aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/9p/vfs_dentry.c1
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/9p/vfs_inode.c1
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/file.c6
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/Makefile5
-rw-r--r--fs/afs/afs_fs.h2
-rw-r--r--fs/afs/callback.c11
-rw-r--r--fs/afs/cmservice.c1
-rw-r--r--fs/afs/dir.c22
-rw-r--r--fs/afs/file.c107
-rw-r--r--fs/afs/fsclient.c368
-rw-r--r--fs/afs/inode.c70
-rw-r--r--fs/afs/internal.h104
-rw-r--r--fs/afs/main.c4
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/mntpt.c16
-rw-r--r--fs/afs/netdevices.c68
-rw-r--r--fs/afs/rxrpc.c82
-rw-r--r--fs/afs/security.c12
-rw-r--r--fs/afs/server.c3
-rw-r--r--fs/afs/super.c106
-rw-r--r--fs/afs/use-rtnetlink.c473
-rw-r--r--fs/afs/vlocation.c19
-rw-r--r--fs/afs/vnode.c121
-rw-r--r--fs/afs/write.c835
-rw-r--r--fs/aio.c13
-rw-r--r--fs/attr.c1
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/autofs4/root.c1
-rw-r--r--fs/bad_inode.c1
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_elf.c5
-rw-r--r--fs/binfmt_elf_fdpic.c1
-rw-r--r--fs/binfmt_em86.c1
-rw-r--r--fs/binfmt_misc.c17
-rw-r--r--fs/binfmt_script.c1
-rw-r--r--fs/bio.c44
-rw-r--r--fs/block_dev.c15
-rw-r--r--fs/buffer.c84
-rw-r--r--fs/cifs/CHANGES22
-rw-r--r--fs/cifs/README43
-rw-r--r--fs/cifs/TODO69
-rw-r--r--fs/cifs/cifs_fs_sb.h14
-rw-r--r--fs/cifs/cifs_unicode.c4
-rw-r--r--fs/cifs/cifsfs.c82
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifspdu.h32
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c130
-rw-r--r--fs/cifs/connect.c140
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c130
-rw-r--r--fs/cifs/inode.c282
-rw-r--r--fs/cifs/netmisc.c24
-rw-r--r--fs/cifs/readdir.c162
-rw-r--r--fs/coda/inode.c3
-rw-r--r--fs/compat.c224
-rw-r--r--fs/compat_ioctl.c1025
-rw-r--r--fs/configfs/file.c33
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/cramfs/inode.c3
-rw-r--r--fs/dcache.c144
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/devpts/inode.c5
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/dlm/Kconfig31
-rw-r--r--fs/dlm/Makefile6
-rw-r--r--fs/dlm/ast.c1
-rw-r--r--fs/dlm/config.c10
-rw-r--r--fs/dlm/config.h3
-rw-r--r--fs/dlm/dlm_internal.h11
-rw-r--r--fs/dlm/lock.c955
-rw-r--r--fs/dlm/lock.h2
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/lowcomms-sctp.c1210
-rw-r--r--fs/dlm/lowcomms.c (renamed from fs/dlm/lowcomms-tcp.c)788
-rw-r--r--fs/dlm/user.c163
-rw-r--r--fs/dquot.c10
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/main.c15
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventpoll.c238
-rw-r--r--fs/exec.c51
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/fsync.c1
-rw-r--r--fs/ext2/inode.c26
-rw-r--r--fs/ext2/ioctl.c1
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext2/xattr_security.c1
-rw-r--r--fs/ext2/xattr_trusted.c1
-rw-r--r--fs/ext3/dir.c1
-rw-r--r--fs/ext3/inode.c39
-rw-r--r--fs/ext3/ioctl.c1
-rw-r--r--fs/ext3/namei.c27
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext3/xattr_security.c1
-rw-r--r--fs/ext3/xattr_trusted.c1
-rw-r--r--fs/ext3/xattr_user.c1
-rw-r--r--fs/ext4/dir.c1
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/namei.c27
-rw-r--r--fs/ext4/resize.c1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr_security.c1
-rw-r--r--fs/ext4/xattr_trusted.c1
-rw-r--r--fs/ext4/xattr_user.c1
-rw-r--r--fs/fat/cache.c3
-rw-r--r--fs/fat/dir.c199
-rw-r--r--fs/fat/inode.c24
-rw-r--r--fs/fifo.c1
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/filesystems.c21
-rw-r--r--fs/freevxfs/vxfs_bmap.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/freevxfs/vxfs_subr.c3
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/fuse/inode.c9
-rw-r--r--fs/gfs2/dir.c38
-rw-r--r--fs/gfs2/glock.c619
-rw-r--r--fs/gfs2/glock.h8
-rw-r--r--fs/gfs2/glops.c5
-rw-r--r--fs/gfs2/incore.h14
-rw-r--r--fs/gfs2/locking/dlm/lock.c14
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h3
-rw-r--r--fs/gfs2/locking/dlm/plock.c109
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/locking/nolock/main.c9
-rw-r--r--fs/gfs2/lops.c20
-rw-r--r--fs/gfs2/main.c10
-rw-r--r--fs/gfs2/mount.c239
-rw-r--r--fs/gfs2/ops_address.c21
-rw-r--r--fs/gfs2/ops_dentry.c1
-rw-r--r--fs/gfs2/ops_file.c13
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/ops_super.c28
-rw-r--r--fs/gfs2/rgrp.c12
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hostfs/hostfs.h15
-rw-r--r--fs/hostfs/hostfs_kern.c192
-rw-r--r--fs/hostfs/hostfs_user.c229
-rw-r--r--fs/hpfs/super.c3
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/inode.c47
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/internal.h10
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/commit.c1
-rw-r--r--fs/jbd/journal.c14
-rw-r--r--fs/jbd/recovery.c2
-rw-r--r--fs/jbd/revoke.c3
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/commit.c1
-rw-r--r--fs/jbd2/journal.c14
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/revoke.c3
-rw-r--r--fs/jbd2/transaction.c3
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_inode.c18
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/jfs_lock.h2
-rw-r--r--fs/jfs/jfs_logmgr.c10
-rw-r--r--fs/jfs/jfs_metapage.c3
-rw-r--r--fs/jfs/jfs_txnmgr.c7
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/libfs.c28
-rw-r--r--fs/lockd/clntproc.c1
-rw-r--r--fs/lockd/mon.c10
-rw-r--r--fs/lockd/svc4proc.c6
-rw-r--r--fs/lockd/svclock.c275
-rw-r--r--fs/lockd/svcproc.c7
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/lockd/xdr.c20
-rw-r--r--fs/lockd/xdr4.c24
-rw-r--r--fs/locks.c267
-rw-r--r--fs/minix/dir.c1
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/mpage.c31
-rw-r--r--fs/namei.c32
-rw-r--r--fs/namespace.c41
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c3
-rw-r--r--fs/nfs/client.c4
-rw-r--r--fs/nfs/dir.c25
-rw-r--r--fs/nfs/direct.c6
-rw-r--r--fs/nfs/file.c7
-rw-r--r--fs/nfs/getroot.c1
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/mount_clnt.c7
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs3xdr.c13
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/nfs4renewd.c1
-rw-r--r--fs/nfs/nfs4xdr.c7
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pagelist.c242
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c92
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/nfs/symlink.c7
-rw-r--r--fs/nfs/write.c263
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs3xdr.c71
-rw-r--r--fs/nfsd/nfs4acl.c17
-rw-r--r--fs/nfsd/nfs4callback.c7
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nfsd/nfs4state.c32
-rw-r--r--fs/nfsd/nfs4xdr.c1
-rw-r--r--fs/nfsd/nfsfh.c57
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/nfsxdr.c53
-rw-r--r--fs/ntfs/aops.h3
-rw-r--r--fs/ntfs/attrib.c18
-rw-r--r--fs/ntfs/dir.c1
-rw-r--r--fs/ntfs/file.c24
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ntfs/super.c33
-rw-r--r--fs/ocfs2/alloc.c6
-rw-r--r--fs/ocfs2/aops.c11
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/masklog.c4
-rw-r--r--fs/ocfs2/cluster/masklog.h2
-rw-r--r--fs/ocfs2/cluster/sys.c7
-rw-r--r--fs/ocfs2/cluster/tcp.c10
-rw-r--r--fs/ocfs2/dir.c7
-rw-r--r--fs/ocfs2/dlm/dlmast.c12
-rw-r--r--fs/ocfs2/dlm/dlmfs.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c4
-rw-r--r--fs/ocfs2/dlm/dlmthread.c2
-rw-r--r--fs/ocfs2/dlmglue.c55
-rw-r--r--fs/ocfs2/dlmglue.h7
-rw-r--r--fs/ocfs2/export.c6
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/ocfs2/file.h5
-rw-r--r--fs/ocfs2/inode.c36
-rw-r--r--fs/ocfs2/inode.h1
-rw-r--r--fs/ocfs2/ioctl.c24
-rw-r--r--fs/ocfs2/ioctl.h1
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/namei.c5
-rw-r--r--fs/ocfs2/ocfs2.h12
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/slot_map.c1
-rw-r--r--fs/ocfs2/suballoc.c10
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/ocfs2/symlink.c7
-rw-r--r--fs/ocfs2/vote.c1
-rw-r--r--fs/open.c4
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/partitions/Kconfig9
-rw-r--r--fs/partitions/Makefile1
-rw-r--r--fs/partitions/acorn.c2
-rw-r--r--fs/partitions/check.c13
-rw-r--r--fs/partitions/sysv68.c92
-rw-r--r--fs/partitions/sysv68.h1
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c304
-rw-r--r--fs/proc/generic.c13
-rw-r--r--fs/proc/inode.c17
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_misc.c12
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/proc/proc_tty.c3
-rw-r--r--fs/proc/task_mmu.c151
-rw-r--r--fs/proc/task_nommu.c7
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/quota.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c16
-rw-r--r--fs/readdir.c9
-rw-r--r--fs/reiserfs/dir.c1
-rw-r--r--fs/reiserfs/file.c40
-rw-r--r--fs/reiserfs/inode.c13
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/namei.c1
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/resize.c4
-rw-r--r--fs/reiserfs/stree.c1
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/romfs/inode.c3
-rw-r--r--fs/select.c13
-rw-r--r--fs/smbfs/inode.c3
-rw-r--r--fs/smbfs/request.c7
-rw-r--r--fs/smbfs/smbiod.c3
-rw-r--r--fs/smbfs/sock.c1
-rw-r--r--fs/smbfs/symlink.c1
-rw-r--r--fs/splice.c17
-rw-r--r--fs/stat.c1
-rw-r--r--fs/super.c27
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/bin.c2
-rw-r--r--fs/sysfs/file.c44
-rw-r--r--fs/sysv/dir.c10
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/sysv/namei.c1
-rw-r--r--fs/udf/balloc.c177
-rw-r--r--fs/udf/dir.c39
-rw-r--r--fs/udf/directory.c30
-rw-r--r--fs/udf/fsync.c1
-rw-r--r--fs/udf/inode.c581
-rw-r--r--fs/udf/misc.c6
-rw-r--r--fs/udf/namei.c226
-rw-r--r--fs/udf/partition.c2
-rw-r--r--fs/udf/super.c79
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/udf/truncate.c206
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/udfdecl.h25
-rw-r--r--fs/ufs/dir.c7
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/ufs/util.c6
-rw-r--r--fs/utimes.c162
-rw-r--r--fs/xattr.c1
-rw-r--r--fs/xfs/linux-2.6/mrlock.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c89
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c185
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h2
-rw-r--r--fs/xfs/quota/xfs_dquot.c3
-rw-r--r--fs/xfs/quota/xfs_qm.c16
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c19
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c4
-rw-r--r--fs/xfs/support/debug.c17
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_attr.c12
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_bmap.c28
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_dir2_block.c14
-rw-r--r--fs/xfs/xfs_dir2_data.c7
-rw-r--r--fs/xfs/xfs_dir2_data.h2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c7
-rw-r--r--fs/xfs/xfs_dir2_node.c4
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_inode.c58
-rw-r--r--fs/xfs/xfs_inode.h65
-rw-r--r--fs/xfs/xfs_iocore.c2
-rw-r--r--fs/xfs/xfs_iomap.c15
-rw-r--r--fs/xfs/xfs_iomap.h1
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log_recover.c15
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_qmops.c2
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_rename.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c6
-rw-r--r--fs/xfs/xfs_rw.c4
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans.h4
-rw-r--r--fs/xfs/xfs_utils.c11
-rw-r--r--fs/xfs/xfs_vfsops.c6
-rw-r--r--fs/xfs/xfs_vnodeops.c125
393 files changed, 9749 insertions, 6649 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index bed48fa96521..3128aa948a4e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -29,7 +29,6 @@
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/stat.h> 30#include <linux/stat.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/smp_lock.h>
33#include <linux/inet.h> 32#include <linux/inet.h>
34#include <linux/pagemap.h> 33#include <linux/pagemap.h>
35#include <linux/idr.h> 34#include <linux/idr.h>
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index ddffd8aa902d..775e26e82cbc 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -30,7 +30,6 @@
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/stat.h> 31#include <linux/stat.h>
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h> 33#include <linux/inet.h>
35#include <linux/namei.h> 34#include <linux/namei.h>
36#include <linux/idr.h> 35#include <linux/idr.h>
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 3129688143ea..1dd86ee90bc5 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -29,7 +29,6 @@
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/stat.h> 30#include <linux/stat.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/smp_lock.h>
33#include <linux/sched.h> 32#include <linux/sched.h>
34#include <linux/inet.h> 33#include <linux/inet.h>
35#include <linux/idr.h> 34#include <linux/idr.h>
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c7b677253843..6e7678e4852f 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -30,7 +30,6 @@
30#include <linux/file.h> 30#include <linux/file.h>
31#include <linux/stat.h> 31#include <linux/stat.h>
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h> 33#include <linux/inet.h>
35#include <linux/list.h> 34#include <linux/list.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b01b0a457932..7624821729a0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -30,7 +30,6 @@
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/stat.h> 31#include <linux/stat.h>
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h> 33#include <linux/inet.h>
35#include <linux/namei.h> 34#include <linux/namei.h>
36#include <linux/idr.h> 35#include <linux/idr.h>
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 0ec42f665457..8eb9263a67b9 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -31,7 +31,6 @@
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/stat.h> 32#include <linux/stat.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/smp_lock.h>
35#include <linux/inet.h> 34#include <linux/inet.h>
36#include <linux/pagemap.h> 35#include <linux/pagemap.h>
37#include <linux/seq_file.h> 36#include <linux/seq_file.h>
diff --git a/fs/Kconfig b/fs/Kconfig
index e33c08924572..0fa0c1193e81 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -314,7 +314,7 @@ config REISERFS_CHECK
314 314
315config REISERFS_PROC_INFO 315config REISERFS_PROC_INFO
316 bool "Stats in /proc/fs/reiserfs" 316 bool "Stats in /proc/fs/reiserfs"
317 depends on REISERFS_FS 317 depends on REISERFS_FS && PROC_FS
318 help 318 help
319 Create under /proc/fs/reiserfs a hierarchy of files, displaying 319 Create under /proc/fs/reiserfs a hierarchy of files, displaying
320 various ReiserFS statistics and internal data at the expense of 320 various ReiserFS statistics and internal data at the expense of
@@ -724,10 +724,6 @@ config FAT_FS
724 file system and use GNU tar's M option. GNU tar is a program 724 file system and use GNU tar's M option. GNU tar is a program
725 available for Unix and DOS ("man tar" or "info tar"). 725 available for Unix and DOS ("man tar" or "info tar").
726 726
727 It is now also becoming possible to read and write compressed FAT
728 file systems; read <file:Documentation/filesystems/fat_cvf.txt> for
729 details.
730
731 The FAT support will enlarge your kernel by about 37 KB. If unsure, 727 The FAT support will enlarge your kernel by about 37 KB. If unsure,
732 say Y. 728 say Y.
733 729
@@ -1734,6 +1730,18 @@ config SUNRPC
1734config SUNRPC_GSS 1730config SUNRPC_GSS
1735 tristate 1731 tristate
1736 1732
1733config SUNRPC_BIND34
1734 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
1735 depends on SUNRPC && EXPERIMENTAL
1736 help
1737 Provides kernel support for querying rpcbind servers via versions 3
1738 and 4 of the rpcbind protocol. The kernel automatically falls back
1739 to version 2 if a remote rpcbind service does not support versions
1740 3 or 4.
1741
1742 If unsure, say N to get traditional behavior (version 2 rpcbind
1743 requests only).
1744
1737config RPCSEC_GSS_KRB5 1745config RPCSEC_GSS_KRB5
1738 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 1746 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
1739 depends on SUNRPC && EXPERIMENTAL 1747 depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f3d3d81eb7e9..74c64409ddbc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -26,7 +26,7 @@ config BINFMT_ELF
26config BINFMT_ELF_FDPIC 26config BINFMT_ELF_FDPIC
27 bool "Kernel support for FDPIC ELF binaries" 27 bool "Kernel support for FDPIC ELF binaries"
28 default y 28 default y
29 depends on FRV 29 depends on (FRV || BLACKFIN)
30 help 30 help
31 ELF FDPIC binaries are based on ELF, but allow the individual load 31 ELF FDPIC binaries are based on ELF, but allow the individual load
32 segments of a binary to be located in memory independently of each 32 segments of a binary to be located in memory independently of each
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 2e5f2c8371ee..30c296508497 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -232,8 +232,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
232{ 232{
233 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 233 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
234 234
235 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 235 if (flags & SLAB_CTOR_CONSTRUCTOR)
236 SLAB_CTOR_CONSTRUCTOR)
237 inode_init_once(&ei->vfs_inode); 236 inode_init_once(&ei->vfs_inode);
238} 237}
239 238
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 4aa8079e71be..c8796906f584 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,11 +628,7 @@ static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned
628 return err; 628 return err;
629 } 629 }
630 if (to < PAGE_CACHE_SIZE) { 630 if (to < PAGE_CACHE_SIZE) {
631 char *kaddr = kmap_atomic(page, KM_USER0); 631 zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0);
632
633 memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
634 flush_dcache_page(page);
635 kunmap_atomic(kaddr, KM_USER0);
636 if (size > offset + to) { 632 if (size > offset + to) {
637 if (size < offset + PAGE_CACHE_SIZE) 633 if (size < offset + PAGE_CACHE_SIZE)
638 tmp = size & ~PAGE_CACHE_MASK; 634 tmp = size & ~PAGE_CACHE_MASK;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c3986a1911b0..beff7d21e6e2 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -87,8 +87,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
87{ 87{
88 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 88 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
89 89
90 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 90 if (flags & SLAB_CTOR_CONSTRUCTOR) {
91 SLAB_CTOR_CONSTRUCTOR) {
92 init_MUTEX(&ei->i_link_lock); 91 init_MUTEX(&ei->i_link_lock);
93 init_MUTEX(&ei->i_ext_lock); 92 init_MUTEX(&ei->i_ext_lock);
94 inode_init_once(&ei->vfs_inode); 93 inode_init_once(&ei->vfs_inode);
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 01545eb1d872..73ce561f3ea0 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -18,10 +18,11 @@ kafs-objs := \
18 security.o \ 18 security.o \
19 server.o \ 19 server.o \
20 super.o \ 20 super.o \
21 use-rtnetlink.o \ 21 netdevices.o \
22 vlclient.o \ 22 vlclient.o \
23 vlocation.o \ 23 vlocation.o \
24 vnode.o \ 24 vnode.o \
25 volume.o 25 volume.o \
26 write.o
26 27
27obj-$(CONFIG_AFS_FS) := kafs.o 28obj-$(CONFIG_AFS_FS) := kafs.o
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index 89e0d1650a72..2198006d2d03 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -18,6 +18,8 @@
18enum AFS_FS_Operations { 18enum AFS_FS_Operations {
19 FSFETCHDATA = 130, /* AFS Fetch file data */ 19 FSFETCHDATA = 130, /* AFS Fetch file data */
20 FSFETCHSTATUS = 132, /* AFS Fetch file status */ 20 FSFETCHSTATUS = 132, /* AFS Fetch file status */
21 FSSTOREDATA = 133, /* AFS Store file data */
22 FSSTORESTATUS = 135, /* AFS Store file status */
21 FSREMOVEFILE = 136, /* AFS Remove a file */ 23 FSREMOVEFILE = 136, /* AFS Remove a file */
22 FSCREATEFILE = 137, /* AFS Create a file */ 24 FSCREATEFILE = 137, /* AFS Create a file */
23 FSRENAME = 138, /* AFS Rename or move a file or directory */ 25 FSRENAME = 138, /* AFS Rename or move a file or directory */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 639399f0ab6f..f64e40fefc02 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -44,7 +44,7 @@ void afs_init_callback_state(struct afs_server *server)
44 while (!RB_EMPTY_ROOT(&server->cb_promises)) { 44 while (!RB_EMPTY_ROOT(&server->cb_promises)) {
45 vnode = rb_entry(server->cb_promises.rb_node, 45 vnode = rb_entry(server->cb_promises.rb_node,
46 struct afs_vnode, cb_promise); 46 struct afs_vnode, cb_promise);
47 _debug("UNPROMISE { vid=%x vn=%u uq=%u}", 47 _debug("UNPROMISE { vid=%x:%u uq=%u}",
48 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 48 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
49 rb_erase(&vnode->cb_promise, &server->cb_promises); 49 rb_erase(&vnode->cb_promise, &server->cb_promises);
50 vnode->cb_promised = false; 50 vnode->cb_promised = false;
@@ -84,11 +84,8 @@ void afs_broken_callback_work(struct work_struct *work)
84 84
85 /* if the vnode's data version number changed then its contents 85 /* if the vnode's data version number changed then its contents
86 * are different */ 86 * are different */
87 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { 87 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
88 _debug("zap data {%x:%u}", 88 afs_zap_data(vnode);
89 vnode->fid.vid, vnode->fid.vnode);
90 invalidate_remote_inode(&vnode->vfs_inode);
91 }
92 } 89 }
93 90
94out: 91out:
@@ -468,7 +465,7 @@ int __init afs_callback_update_init(void)
468/* 465/*
469 * shut down the callback update process 466 * shut down the callback update process
470 */ 467 */
471void __exit afs_callback_update_kill(void) 468void afs_callback_update_kill(void)
472{ 469{
473 destroy_workqueue(afs_callback_update_worker); 470 destroy_workqueue(afs_callback_update_worker);
474} 471}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 6685f4cbccb3..d5b2ad6575bc 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -443,6 +443,7 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
443 reply.ia.netmask[loop] = ifs[loop].netmask.s_addr; 443 reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
444 reply.ia.mtu[loop] = htonl(ifs[loop].mtu); 444 reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
445 } 445 }
446 kfree(ifs);
446 } 447 }
447 448
448 reply.cap.capcount = htonl(1); 449 reply.cap.capcount = htonl(1);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index dac5b990c0cd..2fb31276196b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -55,7 +55,8 @@ const struct inode_operations afs_dir_inode_operations = {
55 .rmdir = afs_rmdir, 55 .rmdir = afs_rmdir,
56 .rename = afs_rename, 56 .rename = afs_rename,
57 .permission = afs_permission, 57 .permission = afs_permission,
58 .getattr = afs_inode_getattr, 58 .getattr = afs_getattr,
59 .setattr = afs_setattr,
59}; 60};
60 61
61static struct dentry_operations afs_fs_dentry_operations = { 62static struct dentry_operations afs_fs_dentry_operations = {
@@ -194,10 +195,7 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
194 195
195 page = read_mapping_page(dir->i_mapping, index, &file); 196 page = read_mapping_page(dir->i_mapping, index, &file);
196 if (!IS_ERR(page)) { 197 if (!IS_ERR(page)) {
197 wait_on_page_locked(page);
198 kmap(page); 198 kmap(page);
199 if (!PageUptodate(page))
200 goto fail;
201 if (!PageChecked(page)) 199 if (!PageChecked(page))
202 afs_dir_check_page(dir, page); 200 afs_dir_check_page(dir, page);
203 if (PageError(page)) 201 if (PageError(page))
@@ -494,7 +492,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
494 492
495 vnode = AFS_FS_I(dir); 493 vnode = AFS_FS_I(dir);
496 494
497 _enter("{%x:%d},%p{%s},", 495 _enter("{%x:%u},%p{%s},",
498 vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); 496 vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
499 497
500 ASSERTCMP(dentry->d_inode, ==, NULL); 498 ASSERTCMP(dentry->d_inode, ==, NULL);
@@ -734,7 +732,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
734 732
735 dvnode = AFS_FS_I(dir); 733 dvnode = AFS_FS_I(dir);
736 734
737 _enter("{%x:%d},{%s},%o", 735 _enter("{%x:%u},{%s},%o",
738 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 736 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
739 737
740 ret = -ENAMETOOLONG; 738 ret = -ENAMETOOLONG;
@@ -799,7 +797,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
799 797
800 dvnode = AFS_FS_I(dir); 798 dvnode = AFS_FS_I(dir);
801 799
802 _enter("{%x:%d},{%s}", 800 _enter("{%x:%u},{%s}",
803 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); 801 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
804 802
805 ret = -ENAMETOOLONG; 803 ret = -ENAMETOOLONG;
@@ -845,7 +843,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
845 843
846 dvnode = AFS_FS_I(dir); 844 dvnode = AFS_FS_I(dir);
847 845
848 _enter("{%x:%d},{%s}", 846 _enter("{%x:%u},{%s}",
849 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); 847 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
850 848
851 ret = -ENAMETOOLONG; 849 ret = -ENAMETOOLONG;
@@ -919,7 +917,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
919 917
920 dvnode = AFS_FS_I(dir); 918 dvnode = AFS_FS_I(dir);
921 919
922 _enter("{%x:%d},{%s},%o,", 920 _enter("{%x:%u},{%s},%o,",
923 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 921 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
924 922
925 ret = -ENAMETOOLONG; 923 ret = -ENAMETOOLONG;
@@ -986,7 +984,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
986 vnode = AFS_FS_I(from->d_inode); 984 vnode = AFS_FS_I(from->d_inode);
987 dvnode = AFS_FS_I(dir); 985 dvnode = AFS_FS_I(dir);
988 986
989 _enter("{%x:%d},{%x:%d},{%s}", 987 _enter("{%x:%u},{%x:%u},{%s}",
990 vnode->fid.vid, vnode->fid.vnode, 988 vnode->fid.vid, vnode->fid.vnode,
991 dvnode->fid.vid, dvnode->fid.vnode, 989 dvnode->fid.vid, dvnode->fid.vnode,
992 dentry->d_name.name); 990 dentry->d_name.name);
@@ -1035,7 +1033,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
1035 1033
1036 dvnode = AFS_FS_I(dir); 1034 dvnode = AFS_FS_I(dir);
1037 1035
1038 _enter("{%x:%d},{%s},%s", 1036 _enter("{%x:%u},{%s},%s",
1039 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, 1037 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
1040 content); 1038 content);
1041 1039
@@ -1107,7 +1105,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1107 orig_dvnode = AFS_FS_I(old_dir); 1105 orig_dvnode = AFS_FS_I(old_dir);
1108 new_dvnode = AFS_FS_I(new_dir); 1106 new_dvnode = AFS_FS_I(new_dir);
1109 1107
1110 _enter("{%x:%d},{%x:%d},{%x:%d},{%s}", 1108 _enter("{%x:%u},{%x:%u},{%x:%u},{%s}",
1111 orig_dvnode->fid.vid, orig_dvnode->fid.vnode, 1109 orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
1112 vnode->fid.vid, vnode->fid.vnode, 1110 vnode->fid.vid, vnode->fid.vnode,
1113 new_dvnode->fid.vid, new_dvnode->fid.vnode, 1111 new_dvnode->fid.vid, new_dvnode->fid.vnode,
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ae256498f4f7..3e25795e5a42 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -15,32 +15,43 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/writeback.h>
18#include "internal.h" 19#include "internal.h"
19 20
20static int afs_file_readpage(struct file *file, struct page *page); 21static int afs_readpage(struct file *file, struct page *page);
21static void afs_file_invalidatepage(struct page *page, unsigned long offset); 22static void afs_invalidatepage(struct page *page, unsigned long offset);
22static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); 23static int afs_releasepage(struct page *page, gfp_t gfp_flags);
24static int afs_launder_page(struct page *page);
23 25
24const struct file_operations afs_file_operations = { 26const struct file_operations afs_file_operations = {
25 .open = afs_open, 27 .open = afs_open,
26 .release = afs_release, 28 .release = afs_release,
27 .llseek = generic_file_llseek, 29 .llseek = generic_file_llseek,
28 .read = do_sync_read, 30 .read = do_sync_read,
31 .write = do_sync_write,
29 .aio_read = generic_file_aio_read, 32 .aio_read = generic_file_aio_read,
33 .aio_write = afs_file_write,
30 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
31 .sendfile = generic_file_sendfile, 35 .sendfile = generic_file_sendfile,
36 .fsync = afs_fsync,
32}; 37};
33 38
34const struct inode_operations afs_file_inode_operations = { 39const struct inode_operations afs_file_inode_operations = {
35 .getattr = afs_inode_getattr, 40 .getattr = afs_getattr,
41 .setattr = afs_setattr,
36 .permission = afs_permission, 42 .permission = afs_permission,
37}; 43};
38 44
39const struct address_space_operations afs_fs_aops = { 45const struct address_space_operations afs_fs_aops = {
40 .readpage = afs_file_readpage, 46 .readpage = afs_readpage,
41 .set_page_dirty = __set_page_dirty_nobuffers, 47 .set_page_dirty = afs_set_page_dirty,
42 .releasepage = afs_file_releasepage, 48 .launder_page = afs_launder_page,
43 .invalidatepage = afs_file_invalidatepage, 49 .releasepage = afs_releasepage,
50 .invalidatepage = afs_invalidatepage,
51 .prepare_write = afs_prepare_write,
52 .commit_write = afs_commit_write,
53 .writepage = afs_writepage,
54 .writepages = afs_writepages,
44}; 55};
45 56
46/* 57/*
@@ -52,7 +63,7 @@ int afs_open(struct inode *inode, struct file *file)
52 struct key *key; 63 struct key *key;
53 int ret; 64 int ret;
54 65
55 _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); 66 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
56 67
57 key = afs_request_key(vnode->volume->cell); 68 key = afs_request_key(vnode->volume->cell);
58 if (IS_ERR(key)) { 69 if (IS_ERR(key)) {
@@ -78,7 +89,7 @@ int afs_release(struct inode *inode, struct file *file)
78{ 89{
79 struct afs_vnode *vnode = AFS_FS_I(inode); 90 struct afs_vnode *vnode = AFS_FS_I(inode);
80 91
81 _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); 92 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
82 93
83 key_put(file->private_data); 94 key_put(file->private_data);
84 _leave(" = 0"); 95 _leave(" = 0");
@@ -89,10 +100,10 @@ int afs_release(struct inode *inode, struct file *file)
89 * deal with notification that a page was read from the cache 100 * deal with notification that a page was read from the cache
90 */ 101 */
91#ifdef AFS_CACHING_SUPPORT 102#ifdef AFS_CACHING_SUPPORT
92static void afs_file_readpage_read_complete(void *cookie_data, 103static void afs_readpage_read_complete(void *cookie_data,
93 struct page *page, 104 struct page *page,
94 void *data, 105 void *data,
95 int error) 106 int error)
96{ 107{
97 _enter("%p,%p,%p,%d", cookie_data, page, data, error); 108 _enter("%p,%p,%p,%d", cookie_data, page, data, error);
98 109
@@ -109,10 +120,10 @@ static void afs_file_readpage_read_complete(void *cookie_data,
109 * deal with notification that a page was written to the cache 120 * deal with notification that a page was written to the cache
110 */ 121 */
111#ifdef AFS_CACHING_SUPPORT 122#ifdef AFS_CACHING_SUPPORT
112static void afs_file_readpage_write_complete(void *cookie_data, 123static void afs_readpage_write_complete(void *cookie_data,
113 struct page *page, 124 struct page *page,
114 void *data, 125 void *data,
115 int error) 126 int error)
116{ 127{
117 _enter("%p,%p,%p,%d", cookie_data, page, data, error); 128 _enter("%p,%p,%p,%d", cookie_data, page, data, error);
118 129
@@ -121,9 +132,9 @@ static void afs_file_readpage_write_complete(void *cookie_data,
121#endif 132#endif
122 133
123/* 134/*
124 * AFS read page from file (or symlink) 135 * AFS read page from file, directory or symlink
125 */ 136 */
126static int afs_file_readpage(struct file *file, struct page *page) 137static int afs_readpage(struct file *file, struct page *page)
127{ 138{
128 struct afs_vnode *vnode; 139 struct afs_vnode *vnode;
129 struct inode *inode; 140 struct inode *inode;
@@ -219,39 +230,17 @@ error:
219} 230}
220 231
221/* 232/*
222 * get a page cookie for the specified page
223 */
224#ifdef AFS_CACHING_SUPPORT
225int afs_cache_get_page_cookie(struct page *page,
226 struct cachefs_page **_page_cookie)
227{
228 int ret;
229
230 _enter("");
231 ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO);
232
233 _leave(" = %d", ret);
234 return ret;
235}
236#endif
237
238/*
239 * invalidate part or all of a page 233 * invalidate part or all of a page
240 */ 234 */
241static void afs_file_invalidatepage(struct page *page, unsigned long offset) 235static void afs_invalidatepage(struct page *page, unsigned long offset)
242{ 236{
243 int ret = 1; 237 int ret = 1;
244 238
245 _enter("{%lu},%lu", page->index, offset); 239 kenter("{%lu},%lu", page->index, offset);
246 240
247 BUG_ON(!PageLocked(page)); 241 BUG_ON(!PageLocked(page));
248 242
249 if (PagePrivate(page)) { 243 if (PagePrivate(page)) {
250#ifdef AFS_CACHING_SUPPORT
251 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
252 cachefs_uncache_page(vnode->cache,page);
253#endif
254
255 /* We release buffers only if the entire page is being 244 /* We release buffers only if the entire page is being
256 * invalidated. 245 * invalidated.
257 * The get_block cached value has been unconditionally 246 * The get_block cached value has been unconditionally
@@ -272,25 +261,33 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset)
272} 261}
273 262
274/* 263/*
264 * write back a dirty page
265 */
266static int afs_launder_page(struct page *page)
267{
268 _enter("{%lu}", page->index);
269
270 return 0;
271}
272
273/*
275 * release a page and cleanup its private data 274 * release a page and cleanup its private data
276 */ 275 */
277static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) 276static int afs_releasepage(struct page *page, gfp_t gfp_flags)
278{ 277{
279 struct cachefs_page *pageio; 278 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
279 struct afs_writeback *wb;
280 280
281 _enter("{%lu},%x", page->index, gfp_flags); 281 _enter("{{%x:%u}[%lu],%lx},%x",
282 vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
283 gfp_flags);
282 284
283 if (PagePrivate(page)) { 285 if (PagePrivate(page)) {
284#ifdef AFS_CACHING_SUPPORT 286 wb = (struct afs_writeback *) page_private(page);
285 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); 287 ASSERT(wb != NULL);
286 cachefs_uncache_page(vnode->cache, page);
287#endif
288
289 pageio = (struct cachefs_page *) page_private(page);
290 set_page_private(page, 0); 288 set_page_private(page, 0);
291 ClearPagePrivate(page); 289 ClearPagePrivate(page);
292 290 afs_put_writeback(wb);
293 kfree(pageio);
294 } 291 }
295 292
296 _leave(" = 0"); 293 _leave(" = 0");
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2393d2a08d79..025b1903d9e1 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -33,8 +33,10 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
33 */ 33 */
34static void xdr_decode_AFSFetchStatus(const __be32 **_bp, 34static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
35 struct afs_file_status *status, 35 struct afs_file_status *status,
36 struct afs_vnode *vnode) 36 struct afs_vnode *vnode,
37 afs_dataversion_t *store_version)
37{ 38{
39 afs_dataversion_t expected_version;
38 const __be32 *bp = *_bp; 40 const __be32 *bp = *_bp;
39 umode_t mode; 41 umode_t mode;
40 u64 data_version, size; 42 u64 data_version, size;
@@ -101,7 +103,11 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
101 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; 103 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
102 } 104 }
103 105
104 if (status->data_version != data_version) { 106 expected_version = status->data_version;
107 if (store_version)
108 expected_version = *store_version;
109
110 if (expected_version != data_version) {
105 status->data_version = data_version; 111 status->data_version = data_version;
106 if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { 112 if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
107 _debug("vnode modified %llx on {%x:%u}", 113 _debug("vnode modified %llx on {%x:%u}",
@@ -110,6 +116,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
110 set_bit(AFS_VNODE_MODIFIED, &vnode->flags); 116 set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
111 set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); 117 set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
112 } 118 }
119 } else if (store_version) {
120 status->data_version = data_version;
113 } 121 }
114} 122}
115 123
@@ -156,6 +164,44 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
156} 164}
157 165
158/* 166/*
167 * encode the requested attributes into an AFSStoreStatus block
168 */
169static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
170{
171 __be32 *bp = *_bp;
172 u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0;
173
174 mask = 0;
175 if (attr->ia_valid & ATTR_MTIME) {
176 mask |= AFS_SET_MTIME;
177 mtime = attr->ia_mtime.tv_sec;
178 }
179
180 if (attr->ia_valid & ATTR_UID) {
181 mask |= AFS_SET_OWNER;
182 owner = attr->ia_uid;
183 }
184
185 if (attr->ia_valid & ATTR_GID) {
186 mask |= AFS_SET_GROUP;
187 group = attr->ia_gid;
188 }
189
190 if (attr->ia_valid & ATTR_MODE) {
191 mask |= AFS_SET_MODE;
192 mode = attr->ia_mode & S_IALLUGO;
193 }
194
195 *bp++ = htonl(mask);
196 *bp++ = htonl(mtime);
197 *bp++ = htonl(owner);
198 *bp++ = htonl(group);
199 *bp++ = htonl(mode);
200 *bp++ = 0; /* segment size */
201 *_bp = bp;
202}
203
204/*
159 * deliver reply data to an FS.FetchStatus 205 * deliver reply data to an FS.FetchStatus
160 */ 206 */
161static int afs_deliver_fs_fetch_status(struct afs_call *call, 207static int afs_deliver_fs_fetch_status(struct afs_call *call,
@@ -175,7 +221,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
175 221
176 /* unmarshall the reply once we've received all of it */ 222 /* unmarshall the reply once we've received all of it */
177 bp = call->buffer; 223 bp = call->buffer;
178 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 224 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
179 xdr_decode_AFSCallBack(&bp, vnode); 225 xdr_decode_AFSCallBack(&bp, vnode);
180 if (call->reply2) 226 if (call->reply2)
181 xdr_decode_AFSVolSync(&bp, call->reply2); 227 xdr_decode_AFSVolSync(&bp, call->reply2);
@@ -206,7 +252,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
206 struct afs_call *call; 252 struct afs_call *call;
207 __be32 *bp; 253 __be32 *bp;
208 254
209 _enter(",%x,{%x:%d},,", 255 _enter(",%x,{%x:%u},,",
210 key_serial(key), vnode->fid.vid, vnode->fid.vnode); 256 key_serial(key), vnode->fid.vid, vnode->fid.vnode);
211 257
212 call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); 258 call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -265,24 +311,20 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
265 call->offset = 0; 311 call->offset = 0;
266 call->unmarshall++; 312 call->unmarshall++;
267 313
268 if (call->count < PAGE_SIZE) {
269 buffer = kmap_atomic(call->reply3, KM_USER0);
270 memset(buffer + PAGE_SIZE - call->count, 0,
271 call->count);
272 kunmap_atomic(buffer, KM_USER0);
273 }
274
275 /* extract the returned data */ 314 /* extract the returned data */
276 case 2: 315 case 2:
277 _debug("extract data"); 316 _debug("extract data");
278 page = call->reply3; 317 if (call->count > 0) {
279 buffer = kmap_atomic(page, KM_USER0); 318 page = call->reply3;
280 ret = afs_extract_data(call, skb, last, buffer, call->count); 319 buffer = kmap_atomic(page, KM_USER0);
281 kunmap_atomic(buffer, KM_USER0); 320 ret = afs_extract_data(call, skb, last, buffer,
282 switch (ret) { 321 call->count);
283 case 0: break; 322 kunmap_atomic(buffer, KM_USER0);
284 case -EAGAIN: return 0; 323 switch (ret) {
285 default: return ret; 324 case 0: break;
325 case -EAGAIN: return 0;
326 default: return ret;
327 }
286 } 328 }
287 329
288 call->offset = 0; 330 call->offset = 0;
@@ -299,7 +341,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
299 } 341 }
300 342
301 bp = call->buffer; 343 bp = call->buffer;
302 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 344 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
303 xdr_decode_AFSCallBack(&bp, vnode); 345 xdr_decode_AFSCallBack(&bp, vnode);
304 if (call->reply2) 346 if (call->reply2)
305 xdr_decode_AFSVolSync(&bp, call->reply2); 347 xdr_decode_AFSVolSync(&bp, call->reply2);
@@ -317,6 +359,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
317 if (!last) 359 if (!last)
318 return 0; 360 return 0;
319 361
362 if (call->count < PAGE_SIZE) {
363 _debug("clear");
364 page = call->reply3;
365 buffer = kmap_atomic(page, KM_USER0);
366 memset(buffer + call->count, 0, PAGE_SIZE - call->count);
367 kunmap_atomic(buffer, KM_USER0);
368 }
369
320 _leave(" = 0 [done]"); 370 _leave(" = 0 [done]");
321 return 0; 371 return 0;
322} 372}
@@ -475,8 +525,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
475 /* unmarshall the reply once we've received all of it */ 525 /* unmarshall the reply once we've received all of it */
476 bp = call->buffer; 526 bp = call->buffer;
477 xdr_decode_AFSFid(&bp, call->reply2); 527 xdr_decode_AFSFid(&bp, call->reply2);
478 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); 528 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
479 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 529 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
480 xdr_decode_AFSCallBack_raw(&bp, call->reply4); 530 xdr_decode_AFSCallBack_raw(&bp, call->reply4);
481 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 531 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
482 532
@@ -573,7 +623,7 @@ static int afs_deliver_fs_remove(struct afs_call *call,
573 623
574 /* unmarshall the reply once we've received all of it */ 624 /* unmarshall the reply once we've received all of it */
575 bp = call->buffer; 625 bp = call->buffer;
576 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 626 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
577 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 627 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
578 628
579 _leave(" = 0 [done]"); 629 _leave(" = 0 [done]");
@@ -656,8 +706,8 @@ static int afs_deliver_fs_link(struct afs_call *call,
656 706
657 /* unmarshall the reply once we've received all of it */ 707 /* unmarshall the reply once we've received all of it */
658 bp = call->buffer; 708 bp = call->buffer;
659 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 709 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
660 xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode); 710 xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
661 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 711 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
662 712
663 _leave(" = 0 [done]"); 713 _leave(" = 0 [done]");
@@ -745,8 +795,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
745 /* unmarshall the reply once we've received all of it */ 795 /* unmarshall the reply once we've received all of it */
746 bp = call->buffer; 796 bp = call->buffer;
747 xdr_decode_AFSFid(&bp, call->reply2); 797 xdr_decode_AFSFid(&bp, call->reply2);
748 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); 798 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
749 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); 799 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
750 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 800 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
751 801
752 _leave(" = 0 [done]"); 802 _leave(" = 0 [done]");
@@ -851,9 +901,10 @@ static int afs_deliver_fs_rename(struct afs_call *call,
851 901
852 /* unmarshall the reply once we've received all of it */ 902 /* unmarshall the reply once we've received all of it */
853 bp = call->buffer; 903 bp = call->buffer;
854 xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode); 904 xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL);
855 if (new_dvnode != orig_dvnode) 905 if (new_dvnode != orig_dvnode)
856 xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode); 906 xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
907 NULL);
857 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 908 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
858 909
859 _leave(" = 0 [done]"); 910 _leave(" = 0 [done]");
@@ -935,3 +986,262 @@ int afs_fs_rename(struct afs_server *server,
935 986
936 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); 987 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
937} 988}
989
990/*
991 * deliver reply data to an FS.StoreData
992 */
993static int afs_deliver_fs_store_data(struct afs_call *call,
994 struct sk_buff *skb, bool last)
995{
996 struct afs_vnode *vnode = call->reply;
997 const __be32 *bp;
998
999 _enter(",,%u", last);
1000
1001 afs_transfer_reply(call, skb);
1002 if (!last) {
1003 _leave(" = 0 [more]");
1004 return 0;
1005 }
1006
1007 if (call->reply_size != call->reply_max) {
1008 _leave(" = -EBADMSG [%u != %u]",
1009 call->reply_size, call->reply_max);
1010 return -EBADMSG;
1011 }
1012
1013 /* unmarshall the reply once we've received all of it */
1014 bp = call->buffer;
1015 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
1016 &call->store_version);
1017 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
1018
1019 afs_pages_written_back(vnode, call);
1020
1021 _leave(" = 0 [done]");
1022 return 0;
1023}
1024
1025/*
1026 * FS.StoreData operation type
1027 */
1028static const struct afs_call_type afs_RXFSStoreData = {
1029 .name = "FS.StoreData",
1030 .deliver = afs_deliver_fs_store_data,
1031 .abort_to_error = afs_abort_to_error,
1032 .destructor = afs_flat_call_destructor,
1033};
1034
1035/*
1036 * store a set of pages
1037 */
1038int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
1039 pgoff_t first, pgoff_t last,
1040 unsigned offset, unsigned to,
1041 const struct afs_wait_mode *wait_mode)
1042{
1043 struct afs_vnode *vnode = wb->vnode;
1044 struct afs_call *call;
1045 loff_t size, pos, i_size;
1046 __be32 *bp;
1047
1048 _enter(",%x,{%x:%u},,",
1049 key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
1050
1051 size = to - offset;
1052 if (first != last)
1053 size += (loff_t)(last - first) << PAGE_SHIFT;
1054 pos = (loff_t)first << PAGE_SHIFT;
1055 pos += offset;
1056
1057 i_size = i_size_read(&vnode->vfs_inode);
1058 if (pos + size > i_size)
1059 i_size = size + pos;
1060
1061 _debug("size %llx, at %llx, i_size %llx",
1062 (unsigned long long) size, (unsigned long long) pos,
1063 (unsigned long long) i_size);
1064
1065 BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store
1066
1067 call = afs_alloc_flat_call(&afs_RXFSStoreData,
1068 (4 + 6 + 3) * 4,
1069 (21 + 6) * 4);
1070 if (!call)
1071 return -ENOMEM;
1072
1073 call->wb = wb;
1074 call->key = wb->key;
1075 call->reply = vnode;
1076 call->service_id = FS_SERVICE;
1077 call->port = htons(AFS_FS_PORT);
1078 call->mapping = vnode->vfs_inode.i_mapping;
1079 call->first = first;
1080 call->last = last;
1081 call->first_offset = offset;
1082 call->last_to = to;
1083 call->send_pages = true;
1084 call->store_version = vnode->status.data_version + 1;
1085
1086 /* marshall the parameters */
1087 bp = call->request;
1088 *bp++ = htonl(FSSTOREDATA);
1089 *bp++ = htonl(vnode->fid.vid);
1090 *bp++ = htonl(vnode->fid.vnode);
1091 *bp++ = htonl(vnode->fid.unique);
1092
1093 *bp++ = 0; /* mask */
1094 *bp++ = 0; /* mtime */
1095 *bp++ = 0; /* owner */
1096 *bp++ = 0; /* group */
1097 *bp++ = 0; /* unix mode */
1098 *bp++ = 0; /* segment size */
1099
1100 *bp++ = htonl(pos);
1101 *bp++ = htonl(size);
1102 *bp++ = htonl(i_size);
1103
1104 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1105}
1106
1107/*
1108 * deliver reply data to an FS.StoreStatus
1109 */
1110static int afs_deliver_fs_store_status(struct afs_call *call,
1111 struct sk_buff *skb, bool last)
1112{
1113 afs_dataversion_t *store_version;
1114 struct afs_vnode *vnode = call->reply;
1115 const __be32 *bp;
1116
1117 _enter(",,%u", last);
1118
1119 afs_transfer_reply(call, skb);
1120 if (!last) {
1121 _leave(" = 0 [more]");
1122 return 0;
1123 }
1124
1125 if (call->reply_size != call->reply_max) {
1126 _leave(" = -EBADMSG [%u != %u]",
1127 call->reply_size, call->reply_max);
1128 return -EBADMSG;
1129 }
1130
1131 /* unmarshall the reply once we've received all of it */
1132 store_version = NULL;
1133 if (call->operation_ID == FSSTOREDATA)
1134 store_version = &call->store_version;
1135
1136 bp = call->buffer;
1137 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
1138 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
1139
1140 _leave(" = 0 [done]");
1141 return 0;
1142}
1143
1144/*
1145 * FS.StoreStatus operation type
1146 */
1147static const struct afs_call_type afs_RXFSStoreStatus = {
1148 .name = "FS.StoreStatus",
1149 .deliver = afs_deliver_fs_store_status,
1150 .abort_to_error = afs_abort_to_error,
1151 .destructor = afs_flat_call_destructor,
1152};
1153
1154static const struct afs_call_type afs_RXFSStoreData_as_Status = {
1155 .name = "FS.StoreData",
1156 .deliver = afs_deliver_fs_store_status,
1157 .abort_to_error = afs_abort_to_error,
1158 .destructor = afs_flat_call_destructor,
1159};
1160
1161/*
1162 * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
1163 * so as to alter the file size also
1164 */
1165static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
1166 struct afs_vnode *vnode, struct iattr *attr,
1167 const struct afs_wait_mode *wait_mode)
1168{
1169 struct afs_call *call;
1170 __be32 *bp;
1171
1172 _enter(",%x,{%x:%u},,",
1173 key_serial(key), vnode->fid.vid, vnode->fid.vnode);
1174
1175 ASSERT(attr->ia_valid & ATTR_SIZE);
1176 ASSERTCMP(attr->ia_size, <=, 0xffffffff); // TODO: use 64-bit store
1177
1178 call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status,
1179 (4 + 6 + 3) * 4,
1180 (21 + 6) * 4);
1181 if (!call)
1182 return -ENOMEM;
1183
1184 call->key = key;
1185 call->reply = vnode;
1186 call->service_id = FS_SERVICE;
1187 call->port = htons(AFS_FS_PORT);
1188 call->store_version = vnode->status.data_version + 1;
1189 call->operation_ID = FSSTOREDATA;
1190
1191 /* marshall the parameters */
1192 bp = call->request;
1193 *bp++ = htonl(FSSTOREDATA);
1194 *bp++ = htonl(vnode->fid.vid);
1195 *bp++ = htonl(vnode->fid.vnode);
1196 *bp++ = htonl(vnode->fid.unique);
1197
1198 xdr_encode_AFS_StoreStatus(&bp, attr);
1199
1200 *bp++ = 0; /* position of start of write */
1201 *bp++ = 0; /* size of write */
1202 *bp++ = htonl(attr->ia_size); /* new file length */
1203
1204 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1205}
1206
1207/*
1208 * set the attributes on a file, using FS.StoreData if there's a change in file
1209 * size, and FS.StoreStatus otherwise
1210 */
1211int afs_fs_setattr(struct afs_server *server, struct key *key,
1212 struct afs_vnode *vnode, struct iattr *attr,
1213 const struct afs_wait_mode *wait_mode)
1214{
1215 struct afs_call *call;
1216 __be32 *bp;
1217
1218 if (attr->ia_valid & ATTR_SIZE)
1219 return afs_fs_setattr_size(server, key, vnode, attr,
1220 wait_mode);
1221
1222 _enter(",%x,{%x:%u},,",
1223 key_serial(key), vnode->fid.vid, vnode->fid.vnode);
1224
1225 call = afs_alloc_flat_call(&afs_RXFSStoreStatus,
1226 (4 + 6) * 4,
1227 (21 + 6) * 4);
1228 if (!call)
1229 return -ENOMEM;
1230
1231 call->key = key;
1232 call->reply = vnode;
1233 call->service_id = FS_SERVICE;
1234 call->port = htons(AFS_FS_PORT);
1235 call->operation_ID = FSSTORESTATUS;
1236
1237 /* marshall the parameters */
1238 bp = call->request;
1239 *bp++ = htonl(FSSTORESTATUS);
1240 *bp++ = htonl(vnode->fid.vid);
1241 *bp++ = htonl(vnode->fid.vnode);
1242 *bp++ = htonl(vnode->fid.unique);
1243
1244 xdr_encode_AFS_StoreStatus(&bp, attr);
1245
1246 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
1247}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c184a4ee5995..515a5d12d8fb 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -125,7 +125,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
125 struct inode *inode; 125 struct inode *inode;
126 int ret; 126 int ret;
127 127
128 _enter(",{%u,%u,%u},,", fid->vid, fid->vnode, fid->unique); 128 _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
129 129
130 as = sb->s_fs_info; 130 as = sb->s_fs_info;
131 data.volume = as->volume; 131 data.volume = as->volume;
@@ -204,6 +204,19 @@ bad_inode:
204} 204}
205 205
206/* 206/*
207 * mark the data attached to an inode as obsolete due to a write on the server
208 * - might also want to ditch all the outstanding writes and dirty pages
209 */
210void afs_zap_data(struct afs_vnode *vnode)
211{
212 _enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode);
213
214 /* nuke all the non-dirty pages that aren't locked, mapped or being
215 * written back */
216 invalidate_remote_inode(&vnode->vfs_inode);
217}
218
219/*
207 * validate a vnode/inode 220 * validate a vnode/inode
208 * - there are several things we need to check 221 * - there are several things we need to check
209 * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, 222 * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
@@ -258,10 +271,8 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
258 271
259 /* if the vnode's data version number changed then its contents are 272 /* if the vnode's data version number changed then its contents are
260 * different */ 273 * different */
261 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { 274 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
262 _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode); 275 afs_zap_data(vnode);
263 invalidate_remote_inode(&vnode->vfs_inode);
264 }
265 276
266 clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); 277 clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
267 mutex_unlock(&vnode->validate_lock); 278 mutex_unlock(&vnode->validate_lock);
@@ -278,7 +289,7 @@ error_unlock:
278/* 289/*
279 * read the attributes of an inode 290 * read the attributes of an inode
280 */ 291 */
281int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, 292int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
282 struct kstat *stat) 293 struct kstat *stat)
283{ 294{
284 struct inode *inode; 295 struct inode *inode;
@@ -301,7 +312,7 @@ void afs_clear_inode(struct inode *inode)
301 312
302 vnode = AFS_FS_I(inode); 313 vnode = AFS_FS_I(inode);
303 314
304 _enter("{%x:%d.%d} v=%u x=%u t=%u }", 315 _enter("{%x:%u.%d} v=%u x=%u t=%u }",
305 vnode->fid.vid, 316 vnode->fid.vid,
306 vnode->fid.vnode, 317 vnode->fid.vnode,
307 vnode->fid.unique, 318 vnode->fid.unique,
@@ -323,6 +334,7 @@ void afs_clear_inode(struct inode *inode)
323 vnode->server = NULL; 334 vnode->server = NULL;
324 } 335 }
325 336
337 ASSERT(list_empty(&vnode->writebacks));
326 ASSERT(!vnode->cb_promised); 338 ASSERT(!vnode->cb_promised);
327 339
328#ifdef AFS_CACHING_SUPPORT 340#ifdef AFS_CACHING_SUPPORT
@@ -339,3 +351,47 @@ void afs_clear_inode(struct inode *inode)
339 351
340 _leave(""); 352 _leave("");
341} 353}
354
355/*
356 * set the attributes of an inode
357 */
358int afs_setattr(struct dentry *dentry, struct iattr *attr)
359{
360 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
361 struct key *key;
362 int ret;
363
364 _enter("{%x:%u},{n=%s},%x",
365 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
366 attr->ia_valid);
367
368 if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
369 ATTR_MTIME))) {
370 _leave(" = 0 [unsupported]");
371 return 0;
372 }
373
374 /* flush any dirty data outstanding on a regular file */
375 if (S_ISREG(vnode->vfs_inode.i_mode)) {
376 filemap_write_and_wait(vnode->vfs_inode.i_mapping);
377 afs_writeback_all(vnode);
378 }
379
380 if (attr->ia_valid & ATTR_FILE) {
381 key = attr->ia_file->private_data;
382 } else {
383 key = afs_request_key(vnode->volume->cell);
384 if (IS_ERR(key)) {
385 ret = PTR_ERR(key);
386 goto error;
387 }
388 }
389
390 ret = afs_vnode_setattr(vnode, key, attr);
391 if (!(attr->ia_valid & ATTR_FILE))
392 key_put(key);
393
394error:
395 _leave(" = %d", ret);
396 return ret;
397}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6dd3197d1d8d..a30d4fa768e3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
21 21
22#define AFS_CELL_MAX_ADDRS 15 22#define AFS_CELL_MAX_ADDRS 15
23 23
24struct pagevec;
24struct afs_call; 25struct afs_call;
25 26
26typedef enum { 27typedef enum {
@@ -75,12 +76,15 @@ struct afs_call {
75 struct key *key; /* security for this call */ 76 struct key *key; /* security for this call */
76 struct afs_server *server; /* server affected by incoming CM call */ 77 struct afs_server *server; /* server affected by incoming CM call */
77 void *request; /* request data (first part) */ 78 void *request; /* request data (first part) */
78 void *request2; /* request data (second part) */ 79 struct address_space *mapping; /* page set */
80 struct afs_writeback *wb; /* writeback being performed */
79 void *buffer; /* reply receive buffer */ 81 void *buffer; /* reply receive buffer */
80 void *reply; /* reply buffer (first part) */ 82 void *reply; /* reply buffer (first part) */
81 void *reply2; /* reply buffer (second part) */ 83 void *reply2; /* reply buffer (second part) */
82 void *reply3; /* reply buffer (third part) */ 84 void *reply3; /* reply buffer (third part) */
83 void *reply4; /* reply buffer (fourth part) */ 85 void *reply4; /* reply buffer (fourth part) */
86 pgoff_t first; /* first page in mapping to deal with */
87 pgoff_t last; /* last page in mapping to deal with */
84 enum { /* call state */ 88 enum { /* call state */
85 AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ 89 AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
86 AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ 90 AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
@@ -97,14 +101,18 @@ struct afs_call {
97 unsigned request_size; /* size of request data */ 101 unsigned request_size; /* size of request data */
98 unsigned reply_max; /* maximum size of reply */ 102 unsigned reply_max; /* maximum size of reply */
99 unsigned reply_size; /* current size of reply */ 103 unsigned reply_size; /* current size of reply */
104 unsigned first_offset; /* offset into mapping[first] */
105 unsigned last_to; /* amount of mapping[last] */
100 unsigned short offset; /* offset into received data store */ 106 unsigned short offset; /* offset into received data store */
101 unsigned char unmarshall; /* unmarshalling phase */ 107 unsigned char unmarshall; /* unmarshalling phase */
102 bool incoming; /* T if incoming call */ 108 bool incoming; /* T if incoming call */
109 bool send_pages; /* T if data from mapping should be sent */
103 u16 service_id; /* RxRPC service ID to call */ 110 u16 service_id; /* RxRPC service ID to call */
104 __be16 port; /* target UDP port */ 111 __be16 port; /* target UDP port */
105 __be32 operation_ID; /* operation ID for an incoming call */ 112 __be32 operation_ID; /* operation ID for an incoming call */
106 u32 count; /* count for use in unmarshalling */ 113 u32 count; /* count for use in unmarshalling */
107 __be32 tmp; /* place to extract temporary data */ 114 __be32 tmp; /* place to extract temporary data */
115 afs_dataversion_t store_version; /* updated version expected from store */
108}; 116};
109 117
110struct afs_call_type { 118struct afs_call_type {
@@ -124,6 +132,32 @@ struct afs_call_type {
124}; 132};
125 133
126/* 134/*
135 * record of an outstanding writeback on a vnode
136 */
137struct afs_writeback {
138 struct list_head link; /* link in vnode->writebacks */
139 struct work_struct writer; /* work item to perform the writeback */
140 struct afs_vnode *vnode; /* vnode to which this write applies */
141 struct key *key; /* owner of this write */
142 wait_queue_head_t waitq; /* completion and ready wait queue */
143 pgoff_t first; /* first page in batch */
144 pgoff_t point; /* last page in current store op */
145 pgoff_t last; /* last page in batch (inclusive) */
146 unsigned offset_first; /* offset into first page of start of write */
147 unsigned to_last; /* offset into last page of end of write */
148 int num_conflicts; /* count of conflicting writes in list */
149 int usage;
150 bool conflicts; /* T if has dependent conflicts */
151 enum {
152 AFS_WBACK_SYNCING, /* synchronisation being performed */
153 AFS_WBACK_PENDING, /* write pending */
154 AFS_WBACK_CONFLICTING, /* conflicting writes posted */
155 AFS_WBACK_WRITING, /* writing back */
156 AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
157 } state __attribute__((packed));
158};
159
160/*
127 * AFS superblock private data 161 * AFS superblock private data
128 * - there's one superblock per volume 162 * - there's one superblock per volume
129 */ 163 */
@@ -305,6 +339,7 @@ struct afs_vnode {
305 wait_queue_head_t update_waitq; /* status fetch waitqueue */ 339 wait_queue_head_t update_waitq; /* status fetch waitqueue */
306 int update_cnt; /* number of outstanding ops that will update the 340 int update_cnt; /* number of outstanding ops that will update the
307 * status */ 341 * status */
342 spinlock_t writeback_lock; /* lock for writebacks */
308 spinlock_t lock; /* waitqueue/flags lock */ 343 spinlock_t lock; /* waitqueue/flags lock */
309 unsigned long flags; 344 unsigned long flags;
310#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ 345#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
@@ -316,6 +351,8 @@ struct afs_vnode {
316 351
317 long acl_order; /* ACL check count (callback break count) */ 352 long acl_order; /* ACL check count (callback break count) */
318 353
354 struct list_head writebacks; /* alterations in pagecache that need writing */
355
319 /* outstanding callback notification on this file */ 356 /* outstanding callback notification on this file */
320 struct rb_node server_rb; /* link in server->fs_vnodes */ 357 struct rb_node server_rb; /* link in server->fs_vnodes */
321 struct rb_node cb_promise; /* link in server->cb_promises */ 358 struct rb_node cb_promise; /* link in server->cb_promises */
@@ -349,7 +386,6 @@ struct afs_permits {
349 * record of one of a system's set of network interfaces 386 * record of one of a system's set of network interfaces
350 */ 387 */
351struct afs_interface { 388struct afs_interface {
352 unsigned index; /* interface index */
353 struct in_addr address; /* IPv4 address bound to interface */ 389 struct in_addr address; /* IPv4 address bound to interface */
354 struct in_addr netmask; /* netmask applied to address */ 390 struct in_addr netmask; /* netmask applied to address */
355 unsigned mtu; /* MTU of interface */ 391 unsigned mtu; /* MTU of interface */
@@ -367,7 +403,7 @@ struct afs_uuid {
367 u32 time_low; /* low part of timestamp */ 403 u32 time_low; /* low part of timestamp */
368 u16 time_mid; /* mid part of timestamp */ 404 u16 time_mid; /* mid part of timestamp */
369 u16 time_hi_and_version; /* high part of timestamp and version */ 405 u16 time_hi_and_version; /* high part of timestamp and version */
370#define AFS_UUID_TO_UNIX_TIME 0x01b21dd213814000 406#define AFS_UUID_TO_UNIX_TIME 0x01b21dd213814000ULL
371#define AFS_UUID_TIMEHI_MASK 0x0fff 407#define AFS_UUID_TIMEHI_MASK 0x0fff
372#define AFS_UUID_VERSION_TIME 0x1000 /* time-based UUID */ 408#define AFS_UUID_VERSION_TIME 0x1000 /* time-based UUID */
373#define AFS_UUID_VERSION_NAME 0x3000 /* name-based UUID */ 409#define AFS_UUID_VERSION_NAME 0x3000 /* name-based UUID */
@@ -392,7 +428,7 @@ extern void afs_give_up_callback(struct afs_vnode *);
392extern void afs_dispatch_give_up_callbacks(struct work_struct *); 428extern void afs_dispatch_give_up_callbacks(struct work_struct *);
393extern void afs_flush_callback_breaks(struct afs_server *); 429extern void afs_flush_callback_breaks(struct afs_server *);
394extern int __init afs_callback_update_init(void); 430extern int __init afs_callback_update_init(void);
395extern void __exit afs_callback_update_kill(void); 431extern void afs_callback_update_kill(void);
396 432
397/* 433/*
398 * cell.c 434 * cell.c
@@ -434,10 +470,6 @@ extern const struct file_operations afs_file_operations;
434extern int afs_open(struct inode *, struct file *); 470extern int afs_open(struct inode *, struct file *);
435extern int afs_release(struct inode *, struct file *); 471extern int afs_release(struct inode *, struct file *);
436 472
437#ifdef AFS_CACHING_SUPPORT
438extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
439#endif
440
441/* 473/*
442 * fsclient.c 474 * fsclient.c
443 */ 475 */
@@ -468,6 +500,12 @@ extern int afs_fs_rename(struct afs_server *, struct key *,
468 struct afs_vnode *, const char *, 500 struct afs_vnode *, const char *,
469 struct afs_vnode *, const char *, 501 struct afs_vnode *, const char *,
470 const struct afs_wait_mode *); 502 const struct afs_wait_mode *);
503extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
504 pgoff_t, pgoff_t, unsigned, unsigned,
505 const struct afs_wait_mode *);
506extern int afs_fs_setattr(struct afs_server *, struct key *,
507 struct afs_vnode *, struct iattr *,
508 const struct afs_wait_mode *);
471 509
472/* 510/*
473 * inode.c 511 * inode.c
@@ -475,10 +513,10 @@ extern int afs_fs_rename(struct afs_server *, struct key *,
475extern struct inode *afs_iget(struct super_block *, struct key *, 513extern struct inode *afs_iget(struct super_block *, struct key *,
476 struct afs_fid *, struct afs_file_status *, 514 struct afs_fid *, struct afs_file_status *,
477 struct afs_callback *); 515 struct afs_callback *);
516extern void afs_zap_data(struct afs_vnode *);
478extern int afs_validate(struct afs_vnode *, struct key *); 517extern int afs_validate(struct afs_vnode *, struct key *);
479extern int afs_inode_getattr(struct vfsmount *, struct dentry *, 518extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
480 struct kstat *); 519extern int afs_setattr(struct dentry *, struct iattr *);
481extern void afs_zap_permits(struct rcu_head *);
482extern void afs_clear_inode(struct inode *); 520extern void afs_clear_inode(struct inode *);
483 521
484/* 522/*
@@ -534,6 +572,7 @@ extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
534 */ 572 */
535extern void afs_clear_permits(struct afs_vnode *); 573extern void afs_clear_permits(struct afs_vnode *);
536extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 574extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
575extern void afs_zap_permits(struct rcu_head *);
537extern struct key *afs_request_key(struct afs_cell *); 576extern struct key *afs_request_key(struct afs_cell *);
538extern int afs_permission(struct inode *, int, struct nameidata *); 577extern int afs_permission(struct inode *, int, struct nameidata *);
539 578
@@ -564,7 +603,7 @@ extern void afs_fs_exit(void);
564 * use-rtnetlink.c 603 * use-rtnetlink.c
565 */ 604 */
566extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); 605extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
567extern int afs_get_MAC_address(u8 [6]); 606extern int afs_get_MAC_address(u8 *, size_t);
568 607
569/* 608/*
570 * vlclient.c 609 * vlclient.c
@@ -591,7 +630,7 @@ extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
591 struct key *, 630 struct key *,
592 const char *, size_t); 631 const char *, size_t);
593extern void afs_put_vlocation(struct afs_vlocation *); 632extern void afs_put_vlocation(struct afs_vlocation *);
594extern void __exit afs_vlocation_purge(void); 633extern void afs_vlocation_purge(void);
595 634
596/* 635/*
597 * vnode.c 636 * vnode.c
@@ -630,6 +669,9 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
630 struct afs_file_status *, struct afs_server **); 669 struct afs_file_status *, struct afs_server **);
631extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, 670extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
632 struct key *, const char *, const char *); 671 struct key *, const char *, const char *);
672extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
673 unsigned, unsigned);
674extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
633 675
634/* 676/*
635 * volume.c 677 * volume.c
@@ -646,6 +688,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
646extern int afs_volume_release_fileserver(struct afs_vnode *, 688extern int afs_volume_release_fileserver(struct afs_vnode *,
647 struct afs_server *, int); 689 struct afs_server *, int);
648 690
691/*
692 * write.c
693 */
694extern int afs_set_page_dirty(struct page *);
695extern void afs_put_writeback(struct afs_writeback *);
696extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned);
697extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned);
698extern int afs_writepage(struct page *, struct writeback_control *);
699extern int afs_writepages(struct address_space *, struct writeback_control *);
700extern int afs_write_inode(struct inode *, int);
701extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
702extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
703 unsigned long, loff_t);
704extern int afs_writeback_all(struct afs_vnode *);
705extern int afs_fsync(struct file *, struct dentry *, int);
706
707
649/*****************************************************************************/ 708/*****************************************************************************/
650/* 709/*
651 * debug tracing 710 * debug tracing
@@ -727,6 +786,21 @@ do { \
727 } \ 786 } \
728} while(0) 787} while(0)
729 788
789#define ASSERTRANGE(L, OP1, N, OP2, H) \
790do { \
791 if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \
792 printk(KERN_ERR "\n"); \
793 printk(KERN_ERR "AFS: Assertion failed\n"); \
794 printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \
795 (unsigned long)(L), (unsigned long)(N), \
796 (unsigned long)(H)); \
797 printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \
798 (unsigned long)(L), (unsigned long)(N), \
799 (unsigned long)(H)); \
800 BUG(); \
801 } \
802} while(0)
803
730#define ASSERTIF(C, X) \ 804#define ASSERTIF(C, X) \
731do { \ 805do { \
732 if (unlikely((C) && !(X))) { \ 806 if (unlikely((C) && !(X))) { \
@@ -759,6 +833,10 @@ do { \
759do { \ 833do { \
760} while(0) 834} while(0)
761 835
836#define ASSERTRANGE(L, OP1, N, OP2, H) \
837do { \
838} while(0)
839
762#define ASSERTIF(C, X) \ 840#define ASSERTIF(C, X) \
763do { \ 841do { \
764} while(0) 842} while(0)
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 40c2704e7557..f1f71ff7d5c6 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -54,7 +54,7 @@ static int __init afs_get_client_UUID(void)
54 54
55 /* read the MAC address of one of the external interfaces and construct 55 /* read the MAC address of one of the external interfaces and construct
56 * a UUID from it */ 56 * a UUID from it */
57 ret = afs_get_MAC_address(afs_uuid.node); 57 ret = afs_get_MAC_address(afs_uuid.node, sizeof(afs_uuid.node));
58 if (ret < 0) 58 if (ret < 0)
59 return ret; 59 return ret;
60 60
@@ -149,6 +149,7 @@ error_cache:
149 afs_vlocation_purge(); 149 afs_vlocation_purge();
150 afs_cell_purge(); 150 afs_cell_purge();
151 afs_proc_cleanup(); 151 afs_proc_cleanup();
152 rcu_barrier();
152 printk(KERN_ERR "kAFS: failed to register: %d\n", ret); 153 printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
153 return ret; 154 return ret;
154} 155}
@@ -176,6 +177,7 @@ static void __exit afs_exit(void)
176 cachefs_unregister_netfs(&afs_cache_netfs); 177 cachefs_unregister_netfs(&afs_cache_netfs);
177#endif 178#endif
178 afs_proc_cleanup(); 179 afs_proc_cleanup();
180 rcu_barrier();
179} 181}
180 182
181module_exit(afs_exit); 183module_exit(afs_exit);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index cdb9792d8161..d1a889c40742 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code)
22{ 22{
23 switch (abort_code) { 23 switch (abort_code) {
24 case 13: return -EACCES; 24 case 13: return -EACCES;
25 case 27: return -EFBIG;
25 case 30: return -EROFS; 26 case 30: return -EROFS;
26 case VSALVAGE: return -EIO; 27 case VSALVAGE: return -EIO;
27 case VNOVNODE: return -ENOENT; 28 case VNOVNODE: return -ENOENT;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index b905ae37f912..a3684dcc76e7 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -36,7 +36,7 @@ const struct inode_operations afs_mntpt_inode_operations = {
36 .lookup = afs_mntpt_lookup, 36 .lookup = afs_mntpt_lookup,
37 .follow_link = afs_mntpt_follow_link, 37 .follow_link = afs_mntpt_follow_link,
38 .readlink = page_readlink, 38 .readlink = page_readlink,
39 .getattr = afs_inode_getattr, 39 .getattr = afs_getattr,
40}; 40};
41 41
42static LIST_HEAD(afs_vfsmounts); 42static LIST_HEAD(afs_vfsmounts);
@@ -58,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
58 char *buf; 58 char *buf;
59 int ret; 59 int ret;
60 60
61 _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); 61 _enter("{%x:%u,%u}",
62 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
62 63
63 /* read the contents of the symlink into the pagecache */ 64 /* read the contents of the symlink into the pagecache */
64 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); 65 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file);
@@ -68,13 +69,11 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
68 } 69 }
69 70
70 ret = -EIO; 71 ret = -EIO;
71 wait_on_page_locked(page);
72 buf = kmap(page);
73 if (!PageUptodate(page))
74 goto out_free;
75 if (PageError(page)) 72 if (PageError(page))
76 goto out_free; 73 goto out_free;
77 74
75 buf = kmap(page);
76
78 /* examine the symlink's contents */ 77 /* examine the symlink's contents */
79 size = vnode->status.size; 78 size = vnode->status.size;
80 _debug("symlink to %*.*s", (int) size, (int) size, buf); 79 _debug("symlink to %*.*s", (int) size, (int) size, buf);
@@ -91,8 +90,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
91 90
92 ret = 0; 91 ret = 0;
93 92
94out_free:
95 kunmap(page); 93 kunmap(page);
94out_free:
96 page_cache_release(page); 95 page_cache_release(page);
97out: 96out:
98 _leave(" = %d", ret); 97 _leave(" = %d", ret);
@@ -171,8 +170,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
171 } 170 }
172 171
173 ret = -EIO; 172 ret = -EIO;
174 wait_on_page_locked(page); 173 if (PageError(page))
175 if (!PageUptodate(page) || PageError(page))
176 goto error; 174 goto error;
177 175
178 buf = kmap(page); 176 buf = kmap(page);
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
new file mode 100644
index 000000000000..fc27d4b52e5f
--- /dev/null
+++ b/fs/afs/netdevices.c
@@ -0,0 +1,68 @@
1/* AFS network device helpers
2 *
3 * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
4 */
5
6#include <linux/string.h>
7#include <linux/rtnetlink.h>
8#include <linux/inetdevice.h>
9#include <linux/netdevice.h>
10#include <linux/if_arp.h>
11#include "internal.h"
12
13/*
14 * get a MAC address from a random ethernet interface that has a real one
15 * - the buffer will normally be 6 bytes in size
16 */
17int afs_get_MAC_address(u8 *mac, size_t maclen)
18{
19 struct net_device *dev;
20 int ret = -ENODEV;
21
22 if (maclen != ETH_ALEN)
23 BUG();
24
25 rtnl_lock();
26 dev = __dev_getfirstbyhwtype(ARPHRD_ETHER);
27 if (dev) {
28 memcpy(mac, dev->dev_addr, maclen);
29 ret = 0;
30 }
31 rtnl_unlock();
32 return ret;
33}
34
35/*
36 * get a list of this system's interface IPv4 addresses, netmasks and MTUs
37 * - maxbufs must be at least 1
38 * - returns the number of interface records in the buffer
39 */
40int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
41 bool wantloopback)
42{
43 struct net_device *dev;
44 struct in_device *idev;
45 int n = 0;
46
47 ASSERT(maxbufs > 0);
48
49 rtnl_lock();
50 for_each_netdev(dev) {
51 if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
52 continue;
53 idev = __in_dev_get_rtnl(dev);
54 if (!idev)
55 continue;
56 for_primary_ifa(idev) {
57 bufs[n].address.s_addr = ifa->ifa_address;
58 bufs[n].netmask.s_addr = ifa->ifa_mask;
59 bufs[n].mtu = dev->mtu;
60 n++;
61 if (n >= maxbufs)
62 goto out;
63 } endfor_ifa(idev);
64 }
65out:
66 rtnl_unlock();
67 return n;
68}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e7b047328a39..04189c47d6a0 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -237,6 +237,70 @@ void afs_flat_call_destructor(struct afs_call *call)
237} 237}
238 238
239/* 239/*
240 * attach the data from a bunch of pages on an inode to a call
241 */
242int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov)
243{
244 struct page *pages[8];
245 unsigned count, n, loop, offset, to;
246 pgoff_t first = call->first, last = call->last;
247 int ret;
248
249 _enter("");
250
251 offset = call->first_offset;
252 call->first_offset = 0;
253
254 do {
255 _debug("attach %lx-%lx", first, last);
256
257 count = last - first + 1;
258 if (count > ARRAY_SIZE(pages))
259 count = ARRAY_SIZE(pages);
260 n = find_get_pages_contig(call->mapping, first, count, pages);
261 ASSERTCMP(n, ==, count);
262
263 loop = 0;
264 do {
265 msg->msg_flags = 0;
266 to = PAGE_SIZE;
267 if (first + loop >= last)
268 to = call->last_to;
269 else
270 msg->msg_flags = MSG_MORE;
271 iov->iov_base = kmap(pages[loop]) + offset;
272 iov->iov_len = to - offset;
273 offset = 0;
274
275 _debug("- range %u-%u%s",
276 offset, to, msg->msg_flags ? " [more]" : "");
277 msg->msg_iov = (struct iovec *) iov;
278 msg->msg_iovlen = 1;
279
280 /* have to change the state *before* sending the last
281 * packet as RxRPC might give us the reply before it
282 * returns from sending the request */
283 if (first + loop >= last)
284 call->state = AFS_CALL_AWAIT_REPLY;
285 ret = rxrpc_kernel_send_data(call->rxcall, msg,
286 to - offset);
287 kunmap(pages[loop]);
288 if (ret < 0)
289 break;
290 } while (++loop < count);
291 first += count;
292
293 for (loop = 0; loop < count; loop++)
294 put_page(pages[loop]);
295 if (ret < 0)
296 break;
297 } while (first < last);
298
299 _leave(" = %d", ret);
300 return ret;
301}
302
303/*
240 * initiate a call 304 * initiate a call
241 */ 305 */
242int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, 306int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
@@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
253 ASSERT(call->type != NULL); 317 ASSERT(call->type != NULL);
254 ASSERT(call->type->name != NULL); 318 ASSERT(call->type->name != NULL);
255 319
256 _debug("MAKE %p{%s} [%d]", 320 _debug("____MAKE %p{%s,%x} [%d]____",
257 call, call->type->name, atomic_read(&afs_outstanding_calls)); 321 call, call->type->name, key_serial(call->key),
322 atomic_read(&afs_outstanding_calls));
258 323
259 call->wait_mode = wait_mode; 324 call->wait_mode = wait_mode;
260 INIT_WORK(&call->async_work, afs_process_async_call); 325 INIT_WORK(&call->async_work, afs_process_async_call);
@@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
289 msg.msg_iovlen = 1; 354 msg.msg_iovlen = 1;
290 msg.msg_control = NULL; 355 msg.msg_control = NULL;
291 msg.msg_controllen = 0; 356 msg.msg_controllen = 0;
292 msg.msg_flags = 0; 357 msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
293 358
294 /* have to change the state *before* sending the last packet as RxRPC 359 /* have to change the state *before* sending the last packet as RxRPC
295 * might give us the reply before it returns from sending the 360 * might give us the reply before it returns from sending the
296 * request */ 361 * request */
297 call->state = AFS_CALL_AWAIT_REPLY; 362 if (!call->send_pages)
363 call->state = AFS_CALL_AWAIT_REPLY;
298 ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); 364 ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
299 if (ret < 0) 365 if (ret < 0)
300 goto error_do_abort; 366 goto error_do_abort;
301 367
368 if (call->send_pages) {
369 ret = afs_send_pages(call, &msg, iov);
370 if (ret < 0)
371 goto error_do_abort;
372 }
373
302 /* at this point, an async call may no longer exist as it may have 374 /* at this point, an async call may no longer exist as it may have
303 * already completed */ 375 * already completed */
304 return wait_mode->wait(call); 376 return wait_mode->wait(call);
@@ -772,7 +844,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
772 844
773 if (call->offset < count) { 845 if (call->offset < count) {
774 if (last) { 846 if (last) {
775 _leave(" = -EBADMSG [%d < %lu]", call->offset, count); 847 _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
776 return -EBADMSG; 848 return -EBADMSG;
777 } 849 }
778 _leave(" = -EAGAIN"); 850 _leave(" = -EAGAIN");
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f9f424d80458..e0ea88b63ebf 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -109,7 +109,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
109{ 109{
110 struct afs_permits *permits; 110 struct afs_permits *permits;
111 111
112 _enter("{%x}", vnode->fid.vnode); 112 _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
113 113
114 mutex_lock(&vnode->permits_lock); 114 mutex_lock(&vnode->permits_lock);
115 permits = vnode->permits; 115 permits = vnode->permits;
@@ -132,7 +132,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
132 struct afs_vnode *auth_vnode; 132 struct afs_vnode *auth_vnode;
133 int count, loop; 133 int count, loop;
134 134
135 _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order); 135 _enter("{%x:%u},%x,%lx",
136 vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
136 137
137 auth_vnode = afs_get_auth_inode(vnode, key); 138 auth_vnode = afs_get_auth_inode(vnode, key);
138 if (IS_ERR(auth_vnode)) { 139 if (IS_ERR(auth_vnode)) {
@@ -220,7 +221,8 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
220 bool valid; 221 bool valid;
221 int loop, ret; 222 int loop, ret;
222 223
223 _enter(""); 224 _enter("{%x:%u},%x",
225 vnode->fid.vid, vnode->fid.vnode, key_serial(key));
224 226
225 auth_vnode = afs_get_auth_inode(vnode, key); 227 auth_vnode = afs_get_auth_inode(vnode, key);
226 if (IS_ERR(auth_vnode)) { 228 if (IS_ERR(auth_vnode)) {
@@ -268,9 +270,9 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
268 _leave(" = %d", ret); 270 _leave(" = %d", ret);
269 return ret; 271 return ret;
270 } 272 }
273 *_access = vnode->status.caller_access;
271 } 274 }
272 275
273 *_access = vnode->status.caller_access;
274 iput(&auth_vnode->vfs_inode); 276 iput(&auth_vnode->vfs_inode);
275 _leave(" = 0 [access %x]", *_access); 277 _leave(" = 0 [access %x]", *_access);
276 return 0; 278 return 0;
@@ -288,7 +290,7 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
288 struct key *key; 290 struct key *key;
289 int ret; 291 int ret;
290 292
291 _enter("{{%x:%x},%lx},%x,", 293 _enter("{{%x:%u},%lx},%x,",
292 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); 294 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
293 295
294 key = afs_request_key(vnode->volume->cell); 296 key = afs_request_key(vnode->volume->cell);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 96bb23b476a2..231ae4150279 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -252,6 +252,9 @@ static void afs_destroy_server(struct afs_server *server)
252{ 252{
253 _enter("%p", server); 253 _enter("%p", server);
254 254
255 ASSERTIF(server->cb_break_head != server->cb_break_tail,
256 delayed_work_pending(&server->cb_break_work));
257
255 ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); 258 ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
256 ASSERTCMP(server->cb_promises.rb_node, ==, NULL); 259 ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
257 ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); 260 ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index cebd03c91f57..d24be334b608 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -20,6 +20,7 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/parser.h>
23#include "internal.h" 24#include "internal.h"
24 25
25#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 26#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -42,13 +43,14 @@ struct file_system_type afs_fs_type = {
42 .name = "afs", 43 .name = "afs",
43 .get_sb = afs_get_sb, 44 .get_sb = afs_get_sb,
44 .kill_sb = kill_anon_super, 45 .kill_sb = kill_anon_super,
45 .fs_flags = FS_BINARY_MOUNTDATA, 46 .fs_flags = 0,
46}; 47};
47 48
48static const struct super_operations afs_super_ops = { 49static const struct super_operations afs_super_ops = {
49 .statfs = simple_statfs, 50 .statfs = simple_statfs,
50 .alloc_inode = afs_alloc_inode, 51 .alloc_inode = afs_alloc_inode,
51 .drop_inode = generic_delete_inode, 52 .drop_inode = generic_delete_inode,
53 .write_inode = afs_write_inode,
52 .destroy_inode = afs_destroy_inode, 54 .destroy_inode = afs_destroy_inode,
53 .clear_inode = afs_clear_inode, 55 .clear_inode = afs_clear_inode,
54 .umount_begin = afs_umount_begin, 56 .umount_begin = afs_umount_begin,
@@ -58,6 +60,20 @@ static const struct super_operations afs_super_ops = {
58static struct kmem_cache *afs_inode_cachep; 60static struct kmem_cache *afs_inode_cachep;
59static atomic_t afs_count_active_inodes; 61static atomic_t afs_count_active_inodes;
60 62
63enum {
64 afs_no_opt,
65 afs_opt_cell,
66 afs_opt_rwpath,
67 afs_opt_vol,
68};
69
70static match_table_t afs_options_list = {
71 { afs_opt_cell, "cell=%s" },
72 { afs_opt_rwpath, "rwpath" },
73 { afs_opt_vol, "vol=%s" },
74 { afs_no_opt, NULL },
75};
76
61/* 77/*
62 * initialise the filesystem 78 * initialise the filesystem
63 */ 79 */
@@ -115,31 +131,6 @@ void __exit afs_fs_exit(void)
115} 131}
116 132
117/* 133/*
118 * check that an argument has a value
119 */
120static int want_arg(char **_value, const char *option)
121{
122 if (!_value || !*_value || !**_value) {
123 printk(KERN_NOTICE "kAFS: %s: argument missing\n", option);
124 return 0;
125 }
126 return 1;
127}
128
129/*
130 * check that there's no subsequent value
131 */
132static int want_no_value(char *const *_value, const char *option)
133{
134 if (*_value && **_value) {
135 printk(KERN_NOTICE "kAFS: %s: Invalid argument: %s\n",
136 option, *_value);
137 return 0;
138 }
139 return 1;
140}
141
142/*
143 * parse the mount options 134 * parse the mount options
144 * - this function has been shamelessly adapted from the ext3 fs which 135 * - this function has been shamelessly adapted from the ext3 fs which
145 * shamelessly adapted it from the msdos fs 136 * shamelessly adapted it from the msdos fs
@@ -148,48 +139,46 @@ static int afs_parse_options(struct afs_mount_params *params,
148 char *options, const char **devname) 139 char *options, const char **devname)
149{ 140{
150 struct afs_cell *cell; 141 struct afs_cell *cell;
151 char *key, *value; 142 substring_t args[MAX_OPT_ARGS];
152 int ret; 143 char *p;
144 int token;
153 145
154 _enter("%s", options); 146 _enter("%s", options);
155 147
156 options[PAGE_SIZE - 1] = 0; 148 options[PAGE_SIZE - 1] = 0;
157 149
158 ret = 0; 150 while ((p = strsep(&options, ","))) {
159 while ((key = strsep(&options, ","))) { 151 if (!*p)
160 value = strchr(key, '='); 152 continue;
161 if (value)
162 *value++ = 0;
163
164 _debug("kAFS: KEY: %s, VAL:%s", key, value ?: "-");
165 153
166 if (strcmp(key, "rwpath") == 0) { 154 token = match_token(p, afs_options_list, args);
167 if (!want_no_value(&value, "rwpath")) 155 switch (token) {
168 return -EINVAL; 156 case afs_opt_cell:
169 params->rwpath = 1; 157 cell = afs_cell_lookup(args[0].from,
170 } else if (strcmp(key, "vol") == 0) { 158 args[0].to - args[0].from);
171 if (!want_arg(&value, "vol"))
172 return -EINVAL;
173 *devname = value;
174 } else if (strcmp(key, "cell") == 0) {
175 if (!want_arg(&value, "cell"))
176 return -EINVAL;
177 cell = afs_cell_lookup(value, strlen(value));
178 if (IS_ERR(cell)) 159 if (IS_ERR(cell))
179 return PTR_ERR(cell); 160 return PTR_ERR(cell);
180 afs_put_cell(params->cell); 161 afs_put_cell(params->cell);
181 params->cell = cell; 162 params->cell = cell;
182 } else { 163 break;
183 printk("kAFS: Unknown mount option: '%s'\n", key); 164
184 ret = -EINVAL; 165 case afs_opt_rwpath:
185 goto error; 166 params->rwpath = 1;
167 break;
168
169 case afs_opt_vol:
170 *devname = args[0].from;
171 break;
172
173 default:
174 printk(KERN_ERR "kAFS:"
175 " Unknown or invalid mount option: '%s'\n", p);
176 return -EINVAL;
186 } 177 }
187 } 178 }
188 179
189 ret = 0; 180 _leave(" = 0");
190error: 181 return 0;
191 _leave(" = %d", ret);
192 return ret;
193} 182}
194 183
195/* 184/*
@@ -361,7 +350,6 @@ error:
361 350
362/* 351/*
363 * get an AFS superblock 352 * get an AFS superblock
364 * - TODO: don't use get_sb_nodev(), but rather call sget() directly
365 */ 353 */
366static int afs_get_sb(struct file_system_type *fs_type, 354static int afs_get_sb(struct file_system_type *fs_type,
367 int flags, 355 int flags,
@@ -386,7 +374,6 @@ static int afs_get_sb(struct file_system_type *fs_type,
386 goto error; 374 goto error;
387 } 375 }
388 376
389
390 ret = afs_parse_device_name(&params, dev_name); 377 ret = afs_parse_device_name(&params, dev_name);
391 if (ret < 0) 378 if (ret < 0)
392 goto error; 379 goto error;
@@ -467,14 +454,15 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
467{ 454{
468 struct afs_vnode *vnode = _vnode; 455 struct afs_vnode *vnode = _vnode;
469 456
470 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 457 if (flags & SLAB_CTOR_CONSTRUCTOR) {
471 SLAB_CTOR_CONSTRUCTOR) {
472 memset(vnode, 0, sizeof(*vnode)); 458 memset(vnode, 0, sizeof(*vnode));
473 inode_init_once(&vnode->vfs_inode); 459 inode_init_once(&vnode->vfs_inode);
474 init_waitqueue_head(&vnode->update_waitq); 460 init_waitqueue_head(&vnode->update_waitq);
475 mutex_init(&vnode->permits_lock); 461 mutex_init(&vnode->permits_lock);
476 mutex_init(&vnode->validate_lock); 462 mutex_init(&vnode->validate_lock);
463 spin_lock_init(&vnode->writeback_lock);
477 spin_lock_init(&vnode->lock); 464 spin_lock_init(&vnode->lock);
465 INIT_LIST_HEAD(&vnode->writebacks);
478 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); 466 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
479 } 467 }
480} 468}
diff --git a/fs/afs/use-rtnetlink.c b/fs/afs/use-rtnetlink.c
deleted file mode 100644
index 82f0daa28970..000000000000
--- a/fs/afs/use-rtnetlink.c
+++ /dev/null
@@ -1,473 +0,0 @@
1/* RTNETLINK client
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/netlink.h>
12#include <linux/rtnetlink.h>
13#include <linux/if_addr.h>
14#include <linux/if_arp.h>
15#include <linux/inetdevice.h>
16#include <net/netlink.h>
17#include "internal.h"
18
19struct afs_rtm_desc {
20 struct socket *nlsock;
21 struct afs_interface *bufs;
22 u8 *mac;
23 size_t nbufs;
24 size_t maxbufs;
25 void *data;
26 ssize_t datalen;
27 size_t datamax;
28 int msg_seq;
29 unsigned mac_index;
30 bool wantloopback;
31 int (*parse)(struct afs_rtm_desc *, struct nlmsghdr *);
32};
33
34/*
35 * parse an RTM_GETADDR response
36 */
37static int afs_rtm_getaddr_parse(struct afs_rtm_desc *desc,
38 struct nlmsghdr *nlhdr)
39{
40 struct afs_interface *this;
41 struct ifaddrmsg *ifa;
42 struct rtattr *rtattr;
43 const char *name;
44 size_t len;
45
46 ifa = (struct ifaddrmsg *) NLMSG_DATA(nlhdr);
47
48 _enter("{ix=%d,af=%d}", ifa->ifa_index, ifa->ifa_family);
49
50 if (ifa->ifa_family != AF_INET) {
51 _leave(" = 0 [family %d]", ifa->ifa_family);
52 return 0;
53 }
54 if (desc->nbufs >= desc->maxbufs) {
55 _leave(" = 0 [max %zu/%zu]", desc->nbufs, desc->maxbufs);
56 return 0;
57 }
58
59 this = &desc->bufs[desc->nbufs];
60
61 this->index = ifa->ifa_index;
62 this->netmask.s_addr = inet_make_mask(ifa->ifa_prefixlen);
63 this->mtu = 0;
64
65 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifaddrmsg));
66 len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifaddrmsg));
67
68 name = "unknown";
69 for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
70 switch (rtattr->rta_type) {
71 case IFA_ADDRESS:
72 memcpy(&this->address, RTA_DATA(rtattr), 4);
73 break;
74 case IFA_LABEL:
75 name = RTA_DATA(rtattr);
76 break;
77 }
78 }
79
80 _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT,
81 name, NIPQUAD(this->address), NIPQUAD(this->netmask));
82
83 desc->nbufs++;
84 _leave(" = 0");
85 return 0;
86}
87
88/*
89 * parse an RTM_GETLINK response for MTUs
90 */
91static int afs_rtm_getlink_if_parse(struct afs_rtm_desc *desc,
92 struct nlmsghdr *nlhdr)
93{
94 struct afs_interface *this;
95 struct ifinfomsg *ifi;
96 struct rtattr *rtattr;
97 const char *name;
98 size_t len, loop;
99
100 ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
101
102 _enter("{ix=%d}", ifi->ifi_index);
103
104 for (loop = 0; loop < desc->nbufs; loop++) {
105 this = &desc->bufs[loop];
106 if (this->index == ifi->ifi_index)
107 goto found;
108 }
109
110 _leave(" = 0 [no match]");
111 return 0;
112
113found:
114 if (ifi->ifi_type == ARPHRD_LOOPBACK && !desc->wantloopback) {
115 _leave(" = 0 [loopback]");
116 return 0;
117 }
118
119 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
120 len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
121
122 name = "unknown";
123 for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
124 switch (rtattr->rta_type) {
125 case IFLA_MTU:
126 memcpy(&this->mtu, RTA_DATA(rtattr), 4);
127 break;
128 case IFLA_IFNAME:
129 name = RTA_DATA(rtattr);
130 break;
131 }
132 }
133
134 _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
135 name, NIPQUAD(this->address), NIPQUAD(this->netmask),
136 this->mtu);
137
138 _leave(" = 0");
139 return 0;
140}
141
142/*
143 * parse an RTM_GETLINK response for the MAC address belonging to the lowest
144 * non-internal interface
145 */
146static int afs_rtm_getlink_mac_parse(struct afs_rtm_desc *desc,
147 struct nlmsghdr *nlhdr)
148{
149 struct ifinfomsg *ifi;
150 struct rtattr *rtattr;
151 const char *name;
152 size_t remain, len;
153 bool set;
154
155 ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
156
157 _enter("{ix=%d}", ifi->ifi_index);
158
159 if (ifi->ifi_index >= desc->mac_index) {
160 _leave(" = 0 [high]");
161 return 0;
162 }
163 if (ifi->ifi_type == ARPHRD_LOOPBACK) {
164 _leave(" = 0 [loopback]");
165 return 0;
166 }
167
168 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
169 remain = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
170
171 name = "unknown";
172 set = false;
173 for (; RTA_OK(rtattr, remain); rtattr = RTA_NEXT(rtattr, remain)) {
174 switch (rtattr->rta_type) {
175 case IFLA_ADDRESS:
176 len = RTA_PAYLOAD(rtattr);
177 memcpy(desc->mac, RTA_DATA(rtattr),
178 min_t(size_t, len, 6));
179 desc->mac_index = ifi->ifi_index;
180 set = true;
181 break;
182 case IFLA_IFNAME:
183 name = RTA_DATA(rtattr);
184 break;
185 }
186 }
187
188 if (set)
189 _debug("%s: %02x:%02x:%02x:%02x:%02x:%02x",
190 name,
191 desc->mac[0], desc->mac[1], desc->mac[2],
192 desc->mac[3], desc->mac[4], desc->mac[5]);
193
194 _leave(" = 0");
195 return 0;
196}
197
198/*
199 * read the rtnetlink response and pass to parsing routine
200 */
201static int afs_read_rtm(struct afs_rtm_desc *desc)
202{
203 struct nlmsghdr *nlhdr, tmphdr;
204 struct msghdr msg;
205 struct kvec iov[1];
206 void *data;
207 bool last = false;
208 int len, ret, remain;
209
210 _enter("");
211
212 do {
213 /* first of all peek to see how big the packet is */
214 memset(&msg, 0, sizeof(msg));
215 iov[0].iov_base = &tmphdr;
216 iov[0].iov_len = sizeof(tmphdr);
217 len = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
218 sizeof(tmphdr), MSG_PEEK | MSG_TRUNC);
219 if (len < 0) {
220 _leave(" = %d [peek]", len);
221 return len;
222 }
223 if (len == 0)
224 continue;
225 if (len < sizeof(tmphdr) || len < NLMSG_PAYLOAD(&tmphdr, 0)) {
226 _leave(" = -EMSGSIZE");
227 return -EMSGSIZE;
228 }
229
230 if (desc->datamax < len) {
231 kfree(desc->data);
232 desc->data = NULL;
233 data = kmalloc(len, GFP_KERNEL);
234 if (!data)
235 return -ENOMEM;
236 desc->data = data;
237 }
238 desc->datamax = len;
239
240 /* read all the data from this packet */
241 iov[0].iov_base = desc->data;
242 iov[0].iov_len = desc->datamax;
243 desc->datalen = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
244 desc->datamax, 0);
245 if (desc->datalen < 0) {
246 _leave(" = %ld [recv]", desc->datalen);
247 return desc->datalen;
248 }
249
250 nlhdr = desc->data;
251
252 /* check if the header is valid */
253 if (!NLMSG_OK(nlhdr, desc->datalen) ||
254 nlhdr->nlmsg_type == NLMSG_ERROR) {
255 _leave(" = -EIO");
256 return -EIO;
257 }
258
259 /* see if this is the last message */
260 if (nlhdr->nlmsg_type == NLMSG_DONE ||
261 !(nlhdr->nlmsg_flags & NLM_F_MULTI))
262 last = true;
263
264 /* parse the bits we got this time */
265 nlmsg_for_each_msg(nlhdr, desc->data, desc->datalen, remain) {
266 ret = desc->parse(desc, nlhdr);
267 if (ret < 0) {
268 _leave(" = %d [parse]", ret);
269 return ret;
270 }
271 }
272
273 } while (!last);
274
275 _leave(" = 0");
276 return 0;
277}
278
279/*
280 * list the interface bound addresses to get the address and netmask
281 */
282static int afs_rtm_getaddr(struct afs_rtm_desc *desc)
283{
284 struct msghdr msg;
285 struct kvec iov[1];
286 int ret;
287
288 struct {
289 struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
290 struct ifaddrmsg addr_msg __attribute__((aligned(NLMSG_ALIGNTO)));
291 } request;
292
293 _enter("");
294
295 memset(&request, 0, sizeof(request));
296
297 request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
298 request.nl_msg.nlmsg_type = RTM_GETADDR;
299 request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
300 request.nl_msg.nlmsg_seq = desc->msg_seq++;
301 request.nl_msg.nlmsg_pid = 0;
302
303 memset(&msg, 0, sizeof(msg));
304 iov[0].iov_base = &request;
305 iov[0].iov_len = sizeof(request);
306
307 ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
308 _leave(" = %d", ret);
309 return ret;
310}
311
312/*
313 * list the interface link statuses to get the MTUs
314 */
315static int afs_rtm_getlink(struct afs_rtm_desc *desc)
316{
317 struct msghdr msg;
318 struct kvec iov[1];
319 int ret;
320
321 struct {
322 struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
323 struct ifinfomsg link_msg __attribute__((aligned(NLMSG_ALIGNTO)));
324 } request;
325
326 _enter("");
327
328 memset(&request, 0, sizeof(request));
329
330 request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
331 request.nl_msg.nlmsg_type = RTM_GETLINK;
332 request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
333 request.nl_msg.nlmsg_seq = desc->msg_seq++;
334 request.nl_msg.nlmsg_pid = 0;
335
336 memset(&msg, 0, sizeof(msg));
337 iov[0].iov_base = &request;
338 iov[0].iov_len = sizeof(request);
339
340 ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
341 _leave(" = %d", ret);
342 return ret;
343}
344
345/*
346 * cull any interface records for which there isn't an MTU value
347 */
348static void afs_cull_interfaces(struct afs_rtm_desc *desc)
349{
350 struct afs_interface *bufs = desc->bufs;
351 size_t nbufs = desc->nbufs;
352 int loop, point = 0;
353
354 _enter("{%zu}", nbufs);
355
356 for (loop = 0; loop < nbufs; loop++) {
357 if (desc->bufs[loop].mtu != 0) {
358 if (loop != point) {
359 ASSERTCMP(loop, >, point);
360 bufs[point] = bufs[loop];
361 }
362 point++;
363 }
364 }
365
366 desc->nbufs = point;
367 _leave(" [%zu/%zu]", desc->nbufs, nbufs);
368}
369
370/*
371 * get a list of this system's interface IPv4 addresses, netmasks and MTUs
372 * - returns the number of interface records in the buffer
373 */
374int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
375 bool wantloopback)
376{
377 struct afs_rtm_desc desc;
378 int ret, loop;
379
380 _enter("");
381
382 memset(&desc, 0, sizeof(desc));
383 desc.bufs = bufs;
384 desc.maxbufs = maxbufs;
385 desc.wantloopback = wantloopback;
386
387 ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
388 &desc.nlsock);
389 if (ret < 0) {
390 _leave(" = %d [sock]", ret);
391 return ret;
392 }
393
394 /* issue RTM_GETADDR */
395 desc.parse = afs_rtm_getaddr_parse;
396 ret = afs_rtm_getaddr(&desc);
397 if (ret < 0)
398 goto error;
399 ret = afs_read_rtm(&desc);
400 if (ret < 0)
401 goto error;
402
403 /* issue RTM_GETLINK */
404 desc.parse = afs_rtm_getlink_if_parse;
405 ret = afs_rtm_getlink(&desc);
406 if (ret < 0)
407 goto error;
408 ret = afs_read_rtm(&desc);
409 if (ret < 0)
410 goto error;
411
412 afs_cull_interfaces(&desc);
413 ret = desc.nbufs;
414
415 for (loop = 0; loop < ret; loop++)
416 _debug("[%d] "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
417 bufs[loop].index,
418 NIPQUAD(bufs[loop].address),
419 NIPQUAD(bufs[loop].netmask),
420 bufs[loop].mtu);
421
422error:
423 kfree(desc.data);
424 sock_release(desc.nlsock);
425 _leave(" = %d", ret);
426 return ret;
427}
428
429/*
430 * get a MAC address from a random ethernet interface that has a real one
431 * - the buffer should be 6 bytes in size
432 */
433int afs_get_MAC_address(u8 mac[6])
434{
435 struct afs_rtm_desc desc;
436 int ret;
437
438 _enter("");
439
440 memset(&desc, 0, sizeof(desc));
441 desc.mac = mac;
442 desc.mac_index = UINT_MAX;
443
444 ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
445 &desc.nlsock);
446 if (ret < 0) {
447 _leave(" = %d [sock]", ret);
448 return ret;
449 }
450
451 /* issue RTM_GETLINK */
452 desc.parse = afs_rtm_getlink_mac_parse;
453 ret = afs_rtm_getlink(&desc);
454 if (ret < 0)
455 goto error;
456 ret = afs_read_rtm(&desc);
457 if (ret < 0)
458 goto error;
459
460 if (desc.mac_index < UINT_MAX) {
461 /* got a MAC address */
462 _debug("[%d] %02x:%02x:%02x:%02x:%02x:%02x",
463 desc.mac_index,
464 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
465 } else {
466 ret = -ENONET;
467 }
468
469error:
470 sock_release(desc.nlsock);
471 _leave(" = %d", ret);
472 return ret;
473}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 74cce174882a..3370cdb72566 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -416,8 +416,8 @@ fill_in_record:
416 goto error_abandon; 416 goto error_abandon;
417 spin_lock(&vl->lock); 417 spin_lock(&vl->lock);
418 vl->state = AFS_VL_VALID; 418 vl->state = AFS_VL_VALID;
419 wake_up(&vl->waitq);
420 spin_unlock(&vl->lock); 419 spin_unlock(&vl->lock);
420 wake_up(&vl->waitq);
421 421
422 /* schedule for regular updates */ 422 /* schedule for regular updates */
423 afs_vlocation_queue_for_updates(vl); 423 afs_vlocation_queue_for_updates(vl);
@@ -442,7 +442,7 @@ found_in_memory:
442 442
443 _debug("invalid [state %d]", state); 443 _debug("invalid [state %d]", state);
444 444
445 if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) { 445 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
446 vl->state = AFS_VL_CREATING; 446 vl->state = AFS_VL_CREATING;
447 spin_unlock(&vl->lock); 447 spin_unlock(&vl->lock);
448 goto fill_in_record; 448 goto fill_in_record;
@@ -453,11 +453,10 @@ found_in_memory:
453 _debug("wait"); 453 _debug("wait");
454 454
455 spin_unlock(&vl->lock); 455 spin_unlock(&vl->lock);
456 ret = wait_event_interruptible( 456 ret = wait_event_interruptible(vl->waitq,
457 vl->waitq, 457 vl->state == AFS_VL_NEW ||
458 vl->state == AFS_VL_NEW || 458 vl->state == AFS_VL_VALID ||
459 vl->state == AFS_VL_VALID || 459 vl->state == AFS_VL_NO_VOLUME);
460 vl->state == AFS_VL_NO_VOLUME);
461 if (ret < 0) 460 if (ret < 0)
462 goto error; 461 goto error;
463 spin_lock(&vl->lock); 462 spin_lock(&vl->lock);
@@ -471,8 +470,8 @@ success:
471error_abandon: 470error_abandon:
472 spin_lock(&vl->lock); 471 spin_lock(&vl->lock);
473 vl->state = AFS_VL_NEW; 472 vl->state = AFS_VL_NEW;
474 wake_up(&vl->waitq);
475 spin_unlock(&vl->lock); 473 spin_unlock(&vl->lock);
474 wake_up(&vl->waitq);
476error: 475error:
477 ASSERT(vl != NULL); 476 ASSERT(vl != NULL);
478 afs_put_vlocation(vl); 477 afs_put_vlocation(vl);
@@ -603,7 +602,7 @@ int __init afs_vlocation_update_init(void)
603/* 602/*
604 * discard all the volume location records for rmmod 603 * discard all the volume location records for rmmod
605 */ 604 */
606void __exit afs_vlocation_purge(void) 605void afs_vlocation_purge(void)
607{ 606{
608 afs_vlocation_timeout = 0; 607 afs_vlocation_timeout = 0;
609 608
@@ -675,7 +674,6 @@ static void afs_vlocation_updater(struct work_struct *work)
675 case 0: 674 case 0:
676 afs_vlocation_apply_update(vl, &vldb); 675 afs_vlocation_apply_update(vl, &vldb);
677 vl->state = AFS_VL_VALID; 676 vl->state = AFS_VL_VALID;
678 wake_up(&vl->waitq);
679 break; 677 break;
680 case -ENOMEDIUM: 678 case -ENOMEDIUM:
681 vl->state = AFS_VL_VOLUME_DELETED; 679 vl->state = AFS_VL_VOLUME_DELETED;
@@ -685,6 +683,7 @@ static void afs_vlocation_updater(struct work_struct *work)
685 break; 683 break;
686 } 684 }
687 spin_unlock(&vl->lock); 685 spin_unlock(&vl->lock);
686 wake_up(&vl->waitq);
688 687
689 /* and then reschedule */ 688 /* and then reschedule */
690 _debug("reschedule"); 689 _debug("reschedule");
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index a1904ab8426a..ec814660209f 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -261,7 +261,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
261 261
262 DECLARE_WAITQUEUE(myself, current); 262 DECLARE_WAITQUEUE(myself, current);
263 263
264 _enter("%s,{%u,%u,%u}", 264 _enter("%s,{%x:%u.%u}",
265 vnode->volume->vlocation->vldb.name, 265 vnode->volume->vlocation->vldb.name,
266 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 266 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
267 267
@@ -389,7 +389,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
389 struct afs_server *server; 389 struct afs_server *server;
390 int ret; 390 int ret;
391 391
392 _enter("%s{%u,%u,%u},%x,,,", 392 _enter("%s{%x:%u.%u},%x,,,",
393 vnode->volume->vlocation->vldb.name, 393 vnode->volume->vlocation->vldb.name,
394 vnode->fid.vid, 394 vnode->fid.vid,
395 vnode->fid.vnode, 395 vnode->fid.vnode,
@@ -446,7 +446,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
446 struct afs_server *server; 446 struct afs_server *server;
447 int ret; 447 int ret;
448 448
449 _enter("%s{%u,%u,%u},%x,%s,,", 449 _enter("%s{%x:%u.%u},%x,%s,,",
450 vnode->volume->vlocation->vldb.name, 450 vnode->volume->vlocation->vldb.name,
451 vnode->fid.vid, 451 vnode->fid.vid,
452 vnode->fid.vnode, 452 vnode->fid.vnode,
@@ -502,7 +502,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
502 struct afs_server *server; 502 struct afs_server *server;
503 int ret; 503 int ret;
504 504
505 _enter("%s{%u,%u,%u},%x,%s", 505 _enter("%s{%x:%u.%u},%x,%s",
506 vnode->volume->vlocation->vldb.name, 506 vnode->volume->vlocation->vldb.name,
507 vnode->fid.vid, 507 vnode->fid.vid,
508 vnode->fid.vnode, 508 vnode->fid.vnode,
@@ -557,7 +557,7 @@ extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
557 struct afs_server *server; 557 struct afs_server *server;
558 int ret; 558 int ret;
559 559
560 _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s", 560 _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
561 dvnode->volume->vlocation->vldb.name, 561 dvnode->volume->vlocation->vldb.name,
562 dvnode->fid.vid, 562 dvnode->fid.vid,
563 dvnode->fid.vnode, 563 dvnode->fid.vnode,
@@ -628,7 +628,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
628 struct afs_server *server; 628 struct afs_server *server;
629 int ret; 629 int ret;
630 630
631 _enter("%s{%u,%u,%u},%x,%s,%s,,,", 631 _enter("%s{%x:%u.%u},%x,%s,%s,,,",
632 vnode->volume->vlocation->vldb.name, 632 vnode->volume->vlocation->vldb.name,
633 vnode->fid.vid, 633 vnode->fid.vid,
634 vnode->fid.vnode, 634 vnode->fid.vnode,
@@ -687,7 +687,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
687 struct afs_server *server; 687 struct afs_server *server;
688 int ret; 688 int ret;
689 689
690 _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s", 690 _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
691 orig_dvnode->volume->vlocation->vldb.name, 691 orig_dvnode->volume->vlocation->vldb.name,
692 orig_dvnode->fid.vid, 692 orig_dvnode->fid.vid,
693 orig_dvnode->fid.vnode, 693 orig_dvnode->fid.vnode,
@@ -753,3 +753,110 @@ no_server:
753 _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); 753 _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
754 return PTR_ERR(server); 754 return PTR_ERR(server);
755} 755}
756
757/*
758 * write to a file
759 */
760int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
761 unsigned offset, unsigned to)
762{
763 struct afs_server *server;
764 struct afs_vnode *vnode = wb->vnode;
765 int ret;
766
767 _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
768 vnode->volume->vlocation->vldb.name,
769 vnode->fid.vid,
770 vnode->fid.vnode,
771 vnode->fid.unique,
772 key_serial(wb->key),
773 first, last, offset, to);
774
775 /* this op will fetch the status */
776 spin_lock(&vnode->lock);
777 vnode->update_cnt++;
778 spin_unlock(&vnode->lock);
779
780 do {
781 /* pick a server to query */
782 server = afs_volume_pick_fileserver(vnode);
783 if (IS_ERR(server))
784 goto no_server;
785
786 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
787
788 ret = afs_fs_store_data(server, wb, first, last, offset, to,
789 &afs_sync_call);
790
791 } while (!afs_volume_release_fileserver(vnode, server, ret));
792
793 /* adjust the flags */
794 if (ret == 0) {
795 afs_vnode_finalise_status_update(vnode, server);
796 afs_put_server(server);
797 } else {
798 afs_vnode_status_update_failed(vnode, ret);
799 }
800
801 _leave(" = %d", ret);
802 return ret;
803
804no_server:
805 spin_lock(&vnode->lock);
806 vnode->update_cnt--;
807 ASSERTCMP(vnode->update_cnt, >=, 0);
808 spin_unlock(&vnode->lock);
809 return PTR_ERR(server);
810}
811
812/*
813 * set the attributes on a file
814 */
815int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
816 struct iattr *attr)
817{
818 struct afs_server *server;
819 int ret;
820
821 _enter("%s{%x:%u.%u},%x",
822 vnode->volume->vlocation->vldb.name,
823 vnode->fid.vid,
824 vnode->fid.vnode,
825 vnode->fid.unique,
826 key_serial(key));
827
828 /* this op will fetch the status */
829 spin_lock(&vnode->lock);
830 vnode->update_cnt++;
831 spin_unlock(&vnode->lock);
832
833 do {
834 /* pick a server to query */
835 server = afs_volume_pick_fileserver(vnode);
836 if (IS_ERR(server))
837 goto no_server;
838
839 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
840
841 ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call);
842
843 } while (!afs_volume_release_fileserver(vnode, server, ret));
844
845 /* adjust the flags */
846 if (ret == 0) {
847 afs_vnode_finalise_status_update(vnode, server);
848 afs_put_server(server);
849 } else {
850 afs_vnode_status_update_failed(vnode, ret);
851 }
852
853 _leave(" = %d", ret);
854 return ret;
855
856no_server:
857 spin_lock(&vnode->lock);
858 vnode->update_cnt--;
859 ASSERTCMP(vnode->update_cnt, >=, 0);
860 spin_unlock(&vnode->lock);
861 return PTR_ERR(server);
862}
diff --git a/fs/afs/write.c b/fs/afs/write.c
new file mode 100644
index 000000000000..83ff29262816
--- /dev/null
+++ b/fs/afs/write.c
@@ -0,0 +1,835 @@
1/* handling of writes to regular files and writing back to the server
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/pagemap.h>
15#include <linux/writeback.h>
16#include <linux/pagevec.h>
17#include "internal.h"
18
19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
20 struct page *page);
21
22/*
23 * mark a page as having been made dirty and thus needing writeback
24 */
25int afs_set_page_dirty(struct page *page)
26{
27 _enter("");
28 return __set_page_dirty_nobuffers(page);
29}
30
31/*
32 * unlink a writeback record because its usage has reached zero
33 * - must be called with the wb->vnode->writeback_lock held
34 */
35static void afs_unlink_writeback(struct afs_writeback *wb)
36{
37 struct afs_writeback *front;
38 struct afs_vnode *vnode = wb->vnode;
39
40 list_del_init(&wb->link);
41 if (!list_empty(&vnode->writebacks)) {
42 /* if an fsync rises to the front of the queue then wake it
43 * up */
44 front = list_entry(vnode->writebacks.next,
45 struct afs_writeback, link);
46 if (front->state == AFS_WBACK_SYNCING) {
47 _debug("wake up sync");
48 front->state = AFS_WBACK_COMPLETE;
49 wake_up(&front->waitq);
50 }
51 }
52}
53
54/*
55 * free a writeback record
56 */
57static void afs_free_writeback(struct afs_writeback *wb)
58{
59 _enter("");
60 key_put(wb->key);
61 kfree(wb);
62}
63
64/*
65 * dispose of a reference to a writeback record
66 */
67void afs_put_writeback(struct afs_writeback *wb)
68{
69 struct afs_vnode *vnode = wb->vnode;
70
71 _enter("{%d}", wb->usage);
72
73 spin_lock(&vnode->writeback_lock);
74 if (--wb->usage == 0)
75 afs_unlink_writeback(wb);
76 else
77 wb = NULL;
78 spin_unlock(&vnode->writeback_lock);
79 if (wb)
80 afs_free_writeback(wb);
81}
82
83/*
84 * partly or wholly fill a page that's under preparation for writing
85 */
86static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87 unsigned start, unsigned len, struct page *page)
88{
89 int ret;
90
91 _enter(",,%u,%u", start, len);
92
93 ASSERTCMP(start + len, <=, PAGE_SIZE);
94
95 ret = afs_vnode_fetch_data(vnode, key, start, len, page);
96 if (ret < 0) {
97 if (ret == -ENOENT) {
98 _debug("got NOENT from server"
99 " - marking file deleted and stale");
100 set_bit(AFS_VNODE_DELETED, &vnode->flags);
101 ret = -ESTALE;
102 }
103 }
104
105 _leave(" = %d", ret);
106 return ret;
107}
108
109/*
110 * prepare a page for being written to
111 */
112static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
113 struct key *key, unsigned offset, unsigned to)
114{
115 unsigned eof, tail, start, stop, len;
116 loff_t i_size, pos;
117 void *p;
118 int ret;
119
120 _enter("");
121
122 if (offset == 0 && to == PAGE_SIZE)
123 return 0;
124
125 p = kmap(page);
126
127 i_size = i_size_read(&vnode->vfs_inode);
128 pos = (loff_t) page->index << PAGE_SHIFT;
129 if (pos >= i_size) {
130 /* partial write, page beyond EOF */
131 _debug("beyond");
132 if (offset > 0)
133 memset(p, 0, offset);
134 if (to < PAGE_SIZE)
135 memset(p + to, 0, PAGE_SIZE - to);
136 kunmap(page);
137 return 0;
138 }
139
140 if (i_size - pos >= PAGE_SIZE) {
141 /* partial write, page entirely before EOF */
142 _debug("before");
143 tail = eof = PAGE_SIZE;
144 } else {
145 /* partial write, page overlaps EOF */
146 eof = i_size - pos;
147 _debug("overlap %u", eof);
148 tail = max(eof, to);
149 if (tail < PAGE_SIZE)
150 memset(p + tail, 0, PAGE_SIZE - tail);
151 if (offset > eof)
152 memset(p + eof, 0, PAGE_SIZE - eof);
153 }
154
155 kunmap(p);
156
157 ret = 0;
158 if (offset > 0 || eof > to) {
159 /* need to fill one or two bits that aren't going to be written
160 * (cover both fillers in one read if there are two) */
161 start = (offset > 0) ? 0 : to;
162 stop = (eof > to) ? eof : offset;
163 len = stop - start;
164 _debug("wr=%u-%u av=0-%u rd=%u@%u",
165 offset, to, eof, start, len);
166 ret = afs_fill_page(vnode, key, start, len, page);
167 }
168
169 _leave(" = %d", ret);
170 return ret;
171}
172
173/*
174 * prepare to perform part of a write to a page
175 * - the caller holds the page locked, preventing it from being written out or
176 * modified by anyone else
177 */
178int afs_prepare_write(struct file *file, struct page *page,
179 unsigned offset, unsigned to)
180{
181 struct afs_writeback *candidate, *wb;
182 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
183 struct key *key = file->private_data;
184 pgoff_t index;
185 int ret;
186
187 _enter("{%x:%u},{%lx},%u,%u",
188 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
189
190 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
191 if (!candidate)
192 return -ENOMEM;
193 candidate->vnode = vnode;
194 candidate->first = candidate->last = page->index;
195 candidate->offset_first = offset;
196 candidate->to_last = to;
197 candidate->usage = 1;
198 candidate->state = AFS_WBACK_PENDING;
199 init_waitqueue_head(&candidate->waitq);
200
201 if (!PageUptodate(page)) {
202 _debug("not up to date");
203 ret = afs_prepare_page(vnode, page, key, offset, to);
204 if (ret < 0) {
205 kfree(candidate);
206 _leave(" = %d [prep]", ret);
207 return ret;
208 }
209 SetPageUptodate(page);
210 }
211
212try_again:
213 index = page->index;
214 spin_lock(&vnode->writeback_lock);
215
216 /* see if this page is already pending a writeback under a suitable key
217 * - if so we can just join onto that one */
218 wb = (struct afs_writeback *) page_private(page);
219 if (wb) {
220 if (wb->key == key && wb->state == AFS_WBACK_PENDING)
221 goto subsume_in_current_wb;
222 goto flush_conflicting_wb;
223 }
224
225 if (index > 0) {
226 /* see if we can find an already pending writeback that we can
227 * append this page to */
228 list_for_each_entry(wb, &vnode->writebacks, link) {
229 if (wb->last == index - 1 && wb->key == key &&
230 wb->state == AFS_WBACK_PENDING)
231 goto append_to_previous_wb;
232 }
233 }
234
235 list_add_tail(&candidate->link, &vnode->writebacks);
236 candidate->key = key_get(key);
237 spin_unlock(&vnode->writeback_lock);
238 SetPagePrivate(page);
239 set_page_private(page, (unsigned long) candidate);
240 _leave(" = 0 [new]");
241 return 0;
242
243subsume_in_current_wb:
244 _debug("subsume");
245 ASSERTRANGE(wb->first, <=, index, <=, wb->last);
246 if (index == wb->first && offset < wb->offset_first)
247 wb->offset_first = offset;
248 if (index == wb->last && to > wb->to_last)
249 wb->to_last = to;
250 spin_unlock(&vnode->writeback_lock);
251 kfree(candidate);
252 _leave(" = 0 [sub]");
253 return 0;
254
255append_to_previous_wb:
256 _debug("append into %lx-%lx", wb->first, wb->last);
257 wb->usage++;
258 wb->last++;
259 wb->to_last = to;
260 spin_unlock(&vnode->writeback_lock);
261 SetPagePrivate(page);
262 set_page_private(page, (unsigned long) wb);
263 kfree(candidate);
264 _leave(" = 0 [app]");
265 return 0;
266
267 /* the page is currently bound to another context, so if it's dirty we
268 * need to flush it before we can use the new context */
269flush_conflicting_wb:
270 _debug("flush conflict");
271 if (wb->state == AFS_WBACK_PENDING)
272 wb->state = AFS_WBACK_CONFLICTING;
273 spin_unlock(&vnode->writeback_lock);
274 if (PageDirty(page)) {
275 ret = afs_write_back_from_locked_page(wb, page);
276 if (ret < 0) {
277 afs_put_writeback(candidate);
278 _leave(" = %d", ret);
279 return ret;
280 }
281 }
282
283 /* the page holds a ref on the writeback record */
284 afs_put_writeback(wb);
285 set_page_private(page, 0);
286 ClearPagePrivate(page);
287 goto try_again;
288}
289
290/*
291 * finalise part of a write to a page
292 */
293int afs_commit_write(struct file *file, struct page *page,
294 unsigned offset, unsigned to)
295{
296 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
297 loff_t i_size, maybe_i_size;
298
299 _enter("{%x:%u},{%lx},%u,%u",
300 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
301
302 maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
303 maybe_i_size += to;
304
305 i_size = i_size_read(&vnode->vfs_inode);
306 if (maybe_i_size > i_size) {
307 spin_lock(&vnode->writeback_lock);
308 i_size = i_size_read(&vnode->vfs_inode);
309 if (maybe_i_size > i_size)
310 i_size_write(&vnode->vfs_inode, maybe_i_size);
311 spin_unlock(&vnode->writeback_lock);
312 }
313
314 set_page_dirty(page);
315
316 if (PageDirty(page))
317 _debug("dirtied");
318
319 return 0;
320}
321
322/*
323 * kill all the pages in the given range
324 */
325static void afs_kill_pages(struct afs_vnode *vnode, bool error,
326 pgoff_t first, pgoff_t last)
327{
328 struct pagevec pv;
329 unsigned count, loop;
330
331 _enter("{%x:%u},%lx-%lx",
332 vnode->fid.vid, vnode->fid.vnode, first, last);
333
334 pagevec_init(&pv, 0);
335
336 do {
337 _debug("kill %lx-%lx", first, last);
338
339 count = last - first + 1;
340 if (count > PAGEVEC_SIZE)
341 count = PAGEVEC_SIZE;
342 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
343 first, count, pv.pages);
344 ASSERTCMP(pv.nr, ==, count);
345
346 for (loop = 0; loop < count; loop++) {
347 ClearPageUptodate(pv.pages[loop]);
348 if (error)
349 SetPageError(pv.pages[loop]);
350 end_page_writeback(pv.pages[loop]);
351 }
352
353 __pagevec_release(&pv);
354 } while (first < last);
355
356 _leave("");
357}
358
359/*
360 * synchronously write back the locked page and any subsequent non-locked dirty
361 * pages also covered by the same writeback record
362 */
363static int afs_write_back_from_locked_page(struct afs_writeback *wb,
364 struct page *primary_page)
365{
366 struct page *pages[8], *page;
367 unsigned long count;
368 unsigned n, offset, to;
369 pgoff_t start, first, last;
370 int loop, ret;
371
372 _enter(",%lx", primary_page->index);
373
374 count = 1;
375 if (!clear_page_dirty_for_io(primary_page))
376 BUG();
377 if (test_set_page_writeback(primary_page))
378 BUG();
379
380 /* find all consecutive lockable dirty pages, stopping when we find a
381 * page that is not immediately lockable, is not dirty or is missing,
382 * or we reach the end of the range */
383 start = primary_page->index;
384 if (start >= wb->last)
385 goto no_more;
386 start++;
387 do {
388 _debug("more %lx [%lx]", start, count);
389 n = wb->last - start + 1;
390 if (n > ARRAY_SIZE(pages))
391 n = ARRAY_SIZE(pages);
392 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
393 start, n, pages);
394 _debug("fgpc %u", n);
395 if (n == 0)
396 goto no_more;
397 if (pages[0]->index != start) {
398 for (n--; n >= 0; n--)
399 put_page(pages[n]);
400 goto no_more;
401 }
402
403 for (loop = 0; loop < n; loop++) {
404 page = pages[loop];
405 if (page->index > wb->last)
406 break;
407 if (TestSetPageLocked(page))
408 break;
409 if (!PageDirty(page) ||
410 page_private(page) != (unsigned long) wb) {
411 unlock_page(page);
412 break;
413 }
414 if (!clear_page_dirty_for_io(page))
415 BUG();
416 if (test_set_page_writeback(page))
417 BUG();
418 unlock_page(page);
419 put_page(page);
420 }
421 count += loop;
422 if (loop < n) {
423 for (; loop < n; loop++)
424 put_page(pages[loop]);
425 goto no_more;
426 }
427
428 start += loop;
429 } while (start <= wb->last && count < 65536);
430
431no_more:
432 /* we now have a contiguous set of dirty pages, each with writeback set
433 * and the dirty mark cleared; the first page is locked and must remain
434 * so, all the rest are unlocked */
435 first = primary_page->index;
436 last = first + count - 1;
437
438 offset = (first == wb->first) ? wb->offset_first : 0;
439 to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
440
441 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
442
443 ret = afs_vnode_store_data(wb, first, last, offset, to);
444 if (ret < 0) {
445 switch (ret) {
446 case -EDQUOT:
447 case -ENOSPC:
448 set_bit(AS_ENOSPC,
449 &wb->vnode->vfs_inode.i_mapping->flags);
450 break;
451 case -EROFS:
452 case -EIO:
453 case -EREMOTEIO:
454 case -EFBIG:
455 case -ENOENT:
456 case -ENOMEDIUM:
457 case -ENXIO:
458 afs_kill_pages(wb->vnode, true, first, last);
459 set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
460 break;
461 case -EACCES:
462 case -EPERM:
463 case -ENOKEY:
464 case -EKEYEXPIRED:
465 case -EKEYREJECTED:
466 case -EKEYREVOKED:
467 afs_kill_pages(wb->vnode, false, first, last);
468 break;
469 default:
470 break;
471 }
472 } else {
473 ret = count;
474 }
475
476 _leave(" = %d", ret);
477 return ret;
478}
479
480/*
481 * write a page back to the server
482 * - the caller locked the page for us
483 */
484int afs_writepage(struct page *page, struct writeback_control *wbc)
485{
486 struct backing_dev_info *bdi = page->mapping->backing_dev_info;
487 struct afs_writeback *wb;
488 int ret;
489
490 _enter("{%lx},", page->index);
491
492 if (wbc->sync_mode != WB_SYNC_NONE)
493 wait_on_page_writeback(page);
494
495 if (PageWriteback(page) || !PageDirty(page)) {
496 unlock_page(page);
497 return 0;
498 }
499
500 wb = (struct afs_writeback *) page_private(page);
501 ASSERT(wb != NULL);
502
503 ret = afs_write_back_from_locked_page(wb, page);
504 unlock_page(page);
505 if (ret < 0) {
506 _leave(" = %d", ret);
507 return 0;
508 }
509
510 wbc->nr_to_write -= ret;
511 if (wbc->nonblocking && bdi_write_congested(bdi))
512 wbc->encountered_congestion = 1;
513
514 _leave(" = 0");
515 return 0;
516}
517
518/*
519 * write a region of pages back to the server
520 */
521int afs_writepages_region(struct address_space *mapping,
522 struct writeback_control *wbc,
523 pgoff_t index, pgoff_t end, pgoff_t *_next)
524{
525 struct backing_dev_info *bdi = mapping->backing_dev_info;
526 struct afs_writeback *wb;
527 struct page *page;
528 int ret, n;
529
530 _enter(",,%lx,%lx,", index, end);
531
532 do {
533 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
534 1, &page);
535 if (!n)
536 break;
537
538 _debug("wback %lx", page->index);
539
540 if (page->index > end) {
541 *_next = index;
542 page_cache_release(page);
543 _leave(" = 0 [%lx]", *_next);
544 return 0;
545 }
546
547 /* at this point we hold neither mapping->tree_lock nor lock on
548 * the page itself: the page may be truncated or invalidated
549 * (changing page->mapping to NULL), or even swizzled back from
550 * swapper_space to tmpfs file mapping
551 */
552 lock_page(page);
553
554 if (page->mapping != mapping) {
555 unlock_page(page);
556 page_cache_release(page);
557 continue;
558 }
559
560 if (wbc->sync_mode != WB_SYNC_NONE)
561 wait_on_page_writeback(page);
562
563 if (PageWriteback(page) || !PageDirty(page)) {
564 unlock_page(page);
565 continue;
566 }
567
568 wb = (struct afs_writeback *) page_private(page);
569 ASSERT(wb != NULL);
570
571 spin_lock(&wb->vnode->writeback_lock);
572 wb->state = AFS_WBACK_WRITING;
573 spin_unlock(&wb->vnode->writeback_lock);
574
575 ret = afs_write_back_from_locked_page(wb, page);
576 unlock_page(page);
577 page_cache_release(page);
578 if (ret < 0) {
579 _leave(" = %d", ret);
580 return ret;
581 }
582
583 wbc->nr_to_write -= ret;
584
585 if (wbc->nonblocking && bdi_write_congested(bdi)) {
586 wbc->encountered_congestion = 1;
587 break;
588 }
589
590 cond_resched();
591 } while (index < end && wbc->nr_to_write > 0);
592
593 *_next = index;
594 _leave(" = 0 [%lx]", *_next);
595 return 0;
596}
597
598/*
599 * write some of the pending data back to the server
600 */
601int afs_writepages(struct address_space *mapping,
602 struct writeback_control *wbc)
603{
604 struct backing_dev_info *bdi = mapping->backing_dev_info;
605 pgoff_t start, end, next;
606 int ret;
607
608 _enter("");
609
610 if (wbc->nonblocking && bdi_write_congested(bdi)) {
611 wbc->encountered_congestion = 1;
612 _leave(" = 0 [congest]");
613 return 0;
614 }
615
616 if (wbc->range_cyclic) {
617 start = mapping->writeback_index;
618 end = -1;
619 ret = afs_writepages_region(mapping, wbc, start, end, &next);
620 if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
621 !(wbc->nonblocking && wbc->encountered_congestion))
622 ret = afs_writepages_region(mapping, wbc, 0, start,
623 &next);
624 mapping->writeback_index = next;
625 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
626 end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
627 ret = afs_writepages_region(mapping, wbc, 0, end, &next);
628 if (wbc->nr_to_write > 0)
629 mapping->writeback_index = next;
630 } else {
631 start = wbc->range_start >> PAGE_CACHE_SHIFT;
632 end = wbc->range_end >> PAGE_CACHE_SHIFT;
633 ret = afs_writepages_region(mapping, wbc, start, end, &next);
634 }
635
636 _leave(" = %d", ret);
637 return ret;
638}
639
640/*
641 * write an inode back
642 */
643int afs_write_inode(struct inode *inode, int sync)
644{
645 struct afs_vnode *vnode = AFS_FS_I(inode);
646 int ret;
647
648 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
649
650 ret = 0;
651 if (sync) {
652 ret = filemap_fdatawait(inode->i_mapping);
653 if (ret < 0)
654 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
655 }
656
657 _leave(" = %d", ret);
658 return ret;
659}
660
661/*
662 * completion of write to server
663 */
664void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
665{
666 struct afs_writeback *wb = call->wb;
667 struct pagevec pv;
668 unsigned count, loop;
669 pgoff_t first = call->first, last = call->last;
670 bool free_wb;
671
672 _enter("{%x:%u},{%lx-%lx}",
673 vnode->fid.vid, vnode->fid.vnode, first, last);
674
675 ASSERT(wb != NULL);
676
677 pagevec_init(&pv, 0);
678
679 do {
680 _debug("attach %lx-%lx", first, last);
681
682 count = last - first + 1;
683 if (count > PAGEVEC_SIZE)
684 count = PAGEVEC_SIZE;
685 pv.nr = find_get_pages_contig(call->mapping, first, count,
686 pv.pages);
687 ASSERTCMP(pv.nr, ==, count);
688
689 spin_lock(&vnode->writeback_lock);
690 for (loop = 0; loop < count; loop++) {
691 struct page *page = pv.pages[loop];
692 end_page_writeback(page);
693 if (page_private(page) == (unsigned long) wb) {
694 set_page_private(page, 0);
695 ClearPagePrivate(page);
696 wb->usage--;
697 }
698 }
699 free_wb = false;
700 if (wb->usage == 0) {
701 afs_unlink_writeback(wb);
702 free_wb = true;
703 }
704 spin_unlock(&vnode->writeback_lock);
705 first += count;
706 if (free_wb) {
707 afs_free_writeback(wb);
708 wb = NULL;
709 }
710
711 __pagevec_release(&pv);
712 } while (first < last);
713
714 _leave("");
715}
716
717/*
718 * write to an AFS file
719 */
720ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
721 unsigned long nr_segs, loff_t pos)
722{
723 struct dentry *dentry = iocb->ki_filp->f_path.dentry;
724 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
725 ssize_t result;
726 size_t count = iov_length(iov, nr_segs);
727 int ret;
728
729 _enter("{%x.%u},{%zu},%lu,",
730 vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
731
732 if (IS_SWAPFILE(&vnode->vfs_inode)) {
733 printk(KERN_INFO
734 "AFS: Attempt to write to active swap file!\n");
735 return -EBUSY;
736 }
737
738 if (!count)
739 return 0;
740
741 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
742 if (IS_ERR_VALUE(result)) {
743 _leave(" = %zd", result);
744 return result;
745 }
746
747 /* return error values for O_SYNC and IS_SYNC() */
748 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
749 ret = afs_fsync(iocb->ki_filp, dentry, 1);
750 if (ret < 0)
751 result = ret;
752 }
753
754 _leave(" = %zd", result);
755 return result;
756}
757
758/*
759 * flush the vnode to the fileserver
760 */
761int afs_writeback_all(struct afs_vnode *vnode)
762{
763 struct address_space *mapping = vnode->vfs_inode.i_mapping;
764 struct writeback_control wbc = {
765 .bdi = mapping->backing_dev_info,
766 .sync_mode = WB_SYNC_ALL,
767 .nr_to_write = LONG_MAX,
768 .for_writepages = 1,
769 .range_cyclic = 1,
770 };
771 int ret;
772
773 _enter("");
774
775 ret = mapping->a_ops->writepages(mapping, &wbc);
776 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
777
778 _leave(" = %d", ret);
779 return ret;
780}
781
782/*
783 * flush any dirty pages for this process, and check for write errors.
784 * - the return status from this call provides a reliable indication of
785 * whether any write errors occurred for this process.
786 */
787int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
788{
789 struct afs_writeback *wb, *xwb;
790 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
791 int ret;
792
793 _enter("{%x:%u},{n=%s},%d",
794 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
795 datasync);
796
797 /* use a writeback record as a marker in the queue - when this reaches
798 * the front of the queue, all the outstanding writes are either
799 * completed or rejected */
800 wb = kzalloc(sizeof(*wb), GFP_KERNEL);
801 if (!wb)
802 return -ENOMEM;
803 wb->vnode = vnode;
804 wb->first = 0;
805 wb->last = -1;
806 wb->offset_first = 0;
807 wb->to_last = PAGE_SIZE;
808 wb->usage = 1;
809 wb->state = AFS_WBACK_SYNCING;
810 init_waitqueue_head(&wb->waitq);
811
812 spin_lock(&vnode->writeback_lock);
813 list_for_each_entry(xwb, &vnode->writebacks, link) {
814 if (xwb->state == AFS_WBACK_PENDING)
815 xwb->state = AFS_WBACK_CONFLICTING;
816 }
817 list_add_tail(&wb->link, &vnode->writebacks);
818 spin_unlock(&vnode->writeback_lock);
819
820 /* push all the outstanding writebacks to the server */
821 ret = afs_writeback_all(vnode);
822 if (ret < 0) {
823 afs_put_writeback(wb);
824 _leave(" = %d [wb]", ret);
825 return ret;
826 }
827
828 /* wait for the preceding writes to actually complete */
829 ret = wait_event_interruptible(wb->waitq,
830 wb->state == AFS_WBACK_COMPLETE ||
831 vnode->writebacks.next == &wb->link);
832 afs_put_writeback(wb);
833 _leave(" = %d", ret);
834 return ret;
835}
diff --git a/fs/aio.c b/fs/aio.c
index e4598d6d49dd..ac1c1587aa02 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,10 +68,8 @@ static void aio_queue_work(struct kioctx *);
68 */ 68 */
69static int __init aio_setup(void) 69static int __init aio_setup(void)
70{ 70{
71 kiocb_cachep = kmem_cache_create("kiocb", sizeof(struct kiocb), 71 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
72 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 72 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
73 kioctx_cachep = kmem_cache_create("kioctx", sizeof(struct kioctx),
74 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
75 73
76 aio_wq = create_workqueue("aio"); 74 aio_wq = create_workqueue("aio");
77 75
@@ -348,10 +346,9 @@ void fastcall exit_aio(struct mm_struct *mm)
348 346
349 wait_for_all_aios(ctx); 347 wait_for_all_aios(ctx);
350 /* 348 /*
351 * this is an overkill, but ensures we don't leave 349 * Ensure we don't leave the ctx on the aio_wq
352 * the ctx on the aio_wq
353 */ 350 */
354 flush_workqueue(aio_wq); 351 cancel_work_sync(&ctx->wq.work);
355 352
356 if (1 != atomic_read(&ctx->users)) 353 if (1 != atomic_read(&ctx->users))
357 printk(KERN_DEBUG 354 printk(KERN_DEBUG
@@ -374,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
374 BUG_ON(ctx->reqs_active); 371 BUG_ON(ctx->reqs_active);
375 372
376 cancel_delayed_work(&ctx->wq); 373 cancel_delayed_work(&ctx->wq);
377 flush_workqueue(aio_wq); 374 cancel_work_sync(&ctx->wq.work);
378 aio_free_ring(ctx); 375 aio_free_ring(ctx);
379 mmdrop(ctx->mm); 376 mmdrop(ctx->mm);
380 ctx->mm = NULL; 377 ctx->mm = NULL;
diff --git a/fs/attr.c b/fs/attr.c
index 97de94670878..a0a0c7b07ba3 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -9,7 +9,6 @@
9#include <linux/time.h> 9#include <linux/time.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/smp_lock.h>
13#include <linux/capability.h> 12#include <linux/capability.h>
14#include <linux/fsnotify.h> 13#include <linux/fsnotify.h>
15#include <linux/fcntl.h> 14#include <linux/fcntl.h>
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 26063dc84a2a..5769a2f9ad60 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -18,7 +18,6 @@
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/smp_lock.h>
22#include <linux/magic.h> 21#include <linux/magic.h>
23#include "autofs_i.h" 22#include "autofs_i.h"
24#include <linux/module.h> 23#include <linux/module.h>
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d0e9b3a3905d..15170f4e13a7 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -17,7 +17,6 @@
17#include <linux/stat.h> 17#include <linux/stat.h>
18#include <linux/param.h> 18#include <linux/param.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/smp_lock.h>
21#include "autofs_i.h" 20#include "autofs_i.h"
22 21
23static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 22static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index efeab2fab40b..329ee473eede 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -12,7 +12,6 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/smp_lock.h>
16#include <linux/namei.h> 15#include <linux/namei.h>
17#include <linux/poll.h> 16#include <linux/poll.h>
18 17
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index cc6cc8ed2e39..fe96108a788d 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -293,8 +293,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
293{ 293{
294 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 294 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
295 295
296 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 296 if (flags & SLAB_CTOR_CONSTRUCTOR) {
297 SLAB_CTOR_CONSTRUCTOR) {
298 inode_init_once(&bi->vfs_inode); 297 inode_init_once(&bi->vfs_inode);
299 } 298 }
300} 299}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 93d6219243ad..edc08d89aabc 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,8 +248,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
248{ 248{
249 struct bfs_inode_info *bi = foo; 249 struct bfs_inode_info *bi = foo;
250 250
251 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 251 if (flags & SLAB_CTOR_CONSTRUCTOR)
252 SLAB_CTOR_CONSTRUCTOR)
253 inode_init_once(&bi->vfs_inode); 252 inode_init_once(&bi->vfs_inode);
254} 253}
255 254
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9cc4f0a8aaae..fa8ea33ab0be 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/highuid.h> 32#include <linux/highuid.h>
33#include <linux/smp.h> 33#include <linux/smp.h>
34#include <linux/smp_lock.h>
35#include <linux/compiler.h> 34#include <linux/compiler.h>
36#include <linux/highmem.h> 35#include <linux/highmem.h>
37#include <linux/pagemap.h> 36#include <linux/pagemap.h>
@@ -39,6 +38,7 @@
39#include <linux/syscalls.h> 38#include <linux/syscalls.h>
40#include <linux/random.h> 39#include <linux/random.h>
41#include <linux/elf.h> 40#include <linux/elf.h>
41#include <linux/utsname.h>
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <asm/param.h> 43#include <asm/param.h>
44#include <asm/page.h> 44#include <asm/page.h>
@@ -871,6 +871,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
871 elf_prot, elf_flags); 871 elf_prot, elf_flags);
872 if (BAD_ADDR(error)) { 872 if (BAD_ADDR(error)) {
873 send_sig(SIGKILL, current, 0); 873 send_sig(SIGKILL, current, 0);
874 retval = IS_ERR((void *)error) ?
875 PTR_ERR((void*)error) : -EINVAL;
874 goto out_free_dentry; 876 goto out_free_dentry;
875 } 877 }
876 878
@@ -900,6 +902,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
900 TASK_SIZE - elf_ppnt->p_memsz < k) { 902 TASK_SIZE - elf_ppnt->p_memsz < k) {
901 /* set_brk can never work. Avoid overflows. */ 903 /* set_brk can never work. Avoid overflows. */
902 send_sig(SIGKILL, current, 0); 904 send_sig(SIGKILL, current, 0);
905 retval = -EINVAL;
903 goto out_free_dentry; 906 goto out_free_dentry;
904 } 907 }
905 908
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f3ddca4a387b..9d62fbad3d4b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -30,7 +30,6 @@
30#include <linux/personality.h> 30#include <linux/personality.h>
31#include <linux/ptrace.h> 31#include <linux/ptrace.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/smp_lock.h>
34#include <linux/elf.h> 33#include <linux/elf.h>
35#include <linux/elf-fdpic.h> 34#include <linux/elf-fdpic.h>
36#include <linux/elfcore.h> 35#include <linux/elfcore.h>
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 1f2d1ad63319..576dd7de2278 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -12,7 +12,6 @@
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/smp_lock.h>
16#include <linux/binfmts.h> 15#include <linux/binfmts.h>
17#include <linux/elf.h> 16#include <linux/elf.h>
18#include <linux/init.h> 17#include <linux/init.h>
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e6f57990b121..72d0b412c376 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -675,19 +675,8 @@ static ssize_t
675bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) 675bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
676{ 676{
677 char *s = enabled ? "enabled" : "disabled"; 677 char *s = enabled ? "enabled" : "disabled";
678 int len = strlen(s);
679 loff_t pos = *ppos;
680 678
681 if (pos < 0) 679 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
682 return -EINVAL;
683 if (pos >= len)
684 return 0;
685 if (len < pos + nbytes)
686 nbytes = len - pos;
687 if (copy_to_user(buf, s + pos, nbytes))
688 return -EFAULT;
689 *ppos = pos + nbytes;
690 return nbytes;
691} 680}
692 681
693static ssize_t bm_status_write(struct file * file, const char __user * buffer, 682static ssize_t bm_status_write(struct file * file, const char __user * buffer,
@@ -727,8 +716,8 @@ static const struct super_operations s_ops = {
727static int bm_fill_super(struct super_block * sb, void * data, int silent) 716static int bm_fill_super(struct super_block * sb, void * data, int silent)
728{ 717{
729 static struct tree_descr bm_files[] = { 718 static struct tree_descr bm_files[] = {
730 [1] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, 719 [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
731 [2] = {"register", &bm_register_operations, S_IWUSR}, 720 [3] = {"register", &bm_register_operations, S_IWUSR},
732 /* last one */ {""} 721 /* last one */ {""}
733 }; 722 };
734 int err = simple_fill_super(sb, 0x42494e4d, bm_files); 723 int err = simple_fill_super(sb, 0x42494e4d, bm_files);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1edbcca25a73..304c88544d89 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -12,7 +12,6 @@
12#include <linux/binfmts.h> 12#include <linux/binfmts.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/smp_lock.h>
16#include <linux/err.h> 15#include <linux/err.h>
17#include <linux/fs.h> 16#include <linux/fs.h>
18 17
diff --git a/fs/bio.c b/fs/bio.c
index 7618bcb18368..093345f00128 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,7 +28,7 @@
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
30 30
31#define BIO_POOL_SIZE 256 31#define BIO_POOL_SIZE 2
32 32
33static struct kmem_cache *bio_slab __read_mostly; 33static struct kmem_cache *bio_slab __read_mostly;
34 34
@@ -38,7 +38,7 @@ static struct kmem_cache *bio_slab __read_mostly;
38 * a small number of entries is fine, not going to be performance critical. 38 * a small number of entries is fine, not going to be performance critical.
39 * basically we just need to survive 39 * basically we just need to survive
40 */ 40 */
41#define BIO_SPLIT_ENTRIES 8 41#define BIO_SPLIT_ENTRIES 2
42mempool_t *bio_split_pool __read_mostly; 42mempool_t *bio_split_pool __read_mostly;
43 43
44struct biovec_slab { 44struct biovec_slab {
@@ -1120,7 +1120,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1120 * create memory pools for biovec's in a bio_set. 1120 * create memory pools for biovec's in a bio_set.
1121 * use the global biovec slabs created for general use. 1121 * use the global biovec slabs created for general use.
1122 */ 1122 */
1123static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale) 1123static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1124{ 1124{
1125 int i; 1125 int i;
1126 1126
@@ -1128,9 +1128,6 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
1128 struct biovec_slab *bp = bvec_slabs + i; 1128 struct biovec_slab *bp = bvec_slabs + i;
1129 mempool_t **bvp = bs->bvec_pools + i; 1129 mempool_t **bvp = bs->bvec_pools + i;
1130 1130
1131 if (pool_entries > 1 && i >= scale)
1132 pool_entries >>= 1;
1133
1134 *bvp = mempool_create_slab_pool(pool_entries, bp->slab); 1131 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
1135 if (!*bvp) 1132 if (!*bvp)
1136 return -ENOMEM; 1133 return -ENOMEM;
@@ -1161,7 +1158,7 @@ void bioset_free(struct bio_set *bs)
1161 kfree(bs); 1158 kfree(bs);
1162} 1159}
1163 1160
1164struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale) 1161struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
1165{ 1162{
1166 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); 1163 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1167 1164
@@ -1172,7 +1169,7 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
1172 if (!bs->bio_pool) 1169 if (!bs->bio_pool)
1173 goto bad; 1170 goto bad;
1174 1171
1175 if (!biovec_create_pools(bs, bvec_pool_size, scale)) 1172 if (!biovec_create_pools(bs, bvec_pool_size))
1176 return bs; 1173 return bs;
1177 1174
1178bad: 1175bad:
@@ -1196,38 +1193,11 @@ static void __init biovec_init_slabs(void)
1196 1193
1197static int __init init_bio(void) 1194static int __init init_bio(void)
1198{ 1195{
1199 int megabytes, bvec_pool_entries; 1196 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1200 int scale = BIOVEC_NR_POOLS;
1201
1202 bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
1203 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1204 1197
1205 biovec_init_slabs(); 1198 biovec_init_slabs();
1206 1199
1207 megabytes = nr_free_pages() >> (20 - PAGE_SHIFT); 1200 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
1208
1209 /*
1210 * find out where to start scaling
1211 */
1212 if (megabytes <= 16)
1213 scale = 0;
1214 else if (megabytes <= 32)
1215 scale = 1;
1216 else if (megabytes <= 64)
1217 scale = 2;
1218 else if (megabytes <= 96)
1219 scale = 3;
1220 else if (megabytes <= 128)
1221 scale = 4;
1222
1223 /*
1224 * Limit number of entries reserved -- mempools are only used when
1225 * the system is completely unable to allocate memory, so we only
1226 * need enough to make progress.
1227 */
1228 bvec_pool_entries = 1 + scale;
1229
1230 fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
1231 if (!fs_bio_set) 1201 if (!fs_bio_set)
1232 panic("bio: can't allocate bios\n"); 1202 panic("bio: can't allocate bios\n");
1233 1203
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 575076c018f4..742899240872 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -22,6 +22,7 @@
22#include <linux/mount.h> 22#include <linux/mount.h>
23#include <linux/uio.h> 23#include <linux/uio.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/log2.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include "internal.h" 27#include "internal.h"
27 28
@@ -55,17 +56,19 @@ static sector_t max_block(struct block_device *bdev)
55 return retval; 56 return retval;
56} 57}
57 58
58/* Kill _all_ buffers, dirty or not.. */ 59/* Kill _all_ buffers and pagecache , dirty or not.. */
59static void kill_bdev(struct block_device *bdev) 60static void kill_bdev(struct block_device *bdev)
60{ 61{
61 invalidate_bdev(bdev, 1); 62 if (bdev->bd_inode->i_mapping->nrpages == 0)
63 return;
64 invalidate_bh_lrus();
62 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 65 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
63} 66}
64 67
65int set_blocksize(struct block_device *bdev, int size) 68int set_blocksize(struct block_device *bdev, int size)
66{ 69{
67 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 70 /* Size must be a power of two, and between 512 and PAGE_SIZE */
68 if (size > PAGE_SIZE || size < 512 || (size & (size-1))) 71 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
69 return -EINVAL; 72 return -EINVAL;
70 73
71 /* Size cannot be smaller than the size supported by the device */ 74 /* Size cannot be smaller than the size supported by the device */
@@ -455,9 +458,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
455 struct bdev_inode *ei = (struct bdev_inode *) foo; 458 struct bdev_inode *ei = (struct bdev_inode *) foo;
456 struct block_device *bdev = &ei->bdev; 459 struct block_device *bdev = &ei->bdev;
457 460
458 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 461 if (flags & SLAB_CTOR_CONSTRUCTOR) {
459 SLAB_CTOR_CONSTRUCTOR)
460 {
461 memset(bdev, 0, sizeof(*bdev)); 462 memset(bdev, 0, sizeof(*bdev));
462 mutex_init(&bdev->bd_mutex); 463 mutex_init(&bdev->bd_mutex);
463 sema_init(&bdev->bd_mount_sem, 1); 464 sema_init(&bdev->bd_mount_sem, 1);
@@ -1478,7 +1479,7 @@ int __invalidate_device(struct block_device *bdev)
1478 res = invalidate_inodes(sb); 1479 res = invalidate_inodes(sb);
1479 drop_super(sb); 1480 drop_super(sb);
1480 } 1481 }
1481 invalidate_bdev(bdev, 0); 1482 invalidate_bdev(bdev);
1482 return res; 1483 return res;
1483} 1484}
1484EXPORT_SYMBOL(__invalidate_device); 1485EXPORT_SYMBOL(__invalidate_device);
diff --git a/fs/buffer.c b/fs/buffer.c
index 1d0852fa728b..aecd057cd0e0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -24,7 +24,6 @@
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/smp_lock.h>
28#include <linux/capability.h> 27#include <linux/capability.h>
29#include <linux/blkdev.h> 28#include <linux/blkdev.h>
30#include <linux/file.h> 29#include <linux/file.h>
@@ -44,7 +43,6 @@
44#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
45 44
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47static void invalidate_bh_lrus(void);
48 46
49#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
50 48
@@ -333,7 +331,7 @@ out:
333 we think the disk contains more recent information than the buffercache. 331 we think the disk contains more recent information than the buffercache.
334 The update == 1 pass marks the buffers we need to update, the update == 2 332 The update == 1 pass marks the buffers we need to update, the update == 2
335 pass does the actual I/O. */ 333 pass does the actual I/O. */
336void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) 334void invalidate_bdev(struct block_device *bdev)
337{ 335{
338 struct address_space *mapping = bdev->bd_inode->i_mapping; 336 struct address_space *mapping = bdev->bd_inode->i_mapping;
339 337
@@ -341,11 +339,6 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
341 return; 339 return;
342 340
343 invalidate_bh_lrus(); 341 invalidate_bh_lrus();
344 /*
345 * FIXME: what about destroy_dirty_buffers?
346 * We really want to use invalidate_inode_pages2() for
347 * that, but not until that's cleaned up.
348 */
349 invalidate_mapping_pages(mapping, 0, -1); 342 invalidate_mapping_pages(mapping, 0, -1);
350} 343}
351 344
@@ -1408,7 +1401,7 @@ static void invalidate_bh_lru(void *arg)
1408 put_cpu_var(bh_lrus); 1401 put_cpu_var(bh_lrus);
1409} 1402}
1410 1403
1411static void invalidate_bh_lrus(void) 1404void invalidate_bh_lrus(void)
1412{ 1405{
1413 on_each_cpu(invalidate_bh_lru, NULL, 1, 1); 1406 on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
1414} 1407}
@@ -1700,17 +1693,8 @@ done:
1700 * clean. Someone wrote them back by hand with 1693 * clean. Someone wrote them back by hand with
1701 * ll_rw_block/submit_bh. A rare case. 1694 * ll_rw_block/submit_bh. A rare case.
1702 */ 1695 */
1703 int uptodate = 1;
1704 do {
1705 if (!buffer_uptodate(bh)) {
1706 uptodate = 0;
1707 break;
1708 }
1709 bh = bh->b_this_page;
1710 } while (bh != head);
1711 if (uptodate)
1712 SetPageUptodate(page);
1713 end_page_writeback(page); 1696 end_page_writeback(page);
1697
1714 /* 1698 /*
1715 * The page and buffer_heads can be released at any time from 1699 * The page and buffer_heads can be released at any time from
1716 * here on. 1700 * here on.
@@ -1742,6 +1726,7 @@ recover:
1742 } while ((bh = bh->b_this_page) != head); 1726 } while ((bh = bh->b_this_page) != head);
1743 SetPageError(page); 1727 SetPageError(page);
1744 BUG_ON(PageWriteback(page)); 1728 BUG_ON(PageWriteback(page));
1729 mapping_set_error(page->mapping, err);
1745 set_page_writeback(page); 1730 set_page_writeback(page);
1746 do { 1731 do {
1747 struct buffer_head *next = bh->b_this_page; 1732 struct buffer_head *next = bh->b_this_page;
@@ -1861,13 +1846,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1861 if (block_start >= to) 1846 if (block_start >= to)
1862 break; 1847 break;
1863 if (buffer_new(bh)) { 1848 if (buffer_new(bh)) {
1864 void *kaddr;
1865
1866 clear_buffer_new(bh); 1849 clear_buffer_new(bh);
1867 kaddr = kmap_atomic(page, KM_USER0); 1850 zero_user_page(page, block_start, bh->b_size, KM_USER0);
1868 memset(kaddr+block_start, 0, bh->b_size);
1869 flush_dcache_page(page);
1870 kunmap_atomic(kaddr, KM_USER0);
1871 set_buffer_uptodate(bh); 1851 set_buffer_uptodate(bh);
1872 mark_buffer_dirty(bh); 1852 mark_buffer_dirty(bh);
1873 } 1853 }
@@ -1955,10 +1935,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
1955 SetPageError(page); 1935 SetPageError(page);
1956 } 1936 }
1957 if (!buffer_mapped(bh)) { 1937 if (!buffer_mapped(bh)) {
1958 void *kaddr = kmap_atomic(page, KM_USER0); 1938 zero_user_page(page, i * blocksize, blocksize,
1959 memset(kaddr + i * blocksize, 0, blocksize); 1939 KM_USER0);
1960 flush_dcache_page(page);
1961 kunmap_atomic(kaddr, KM_USER0);
1962 if (!err) 1940 if (!err)
1963 set_buffer_uptodate(bh); 1941 set_buffer_uptodate(bh);
1964 continue; 1942 continue;
@@ -2101,7 +2079,6 @@ int cont_prepare_write(struct page *page, unsigned offset,
2101 long status; 2079 long status;
2102 unsigned zerofrom; 2080 unsigned zerofrom;
2103 unsigned blocksize = 1 << inode->i_blkbits; 2081 unsigned blocksize = 1 << inode->i_blkbits;
2104 void *kaddr;
2105 2082
2106 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { 2083 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2107 status = -ENOMEM; 2084 status = -ENOMEM;
@@ -2123,10 +2100,8 @@ int cont_prepare_write(struct page *page, unsigned offset,
2123 PAGE_CACHE_SIZE, get_block); 2100 PAGE_CACHE_SIZE, get_block);
2124 if (status) 2101 if (status)
2125 goto out_unmap; 2102 goto out_unmap;
2126 kaddr = kmap_atomic(new_page, KM_USER0); 2103 zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
2127 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); 2104 KM_USER0);
2128 flush_dcache_page(new_page);
2129 kunmap_atomic(kaddr, KM_USER0);
2130 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); 2105 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
2131 unlock_page(new_page); 2106 unlock_page(new_page);
2132 page_cache_release(new_page); 2107 page_cache_release(new_page);
@@ -2153,10 +2128,7 @@ int cont_prepare_write(struct page *page, unsigned offset,
2153 if (status) 2128 if (status)
2154 goto out1; 2129 goto out1;
2155 if (zerofrom < offset) { 2130 if (zerofrom < offset) {
2156 kaddr = kmap_atomic(page, KM_USER0); 2131 zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
2157 memset(kaddr+zerofrom, 0, offset-zerofrom);
2158 flush_dcache_page(page);
2159 kunmap_atomic(kaddr, KM_USER0);
2160 __block_commit_write(inode, page, zerofrom, offset); 2132 __block_commit_write(inode, page, zerofrom, offset);
2161 } 2133 }
2162 return 0; 2134 return 0;
@@ -2355,10 +2327,7 @@ failed:
2355 * Error recovery is pretty slack. Clear the page and mark it dirty 2327 * Error recovery is pretty slack. Clear the page and mark it dirty
2356 * so we'll later zero out any blocks which _were_ allocated. 2328 * so we'll later zero out any blocks which _were_ allocated.
2357 */ 2329 */
2358 kaddr = kmap_atomic(page, KM_USER0); 2330 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
2359 memset(kaddr, 0, PAGE_CACHE_SIZE);
2360 flush_dcache_page(page);
2361 kunmap_atomic(kaddr, KM_USER0);
2362 SetPageUptodate(page); 2331 SetPageUptodate(page);
2363 set_page_dirty(page); 2332 set_page_dirty(page);
2364 return ret; 2333 return ret;
@@ -2397,7 +2366,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
2397 loff_t i_size = i_size_read(inode); 2366 loff_t i_size = i_size_read(inode);
2398 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2367 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2399 unsigned offset; 2368 unsigned offset;
2400 void *kaddr;
2401 int ret; 2369 int ret;
2402 2370
2403 /* Is the page fully inside i_size? */ 2371 /* Is the page fully inside i_size? */
@@ -2428,10 +2396,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
2428 * the page size, the remaining memory is zeroed when mapped, and 2396 * the page size, the remaining memory is zeroed when mapped, and
2429 * writes to that region are not written out to the file." 2397 * writes to that region are not written out to the file."
2430 */ 2398 */
2431 kaddr = kmap_atomic(page, KM_USER0); 2399 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
2432 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2433 flush_dcache_page(page);
2434 kunmap_atomic(kaddr, KM_USER0);
2435out: 2400out:
2436 ret = mpage_writepage(page, get_block, wbc); 2401 ret = mpage_writepage(page, get_block, wbc);
2437 if (ret == -EAGAIN) 2402 if (ret == -EAGAIN)
@@ -2452,7 +2417,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
2452 unsigned to; 2417 unsigned to;
2453 struct page *page; 2418 struct page *page;
2454 const struct address_space_operations *a_ops = mapping->a_ops; 2419 const struct address_space_operations *a_ops = mapping->a_ops;
2455 char *kaddr;
2456 int ret = 0; 2420 int ret = 0;
2457 2421
2458 if ((offset & (blocksize - 1)) == 0) 2422 if ((offset & (blocksize - 1)) == 0)
@@ -2466,10 +2430,8 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
2466 to = (offset + blocksize) & ~(blocksize - 1); 2430 to = (offset + blocksize) & ~(blocksize - 1);
2467 ret = a_ops->prepare_write(NULL, page, offset, to); 2431 ret = a_ops->prepare_write(NULL, page, offset, to);
2468 if (ret == 0) { 2432 if (ret == 0) {
2469 kaddr = kmap_atomic(page, KM_USER0); 2433 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
2470 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); 2434 KM_USER0);
2471 flush_dcache_page(page);
2472 kunmap_atomic(kaddr, KM_USER0);
2473 /* 2435 /*
2474 * It would be more correct to call aops->commit_write() 2436 * It would be more correct to call aops->commit_write()
2475 * here, but this is more efficient. 2437 * here, but this is more efficient.
@@ -2495,7 +2457,6 @@ int block_truncate_page(struct address_space *mapping,
2495 struct inode *inode = mapping->host; 2457 struct inode *inode = mapping->host;
2496 struct page *page; 2458 struct page *page;
2497 struct buffer_head *bh; 2459 struct buffer_head *bh;
2498 void *kaddr;
2499 int err; 2460 int err;
2500 2461
2501 blocksize = 1 << inode->i_blkbits; 2462 blocksize = 1 << inode->i_blkbits;
@@ -2549,11 +2510,7 @@ int block_truncate_page(struct address_space *mapping,
2549 goto unlock; 2510 goto unlock;
2550 } 2511 }
2551 2512
2552 kaddr = kmap_atomic(page, KM_USER0); 2513 zero_user_page(page, offset, length, KM_USER0);
2553 memset(kaddr + offset, 0, length);
2554 flush_dcache_page(page);
2555 kunmap_atomic(kaddr, KM_USER0);
2556
2557 mark_buffer_dirty(bh); 2514 mark_buffer_dirty(bh);
2558 err = 0; 2515 err = 0;
2559 2516
@@ -2574,7 +2531,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2574 loff_t i_size = i_size_read(inode); 2531 loff_t i_size = i_size_read(inode);
2575 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2532 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2576 unsigned offset; 2533 unsigned offset;
2577 void *kaddr;
2578 2534
2579 /* Is the page fully inside i_size? */ 2535 /* Is the page fully inside i_size? */
2580 if (page->index < end_index) 2536 if (page->index < end_index)
@@ -2600,10 +2556,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2600 * the page size, the remaining memory is zeroed when mapped, and 2556 * the page size, the remaining memory is zeroed when mapped, and
2601 * writes to that region are not written out to the file." 2557 * writes to that region are not written out to the file."
2602 */ 2558 */
2603 kaddr = kmap_atomic(page, KM_USER0); 2559 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
2604 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
2605 flush_dcache_page(page);
2606 kunmap_atomic(kaddr, KM_USER0);
2607 return __block_write_full_page(inode, page, get_block, wbc); 2560 return __block_write_full_page(inode, page, get_block, wbc);
2608} 2561}
2609 2562
@@ -2968,8 +2921,7 @@ EXPORT_SYMBOL(free_buffer_head);
2968static void 2921static void
2969init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) 2922init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
2970{ 2923{
2971 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 2924 if (flags & SLAB_CTOR_CONSTRUCTOR) {
2972 SLAB_CTOR_CONSTRUCTOR) {
2973 struct buffer_head * bh = (struct buffer_head *)data; 2925 struct buffer_head * bh = (struct buffer_head *)data;
2974 2926
2975 memset(bh, 0, sizeof(*bh)); 2927 memset(bh, 0, sizeof(*bh));
@@ -2994,7 +2946,7 @@ static void buffer_exit_cpu(int cpu)
2994static int buffer_cpu_notify(struct notifier_block *self, 2946static int buffer_cpu_notify(struct notifier_block *self,
2995 unsigned long action, void *hcpu) 2947 unsigned long action, void *hcpu)
2996{ 2948{
2997 if (action == CPU_DEAD) 2949 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
2998 buffer_exit_cpu((unsigned long)hcpu); 2950 buffer_exit_cpu((unsigned long)hcpu);
2999 return NOTIFY_OK; 2951 return NOTIFY_OK;
3000} 2952}
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 5d1f4873d701..a9b6bc5157b8 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,4 +1,16 @@
1Verison 1.48 1Version 1.49
2------------
3IPv6 support. Enable ipv6 addresses to be passed on mount (put the ipv6
4address after the "ip=" mount option, at least until mount.cifs is fixed to
5handle DNS host to ipv6 name translation). Accept override of uid or gid
6on mount even when Unix Extensions are negotiated (it used to be ignored
7when Unix Extensions were ignored). This allows users to override the
8default uid and gid for files when they are certain that the uids or
9gids on the server do not match those of the client. Make "sec=none"
10mount override username (so that null user connection is attempted)
11to match what documentation said.
12
13Version 1.48
2------------ 14------------
3Fix mtime bouncing around from local idea of last write times to remote time. 15Fix mtime bouncing around from local idea of last write times to remote time.
4Fix hang (in i_size_read) when simultaneous size update of same remote file 16Fix hang (in i_size_read) when simultaneous size update of same remote file
@@ -9,7 +21,13 @@ from read-only back to read-write, reflect this change in default file mode
9(we had been leaving a file's mode read-only until the inode were reloaded). 21(we had been leaving a file's mode read-only until the inode were reloaded).
10Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute 22Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
11when archive dos attribute not set and we are changing mode back to writeable 23when archive dos attribute not set and we are changing mode back to writeable
12on server which does not support the Unix Extensions). 24on server which does not support the Unix Extensions). Remove read only dos
25attribute on chmod when adding any write permission (ie on any of
26user/group/other (not all of user/group/other ie 0222) when
27mounted to windows. Add support for POSIX MkDir (slight performance
28enhancement and eliminates the network race between the mkdir and set
29path info of the mode).
30
13 31
14Version 1.47 32Version 1.47
15------------ 33------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 080c5eba112b..4d01697722cc 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -257,13 +257,19 @@ A partial list of the supported mount options follows:
257 mount. 257 mount.
258 domain Set the SMB/CIFS workgroup name prepended to the 258 domain Set the SMB/CIFS workgroup name prepended to the
259 username during CIFS session establishment 259 username during CIFS session establishment
260 uid If CIFS Unix extensions are not supported by the server 260 uid Set the default uid for inodes. For mounts to servers
261 this overrides the default uid for inodes. For mounts to 261 which do support the CIFS Unix extensions, such as a
262 servers which do support the CIFS Unix extensions, such 262 properly configured Samba server, the server provides
263 as a properly configured Samba server, the server provides 263 the uid, gid and mode so this parameter should not be
264 the uid, gid and mode. For servers which do not support 264 specified unless the server and clients uid and gid
265 the Unix extensions, the default uid (and gid) returned on 265 numbering differ. If the server and client are in the
266 lookup of existing files is the uid (gid) of the person 266 same domain (e.g. running winbind or nss_ldap) and
267 the server supports the Unix Extensions then the uid
268 and gid can be retrieved from the server (and uid
269 and gid would not have to be specifed on the mount.
270 For servers which do not support the CIFS Unix
271 extensions, the default uid (and gid) returned on lookup
272 of existing files will be the uid (gid) of the person
267 who executed the mount (root, except when mount.cifs 273 who executed the mount (root, except when mount.cifs
268 is configured setuid for user mounts) unless the "uid=" 274 is configured setuid for user mounts) unless the "uid="
269 (gid) mount option is specified. For the uid (gid) of newly 275 (gid) mount option is specified. For the uid (gid) of newly
@@ -281,8 +287,7 @@ A partial list of the supported mount options follows:
281 the client. Note that the mount.cifs helper must be 287 the client. Note that the mount.cifs helper must be
282 at version 1.10 or higher to support specifying the uid 288 at version 1.10 or higher to support specifying the uid
283 (or gid) in non-numberic form. 289 (or gid) in non-numberic form.
284 gid If CIFS Unix extensions are not supported by the server 290 gid Set the default gid for inodes (similar to above).
285 this overrides the default gid for inodes.
286 file_mode If CIFS Unix extensions are not supported by the server 291 file_mode If CIFS Unix extensions are not supported by the server
287 this overrides the default mode for file inodes. 292 this overrides the default mode for file inodes.
288 dir_mode If CIFS Unix extensions are not supported by the server 293 dir_mode If CIFS Unix extensions are not supported by the server
@@ -467,7 +472,7 @@ including:
467 -V print mount.cifs version 472 -V print mount.cifs version
468 -? display simple usage information 473 -? display simple usage information
469 474
470With recent 2.6 kernel versions of modutils, the version of the cifs kernel 475With most 2.6 kernel versions of modutils, the version of the cifs kernel
471module can be displayed via modinfo. 476module can be displayed via modinfo.
472 477
473Misc /proc/fs/cifs Flags and Debug Info 478Misc /proc/fs/cifs Flags and Debug Info
@@ -516,8 +521,22 @@ SecurityFlags Flags which control security negotiation and
516 must use plaintext passwords 0x20020 521 must use plaintext passwords 0x20020
517 (reserved for future packet encryption) 0x00040 522 (reserved for future packet encryption) 0x00040
518 523
519cifsFYI If set to one, additional debug information is 524cifsFYI If set to non-zero value, additional debug information
520 logged to the system error log. (default 0) 525 will be logged to the system error log. This field
526 contains three flags controlling different classes of
527 debugging entries. The maximum value it can be set
528 to is 7 which enables all debugging points (default 0).
529 Some debugging statements are not compiled into the
530 cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
531 kernel configuration. cifsFYI may be set to one or
532 nore of the following flags (7 sets them all):
533
534 log cifs informational messages 0x01
535 log return codes from cifs entry points 0x02
536 log slow responses (ie which take longer than 1 second)
537 CONFIG_CIFS_STATS2 must be enabled in .config 0x04
538
539
521traceSMB If set to one, debug information is logged to the 540traceSMB If set to one, debug information is logged to the
522 system error log with the start of smb requests 541 system error log with the start of smb requests
523 and responses (default 0) 542 and responses (default 0)
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index d7b9c27c942d..78b620e332bd 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -1,4 +1,4 @@
1Version 1.39 November 30, 2005 1Version 1.49 April 26, 2007
2 2
3A Partial List of Missing Features 3A Partial List of Missing Features
4================================== 4==================================
@@ -18,7 +18,7 @@ better)
18 18
19d) Kerberos/SPNEGO session setup support - (started) 19d) Kerberos/SPNEGO session setup support - (started)
20 20
21e) NTLMv2 authentication (mostly implemented - double check 21e) More testing of NTLMv2 authentication (mostly implemented - double check
22that NTLMv2 signing works, also need to cleanup now unneeded SessSetup code in 22that NTLMv2 signing works, also need to cleanup now unneeded SessSetup code in
23fs/cifs/connect.c) 23fs/cifs/connect.c)
24 24
@@ -27,55 +27,44 @@ used (Kerberos or NTLMSSP). Signing alreadyimplemented for NTLM
27and raw NTLMSSP already. This is important when enabling 27and raw NTLMSSP already. This is important when enabling
28extended security and mounting to Windows 2003 Servers 28extended security and mounting to Windows 2003 Servers
29 29
30f) Directory entry caching relies on a 1 second timer, rather than 30g) Directory entry caching relies on a 1 second timer, rather than
31using FindNotify or equivalent. - (started) 31using FindNotify or equivalent. - (started)
32 32
33g) A few byte range testcases fail due to POSIX vs. Windows/CIFS 33h) quota support (needs minor kernel change since quota calls
34style byte range lock differences. Save byte range locks so
35reconnect can replay them.
36
37h) Support unlock all (unlock 0,MAX_OFFSET)
38by unlocking all known byte range locks that we locked on the file.
39
40i) quota support (needs minor kernel change since quota calls
41to make it to network filesystems or deviceless filesystems) 34to make it to network filesystems or deviceless filesystems)
42 35
43j) investigate sync behavior (including syncpage) and check 36i) investigate sync behavior (including syncpage) and check
44for proper behavior of intr/nointr 37for proper behavior of intr/nointr
45 38
46k) hook lower into the sockets api (as NFS/SunRPC does) to avoid the 39j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
47extra copy in/out of the socket buffers in some cases. 40extra copy in/out of the socket buffers in some cases.
48 41
49l) finish support for IPv6. This is mostly complete but 42k) Better optimize open (and pathbased setfilesize) to reduce the
50needs a simple conversion of ipv6 to sin6_addr from the
51address in string representation.
52
53m) Better optimize open (and pathbased setfilesize) to reduce the
54oplock breaks coming from windows srv. Piggyback identical file 43oplock breaks coming from windows srv. Piggyback identical file
55opens on top of each other by incrementing reference count rather 44opens on top of each other by incrementing reference count rather
56than resending (helps reduce server resource utilization and avoid 45than resending (helps reduce server resource utilization and avoid
57spurious oplock breaks). 46spurious oplock breaks).
58 47
59o) Improve performance of readpages by sending more than one read 48l) Improve performance of readpages by sending more than one read
60at a time when 8 pages or more are requested. In conjuntion 49at a time when 8 pages or more are requested. In conjuntion
61add support for async_cifs_readpages. 50add support for async_cifs_readpages.
62 51
63p) Add support for storing symlink info to Windows servers 52m) Add support for storing symlink info to Windows servers
64in the Extended Attribute format their SFU clients would recognize. 53in the Extended Attribute format their SFU clients would recognize.
65 54
66q) Finish fcntl D_NOTIFY support so kde and gnome file list windows 55n) Finish fcntl D_NOTIFY support so kde and gnome file list windows
67will autorefresh (partially complete by Asser). Needs minor kernel 56will autorefresh (partially complete by Asser). Needs minor kernel
68vfs change to support removing D_NOTIFY on a file. 57vfs change to support removing D_NOTIFY on a file.
69 58
70r) Add GUI tool to configure /proc/fs/cifs settings and for display of 59o) Add GUI tool to configure /proc/fs/cifs settings and for display of
71the CIFS statistics (started) 60the CIFS statistics (started)
72 61
73s) implement support for security and trusted categories of xattrs 62p) implement support for security and trusted categories of xattrs
74(requires minor protocol extension) to enable better support for SELINUX 63(requires minor protocol extension) to enable better support for SELINUX
75 64
76t) Implement O_DIRECT flag on open (already supported on mount) 65q) Implement O_DIRECT flag on open (already supported on mount)
77 66
78u) Create UID mapping facility so server UIDs can be mapped on a per 67r) Create UID mapping facility so server UIDs can be mapped on a per
79mount or a per server basis to client UIDs or nobody if no mapping 68mount or a per server basis to client UIDs or nobody if no mapping
80exists. This is helpful when Unix extensions are negotiated to 69exists. This is helpful when Unix extensions are negotiated to
81allow better permission checking when UIDs differ on the server 70allow better permission checking when UIDs differ on the server
@@ -83,19 +72,26 @@ and client. Add new protocol request to the CIFS protocol
83standard for asking the server for the corresponding name of a 72standard for asking the server for the corresponding name of a
84particular uid. 73particular uid.
85 74
86v) Add support for CIFS Unix and also the newer POSIX extensions to the 75s) Add support for CIFS Unix and also the newer POSIX extensions to the
87server side for Samba 4. 76server side for Samba 4.
88 77
89w) Finish up the dos time conversion routines needed to return old server 78t) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers)
90time to the client (default time, of now or time 0 is used now for these
91very old servers)
92
93x) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers)
94need to add ability to set time to server (utimes command) 79need to add ability to set time to server (utimes command)
95 80
96y) Finish testing of Windows 9x/Windows ME server support (started). 81u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
82
83v) mount check for unmatched uids
84
85w) Add mount option for Linux extension disable per mount, and partial
86disable per mount (uid off, symlink/fifo/mknod on but what about posix acls?)
97 87
98KNOWN BUGS (updated February 26, 2007) 88x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
89processes can proceed better in parallel (on the server)
90
91y) Fix Samba 3 to handle reads/writes over 127K (and remove the cifs mount
92restriction of wsize max being 127K)
93
94KNOWN BUGS (updated April 24, 2007)
99==================================== 95====================================
100See http://bugzilla.samba.org - search on product "CifsVFS" for 96See http://bugzilla.samba.org - search on product "CifsVFS" for
101current bug list. 97current bug list.
@@ -127,10 +123,3 @@ negotiated size) and send larger write sizes to modern servers.
1274) More exhaustively test against less common servers. More testing 1234) More exhaustively test against less common servers. More testing
128against Windows 9x, Windows ME servers. 124against Windows 9x, Windows ME servers.
129 125
130DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
131
132mount check for unmatched uids - and uid override
133
134Add mount option for Linux extension disable per mount, and partial disable per mount (uid off, symlink/fifo/mknod on but what about posix acls?)
135
136Free threads at umount --force that are stuck on the sesSem
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index fd1e52ebcee6..4cc2012e9322 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -22,12 +22,14 @@
22#define CIFS_MOUNT_SET_UID 2 /* set current->euid in create etc. */ 22#define CIFS_MOUNT_SET_UID 2 /* set current->euid in create etc. */
23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ 23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */ 24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */
25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */ 25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */
26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */ 26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
27#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible. */ 27#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible*/
28#define CIFS_MOUNT_UNX_EMUL 0x80 /* Network compat with SFUnix emulation */ 28#define CIFS_MOUNT_UNX_EMUL 0x80 /* Network compat with SFUnix emulation */
29#define CIFS_MOUNT_NO_BRL 0x100 /* No sending byte range locks to srv */ 29#define CIFS_MOUNT_NO_BRL 0x100 /* No sending byte range locks to srv */
30#define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */ 30#define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */
31#define CIFS_MOUNT_OVERR_UID 0x400 /* override uid returned from server */
32#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */
31 33
32struct cifs_sb_info { 34struct cifs_sb_info {
33 struct cifsTconInfo *tcon; /* primary mount */ 35 struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d2a8b2941fc2..793c4b95c164 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -74,8 +74,8 @@ cifs_strtoUCS(__le16 * to, const char *from, int len,
74 charlen = codepage->char2uni(from, len, &wchar_to[i]); 74 charlen = codepage->char2uni(from, len, &wchar_to[i]);
75 if (charlen < 1) { 75 if (charlen < 1) {
76 cERROR(1, 76 cERROR(1,
77 ("cifs_strtoUCS: char2uni returned %d", 77 ("strtoUCS: char2uni of %d returned %d",
78 charlen)); 78 (int)*from, charlen));
79 /* A question mark */ 79 /* A question mark */
80 to[i] = cpu_to_le16(0x003f); 80 to[i] = cpu_to_le16(0x003f);
81 charlen = 1; 81 charlen = 1;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index faba4d69fe91..8568e100953c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -100,7 +100,7 @@ cifs_read_super(struct super_block *sb, void *data,
100 sb->s_flags |= MS_NODIRATIME | MS_NOATIME; 100 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
101 sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info),GFP_KERNEL); 101 sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info),GFP_KERNEL);
102 cifs_sb = CIFS_SB(sb); 102 cifs_sb = CIFS_SB(sb);
103 if(cifs_sb == NULL) 103 if (cifs_sb == NULL)
104 return -ENOMEM; 104 return -ENOMEM;
105 105
106 rc = cifs_mount(sb, cifs_sb, data, devname); 106 rc = cifs_mount(sb, cifs_sb, data, devname);
@@ -115,10 +115,10 @@ cifs_read_super(struct super_block *sb, void *data,
115 sb->s_magic = CIFS_MAGIC_NUMBER; 115 sb->s_magic = CIFS_MAGIC_NUMBER;
116 sb->s_op = &cifs_super_ops; 116 sb->s_op = &cifs_super_ops;
117#ifdef CONFIG_CIFS_EXPERIMENTAL 117#ifdef CONFIG_CIFS_EXPERIMENTAL
118 if(experimEnabled != 0) 118 if (experimEnabled != 0)
119 sb->s_export_op = &cifs_export_ops; 119 sb->s_export_op = &cifs_export_ops;
120#endif /* EXPERIMENTAL */ 120#endif /* EXPERIMENTAL */
121/* if(cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 121/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
122 sb->s_blocksize = cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 122 sb->s_blocksize = cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
123#ifdef CONFIG_CIFS_QUOTA 123#ifdef CONFIG_CIFS_QUOTA
124 sb->s_qcop = &cifs_quotactl_ops; 124 sb->s_qcop = &cifs_quotactl_ops;
@@ -147,8 +147,8 @@ out_no_root:
147 iput(inode); 147 iput(inode);
148 148
149out_mount_failed: 149out_mount_failed:
150 if(cifs_sb) { 150 if (cifs_sb) {
151 if(cifs_sb->local_nls) 151 if (cifs_sb->local_nls)
152 unload_nls(cifs_sb->local_nls); 152 unload_nls(cifs_sb->local_nls);
153 kfree(cifs_sb); 153 kfree(cifs_sb);
154 } 154 }
@@ -163,7 +163,7 @@ cifs_put_super(struct super_block *sb)
163 163
164 cFYI(1, ("In cifs_put_super")); 164 cFYI(1, ("In cifs_put_super"));
165 cifs_sb = CIFS_SB(sb); 165 cifs_sb = CIFS_SB(sb);
166 if(cifs_sb == NULL) { 166 if (cifs_sb == NULL) {
167 cFYI(1,("Empty cifs superblock info passed to unmount")); 167 cFYI(1,("Empty cifs superblock info passed to unmount"));
168 return; 168 return;
169 } 169 }
@@ -208,14 +208,14 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
208 208
209 /* Only need to call the old QFSInfo if failed 209 /* Only need to call the old QFSInfo if failed
210 on newer one */ 210 on newer one */
211 if(rc) 211 if (rc)
212 if(pTcon->ses->capabilities & CAP_NT_SMBS) 212 if (pTcon->ses->capabilities & CAP_NT_SMBS)
213 rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */ 213 rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */
214 214
215 /* Some old Windows servers also do not support level 103, retry with 215 /* Some old Windows servers also do not support level 103, retry with
216 older level one if old server failed the previous call or we 216 older level one if old server failed the previous call or we
217 bypassed it because we detected that this was an older LANMAN sess */ 217 bypassed it because we detected that this was an older LANMAN sess */
218 if(rc) 218 if (rc)
219 rc = SMBOldQFSInfo(xid, pTcon, buf); 219 rc = SMBOldQFSInfo(xid, pTcon, buf);
220 /* 220 /*
221 int f_type; 221 int f_type;
@@ -301,11 +301,19 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
301 if (cifs_sb->tcon->ses->userName) 301 if (cifs_sb->tcon->ses->userName)
302 seq_printf(s, ",username=%s", 302 seq_printf(s, ",username=%s",
303 cifs_sb->tcon->ses->userName); 303 cifs_sb->tcon->ses->userName);
304 if(cifs_sb->tcon->ses->domainName) 304 if (cifs_sb->tcon->ses->domainName)
305 seq_printf(s, ",domain=%s", 305 seq_printf(s, ",domain=%s",
306 cifs_sb->tcon->ses->domainName); 306 cifs_sb->tcon->ses->domainName);
307 } 307 }
308 } 308 }
309 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
310 seq_printf(s, ",posixpaths");
311 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
312 !(cifs_sb->tcon->ses->capabilities & CAP_UNIX))
313 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
314 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
315 !(cifs_sb->tcon->ses->capabilities & CAP_UNIX))
316 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
309 seq_printf(s, ",rsize=%d",cifs_sb->rsize); 317 seq_printf(s, ",rsize=%d",cifs_sb->rsize);
310 seq_printf(s, ",wsize=%d",cifs_sb->wsize); 318 seq_printf(s, ",wsize=%d",cifs_sb->wsize);
311 } 319 }
@@ -321,14 +329,14 @@ int cifs_xquota_set(struct super_block * sb, int quota_type, qid_t qid,
321 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 329 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
322 struct cifsTconInfo *pTcon; 330 struct cifsTconInfo *pTcon;
323 331
324 if(cifs_sb) 332 if (cifs_sb)
325 pTcon = cifs_sb->tcon; 333 pTcon = cifs_sb->tcon;
326 else 334 else
327 return -EIO; 335 return -EIO;
328 336
329 337
330 xid = GetXid(); 338 xid = GetXid();
331 if(pTcon) { 339 if (pTcon) {
332 cFYI(1,("set type: 0x%x id: %d",quota_type,qid)); 340 cFYI(1,("set type: 0x%x id: %d",quota_type,qid));
333 } else { 341 } else {
334 return -EIO; 342 return -EIO;
@@ -346,13 +354,13 @@ int cifs_xquota_get(struct super_block * sb, int quota_type, qid_t qid,
346 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 354 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
347 struct cifsTconInfo *pTcon; 355 struct cifsTconInfo *pTcon;
348 356
349 if(cifs_sb) 357 if (cifs_sb)
350 pTcon = cifs_sb->tcon; 358 pTcon = cifs_sb->tcon;
351 else 359 else
352 return -EIO; 360 return -EIO;
353 361
354 xid = GetXid(); 362 xid = GetXid();
355 if(pTcon) { 363 if (pTcon) {
356 cFYI(1,("set type: 0x%x id: %d",quota_type,qid)); 364 cFYI(1,("set type: 0x%x id: %d",quota_type,qid));
357 } else { 365 } else {
358 rc = -EIO; 366 rc = -EIO;
@@ -369,13 +377,13 @@ int cifs_xstate_set(struct super_block * sb, unsigned int flags, int operation)
369 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 377 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
370 struct cifsTconInfo *pTcon; 378 struct cifsTconInfo *pTcon;
371 379
372 if(cifs_sb) 380 if (cifs_sb)
373 pTcon = cifs_sb->tcon; 381 pTcon = cifs_sb->tcon;
374 else 382 else
375 return -EIO; 383 return -EIO;
376 384
377 xid = GetXid(); 385 xid = GetXid();
378 if(pTcon) { 386 if (pTcon) {
379 cFYI(1,("flags: 0x%x operation: 0x%x",flags,operation)); 387 cFYI(1,("flags: 0x%x operation: 0x%x",flags,operation));
380 } else { 388 } else {
381 rc = -EIO; 389 rc = -EIO;
@@ -392,13 +400,13 @@ int cifs_xstate_get(struct super_block * sb, struct fs_quota_stat *qstats)
392 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 400 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
393 struct cifsTconInfo *pTcon; 401 struct cifsTconInfo *pTcon;
394 402
395 if(cifs_sb) { 403 if (cifs_sb) {
396 pTcon = cifs_sb->tcon; 404 pTcon = cifs_sb->tcon;
397 } else { 405 } else {
398 return -EIO; 406 return -EIO;
399 } 407 }
400 xid = GetXid(); 408 xid = GetXid();
401 if(pTcon) { 409 if (pTcon) {
402 cFYI(1,("pqstats %p",qstats)); 410 cFYI(1,("pqstats %p",qstats));
403 } else { 411 } else {
404 rc = -EIO; 412 rc = -EIO;
@@ -424,11 +432,11 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
424 if (!(flags & MNT_FORCE)) 432 if (!(flags & MNT_FORCE))
425 return; 433 return;
426 cifs_sb = CIFS_SB(vfsmnt->mnt_sb); 434 cifs_sb = CIFS_SB(vfsmnt->mnt_sb);
427 if(cifs_sb == NULL) 435 if (cifs_sb == NULL)
428 return; 436 return;
429 437
430 tcon = cifs_sb->tcon; 438 tcon = cifs_sb->tcon;
431 if(tcon == NULL) 439 if (tcon == NULL)
432 return; 440 return;
433 down(&tcon->tconSem); 441 down(&tcon->tconSem);
434 if (atomic_read(&tcon->useCount) == 1) 442 if (atomic_read(&tcon->useCount) == 1)
@@ -437,7 +445,7 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
437 445
438 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ 446 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
439 /* cancel_notify_requests(tcon); */ 447 /* cancel_notify_requests(tcon); */
440 if(tcon->ses && tcon->ses->server) 448 if (tcon->ses && tcon->ses->server)
441 { 449 {
442 cFYI(1,("wake up tasks now - umount begin not complete")); 450 cFYI(1,("wake up tasks now - umount begin not complete"));
443 wake_up_all(&tcon->ses->server->request_q); 451 wake_up_all(&tcon->ses->server->request_q);
@@ -529,8 +537,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
529 /* some applications poll for the file length in this strange 537 /* some applications poll for the file length in this strange
530 way so we must seek to end on non-oplocked files by 538 way so we must seek to end on non-oplocked files by
531 setting the revalidate time to zero */ 539 setting the revalidate time to zero */
532 if(file->f_path.dentry->d_inode) 540 CIFS_I(file->f_path.dentry->d_inode)->time = 0;
533 CIFS_I(file->f_path.dentry->d_inode)->time = 0;
534 541
535 retval = cifs_revalidate(file->f_path.dentry); 542 retval = cifs_revalidate(file->f_path.dentry);
536 if (retval < 0) 543 if (retval < 0)
@@ -694,8 +701,7 @@ cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
694{ 701{
695 struct cifsInodeInfo *cifsi = inode; 702 struct cifsInodeInfo *cifsi = inode;
696 703
697 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 704 if (flags & SLAB_CTOR_CONSTRUCTOR) {
698 SLAB_CTOR_CONSTRUCTOR) {
699 inode_init_once(&cifsi->vfs_inode); 705 inode_init_once(&cifsi->vfs_inode);
700 INIT_LIST_HEAD(&cifsi->lockList); 706 INIT_LIST_HEAD(&cifsi->lockList);
701 } 707 }
@@ -724,7 +730,7 @@ cifs_destroy_inodecache(void)
724static int 730static int
725cifs_init_request_bufs(void) 731cifs_init_request_bufs(void)
726{ 732{
727 if(CIFSMaxBufSize < 8192) { 733 if (CIFSMaxBufSize < 8192) {
728 /* Buffer size can not be smaller than 2 * PATH_MAX since maximum 734 /* Buffer size can not be smaller than 2 * PATH_MAX since maximum
729 Unicode path name has to fit in any SMB/CIFS path based frames */ 735 Unicode path name has to fit in any SMB/CIFS path based frames */
730 CIFSMaxBufSize = 8192; 736 CIFSMaxBufSize = 8192;
@@ -741,7 +747,7 @@ cifs_init_request_bufs(void)
741 if (cifs_req_cachep == NULL) 747 if (cifs_req_cachep == NULL)
742 return -ENOMEM; 748 return -ENOMEM;
743 749
744 if(cifs_min_rcv < 1) 750 if (cifs_min_rcv < 1)
745 cifs_min_rcv = 1; 751 cifs_min_rcv = 1;
746 else if (cifs_min_rcv > 64) { 752 else if (cifs_min_rcv > 64) {
747 cifs_min_rcv = 64; 753 cifs_min_rcv = 64;
@@ -751,7 +757,7 @@ cifs_init_request_bufs(void)
751 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, 757 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
752 cifs_req_cachep); 758 cifs_req_cachep);
753 759
754 if(cifs_req_poolp == NULL) { 760 if (cifs_req_poolp == NULL) {
755 kmem_cache_destroy(cifs_req_cachep); 761 kmem_cache_destroy(cifs_req_cachep);
756 return -ENOMEM; 762 return -ENOMEM;
757 } 763 }
@@ -772,7 +778,7 @@ cifs_init_request_bufs(void)
772 return -ENOMEM; 778 return -ENOMEM;
773 } 779 }
774 780
775 if(cifs_min_small < 2) 781 if (cifs_min_small < 2)
776 cifs_min_small = 2; 782 cifs_min_small = 2;
777 else if (cifs_min_small > 256) { 783 else if (cifs_min_small > 256) {
778 cifs_min_small = 256; 784 cifs_min_small = 256;
@@ -782,7 +788,7 @@ cifs_init_request_bufs(void)
782 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, 788 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
783 cifs_sm_req_cachep); 789 cifs_sm_req_cachep);
784 790
785 if(cifs_sm_req_poolp == NULL) { 791 if (cifs_sm_req_poolp == NULL) {
786 mempool_destroy(cifs_req_poolp); 792 mempool_destroy(cifs_req_poolp);
787 kmem_cache_destroy(cifs_req_cachep); 793 kmem_cache_destroy(cifs_req_cachep);
788 kmem_cache_destroy(cifs_sm_req_cachep); 794 kmem_cache_destroy(cifs_sm_req_cachep);
@@ -812,7 +818,7 @@ cifs_init_mids(void)
812 818
813 /* 3 is a reasonable minimum number of simultaneous operations */ 819 /* 3 is a reasonable minimum number of simultaneous operations */
814 cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep); 820 cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep);
815 if(cifs_mid_poolp == NULL) { 821 if (cifs_mid_poolp == NULL) {
816 kmem_cache_destroy(cifs_mid_cachep); 822 kmem_cache_destroy(cifs_mid_cachep);
817 return -ENOMEM; 823 return -ENOMEM;
818 } 824 }
@@ -850,14 +856,14 @@ static int cifs_oplock_thread(void * dummyarg)
850 continue; 856 continue;
851 857
852 spin_lock(&GlobalMid_Lock); 858 spin_lock(&GlobalMid_Lock);
853 if(list_empty(&GlobalOplock_Q)) { 859 if (list_empty(&GlobalOplock_Q)) {
854 spin_unlock(&GlobalMid_Lock); 860 spin_unlock(&GlobalMid_Lock);
855 set_current_state(TASK_INTERRUPTIBLE); 861 set_current_state(TASK_INTERRUPTIBLE);
856 schedule_timeout(39*HZ); 862 schedule_timeout(39*HZ);
857 } else { 863 } else {
858 oplock_item = list_entry(GlobalOplock_Q.next, 864 oplock_item = list_entry(GlobalOplock_Q.next,
859 struct oplock_q_entry, qhead); 865 struct oplock_q_entry, qhead);
860 if(oplock_item) { 866 if (oplock_item) {
861 cFYI(1,("found oplock item to write out")); 867 cFYI(1,("found oplock item to write out"));
862 pTcon = oplock_item->tcon; 868 pTcon = oplock_item->tcon;
863 inode = oplock_item->pinode; 869 inode = oplock_item->pinode;
@@ -871,7 +877,7 @@ static int cifs_oplock_thread(void * dummyarg)
871 /* mutex_lock(&inode->i_mutex);*/ 877 /* mutex_lock(&inode->i_mutex);*/
872 if (S_ISREG(inode->i_mode)) { 878 if (S_ISREG(inode->i_mode)) {
873 rc = filemap_fdatawrite(inode->i_mapping); 879 rc = filemap_fdatawrite(inode->i_mapping);
874 if(CIFS_I(inode)->clientCanCacheRead == 0) { 880 if (CIFS_I(inode)->clientCanCacheRead == 0) {
875 filemap_fdatawait(inode->i_mapping); 881 filemap_fdatawait(inode->i_mapping);
876 invalidate_remote_inode(inode); 882 invalidate_remote_inode(inode);
877 } 883 }
@@ -888,7 +894,7 @@ static int cifs_oplock_thread(void * dummyarg)
888 not bother sending an oplock release if session 894 not bother sending an oplock release if session
889 to server still is disconnected since oplock 895 to server still is disconnected since oplock
890 already released by the server in that case */ 896 already released by the server in that case */
891 if(pTcon->tidStatus != CifsNeedReconnect) { 897 if (pTcon->tidStatus != CifsNeedReconnect) {
892 rc = CIFSSMBLock(0, pTcon, netfid, 898 rc = CIFSSMBLock(0, pTcon, netfid,
893 0 /* len */ , 0 /* offset */, 0, 899 0 /* len */ , 0 /* offset */, 0,
894 0, LOCKING_ANDX_OPLOCK_RELEASE, 900 0, LOCKING_ANDX_OPLOCK_RELEASE,
@@ -922,7 +928,7 @@ static int cifs_dnotify_thread(void * dummyarg)
922 list_for_each(tmp, &GlobalSMBSessionList) { 928 list_for_each(tmp, &GlobalSMBSessionList) {
923 ses = list_entry(tmp, struct cifsSesInfo, 929 ses = list_entry(tmp, struct cifsSesInfo,
924 cifsSessionList); 930 cifsSessionList);
925 if(ses && ses->server && 931 if (ses && ses->server &&
926 atomic_read(&ses->server->inFlight)) 932 atomic_read(&ses->server->inFlight))
927 wake_up_all(&ses->server->response_q); 933 wake_up_all(&ses->server->response_q);
928 } 934 }
@@ -971,10 +977,10 @@ init_cifs(void)
971 rwlock_init(&GlobalSMBSeslock); 977 rwlock_init(&GlobalSMBSeslock);
972 spin_lock_init(&GlobalMid_Lock); 978 spin_lock_init(&GlobalMid_Lock);
973 979
974 if(cifs_max_pending < 2) { 980 if (cifs_max_pending < 2) {
975 cifs_max_pending = 2; 981 cifs_max_pending = 2;
976 cFYI(1,("cifs_max_pending set to min of 2")); 982 cFYI(1,("cifs_max_pending set to min of 2"));
977 } else if(cifs_max_pending > 256) { 983 } else if (cifs_max_pending > 256) {
978 cifs_max_pending = 256; 984 cifs_max_pending = 256;
979 cFYI(1,("cifs_max_pending set to max of 256")); 985 cFYI(1,("cifs_max_pending set to max of 256"));
980 } 986 }
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2c2c384894d8..c235d32ad4a8 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
101extern int cifs_ioctl (struct inode * inode, struct file * filep, 101extern int cifs_ioctl (struct inode * inode, struct file * filep,
102 unsigned int command, unsigned long arg); 102 unsigned int command, unsigned long arg);
103#define CIFS_VERSION "1.48" 103#define CIFS_VERSION "1.49"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e4de8eba4780..23655de2f4a4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -311,7 +311,7 @@ struct cifsFileInfo {
311 /* lock scope id (0 if none) */ 311 /* lock scope id (0 if none) */
312 struct file * pfile; /* needed for writepage */ 312 struct file * pfile; /* needed for writepage */
313 struct inode * pInode; /* needed for oplock break */ 313 struct inode * pInode; /* needed for oplock break */
314 struct semaphore lock_sem; 314 struct mutex lock_mutex;
315 struct list_head llist; /* list of byte range locks we have. */ 315 struct list_head llist; /* list of byte range locks we have. */
316 unsigned closePend:1; /* file is marked to close */ 316 unsigned closePend:1; /* file is marked to close */
317 unsigned invalidHandle:1; /* file closed via session abend */ 317 unsigned invalidHandle:1; /* file closed via session abend */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 4d8948e8762c..d619ca7d1416 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1388,7 +1388,7 @@ struct smb_t2_rsp {
1388#define SMB_SET_POSIX_LOCK 0x208 1388#define SMB_SET_POSIX_LOCK 0x208
1389#define SMB_POSIX_OPEN 0x209 1389#define SMB_POSIX_OPEN 0x209
1390#define SMB_POSIX_UNLINK 0x20a 1390#define SMB_POSIX_UNLINK 0x20a
1391#define SMB_SET_FILE_UNIX_INFO2 1391#define SMB_SET_FILE_UNIX_INFO2 0x20b
1392#define SMB_SET_FILE_BASIC_INFO2 0x3ec 1392#define SMB_SET_FILE_BASIC_INFO2 0x3ec
1393#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo too */ 1393#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo too */
1394#define SMB_FILE_ALL_INFO2 0x3fa 1394#define SMB_FILE_ALL_INFO2 0x3fa
@@ -2109,22 +2109,40 @@ struct cifs_posix_acl { /* access conrol list (ACL) */
2109 2109
2110/* end of POSIX ACL definitions */ 2110/* end of POSIX ACL definitions */
2111 2111
2112/* POSIX Open Flags */
2113#define SMB_O_RDONLY 0x1
2114#define SMB_O_WRONLY 0x2
2115#define SMB_O_RDWR 0x4
2116#define SMB_O_CREAT 0x10
2117#define SMB_O_EXCL 0x20
2118#define SMB_O_TRUNC 0x40
2119#define SMB_O_APPEND 0x80
2120#define SMB_O_SYNC 0x100
2121#define SMB_O_DIRECTORY 0x200
2122#define SMB_O_NOFOLLOW 0x400
2123#define SMB_O_DIRECT 0x800
2124
2112typedef struct { 2125typedef struct {
2113 __u32 OpenFlags; /* same as NT CreateX */ 2126 __le32 OpenFlags; /* same as NT CreateX */
2114 __u32 PosixOpenFlags; 2127 __le32 PosixOpenFlags;
2115 __u32 Mode; 2128 __le64 Permissions;
2116 __u16 Level; /* reply level requested (see QPathInfo levels) */ 2129 __le16 Level; /* reply level requested (see QPathInfo levels) */
2117 __u16 Pad; /* reserved - MBZ */
2118} __attribute__((packed)) OPEN_PSX_REQ; /* level 0x209 SetPathInfo data */ 2130} __attribute__((packed)) OPEN_PSX_REQ; /* level 0x209 SetPathInfo data */
2119 2131
2120typedef struct { 2132typedef struct {
2121 /* reply varies based on requested level */ 2133 __le16 OplockFlags;
2134 __u16 Fid;
2135 __le32 CreateAction;
2136 __le16 ReturnedLevel;
2137 __le16 Pad;
2138 /* struct following varies based on requested level */
2122} __attribute__((packed)) OPEN_PSX_RSP; /* level 0x209 SetPathInfo data */ 2139} __attribute__((packed)) OPEN_PSX_RSP; /* level 0x209 SetPathInfo data */
2123 2140
2124 2141
2125struct file_internal_info { 2142struct file_internal_info {
2126 __u64 UniqueId; /* inode number */ 2143 __u64 UniqueId; /* inode number */
2127} __attribute__((packed)); /* level 0x3ee */ 2144} __attribute__((packed)); /* level 0x3ee */
2145
2128struct file_mode_info { 2146struct file_mode_info {
2129 __le32 Mode; 2147 __le32 Mode;
2130} __attribute__((packed)); /* level 0x3f8 */ 2148} __attribute__((packed)); /* level 0x3f8 */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 32eb1acab630..5d163e2b6143 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsproto.h 2 * fs/cifs/cifsproto.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002,2006 4 * Copyright (c) International Business Machines Corp., 2002,2007
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -244,6 +244,11 @@ extern int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon,
244 const int access_flags, const int omode, 244 const int access_flags, const int omode,
245 __u16 * netfid, int *pOplock, FILE_ALL_INFO *, 245 __u16 * netfid, int *pOplock, FILE_ALL_INFO *,
246 const struct nls_table *nls_codepage, int remap); 246 const struct nls_table *nls_codepage, int remap);
247extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon,
248 u32 posix_flags, __u64 mode, __u16 * netfid,
249 FILE_UNIX_BASIC_INFO *pRetData,
250 __u32 *pOplock, const char *name,
251 const struct nls_table *nls_codepage, int remap);
247extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, 252extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
248 const int smb_file_id); 253 const int smb_file_id);
249 254
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 48fc0c2ab0e5..14de58fa1437 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2006 4 * Copyright (C) International Business Machines Corp., 2002,2007
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -24,8 +24,8 @@
24 /* SMB/CIFS PDU handling routines here - except for leftovers in connect.c */ 24 /* SMB/CIFS PDU handling routines here - except for leftovers in connect.c */
25 /* These are mostly routines that operate on a pathname, or on a tree id */ 25 /* These are mostly routines that operate on a pathname, or on a tree id */
26 /* (mounted volume), but there are eight handle based routines which must be */ 26 /* (mounted volume), but there are eight handle based routines which must be */
27 /* treated slightly different for reconnection purposes since we never want */ 27 /* treated slightly differently for reconnection purposes since we never */
28 /* to reuse a stale file handle and the caller knows the file handle */ 28 /* want to reuse a stale file handle and only the caller knows the file info */
29 29
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
@@ -913,6 +913,130 @@ MkDirRetry:
913 return rc; 913 return rc;
914} 914}
915 915
916int
917CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
918 __u64 mode, __u16 * netfid, FILE_UNIX_BASIC_INFO *pRetData,
919 __u32 *pOplock, const char *name,
920 const struct nls_table *nls_codepage, int remap)
921{
922 TRANSACTION2_SPI_REQ *pSMB = NULL;
923 TRANSACTION2_SPI_RSP *pSMBr = NULL;
924 int name_len;
925 int rc = 0;
926 int bytes_returned = 0;
927 char *data_offset;
928 __u16 params, param_offset, offset, byte_count, count;
929 OPEN_PSX_REQ * pdata;
930 OPEN_PSX_RSP * psx_rsp;
931
932 cFYI(1, ("In POSIX Create"));
933PsxCreat:
934 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
935 (void **) &pSMBr);
936 if (rc)
937 return rc;
938
939 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
940 name_len =
941 cifsConvertToUCS((__le16 *) pSMB->FileName, name,
942 PATH_MAX, nls_codepage, remap);
943 name_len++; /* trailing null */
944 name_len *= 2;
945 } else { /* BB improve the check for buffer overruns BB */
946 name_len = strnlen(name, PATH_MAX);
947 name_len++; /* trailing null */
948 strncpy(pSMB->FileName, name, name_len);
949 }
950
951 params = 6 + name_len;
952 count = sizeof(OPEN_PSX_REQ);
953 pSMB->MaxParameterCount = cpu_to_le16(2);
954 pSMB->MaxDataCount = cpu_to_le16(1000); /* large enough */
955 pSMB->MaxSetupCount = 0;
956 pSMB->Reserved = 0;
957 pSMB->Flags = 0;
958 pSMB->Timeout = 0;
959 pSMB->Reserved2 = 0;
960 param_offset = offsetof(struct smb_com_transaction2_spi_req,
961 InformationLevel) - 4;
962 offset = param_offset + params;
963 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
964 pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
965 pdata->Level = SMB_QUERY_FILE_UNIX_BASIC;
966 pdata->Permissions = cpu_to_le64(mode);
967 pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
968 pdata->OpenFlags = cpu_to_le32(*pOplock);
969 pSMB->ParameterOffset = cpu_to_le16(param_offset);
970 pSMB->DataOffset = cpu_to_le16(offset);
971 pSMB->SetupCount = 1;
972 pSMB->Reserved3 = 0;
973 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_PATH_INFORMATION);
974 byte_count = 3 /* pad */ + params + count;
975
976 pSMB->DataCount = cpu_to_le16(count);
977 pSMB->ParameterCount = cpu_to_le16(params);
978 pSMB->TotalDataCount = pSMB->DataCount;
979 pSMB->TotalParameterCount = pSMB->ParameterCount;
980 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
981 pSMB->Reserved4 = 0;
982 pSMB->hdr.smb_buf_length += byte_count;
983 pSMB->ByteCount = cpu_to_le16(byte_count);
984 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
985 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
986 if (rc) {
987 cFYI(1, ("Posix create returned %d", rc));
988 goto psx_create_err;
989 }
990
991 cFYI(1,("copying inode info"));
992 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
993
994 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
995 rc = -EIO; /* bad smb */
996 goto psx_create_err;
997 }
998
999 /* copy return information to pRetData */
1000 psx_rsp = (OPEN_PSX_RSP *)((char *) &pSMBr->hdr.Protocol
1001 + le16_to_cpu(pSMBr->t2.DataOffset));
1002
1003 *pOplock = le16_to_cpu(psx_rsp->OplockFlags);
1004 if(netfid)
1005 *netfid = psx_rsp->Fid; /* cifs fid stays in le */
1006 /* Let caller know file was created so we can set the mode. */
1007 /* Do we care about the CreateAction in any other cases? */
1008 if(cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction)
1009 *pOplock |= CIFS_CREATE_ACTION;
1010 /* check to make sure response data is there */
1011 if(psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) {
1012 pRetData->Type = -1; /* unknown */
1013#ifdef CONFIG_CIFS_DEBUG2
1014 cFYI(1,("unknown type"));
1015#endif
1016 } else {
1017 if(pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
1018 + sizeof(FILE_UNIX_BASIC_INFO)) {
1019 cERROR(1,("Open response data too small"));
1020 pRetData->Type = -1;
1021 goto psx_create_err;
1022 }
1023 memcpy((char *) pRetData,
1024 (char *)psx_rsp + sizeof(OPEN_PSX_RSP),
1025 sizeof (FILE_UNIX_BASIC_INFO));
1026 }
1027
1028
1029psx_create_err:
1030 cifs_buf_release(pSMB);
1031
1032 cifs_stats_inc(&tcon->num_mkdirs);
1033
1034 if (rc == -EAGAIN)
1035 goto PsxCreat;
1036
1037 return rc;
1038}
1039
916static __u16 convert_disposition(int disposition) 1040static __u16 convert_disposition(int disposition)
917{ 1041{
918 __u16 ofun = 0; 1042 __u16 ofun = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20ba7dcc9959..216fb625843f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -30,6 +30,7 @@
30#include <linux/mempool.h> 30#include <linux/mempool.h>
31#include <linux/delay.h> 31#include <linux/delay.h>
32#include <linux/completion.h> 32#include <linux/completion.h>
33#include <linux/kthread.h>
33#include <linux/pagevec.h> 34#include <linux/pagevec.h>
34#include <linux/freezer.h> 35#include <linux/freezer.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
@@ -74,6 +75,8 @@ struct smb_vol {
74 unsigned retry:1; 75 unsigned retry:1;
75 unsigned intr:1; 76 unsigned intr:1;
76 unsigned setuids:1; 77 unsigned setuids:1;
78 unsigned override_uid:1;
79 unsigned override_gid:1;
77 unsigned noperm:1; 80 unsigned noperm:1;
78 unsigned no_psx_acl:1; /* set if posix acl support should be disabled */ 81 unsigned no_psx_acl:1; /* set if posix acl support should be disabled */
79 unsigned cifs_acl:1; 82 unsigned cifs_acl:1;
@@ -120,7 +123,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
120 struct mid_q_entry * mid_entry; 123 struct mid_q_entry * mid_entry;
121 124
122 spin_lock(&GlobalMid_Lock); 125 spin_lock(&GlobalMid_Lock);
123 if(server->tcpStatus == CifsExiting) { 126 if( kthread_should_stop() ) {
124 /* the demux thread will exit normally 127 /* the demux thread will exit normally
125 next time through the loop */ 128 next time through the loop */
126 spin_unlock(&GlobalMid_Lock); 129 spin_unlock(&GlobalMid_Lock);
@@ -182,7 +185,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
182 spin_unlock(&GlobalMid_Lock); 185 spin_unlock(&GlobalMid_Lock);
183 up(&server->tcpSem); 186 up(&server->tcpSem);
184 187
185 while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) 188 while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood))
186 { 189 {
187 try_to_freeze(); 190 try_to_freeze();
188 if(server->protocolType == IPV6) { 191 if(server->protocolType == IPV6) {
@@ -199,7 +202,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
199 } else { 202 } else {
200 atomic_inc(&tcpSesReconnectCount); 203 atomic_inc(&tcpSesReconnectCount);
201 spin_lock(&GlobalMid_Lock); 204 spin_lock(&GlobalMid_Lock);
202 if(server->tcpStatus != CifsExiting) 205 if( !kthread_should_stop() )
203 server->tcpStatus = CifsGood; 206 server->tcpStatus = CifsGood;
204 server->sequence_number = 0; 207 server->sequence_number = 0;
205 spin_unlock(&GlobalMid_Lock); 208 spin_unlock(&GlobalMid_Lock);
@@ -345,7 +348,6 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
345 int isMultiRsp; 348 int isMultiRsp;
346 int reconnect; 349 int reconnect;
347 350
348 daemonize("cifsd");
349 allow_signal(SIGKILL); 351 allow_signal(SIGKILL);
350 current->flags |= PF_MEMALLOC; 352 current->flags |= PF_MEMALLOC;
351 server->tsk = current; /* save process info to wake at shutdown */ 353 server->tsk = current; /* save process info to wake at shutdown */
@@ -361,7 +363,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
361 GFP_KERNEL); 363 GFP_KERNEL);
362 } 364 }
363 365
364 while (server->tcpStatus != CifsExiting) { 366 while (!kthread_should_stop()) {
365 if (try_to_freeze()) 367 if (try_to_freeze())
366 continue; 368 continue;
367 if (bigbuf == NULL) { 369 if (bigbuf == NULL) {
@@ -400,7 +402,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
400 kernel_recvmsg(csocket, &smb_msg, 402 kernel_recvmsg(csocket, &smb_msg,
401 &iov, 1, 4, 0 /* BB see socket.h flags */); 403 &iov, 1, 4, 0 /* BB see socket.h flags */);
402 404
403 if (server->tcpStatus == CifsExiting) { 405 if ( kthread_should_stop() ) {
404 break; 406 break;
405 } else if (server->tcpStatus == CifsNeedReconnect) { 407 } else if (server->tcpStatus == CifsNeedReconnect) {
406 cFYI(1, ("Reconnect after server stopped responding")); 408 cFYI(1, ("Reconnect after server stopped responding"));
@@ -524,7 +526,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
524 total_read += length) { 526 total_read += length) {
525 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 527 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
526 pdu_length - total_read, 0); 528 pdu_length - total_read, 0);
527 if((server->tcpStatus == CifsExiting) || 529 if( kthread_should_stop() ||
528 (length == -EINTR)) { 530 (length == -EINTR)) {
529 /* then will exit */ 531 /* then will exit */
530 reconnect = 2; 532 reconnect = 2;
@@ -757,7 +759,6 @@ multi_t2_fnd:
757 GFP_KERNEL); 759 GFP_KERNEL);
758 } 760 }
759 761
760 complete_and_exit(&cifsd_complete, 0);
761 return 0; 762 return 0;
762} 763}
763 764
@@ -973,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
973 } 974 }
974 if ((temp_len = strnlen(value, 300)) < 300) { 975 if ((temp_len = strnlen(value, 300)) < 300) {
975 vol->UNC = kmalloc(temp_len+1,GFP_KERNEL); 976 vol->UNC = kmalloc(temp_len+1,GFP_KERNEL);
976 if(vol->UNC == NULL) 977 if (vol->UNC == NULL)
977 return 1; 978 return 1;
978 strcpy(vol->UNC,value); 979 strcpy(vol->UNC,value);
979 if (strncmp(vol->UNC, "//", 2) == 0) { 980 if (strncmp(vol->UNC, "//", 2) == 0) {
@@ -1010,12 +1011,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1010 return 1; /* needs_arg; */ 1011 return 1; /* needs_arg; */
1011 } 1012 }
1012 if ((temp_len = strnlen(value, 1024)) < 1024) { 1013 if ((temp_len = strnlen(value, 1024)) < 1024) {
1013 if(value[0] != '/') 1014 if (value[0] != '/')
1014 temp_len++; /* missing leading slash */ 1015 temp_len++; /* missing leading slash */
1015 vol->prepath = kmalloc(temp_len+1,GFP_KERNEL); 1016 vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
1016 if(vol->prepath == NULL) 1017 if (vol->prepath == NULL)
1017 return 1; 1018 return 1;
1018 if(value[0] != '/') { 1019 if (value[0] != '/') {
1019 vol->prepath[0] = '/'; 1020 vol->prepath[0] = '/';
1020 strcpy(vol->prepath+1,value); 1021 strcpy(vol->prepath+1,value);
1021 } else 1022 } else
@@ -1031,7 +1032,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1031 return 1; /* needs_arg; */ 1032 return 1; /* needs_arg; */
1032 } 1033 }
1033 if (strnlen(value, 65) < 65) { 1034 if (strnlen(value, 65) < 65) {
1034 if(strnicmp(value,"default",7)) 1035 if (strnicmp(value,"default",7))
1035 vol->iocharset = value; 1036 vol->iocharset = value;
1036 /* if iocharset not set load_nls_default used by caller */ 1037 /* if iocharset not set load_nls_default used by caller */
1037 cFYI(1, ("iocharset set to %s",value)); 1038 cFYI(1, ("iocharset set to %s",value));
@@ -1043,11 +1044,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1043 if (value && *value) { 1044 if (value && *value) {
1044 vol->linux_uid = 1045 vol->linux_uid =
1045 simple_strtoul(value, &value, 0); 1046 simple_strtoul(value, &value, 0);
1047 vol->override_uid = 1;
1046 } 1048 }
1047 } else if (strnicmp(data, "gid", 3) == 0) { 1049 } else if (strnicmp(data, "gid", 3) == 0) {
1048 if (value && *value) { 1050 if (value && *value) {
1049 vol->linux_gid = 1051 vol->linux_gid =
1050 simple_strtoul(value, &value, 0); 1052 simple_strtoul(value, &value, 0);
1053 vol->override_gid = 1;
1051 } 1054 }
1052 } else if (strnicmp(data, "file_mode", 4) == 0) { 1055 } else if (strnicmp(data, "file_mode", 4) == 0) {
1053 if (value && *value) { 1056 if (value && *value) {
@@ -1102,7 +1105,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1102 } 1105 }
1103 /* The string has 16th byte zero still from 1106 /* The string has 16th byte zero still from
1104 set at top of the function */ 1107 set at top of the function */
1105 if((i==15) && (value[i] != 0)) 1108 if ((i==15) && (value[i] != 0))
1106 printk(KERN_WARNING "CIFS: netbiosname longer than 15 truncated.\n"); 1109 printk(KERN_WARNING "CIFS: netbiosname longer than 15 truncated.\n");
1107 } 1110 }
1108 } else if (strnicmp(data, "servern", 7) == 0) { 1111 } else if (strnicmp(data, "servern", 7) == 0) {
@@ -1126,7 +1129,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1126 } 1129 }
1127 /* The string has 16th byte zero still from 1130 /* The string has 16th byte zero still from
1128 set at top of the function */ 1131 set at top of the function */
1129 if((i==15) && (value[i] != 0)) 1132 if ((i==15) && (value[i] != 0))
1130 printk(KERN_WARNING "CIFS: server netbiosname longer than 15 truncated.\n"); 1133 printk(KERN_WARNING "CIFS: server netbiosname longer than 15 truncated.\n");
1131 } 1134 }
1132 } else if (strnicmp(data, "credentials", 4) == 0) { 1135 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1233,13 +1236,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1233 printk(KERN_WARNING "CIFS: Unknown mount option %s\n",data); 1236 printk(KERN_WARNING "CIFS: Unknown mount option %s\n",data);
1234 } 1237 }
1235 if (vol->UNC == NULL) { 1238 if (vol->UNC == NULL) {
1236 if(devname == NULL) { 1239 if (devname == NULL) {
1237 printk(KERN_WARNING "CIFS: Missing UNC name for mount target\n"); 1240 printk(KERN_WARNING "CIFS: Missing UNC name for mount target\n");
1238 return 1; 1241 return 1;
1239 } 1242 }
1240 if ((temp_len = strnlen(devname, 300)) < 300) { 1243 if ((temp_len = strnlen(devname, 300)) < 300) {
1241 vol->UNC = kmalloc(temp_len+1,GFP_KERNEL); 1244 vol->UNC = kmalloc(temp_len+1,GFP_KERNEL);
1242 if(vol->UNC == NULL) 1245 if (vol->UNC == NULL)
1243 return 1; 1246 return 1;
1244 strcpy(vol->UNC,devname); 1247 strcpy(vol->UNC,devname);
1245 if (strncmp(vol->UNC, "//", 2) == 0) { 1248 if (strncmp(vol->UNC, "//", 2) == 0) {
@@ -1663,7 +1666,13 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo * tcon,
1663 CIFS_SB(sb)->mnt_cifs_flags |= 1666 CIFS_SB(sb)->mnt_cifs_flags |=
1664 CIFS_MOUNT_POSIX_PATHS; 1667 CIFS_MOUNT_POSIX_PATHS;
1665 } 1668 }
1666 1669
1670 /* We might be setting the path sep back to a different
1671 form if we are reconnecting and the server switched its
1672 posix path capability for this share */
1673 if(sb && (CIFS_SB(sb)->prepathlen > 0))
1674 CIFS_SB(sb)->prepath[0] = CIFS_DIR_SEP(CIFS_SB(sb));
1675
1667 cFYI(1,("Negotiate caps 0x%x",(int)cap)); 1676 cFYI(1,("Negotiate caps 0x%x",(int)cap));
1668#ifdef CONFIG_CIFS_DEBUG2 1677#ifdef CONFIG_CIFS_DEBUG2
1669 if(cap & CIFS_UNIX_FCNTL_CAP) 1678 if(cap & CIFS_UNIX_FCNTL_CAP)
@@ -1712,12 +1721,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1712 return -EINVAL; 1721 return -EINVAL;
1713 } 1722 }
1714 1723
1715 if (volume_info.username) { 1724 if (volume_info.nullauth) {
1725 cFYI(1,("null user"));
1726 volume_info.username = NULL;
1727 } else if (volume_info.username) {
1716 /* BB fixme parse for domain name here */ 1728 /* BB fixme parse for domain name here */
1717 cFYI(1, ("Username: %s ", volume_info.username)); 1729 cFYI(1, ("Username: %s ", volume_info.username));
1718
1719 } else if (volume_info.nullauth) {
1720 cFYI(1,("null user"));
1721 } else { 1730 } else {
1722 cifserror("No username specified"); 1731 cifserror("No username specified");
1723 /* In userspace mount helper we can get user name from alternate 1732 /* In userspace mount helper we can get user name from alternate
@@ -1791,11 +1800,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1791 existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, 1800 existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr,
1792 NULL /* no ipv6 addr */, 1801 NULL /* no ipv6 addr */,
1793 volume_info.username, &srvTcp); 1802 volume_info.username, &srvTcp);
1794 else if(address_type == AF_INET6) 1803 else if(address_type == AF_INET6) {
1804 cFYI(1,("looking for ipv6 address"));
1795 existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, 1805 existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */,
1796 &sin_server6.sin6_addr, 1806 &sin_server6.sin6_addr,
1797 volume_info.username, &srvTcp); 1807 volume_info.username, &srvTcp);
1798 else { 1808 } else {
1799 kfree(volume_info.UNC); 1809 kfree(volume_info.UNC);
1800 kfree(volume_info.password); 1810 kfree(volume_info.password);
1801 kfree(volume_info.prepath); 1811 kfree(volume_info.prepath);
@@ -1807,17 +1817,23 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1807 if (srvTcp) { 1817 if (srvTcp) {
1808 cFYI(1, ("Existing tcp session with server found")); 1818 cFYI(1, ("Existing tcp session with server found"));
1809 } else { /* create socket */ 1819 } else { /* create socket */
1810 if(volume_info.port) 1820 if (volume_info.port)
1811 sin_server.sin_port = htons(volume_info.port); 1821 sin_server.sin_port = htons(volume_info.port);
1812 else 1822 else
1813 sin_server.sin_port = 0; 1823 sin_server.sin_port = 0;
1814 rc = ipv4_connect(&sin_server,&csocket, 1824 if (address_type == AF_INET6) {
1825 cFYI(1,("attempting ipv6 connect"));
1826 /* BB should we allow ipv6 on port 139? */
1827 /* other OS never observed in Wild doing 139 with v6 */
1828 rc = ipv6_connect(&sin_server6,&csocket);
1829 } else
1830 rc = ipv4_connect(&sin_server,&csocket,
1815 volume_info.source_rfc1001_name, 1831 volume_info.source_rfc1001_name,
1816 volume_info.target_rfc1001_name); 1832 volume_info.target_rfc1001_name);
1817 if (rc < 0) { 1833 if (rc < 0) {
1818 cERROR(1, 1834 cERROR(1,
1819 ("Error connecting to IPv4 socket. Aborting operation")); 1835 ("Error connecting to IPv4 socket. Aborting operation"));
1820 if(csocket != NULL) 1836 if (csocket != NULL)
1821 sock_release(csocket); 1837 sock_release(csocket);
1822 kfree(volume_info.UNC); 1838 kfree(volume_info.UNC);
1823 kfree(volume_info.password); 1839 kfree(volume_info.password);
@@ -1850,10 +1866,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1850 so no need to spinlock this init of tcpStatus */ 1866 so no need to spinlock this init of tcpStatus */
1851 srvTcp->tcpStatus = CifsNew; 1867 srvTcp->tcpStatus = CifsNew;
1852 init_MUTEX(&srvTcp->tcpSem); 1868 init_MUTEX(&srvTcp->tcpSem);
1853 rc = (int)kernel_thread((void *)(void *)cifs_demultiplex_thread, srvTcp, 1869 srvTcp->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, srvTcp, "cifsd");
1854 CLONE_FS | CLONE_FILES | CLONE_VM); 1870 if ( IS_ERR(srvTcp->tsk) ) {
1855 if(rc < 0) { 1871 rc = PTR_ERR(srvTcp->tsk);
1856 rc = -ENOMEM; 1872 cERROR(1,("error %d create cifsd thread", rc));
1873 srvTcp->tsk = NULL;
1857 sock_release(csocket); 1874 sock_release(csocket);
1858 kfree(volume_info.UNC); 1875 kfree(volume_info.UNC);
1859 kfree(volume_info.password); 1876 kfree(volume_info.password);
@@ -1896,7 +1913,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1896 int len = strlen(volume_info.domainname); 1913 int len = strlen(volume_info.domainname);
1897 pSesInfo->domainName = 1914 pSesInfo->domainName =
1898 kmalloc(len + 1, GFP_KERNEL); 1915 kmalloc(len + 1, GFP_KERNEL);
1899 if(pSesInfo->domainName) 1916 if (pSesInfo->domainName)
1900 strcpy(pSesInfo->domainName, 1917 strcpy(pSesInfo->domainName,
1901 volume_info.domainname); 1918 volume_info.domainname);
1902 } 1919 }
@@ -1906,7 +1923,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1906 /* BB FIXME need to pass vol->secFlgs BB */ 1923 /* BB FIXME need to pass vol->secFlgs BB */
1907 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls); 1924 rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls);
1908 up(&pSesInfo->sesSem); 1925 up(&pSesInfo->sesSem);
1909 if(!rc) 1926 if (!rc)
1910 atomic_inc(&srvTcp->socketUseCount); 1927 atomic_inc(&srvTcp->socketUseCount);
1911 } else 1928 } else
1912 kfree(volume_info.password); 1929 kfree(volume_info.password);
@@ -1914,7 +1931,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1914 1931
1915 /* search for existing tcon to this server share */ 1932 /* search for existing tcon to this server share */
1916 if (!rc) { 1933 if (!rc) {
1917 if(volume_info.rsize > CIFSMaxBufSize) { 1934 if (volume_info.rsize > CIFSMaxBufSize) {
1918 cERROR(1,("rsize %d too large, using MaxBufSize", 1935 cERROR(1,("rsize %d too large, using MaxBufSize",
1919 volume_info.rsize)); 1936 volume_info.rsize));
1920 cifs_sb->rsize = CIFSMaxBufSize; 1937 cifs_sb->rsize = CIFSMaxBufSize;
@@ -1923,11 +1940,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1923 else /* default */ 1940 else /* default */
1924 cifs_sb->rsize = CIFSMaxBufSize; 1941 cifs_sb->rsize = CIFSMaxBufSize;
1925 1942
1926 if(volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { 1943 if (volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
1927 cERROR(1,("wsize %d too large using 4096 instead", 1944 cERROR(1,("wsize %d too large using 4096 instead",
1928 volume_info.wsize)); 1945 volume_info.wsize));
1929 cifs_sb->wsize = 4096; 1946 cifs_sb->wsize = 4096;
1930 } else if(volume_info.wsize) 1947 } else if (volume_info.wsize)
1931 cifs_sb->wsize = volume_info.wsize; 1948 cifs_sb->wsize = volume_info.wsize;
1932 else 1949 else
1933 cifs_sb->wsize = 1950 cifs_sb->wsize =
@@ -1940,14 +1957,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1940 conjunction with 52K kvec constraint on arch with 4K 1957 conjunction with 52K kvec constraint on arch with 4K
1941 page size */ 1958 page size */
1942 1959
1943 if(cifs_sb->rsize < 2048) { 1960 if (cifs_sb->rsize < 2048) {
1944 cifs_sb->rsize = 2048; 1961 cifs_sb->rsize = 2048;
1945 /* Windows ME may prefer this */ 1962 /* Windows ME may prefer this */
1946 cFYI(1,("readsize set to minimum 2048")); 1963 cFYI(1,("readsize set to minimum 2048"));
1947 } 1964 }
1948 /* calculate prepath */ 1965 /* calculate prepath */
1949 cifs_sb->prepath = volume_info.prepath; 1966 cifs_sb->prepath = volume_info.prepath;
1950 if(cifs_sb->prepath) { 1967 if (cifs_sb->prepath) {
1951 cifs_sb->prepathlen = strlen(cifs_sb->prepath); 1968 cifs_sb->prepathlen = strlen(cifs_sb->prepath);
1952 cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb); 1969 cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
1953 volume_info.prepath = NULL; 1970 volume_info.prepath = NULL;
@@ -1960,24 +1977,27 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1960 cFYI(1,("file mode: 0x%x dir mode: 0x%x", 1977 cFYI(1,("file mode: 0x%x dir mode: 0x%x",
1961 cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode)); 1978 cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode));
1962 1979
1963 if(volume_info.noperm) 1980 if (volume_info.noperm)
1964 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 1981 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
1965 if(volume_info.setuids) 1982 if (volume_info.setuids)
1966 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; 1983 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
1967 if(volume_info.server_ino) 1984 if (volume_info.server_ino)
1968 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; 1985 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
1969 if(volume_info.remap) 1986 if (volume_info.remap)
1970 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; 1987 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
1971 if(volume_info.no_xattr) 1988 if (volume_info.no_xattr)
1972 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; 1989 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
1973 if(volume_info.sfu_emul) 1990 if (volume_info.sfu_emul)
1974 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; 1991 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
1975 if(volume_info.nobrl) 1992 if (volume_info.nobrl)
1976 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; 1993 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
1977 if(volume_info.cifs_acl) 1994 if (volume_info.cifs_acl)
1978 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; 1995 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
1979 1996 if (volume_info.override_uid)
1980 if(volume_info.direct_io) { 1997 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
1998 if (volume_info.override_gid)
1999 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
2000 if (volume_info.direct_io) {
1981 cFYI(1,("mounting share using direct i/o")); 2001 cFYI(1,("mounting share using direct i/o"));
1982 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2002 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
1983 } 2003 }
@@ -2030,7 +2050,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2030 } 2050 }
2031 } 2051 }
2032 } 2052 }
2033 if(pSesInfo) { 2053 if (pSesInfo) {
2034 if (pSesInfo->capabilities & CAP_LARGE_FILES) { 2054 if (pSesInfo->capabilities & CAP_LARGE_FILES) {
2035 sb->s_maxbytes = (u64) 1 << 63; 2055 sb->s_maxbytes = (u64) 1 << 63;
2036 } else 2056 } else
@@ -2044,13 +2064,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2044 if (rc) { 2064 if (rc) {
2045 /* if session setup failed, use count is zero but 2065 /* if session setup failed, use count is zero but
2046 we still need to free cifsd thread */ 2066 we still need to free cifsd thread */
2047 if(atomic_read(&srvTcp->socketUseCount) == 0) { 2067 if (atomic_read(&srvTcp->socketUseCount) == 0) {
2048 spin_lock(&GlobalMid_Lock); 2068 spin_lock(&GlobalMid_Lock);
2049 srvTcp->tcpStatus = CifsExiting; 2069 srvTcp->tcpStatus = CifsExiting;
2050 spin_unlock(&GlobalMid_Lock); 2070 spin_unlock(&GlobalMid_Lock);
2051 if(srvTcp->tsk) { 2071 if (srvTcp->tsk) {
2052 send_sig(SIGKILL,srvTcp->tsk,1); 2072 send_sig(SIGKILL,srvTcp->tsk,1);
2053 wait_for_completion(&cifsd_complete); 2073 kthread_stop(srvTcp->tsk);
2054 } 2074 }
2055 } 2075 }
2056 /* If find_unc succeeded then rc == 0 so we can not end */ 2076 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2063,10 +2083,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2063 int temp_rc; 2083 int temp_rc;
2064 temp_rc = CIFSSMBLogoff(xid, pSesInfo); 2084 temp_rc = CIFSSMBLogoff(xid, pSesInfo);
2065 /* if the socketUseCount is now zero */ 2085 /* if the socketUseCount is now zero */
2066 if((temp_rc == -ESHUTDOWN) && 2086 if ((temp_rc == -ESHUTDOWN) &&
2067 (pSesInfo->server->tsk)) { 2087 (pSesInfo->server) && (pSesInfo->server->tsk)) {
2068 send_sig(SIGKILL,pSesInfo->server->tsk,1); 2088 send_sig(SIGKILL,pSesInfo->server->tsk,1);
2069 wait_for_completion(&cifsd_complete); 2089 kthread_stop(pSesInfo->server->tsk);
2070 } 2090 }
2071 } else 2091 } else
2072 cFYI(1, ("No session or bad tcon")); 2092 cFYI(1, ("No session or bad tcon"));
@@ -2127,7 +2147,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2127 __u16 count; 2147 __u16 count;
2128 2148
2129 cFYI(1, ("In sesssetup")); 2149 cFYI(1, ("In sesssetup"));
2130 if(ses == NULL) 2150 if (ses == NULL)
2131 return -EINVAL; 2151 return -EINVAL;
2132 user = ses->userName; 2152 user = ses->userName;
2133 domain = ses->domainName; 2153 domain = ses->domainName;
@@ -2182,7 +2202,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2182 *bcc_ptr = 0; 2202 *bcc_ptr = 0;
2183 bcc_ptr++; 2203 bcc_ptr++;
2184 } 2204 }
2185 if(user == NULL) 2205 if (user == NULL)
2186 bytes_returned = 0; /* skip null user */ 2206 bytes_returned = 0; /* skip null user */
2187 else 2207 else
2188 bytes_returned = 2208 bytes_returned =
@@ -2216,7 +2236,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2216 bcc_ptr += 2 * bytes_returned; 2236 bcc_ptr += 2 * bytes_returned;
2217 bcc_ptr += 2; 2237 bcc_ptr += 2;
2218 } else { 2238 } else {
2219 if(user != NULL) { 2239 if (user != NULL) {
2220 strncpy(bcc_ptr, user, 200); 2240 strncpy(bcc_ptr, user, 200);
2221 bcc_ptr += strnlen(user, 200); 2241 bcc_ptr += strnlen(user, 200);
2222 } 2242 }
@@ -3316,7 +3336,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3316 cFYI(1,("Waking up socket by sending it signal")); 3336 cFYI(1,("Waking up socket by sending it signal"));
3317 if(cifsd_task) { 3337 if(cifsd_task) {
3318 send_sig(SIGKILL,cifsd_task,1); 3338 send_sig(SIGKILL,cifsd_task,1);
3319 wait_for_completion(&cifsd_complete); 3339 kthread_stop(cifsd_task);
3320 } 3340 }
3321 rc = 0; 3341 rc = 0;
3322 } /* else - we have an smb session 3342 } /* else - we have an smb session
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3fad638d26d3..e5210519ac4b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -274,7 +274,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
274 pCifsFile->invalidHandle = FALSE; 274 pCifsFile->invalidHandle = FALSE;
275 pCifsFile->closePend = FALSE; 275 pCifsFile->closePend = FALSE;
276 init_MUTEX(&pCifsFile->fh_sem); 276 init_MUTEX(&pCifsFile->fh_sem);
277 init_MUTEX(&pCifsFile->lock_sem); 277 mutex_init(&pCifsFile->lock_mutex);
278 INIT_LIST_HEAD(&pCifsFile->llist); 278 INIT_LIST_HEAD(&pCifsFile->llist);
279 atomic_set(&pCifsFile->wrtPending,0); 279 atomic_set(&pCifsFile->wrtPending,0);
280 280
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2d3275bedb55..94d5b49049df 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -27,7 +27,6 @@
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/pagevec.h> 29#include <linux/pagevec.h>
30#include <linux/smp_lock.h>
31#include <linux/writeback.h> 30#include <linux/writeback.h>
32#include <linux/task_io_accounting_ops.h> 31#include <linux/task_io_accounting_ops.h>
33#include <linux/delay.h> 32#include <linux/delay.h>
@@ -48,7 +47,7 @@ static inline struct cifsFileInfo *cifs_init_private(
48 private_data->netfid = netfid; 47 private_data->netfid = netfid;
49 private_data->pid = current->tgid; 48 private_data->pid = current->tgid;
50 init_MUTEX(&private_data->fh_sem); 49 init_MUTEX(&private_data->fh_sem);
51 init_MUTEX(&private_data->lock_sem); 50 mutex_init(&private_data->lock_mutex);
52 INIT_LIST_HEAD(&private_data->llist); 51 INIT_LIST_HEAD(&private_data->llist);
53 private_data->pfile = file; /* needed for writepage */ 52 private_data->pfile = file; /* needed for writepage */
54 private_data->pInode = inode; 53 private_data->pInode = inode;
@@ -338,8 +337,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile)
338 return rc; 337 return rc;
339} 338}
340 339
341static int cifs_reopen_file(struct inode *inode, struct file *file, 340static int cifs_reopen_file(struct file *file, int can_flush)
342 int can_flush)
343{ 341{
344 int rc = -EACCES; 342 int rc = -EACCES;
345 int xid, oplock; 343 int xid, oplock;
@@ -347,13 +345,12 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
347 struct cifsTconInfo *pTcon; 345 struct cifsTconInfo *pTcon;
348 struct cifsFileInfo *pCifsFile; 346 struct cifsFileInfo *pCifsFile;
349 struct cifsInodeInfo *pCifsInode; 347 struct cifsInodeInfo *pCifsInode;
348 struct inode * inode;
350 char *full_path = NULL; 349 char *full_path = NULL;
351 int desiredAccess; 350 int desiredAccess;
352 int disposition = FILE_OPEN; 351 int disposition = FILE_OPEN;
353 __u16 netfid; 352 __u16 netfid;
354 353
355 if (inode == NULL)
356 return -EBADF;
357 if (file->private_data) { 354 if (file->private_data) {
358 pCifsFile = (struct cifsFileInfo *)file->private_data; 355 pCifsFile = (struct cifsFileInfo *)file->private_data;
359 } else 356 } else
@@ -368,25 +365,37 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
368 } 365 }
369 366
370 if (file->f_path.dentry == NULL) { 367 if (file->f_path.dentry == NULL) {
371 up(&pCifsFile->fh_sem); 368 cERROR(1, ("no valid name if dentry freed"));
372 cFYI(1, ("failed file reopen, no valid name if dentry freed")); 369 dump_stack();
373 FreeXid(xid); 370 rc = -EBADF;
374 return -EBADF; 371 goto reopen_error_exit;
375 } 372 }
373
374 inode = file->f_path.dentry->d_inode;
375 if(inode == NULL) {
376 cERROR(1, ("inode not valid"));
377 dump_stack();
378 rc = -EBADF;
379 goto reopen_error_exit;
380 }
381
376 cifs_sb = CIFS_SB(inode->i_sb); 382 cifs_sb = CIFS_SB(inode->i_sb);
377 pTcon = cifs_sb->tcon; 383 pTcon = cifs_sb->tcon;
384
378/* can not grab rename sem here because various ops, including 385/* can not grab rename sem here because various ops, including
379 those that already have the rename sem can end up causing writepage 386 those that already have the rename sem can end up causing writepage
380 to get called and if the server was down that means we end up here, 387 to get called and if the server was down that means we end up here,
381 and we can never tell if the caller already has the rename_sem */ 388 and we can never tell if the caller already has the rename_sem */
382 full_path = build_path_from_dentry(file->f_path.dentry); 389 full_path = build_path_from_dentry(file->f_path.dentry);
383 if (full_path == NULL) { 390 if (full_path == NULL) {
391 rc = -ENOMEM;
392reopen_error_exit:
384 up(&pCifsFile->fh_sem); 393 up(&pCifsFile->fh_sem);
385 FreeXid(xid); 394 FreeXid(xid);
386 return -ENOMEM; 395 return rc;
387 } 396 }
388 397
389 cFYI(1, (" inode = 0x%p file flags are 0x%x for %s", 398 cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
390 inode, file->f_flags,full_path)); 399 inode, file->f_flags,full_path));
391 desiredAccess = cifs_convert_flags(file->f_flags); 400 desiredAccess = cifs_convert_flags(file->f_flags);
392 401
@@ -401,13 +410,6 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
401 and server version of file size can be stale. If we knew for sure 410 and server version of file size can be stale. If we knew for sure
402 that inode was not dirty locally we could do this */ 411 that inode was not dirty locally we could do this */
403 412
404/* buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
405 if (buf == 0) {
406 up(&pCifsFile->fh_sem);
407 kfree(full_path);
408 FreeXid(xid);
409 return -ENOMEM;
410 } */
411 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, 413 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
412 CREATE_NOT_DIR, &netfid, &oplock, NULL, 414 CREATE_NOT_DIR, &netfid, &oplock, NULL,
413 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 415 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -508,12 +510,12 @@ int cifs_close(struct inode *inode, struct file *file)
508 510
509 /* Delete any outstanding lock records. 511 /* Delete any outstanding lock records.
510 We'll lose them when the file is closed anyway. */ 512 We'll lose them when the file is closed anyway. */
511 down(&pSMBFile->lock_sem); 513 mutex_lock(&pSMBFile->lock_mutex);
512 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) { 514 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
513 list_del(&li->llist); 515 list_del(&li->llist);
514 kfree(li); 516 kfree(li);
515 } 517 }
516 up(&pSMBFile->lock_sem); 518 mutex_unlock(&pSMBFile->lock_mutex);
517 519
518 write_lock(&GlobalSMBSeslock); 520 write_lock(&GlobalSMBSeslock);
519 list_del(&pSMBFile->flist); 521 list_del(&pSMBFile->flist);
@@ -598,9 +600,9 @@ static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
598 li->offset = offset; 600 li->offset = offset;
599 li->length = len; 601 li->length = len;
600 li->type = lockType; 602 li->type = lockType;
601 down(&fid->lock_sem); 603 mutex_lock(&fid->lock_mutex);
602 list_add(&li->llist, &fid->llist); 604 list_add(&li->llist, &fid->llist);
603 up(&fid->lock_sem); 605 mutex_unlock(&fid->lock_mutex);
604 return 0; 606 return 0;
605} 607}
606 608
@@ -757,7 +759,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
757 struct cifsLockInfo *li, *tmp; 759 struct cifsLockInfo *li, *tmp;
758 760
759 rc = 0; 761 rc = 0;
760 down(&fid->lock_sem); 762 mutex_lock(&fid->lock_mutex);
761 list_for_each_entry_safe(li, tmp, &fid->llist, llist) { 763 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
762 if (pfLock->fl_start <= li->offset && 764 if (pfLock->fl_start <= li->offset &&
763 length >= li->length) { 765 length >= li->length) {
@@ -771,7 +773,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
771 kfree(li); 773 kfree(li);
772 } 774 }
773 } 775 }
774 up(&fid->lock_sem); 776 mutex_unlock(&fid->lock_mutex);
775 } 777 }
776 } 778 }
777 779
@@ -792,12 +794,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
792 int xid, long_op; 794 int xid, long_op;
793 struct cifsFileInfo *open_file; 795 struct cifsFileInfo *open_file;
794 796
795 if (file->f_path.dentry == NULL)
796 return -EBADF;
797
798 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 797 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
799 if (cifs_sb == NULL)
800 return -EBADF;
801 798
802 pTcon = cifs_sb->tcon; 799 pTcon = cifs_sb->tcon;
803 800
@@ -807,14 +804,9 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
807 804
808 if (file->private_data == NULL) 805 if (file->private_data == NULL)
809 return -EBADF; 806 return -EBADF;
810 else 807 open_file = (struct cifsFileInfo *) file->private_data;
811 open_file = (struct cifsFileInfo *) file->private_data;
812 808
813 xid = GetXid(); 809 xid = GetXid();
814 if (file->f_path.dentry->d_inode == NULL) {
815 FreeXid(xid);
816 return -EBADF;
817 }
818 810
819 if (*poffset > file->f_path.dentry->d_inode->i_size) 811 if (*poffset > file->f_path.dentry->d_inode->i_size)
820 long_op = 2; /* writes past end of file can take a long time */ 812 long_op = 2; /* writes past end of file can take a long time */
@@ -841,17 +833,11 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
841 return -EBADF; 833 return -EBADF;
842 } 834 }
843 if (open_file->invalidHandle) { 835 if (open_file->invalidHandle) {
844 if ((file->f_path.dentry == NULL) ||
845 (file->f_path.dentry->d_inode == NULL)) {
846 FreeXid(xid);
847 return total_written;
848 }
849 /* we could deadlock if we called 836 /* we could deadlock if we called
850 filemap_fdatawait from here so tell 837 filemap_fdatawait from here so tell
851 reopen_file not to flush data to server 838 reopen_file not to flush data to server
852 now */ 839 now */
853 rc = cifs_reopen_file(file->f_path.dentry->d_inode, 840 rc = cifs_reopen_file(file, FALSE);
854 file, FALSE);
855 if (rc != 0) 841 if (rc != 0)
856 break; 842 break;
857 } 843 }
@@ -908,12 +894,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
908 int xid, long_op; 894 int xid, long_op;
909 struct cifsFileInfo *open_file; 895 struct cifsFileInfo *open_file;
910 896
911 if (file->f_path.dentry == NULL)
912 return -EBADF;
913
914 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 897 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
915 if (cifs_sb == NULL)
916 return -EBADF;
917 898
918 pTcon = cifs_sb->tcon; 899 pTcon = cifs_sb->tcon;
919 900
@@ -922,14 +903,9 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
922 903
923 if (file->private_data == NULL) 904 if (file->private_data == NULL)
924 return -EBADF; 905 return -EBADF;
925 else 906 open_file = (struct cifsFileInfo *)file->private_data;
926 open_file = (struct cifsFileInfo *)file->private_data;
927 907
928 xid = GetXid(); 908 xid = GetXid();
929 if (file->f_path.dentry->d_inode == NULL) {
930 FreeXid(xid);
931 return -EBADF;
932 }
933 909
934 if (*poffset > file->f_path.dentry->d_inode->i_size) 910 if (*poffset > file->f_path.dentry->d_inode->i_size)
935 long_op = 2; /* writes past end of file can take a long time */ 911 long_op = 2; /* writes past end of file can take a long time */
@@ -957,17 +933,11 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
957 return -EBADF; 933 return -EBADF;
958 } 934 }
959 if (open_file->invalidHandle) { 935 if (open_file->invalidHandle) {
960 if ((file->f_path.dentry == NULL) ||
961 (file->f_path.dentry->d_inode == NULL)) {
962 FreeXid(xid);
963 return total_written;
964 }
965 /* we could deadlock if we called 936 /* we could deadlock if we called
966 filemap_fdatawait from here so tell 937 filemap_fdatawait from here so tell
967 reopen_file not to flush data to 938 reopen_file not to flush data to
968 server now */ 939 server now */
969 rc = cifs_reopen_file(file->f_path.dentry->d_inode, 940 rc = cifs_reopen_file(file, FALSE);
970 file, FALSE);
971 if (rc != 0) 941 if (rc != 0)
972 break; 942 break;
973 } 943 }
@@ -1056,8 +1026,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1056 read_unlock(&GlobalSMBSeslock); 1026 read_unlock(&GlobalSMBSeslock);
1057 if((open_file->invalidHandle) && 1027 if((open_file->invalidHandle) &&
1058 (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) { 1028 (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
1059 rc = cifs_reopen_file(&cifs_inode->vfs_inode, 1029 rc = cifs_reopen_file(open_file->pfile, FALSE);
1060 open_file->pfile, FALSE);
1061 /* if it fails, try another handle - might be */ 1030 /* if it fails, try another handle - might be */
1062 /* dangerous to hold up writepages with retry */ 1031 /* dangerous to hold up writepages with retry */
1063 if(rc) { 1032 if(rc) {
@@ -1404,32 +1373,6 @@ static int cifs_commit_write(struct file *file, struct page *page,
1404 spin_lock(&inode->i_lock); 1373 spin_lock(&inode->i_lock);
1405 if (position > inode->i_size) { 1374 if (position > inode->i_size) {
1406 i_size_write(inode, position); 1375 i_size_write(inode, position);
1407 /* if (file->private_data == NULL) {
1408 rc = -EBADF;
1409 } else {
1410 open_file = (struct cifsFileInfo *)file->private_data;
1411 cifs_sb = CIFS_SB(inode->i_sb);
1412 rc = -EAGAIN;
1413 while (rc == -EAGAIN) {
1414 if ((open_file->invalidHandle) &&
1415 (!open_file->closePend)) {
1416 rc = cifs_reopen_file(
1417 file->f_path.dentry->d_inode, file);
1418 if (rc != 0)
1419 break;
1420 }
1421 if (!open_file->closePend) {
1422 rc = CIFSSMBSetFileSize(xid,
1423 cifs_sb->tcon, position,
1424 open_file->netfid,
1425 open_file->pid, FALSE);
1426 } else {
1427 rc = -EBADF;
1428 break;
1429 }
1430 }
1431 cFYI(1, (" SetEOF (commit write) rc = %d", rc));
1432 } */
1433 } 1376 }
1434 spin_unlock(&inode->i_lock); 1377 spin_unlock(&inode->i_lock);
1435 if (!PageUptodate(page)) { 1378 if (!PageUptodate(page)) {
@@ -1573,8 +1516,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1573 int buf_type = CIFS_NO_BUFFER; 1516 int buf_type = CIFS_NO_BUFFER;
1574 if ((open_file->invalidHandle) && 1517 if ((open_file->invalidHandle) &&
1575 (!open_file->closePend)) { 1518 (!open_file->closePend)) {
1576 rc = cifs_reopen_file(file->f_path.dentry->d_inode, 1519 rc = cifs_reopen_file(file, TRUE);
1577 file, TRUE);
1578 if (rc != 0) 1520 if (rc != 0)
1579 break; 1521 break;
1580 } 1522 }
@@ -1660,8 +1602,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1660 while (rc == -EAGAIN) { 1602 while (rc == -EAGAIN) {
1661 if ((open_file->invalidHandle) && 1603 if ((open_file->invalidHandle) &&
1662 (!open_file->closePend)) { 1604 (!open_file->closePend)) {
1663 rc = cifs_reopen_file(file->f_path.dentry->d_inode, 1605 rc = cifs_reopen_file(file, TRUE);
1664 file, TRUE);
1665 if (rc != 0) 1606 if (rc != 0)
1666 break; 1607 break;
1667 } 1608 }
@@ -1817,8 +1758,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1817 while (rc == -EAGAIN) { 1758 while (rc == -EAGAIN) {
1818 if ((open_file->invalidHandle) && 1759 if ((open_file->invalidHandle) &&
1819 (!open_file->closePend)) { 1760 (!open_file->closePend)) {
1820 rc = cifs_reopen_file(file->f_path.dentry->d_inode, 1761 rc = cifs_reopen_file(file, TRUE);
1821 file, TRUE);
1822 if (rc != 0) 1762 if (rc != 0)
1823 break; 1763 break;
1824 } 1764 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f414526e476a..3e87dad3367c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/inode.c 2 * fs/cifs/inode.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2005 4 * Copyright (C) International Business Machines Corp., 2002,2007
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -90,7 +90,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
90 (*pinode)->i_ino = 90 (*pinode)->i_ino =
91 (unsigned long)findData.UniqueId; 91 (unsigned long)findData.UniqueId;
92 } /* note ino incremented to unique num in new_inode */ 92 } /* note ino incremented to unique num in new_inode */
93 if(sb->s_flags & MS_NOATIME) 93 if (sb->s_flags & MS_NOATIME)
94 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; 94 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME;
95 95
96 insert_inode_hash(*pinode); 96 insert_inode_hash(*pinode);
@@ -139,8 +139,17 @@ int cifs_get_inode_info_unix(struct inode **pinode,
139 inode->i_mode |= S_IFREG; 139 inode->i_mode |= S_IFREG;
140 cFYI(1,("unknown type %d",type)); 140 cFYI(1,("unknown type %d",type));
141 } 141 }
142 inode->i_uid = le64_to_cpu(findData.Uid); 142
143 inode->i_gid = le64_to_cpu(findData.Gid); 143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
144 inode->i_uid = cifs_sb->mnt_uid;
145 else
146 inode->i_uid = le64_to_cpu(findData.Uid);
147
148 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
149 inode->i_gid = cifs_sb->mnt_gid;
150 else
151 inode->i_gid = le64_to_cpu(findData.Gid);
152
144 inode->i_nlink = le64_to_cpu(findData.Nlinks); 153 inode->i_nlink = le64_to_cpu(findData.Nlinks);
145 154
146 spin_lock(&inode->i_lock); 155 spin_lock(&inode->i_lock);
@@ -178,13 +187,13 @@ int cifs_get_inode_info_unix(struct inode **pinode,
178 &cifs_file_direct_nobrl_ops; 187 &cifs_file_direct_nobrl_ops;
179 else 188 else
180 inode->i_fop = &cifs_file_direct_ops; 189 inode->i_fop = &cifs_file_direct_ops;
181 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 190 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
182 inode->i_fop = &cifs_file_nobrl_ops; 191 inode->i_fop = &cifs_file_nobrl_ops;
183 else /* not direct, send byte range locks */ 192 else /* not direct, send byte range locks */
184 inode->i_fop = &cifs_file_ops; 193 inode->i_fop = &cifs_file_ops;
185 194
186 /* check if server can support readpages */ 195 /* check if server can support readpages */
187 if(pTcon->ses->server->maxBuf < 196 if (pTcon->ses->server->maxBuf <
188 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) 197 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
189 inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 198 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
190 else 199 else
@@ -220,7 +229,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
220 229
221 pbuf = buf; 230 pbuf = buf;
222 231
223 if(size == 0) { 232 if (size == 0) {
224 inode->i_mode |= S_IFIFO; 233 inode->i_mode |= S_IFIFO;
225 return 0; 234 return 0;
226 } else if (size < 8) { 235 } else if (size < 8) {
@@ -239,11 +248,11 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
239 netfid, 248 netfid,
240 24 /* length */, 0 /* offset */, 249 24 /* length */, 0 /* offset */,
241 &bytes_read, &pbuf, &buf_type); 250 &bytes_read, &pbuf, &buf_type);
242 if((rc == 0) && (bytes_read >= 8)) { 251 if ((rc == 0) && (bytes_read >= 8)) {
243 if(memcmp("IntxBLK", pbuf, 8) == 0) { 252 if (memcmp("IntxBLK", pbuf, 8) == 0) {
244 cFYI(1,("Block device")); 253 cFYI(1,("Block device"));
245 inode->i_mode |= S_IFBLK; 254 inode->i_mode |= S_IFBLK;
246 if(bytes_read == 24) { 255 if (bytes_read == 24) {
247 /* we have enough to decode dev num */ 256 /* we have enough to decode dev num */
248 __u64 mjr; /* major */ 257 __u64 mjr; /* major */
249 __u64 mnr; /* minor */ 258 __u64 mnr; /* minor */
@@ -251,10 +260,10 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
251 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 260 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
252 inode->i_rdev = MKDEV(mjr, mnr); 261 inode->i_rdev = MKDEV(mjr, mnr);
253 } 262 }
254 } else if(memcmp("IntxCHR", pbuf, 8) == 0) { 263 } else if (memcmp("IntxCHR", pbuf, 8) == 0) {
255 cFYI(1,("Char device")); 264 cFYI(1,("Char device"));
256 inode->i_mode |= S_IFCHR; 265 inode->i_mode |= S_IFCHR;
257 if(bytes_read == 24) { 266 if (bytes_read == 24) {
258 /* we have enough to decode dev num */ 267 /* we have enough to decode dev num */
259 __u64 mjr; /* major */ 268 __u64 mjr; /* major */
260 __u64 mnr; /* minor */ 269 __u64 mnr; /* minor */
@@ -262,7 +271,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
262 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 271 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
263 inode->i_rdev = MKDEV(mjr, mnr); 272 inode->i_rdev = MKDEV(mjr, mnr);
264 } 273 }
265 } else if(memcmp("IntxLNK", pbuf, 7) == 0) { 274 } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
266 cFYI(1,("Symlink")); 275 cFYI(1,("Symlink"));
267 inode->i_mode |= S_IFLNK; 276 inode->i_mode |= S_IFLNK;
268 } else { 277 } else {
@@ -293,7 +302,7 @@ static int get_sfu_uid_mode(struct inode * inode,
293 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", 302 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS",
294 ea_value, 4 /* size of buf */, cifs_sb->local_nls, 303 ea_value, 4 /* size of buf */, cifs_sb->local_nls,
295 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 304 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
296 if(rc < 0) 305 if (rc < 0)
297 return (int)rc; 306 return (int)rc;
298 else if (rc > 3) { 307 else if (rc > 3) {
299 mode = le32_to_cpu(*((__le32 *)ea_value)); 308 mode = le32_to_cpu(*((__le32 *)ea_value));
@@ -348,7 +357,7 @@ int cifs_get_inode_info(struct inode **pinode,
348 /* BB optimize code so we do not make the above call 357 /* BB optimize code so we do not make the above call
349 when server claims no NT SMB support and the above call 358 when server claims no NT SMB support and the above call
350 failed at least once - set flag in tcon or mount */ 359 failed at least once - set flag in tcon or mount */
351 if((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { 360 if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
352 rc = SMBQueryInformation(xid, pTcon, search_path, 361 rc = SMBQueryInformation(xid, pTcon, search_path,
353 pfindData, cifs_sb->local_nls, 362 pfindData, cifs_sb->local_nls,
354 cifs_sb->mnt_cifs_flags & 363 cifs_sb->mnt_cifs_flags &
@@ -425,7 +434,7 @@ int cifs_get_inode_info(struct inode **pinode,
425 } else /* do we need cast or hash to ino? */ 434 } else /* do we need cast or hash to ino? */
426 (*pinode)->i_ino = inode_num; 435 (*pinode)->i_ino = inode_num;
427 } /* else ino incremented to unique num in new_inode*/ 436 } /* else ino incremented to unique num in new_inode*/
428 if(sb->s_flags & MS_NOATIME) 437 if (sb->s_flags & MS_NOATIME)
429 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; 438 (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME;
430 insert_inode_hash(*pinode); 439 insert_inode_hash(*pinode);
431 } 440 }
@@ -442,7 +451,7 @@ int cifs_get_inode_info(struct inode **pinode,
442 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ 451 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
443 452
444 /* Linux can not store file creation time so ignore it */ 453 /* Linux can not store file creation time so ignore it */
445 if(pfindData->LastAccessTime) 454 if (pfindData->LastAccessTime)
446 inode->i_atime = cifs_NTtimeToUnix 455 inode->i_atime = cifs_NTtimeToUnix
447 (le64_to_cpu(pfindData->LastAccessTime)); 456 (le64_to_cpu(pfindData->LastAccessTime));
448 else /* do not need to use current_fs_time - time not stored */ 457 else /* do not need to use current_fs_time - time not stored */
@@ -452,7 +461,7 @@ int cifs_get_inode_info(struct inode **pinode,
452 inode->i_ctime = 461 inode->i_ctime =
453 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); 462 cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
454 cFYI(0, ("Attributes came in as 0x%x", attr)); 463 cFYI(0, ("Attributes came in as 0x%x", attr));
455 if(adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { 464 if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
456 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; 465 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
457 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; 466 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
458 } 467 }
@@ -521,8 +530,10 @@ int cifs_get_inode_info(struct inode **pinode,
521 530
522 /* BB fill in uid and gid here? with help from winbind? 531 /* BB fill in uid and gid here? with help from winbind?
523 or retrieve from NTFS stream extended attribute */ 532 or retrieve from NTFS stream extended attribute */
524 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 533 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
525 /* fill in uid, gid, mode from server ACL */ 534 /* fill in uid, gid, mode from server ACL */
535 /* BB FIXME this should also take into account the
536 * default uid specified on mount if present */
526 get_sfu_uid_mode(inode, search_path, cifs_sb, xid); 537 get_sfu_uid_mode(inode, search_path, cifs_sb, xid);
527 } else if (atomic_read(&cifsInfo->inUse) == 0) { 538 } else if (atomic_read(&cifsInfo->inUse) == 0) {
528 inode->i_uid = cifs_sb->mnt_uid; 539 inode->i_uid = cifs_sb->mnt_uid;
@@ -541,12 +552,12 @@ int cifs_get_inode_info(struct inode **pinode,
541 &cifs_file_direct_nobrl_ops; 552 &cifs_file_direct_nobrl_ops;
542 else 553 else
543 inode->i_fop = &cifs_file_direct_ops; 554 inode->i_fop = &cifs_file_direct_ops;
544 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 555 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
545 inode->i_fop = &cifs_file_nobrl_ops; 556 inode->i_fop = &cifs_file_nobrl_ops;
546 else /* not direct, send byte range locks */ 557 else /* not direct, send byte range locks */
547 inode->i_fop = &cifs_file_ops; 558 inode->i_fop = &cifs_file_ops;
548 559
549 if(pTcon->ses->server->maxBuf < 560 if (pTcon->ses->server->maxBuf <
550 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) 561 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
551 inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 562 inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
552 else 563 else
@@ -597,7 +608,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
597 608
598 xid = GetXid(); 609 xid = GetXid();
599 610
600 if(inode) 611 if (inode)
601 cifs_sb = CIFS_SB(inode->i_sb); 612 cifs_sb = CIFS_SB(inode->i_sb);
602 else 613 else
603 cifs_sb = CIFS_SB(direntry->d_sb); 614 cifs_sb = CIFS_SB(direntry->d_sb);
@@ -723,7 +734,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
723 when needed */ 734 when needed */
724 direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); 735 direntry->d_inode->i_ctime = current_fs_time(inode->i_sb);
725 } 736 }
726 if(inode) { 737 if (inode) {
727 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); 738 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
728 cifsInode = CIFS_I(inode); 739 cifsInode = CIFS_I(inode);
729 cifsInode->time = 0; /* force revalidate of dir as well */ 740 cifsInode->time = 0; /* force revalidate of dir as well */
@@ -734,6 +745,136 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
734 return rc; 745 return rc;
735} 746}
736 747
748static void posix_fill_in_inode(struct inode *tmp_inode,
749 FILE_UNIX_BASIC_INFO *pData, int *pobject_type, int isNewInode)
750{
751 loff_t local_size;
752 struct timespec local_mtime;
753
754 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
755 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
756
757 __u32 type = le32_to_cpu(pData->Type);
758 __u64 num_of_bytes = le64_to_cpu(pData->NumOfBytes);
759 __u64 end_of_file = le64_to_cpu(pData->EndOfFile);
760 cifsInfo->time = jiffies;
761 atomic_inc(&cifsInfo->inUse);
762
763 /* save mtime and size */
764 local_mtime = tmp_inode->i_mtime;
765 local_size = tmp_inode->i_size;
766
767 tmp_inode->i_atime =
768 cifs_NTtimeToUnix(le64_to_cpu(pData->LastAccessTime));
769 tmp_inode->i_mtime =
770 cifs_NTtimeToUnix(le64_to_cpu(pData->LastModificationTime));
771 tmp_inode->i_ctime =
772 cifs_NTtimeToUnix(le64_to_cpu(pData->LastStatusChange));
773
774 tmp_inode->i_mode = le64_to_cpu(pData->Permissions);
775 /* since we set the inode type below we need to mask off type
776 to avoid strange results if bits above were corrupt */
777 tmp_inode->i_mode &= ~S_IFMT;
778 if (type == UNIX_FILE) {
779 *pobject_type = DT_REG;
780 tmp_inode->i_mode |= S_IFREG;
781 } else if (type == UNIX_SYMLINK) {
782 *pobject_type = DT_LNK;
783 tmp_inode->i_mode |= S_IFLNK;
784 } else if (type == UNIX_DIR) {
785 *pobject_type = DT_DIR;
786 tmp_inode->i_mode |= S_IFDIR;
787 } else if (type == UNIX_CHARDEV) {
788 *pobject_type = DT_CHR;
789 tmp_inode->i_mode |= S_IFCHR;
790 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
791 le64_to_cpu(pData->DevMinor) & MINORMASK);
792 } else if (type == UNIX_BLOCKDEV) {
793 *pobject_type = DT_BLK;
794 tmp_inode->i_mode |= S_IFBLK;
795 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pData->DevMajor),
796 le64_to_cpu(pData->DevMinor) & MINORMASK);
797 } else if (type == UNIX_FIFO) {
798 *pobject_type = DT_FIFO;
799 tmp_inode->i_mode |= S_IFIFO;
800 } else if (type == UNIX_SOCKET) {
801 *pobject_type = DT_SOCK;
802 tmp_inode->i_mode |= S_IFSOCK;
803 } else {
804 /* safest to just call it a file */
805 *pobject_type = DT_REG;
806 tmp_inode->i_mode |= S_IFREG;
807 cFYI(1,("unknown inode type %d",type));
808 }
809
810#ifdef CONFIG_CIFS_DEBUG2
811 cFYI(1,("object type: %d", type));
812#endif
813 tmp_inode->i_uid = le64_to_cpu(pData->Uid);
814 tmp_inode->i_gid = le64_to_cpu(pData->Gid);
815 tmp_inode->i_nlink = le64_to_cpu(pData->Nlinks);
816
817 spin_lock(&tmp_inode->i_lock);
818 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
819 /* can not safely change the file size here if the
820 client is writing to it due to potential races */
821 i_size_write(tmp_inode, end_of_file);
822
823 /* 512 bytes (2**9) is the fake blocksize that must be used */
824 /* for this calculation, not the real blocksize */
825 tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
826 }
827 spin_unlock(&tmp_inode->i_lock);
828
829 if (S_ISREG(tmp_inode->i_mode)) {
830 cFYI(1, ("File inode"));
831 tmp_inode->i_op = &cifs_file_inode_ops;
832
833 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
834 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
835 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
836 else
837 tmp_inode->i_fop = &cifs_file_direct_ops;
838
839 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
840 tmp_inode->i_fop = &cifs_file_nobrl_ops;
841 else
842 tmp_inode->i_fop = &cifs_file_ops;
843
844 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
845 (cifs_sb->tcon->ses->server->maxBuf <
846 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
847 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
848 else
849 tmp_inode->i_data.a_ops = &cifs_addr_ops;
850
851 if(isNewInode)
852 return; /* No sense invalidating pages for new inode since we
853 have not started caching readahead file data yet */
854
855 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
856 (local_size == tmp_inode->i_size)) {
857 cFYI(1, ("inode exists but unchanged"));
858 } else {
859 /* file may have changed on server */
860 cFYI(1, ("invalidate inode, readdir detected change"));
861 invalidate_remote_inode(tmp_inode);
862 }
863 } else if (S_ISDIR(tmp_inode->i_mode)) {
864 cFYI(1, ("Directory inode"));
865 tmp_inode->i_op = &cifs_dir_inode_ops;
866 tmp_inode->i_fop = &cifs_dir_ops;
867 } else if (S_ISLNK(tmp_inode->i_mode)) {
868 cFYI(1, ("Symbolic Link inode"));
869 tmp_inode->i_op = &cifs_symlink_inode_ops;
870/* tmp_inode->i_fop = *//* do not need to set to anything */
871 } else {
872 cFYI(1, ("Special inode"));
873 init_special_inode(tmp_inode, tmp_inode->i_mode,
874 tmp_inode->i_rdev);
875 }
876}
877
737int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) 878int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
738{ 879{
739 int rc = 0; 880 int rc = 0;
@@ -755,6 +896,71 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
755 FreeXid(xid); 896 FreeXid(xid);
756 return -ENOMEM; 897 return -ENOMEM;
757 } 898 }
899
900 if((pTcon->ses->capabilities & CAP_UNIX) &&
901 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
902 le64_to_cpu(pTcon->fsUnixInfo.Capability))) {
903 u32 oplock = 0;
904 FILE_UNIX_BASIC_INFO * pInfo =
905 kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
906 if(pInfo == NULL) {
907 rc = -ENOMEM;
908 goto mkdir_out;
909 }
910
911 rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
912 mode, NULL /* netfid */, pInfo, &oplock,
913 full_path, cifs_sb->local_nls,
914 cifs_sb->mnt_cifs_flags &
915 CIFS_MOUNT_MAP_SPECIAL_CHR);
916 if (rc) {
917 cFYI(1, ("posix mkdir returned 0x%x", rc));
918 d_drop(direntry);
919 } else {
920 int obj_type;
921 if (pInfo->Type == -1) /* no return info - go query */
922 goto mkdir_get_info;
923/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need to set uid/gid */
924 inc_nlink(inode);
925 if (pTcon->nocase)
926 direntry->d_op = &cifs_ci_dentry_ops;
927 else
928 direntry->d_op = &cifs_dentry_ops;
929
930 newinode = new_inode(inode->i_sb);
931 if (newinode == NULL)
932 goto mkdir_get_info;
933 /* Is an i_ino of zero legal? */
934 /* Are there sanity checks we can use to ensure that
935 the server is really filling in that field? */
936 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
937 newinode->i_ino =
938 (unsigned long)pInfo->UniqueId;
939 } /* note ino incremented to unique num in new_inode */
940 if(inode->i_sb->s_flags & MS_NOATIME)
941 newinode->i_flags |= S_NOATIME | S_NOCMTIME;
942 newinode->i_nlink = 2;
943
944 insert_inode_hash(newinode);
945 d_instantiate(direntry, newinode);
946
947 /* we already checked in POSIXCreate whether
948 frame was long enough */
949 posix_fill_in_inode(direntry->d_inode,
950 pInfo, &obj_type, 1 /* NewInode */);
951#ifdef CONFIG_CIFS_DEBUG2
952 cFYI(1,("instantiated dentry %p %s to inode %p",
953 direntry, direntry->d_name.name, newinode));
954
955 if(newinode->i_nlink != 2)
956 cFYI(1,("unexpected number of links %d",
957 newinode->i_nlink));
958#endif
959 }
960 kfree(pInfo);
961 goto mkdir_out;
962 }
963
758 /* BB add setting the equivalent of mode via CreateX w/ACLs */ 964 /* BB add setting the equivalent of mode via CreateX w/ACLs */
759 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 965 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
760 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 966 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -762,6 +968,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
762 cFYI(1, ("cifs_mkdir returned 0x%x", rc)); 968 cFYI(1, ("cifs_mkdir returned 0x%x", rc));
763 d_drop(direntry); 969 d_drop(direntry);
764 } else { 970 } else {
971mkdir_get_info:
765 inc_nlink(inode); 972 inc_nlink(inode);
766 if (pTcon->ses->capabilities & CAP_UNIX) 973 if (pTcon->ses->capabilities & CAP_UNIX)
767 rc = cifs_get_inode_info_unix(&newinode, full_path, 974 rc = cifs_get_inode_info_unix(&newinode, full_path,
@@ -775,8 +982,10 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
775 else 982 else
776 direntry->d_op = &cifs_dentry_ops; 983 direntry->d_op = &cifs_dentry_ops;
777 d_instantiate(direntry, newinode); 984 d_instantiate(direntry, newinode);
778 if (direntry->d_inode) 985 /* setting nlink not necessary except in cases where we
779 direntry->d_inode->i_nlink = 2; 986 * failed to get it from the server or was set bogus */
987 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
988 direntry->d_inode->i_nlink = 2;
780 if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) 989 if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
781 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 990 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
782 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 991 CIFSSMBUnixSetPerms(xid, pTcon, full_path,
@@ -812,6 +1021,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
812 } 1021 }
813 } 1022 }
814 } 1023 }
1024mkdir_out:
815 kfree(full_path); 1025 kfree(full_path);
816 FreeXid(xid); 1026 FreeXid(xid);
817 return rc; 1027 return rc;
@@ -1339,17 +1549,17 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1339 cpu_to_le32(cifsInode->cifsAttrs | 1549 cpu_to_le32(cifsInode->cifsAttrs |
1340 ATTR_READONLY); 1550 ATTR_READONLY);
1341 } 1551 }
1342 } else if ((mode & S_IWUGO) == S_IWUGO) { 1552 } else if (cifsInode->cifsAttrs & ATTR_READONLY) {
1343 if (cifsInode->cifsAttrs & ATTR_READONLY) { 1553 /* If file is readonly on server, we would
1344 set_dosattr = TRUE; 1554 not be able to write to it - so if any write
1345 time_buf.Attributes = 1555 bit is enabled for user or group or other we
1346 cpu_to_le32(cifsInode->cifsAttrs & 1556 need to at least try to remove r/o dos attr */
1347 (~ATTR_READONLY)); 1557 set_dosattr = TRUE;
1348 /* Windows ignores set to zero */ 1558 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
1349 if(time_buf.Attributes == 0) 1559 (~ATTR_READONLY));
1350 time_buf.Attributes |= 1560 /* Windows ignores set to zero */
1351 cpu_to_le32(ATTR_NORMAL); 1561 if(time_buf.Attributes == 0)
1352 } 1562 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1353 } 1563 }
1354 /* BB to be implemented - 1564 /* BB to be implemented -
1355 via Windows security descriptors or streams */ 1565 via Windows security descriptors or streams */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 992e80edc720..53e304d59544 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -30,6 +30,9 @@
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <asm/div64.h> 31#include <asm/div64.h>
32#include <asm/byteorder.h> 32#include <asm/byteorder.h>
33#ifdef CONFIG_CIFS_EXPERIMENTAL
34#include <linux/inet.h>
35#endif
33#include "cifsfs.h" 36#include "cifsfs.h"
34#include "cifspdu.h" 37#include "cifspdu.h"
35#include "cifsglob.h" 38#include "cifsglob.h"
@@ -129,11 +132,27 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
129/* Convert string containing dotted ip address to binary form */ 132/* Convert string containing dotted ip address to binary form */
130/* returns 0 if invalid address */ 133/* returns 0 if invalid address */
131 134
132/* BB add address family, change rc to status flag and return union or for ipv6 */
133/* will need parent to call something like inet_pton to convert ipv6 address BB */
134int 135int
135cifs_inet_pton(int address_family, char *cp,void *dst) 136cifs_inet_pton(int address_family, char *cp,void *dst)
136{ 137{
138#ifdef CONFIG_CIFS_EXPERIMENTAL
139 int ret = 0;
140
141 /* calculate length by finding first slash or NULL */
142 /* BB Should we convert '/' slash to '\' here since it seems already done
143 before this */
144 if( address_family == AF_INET ){
145 ret = in4_pton(cp, -1 /* len */, dst , '\\', NULL);
146 } else if( address_family == AF_INET6 ){
147 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
148 }
149#ifdef CONFIG_CIFS_DEBUG2
150 cFYI(1,("address conversion returned %d for %s", ret, cp));
151#endif
152 if (ret > 0)
153 ret = 1;
154 return ret;
155#else
137 int value; 156 int value;
138 int digit; 157 int digit;
139 int i; 158 int i;
@@ -192,6 +211,7 @@ cifs_inet_pton(int address_family, char *cp,void *dst)
192 211
193 *((__be32 *)dst) = *((__be32 *) bytes) | htonl(value); 212 *((__be32 *)dst) = *((__be32 *) bytes) | htonl(value);
194 return 1; /* success */ 213 return 1; /* success */
214#endif /* EXPERIMENTAL */
195} 215}
196 216
197/***************************************************************************** 217/*****************************************************************************
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2a374d5215ab..c08bda9fcac6 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -23,7 +23,6 @@
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25#include <linux/stat.h> 25#include <linux/stat.h>
26#include <linux/smp_lock.h>
27#include "cifspdu.h" 26#include "cifspdu.h"
28#include "cifsglob.h" 27#include "cifsglob.h"
29#include "cifsproto.h" 28#include "cifsproto.h"
@@ -37,19 +36,19 @@ static void dump_cifs_file_struct(struct file *file, char *label)
37{ 36{
38 struct cifsFileInfo * cf; 37 struct cifsFileInfo * cf;
39 38
40 if(file) { 39 if (file) {
41 cf = file->private_data; 40 cf = file->private_data;
42 if(cf == NULL) { 41 if (cf == NULL) {
43 cFYI(1,("empty cifs private file data")); 42 cFYI(1,("empty cifs private file data"));
44 return; 43 return;
45 } 44 }
46 if(cf->invalidHandle) { 45 if (cf->invalidHandle) {
47 cFYI(1,("invalid handle")); 46 cFYI(1,("invalid handle"));
48 } 47 }
49 if(cf->srch_inf.endOfSearch) { 48 if (cf->srch_inf.endOfSearch) {
50 cFYI(1,("end of search")); 49 cFYI(1,("end of search"));
51 } 50 }
52 if(cf->srch_inf.emptyDir) { 51 if (cf->srch_inf.emptyDir) {
53 cFYI(1,("empty dir")); 52 cFYI(1,("empty dir"));
54 } 53 }
55 54
@@ -77,17 +76,17 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
77 cFYI(0, ("existing dentry with inode 0x%p", tmp_dentry->d_inode)); 76 cFYI(0, ("existing dentry with inode 0x%p", tmp_dentry->d_inode));
78 *ptmp_inode = tmp_dentry->d_inode; 77 *ptmp_inode = tmp_dentry->d_inode;
79/* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/ 78/* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/
80 if(*ptmp_inode == NULL) { 79 if (*ptmp_inode == NULL) {
81 *ptmp_inode = new_inode(file->f_path.dentry->d_sb); 80 *ptmp_inode = new_inode(file->f_path.dentry->d_sb);
82 if(*ptmp_inode == NULL) 81 if (*ptmp_inode == NULL)
83 return rc; 82 return rc;
84 rc = 1; 83 rc = 1;
85 } 84 }
86 if(file->f_path.dentry->d_sb->s_flags & MS_NOATIME) 85 if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME)
87 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; 86 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME;
88 } else { 87 } else {
89 tmp_dentry = d_alloc(file->f_path.dentry, qstring); 88 tmp_dentry = d_alloc(file->f_path.dentry, qstring);
90 if(tmp_dentry == NULL) { 89 if (tmp_dentry == NULL) {
91 cERROR(1,("Failed allocating dentry")); 90 cERROR(1,("Failed allocating dentry"));
92 *ptmp_inode = NULL; 91 *ptmp_inode = NULL;
93 return rc; 92 return rc;
@@ -98,9 +97,9 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
98 tmp_dentry->d_op = &cifs_ci_dentry_ops; 97 tmp_dentry->d_op = &cifs_ci_dentry_ops;
99 else 98 else
100 tmp_dentry->d_op = &cifs_dentry_ops; 99 tmp_dentry->d_op = &cifs_dentry_ops;
101 if(*ptmp_inode == NULL) 100 if (*ptmp_inode == NULL)
102 return rc; 101 return rc;
103 if(file->f_path.dentry->d_sb->s_flags & MS_NOATIME) 102 if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME)
104 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; 103 (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME;
105 rc = 2; 104 rc = 2;
106 } 105 }
@@ -112,7 +111,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
112 111
113static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode) 112static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode)
114{ 113{
115 if((tcon) && (tcon->ses) && (tcon->ses->server)) { 114 if ((tcon) && (tcon->ses) && (tcon->ses->server)) {
116 inode->i_ctime.tv_sec += tcon->ses->server->timeAdj; 115 inode->i_ctime.tv_sec += tcon->ses->server->timeAdj;
117 inode->i_mtime.tv_sec += tcon->ses->server->timeAdj; 116 inode->i_mtime.tv_sec += tcon->ses->server->timeAdj;
118 inode->i_atime.tv_sec += tcon->ses->server->timeAdj; 117 inode->i_atime.tv_sec += tcon->ses->server->timeAdj;
@@ -137,7 +136,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
137 local_mtime = tmp_inode->i_mtime; 136 local_mtime = tmp_inode->i_mtime;
138 local_size = tmp_inode->i_size; 137 local_size = tmp_inode->i_size;
139 138
140 if(new_buf_type) { 139 if (new_buf_type) {
141 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf; 140 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
142 141
143 attr = le32_to_cpu(pfindData->ExtFileAttributes); 142 attr = le32_to_cpu(pfindData->ExtFileAttributes);
@@ -193,7 +192,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
193 if (attr & ATTR_DIRECTORY) { 192 if (attr & ATTR_DIRECTORY) {
194 *pobject_type = DT_DIR; 193 *pobject_type = DT_DIR;
195 /* override default perms since we do not lock dirs */ 194 /* override default perms since we do not lock dirs */
196 if(atomic_read(&cifsInfo->inUse) == 0) { 195 if (atomic_read(&cifsInfo->inUse) == 0) {
197 tmp_inode->i_mode = cifs_sb->mnt_dir_mode; 196 tmp_inode->i_mode = cifs_sb->mnt_dir_mode;
198 } 197 }
199 tmp_inode->i_mode |= S_IFDIR; 198 tmp_inode->i_mode |= S_IFDIR;
@@ -250,25 +249,25 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
250 if (S_ISREG(tmp_inode->i_mode)) { 249 if (S_ISREG(tmp_inode->i_mode)) {
251 cFYI(1, ("File inode")); 250 cFYI(1, ("File inode"));
252 tmp_inode->i_op = &cifs_file_inode_ops; 251 tmp_inode->i_op = &cifs_file_inode_ops;
253 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 252 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
254 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 253 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
255 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; 254 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
256 else 255 else
257 tmp_inode->i_fop = &cifs_file_direct_ops; 256 tmp_inode->i_fop = &cifs_file_direct_ops;
258 257
259 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 258 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
260 tmp_inode->i_fop = &cifs_file_nobrl_ops; 259 tmp_inode->i_fop = &cifs_file_nobrl_ops;
261 else 260 else
262 tmp_inode->i_fop = &cifs_file_ops; 261 tmp_inode->i_fop = &cifs_file_ops;
263 262
264 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 263 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
265 (cifs_sb->tcon->ses->server->maxBuf < 264 (cifs_sb->tcon->ses->server->maxBuf <
266 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) 265 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
267 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 266 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
268 else 267 else
269 tmp_inode->i_data.a_ops = &cifs_addr_ops; 268 tmp_inode->i_data.a_ops = &cifs_addr_ops;
270 269
271 if(isNewInode) 270 if (isNewInode)
272 return; /* No sense invalidating pages for new inode 271 return; /* No sense invalidating pages for new inode
273 since have not started caching readahead file 272 since have not started caching readahead file
274 data yet */ 273 data yet */
@@ -357,8 +356,14 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
357 cFYI(1,("unknown inode type %d",type)); 356 cFYI(1,("unknown inode type %d",type));
358 } 357 }
359 358
360 tmp_inode->i_uid = le64_to_cpu(pfindData->Uid); 359 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
361 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); 360 tmp_inode->i_uid = cifs_sb->mnt_uid;
361 else
362 tmp_inode->i_uid = le64_to_cpu(pfindData->Uid);
363 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
364 tmp_inode->i_gid = cifs_sb->mnt_gid;
365 else
366 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
362 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); 367 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
363 368
364 spin_lock(&tmp_inode->i_lock); 369 spin_lock(&tmp_inode->i_lock);
@@ -377,25 +382,24 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
377 cFYI(1, ("File inode")); 382 cFYI(1, ("File inode"));
378 tmp_inode->i_op = &cifs_file_inode_ops; 383 tmp_inode->i_op = &cifs_file_inode_ops;
379 384
380 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 385 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
381 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 386 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
382 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; 387 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
383 else 388 else
384 tmp_inode->i_fop = &cifs_file_direct_ops; 389 tmp_inode->i_fop = &cifs_file_direct_ops;
385 390 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
386 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
387 tmp_inode->i_fop = &cifs_file_nobrl_ops; 391 tmp_inode->i_fop = &cifs_file_nobrl_ops;
388 else 392 else
389 tmp_inode->i_fop = &cifs_file_ops; 393 tmp_inode->i_fop = &cifs_file_ops;
390 394
391 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 395 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
392 (cifs_sb->tcon->ses->server->maxBuf < 396 (cifs_sb->tcon->ses->server->maxBuf <
393 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) 397 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
394 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; 398 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
395 else 399 else
396 tmp_inode->i_data.a_ops = &cifs_addr_ops; 400 tmp_inode->i_data.a_ops = &cifs_addr_ops;
397 401
398 if(isNewInode) 402 if (isNewInode)
399 return; /* No sense invalidating pages for new inode since we 403 return; /* No sense invalidating pages for new inode since we
400 have not started caching readahead file data yet */ 404 have not started caching readahead file data yet */
401 405
@@ -430,34 +434,28 @@ static int initiate_cifs_search(const int xid, struct file *file)
430 struct cifs_sb_info *cifs_sb; 434 struct cifs_sb_info *cifs_sb;
431 struct cifsTconInfo *pTcon; 435 struct cifsTconInfo *pTcon;
432 436
433 if(file->private_data == NULL) { 437 if (file->private_data == NULL) {
434 file->private_data = 438 file->private_data =
435 kmalloc(sizeof(struct cifsFileInfo),GFP_KERNEL); 439 kzalloc(sizeof(struct cifsFileInfo),GFP_KERNEL);
436 } 440 }
437 441
438 if(file->private_data == NULL) { 442 if (file->private_data == NULL)
439 return -ENOMEM; 443 return -ENOMEM;
440 } else {
441 memset(file->private_data,0,sizeof(struct cifsFileInfo));
442 }
443 cifsFile = file->private_data; 444 cifsFile = file->private_data;
444 cifsFile->invalidHandle = TRUE; 445 cifsFile->invalidHandle = TRUE;
445 cifsFile->srch_inf.endOfSearch = FALSE; 446 cifsFile->srch_inf.endOfSearch = FALSE;
446 447
447 if(file->f_path.dentry == NULL)
448 return -ENOENT;
449
450 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 448 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
451 if(cifs_sb == NULL) 449 if (cifs_sb == NULL)
452 return -EINVAL; 450 return -EINVAL;
453 451
454 pTcon = cifs_sb->tcon; 452 pTcon = cifs_sb->tcon;
455 if(pTcon == NULL) 453 if (pTcon == NULL)
456 return -EINVAL; 454 return -EINVAL;
457 455
458 full_path = build_path_from_dentry(file->f_path.dentry); 456 full_path = build_path_from_dentry(file->f_path.dentry);
459 457
460 if(full_path == NULL) { 458 if (full_path == NULL) {
461 return -ENOMEM; 459 return -ENOMEM;
462 } 460 }
463 461
@@ -480,9 +478,9 @@ ffirst_retry:
480 &cifsFile->netfid, &cifsFile->srch_inf, 478 &cifsFile->netfid, &cifsFile->srch_inf,
481 cifs_sb->mnt_cifs_flags & 479 cifs_sb->mnt_cifs_flags &
482 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); 480 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
483 if(rc == 0) 481 if (rc == 0)
484 cifsFile->invalidHandle = FALSE; 482 cifsFile->invalidHandle = FALSE;
485 if((rc == -EOPNOTSUPP) && 483 if ((rc == -EOPNOTSUPP) &&
486 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { 484 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
487 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; 485 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
488 goto ffirst_retry; 486 goto ffirst_retry;
@@ -498,7 +496,7 @@ static int cifs_unicode_bytelen(char *str)
498 __le16 * ustr = (__le16 *)str; 496 __le16 * ustr = (__le16 *)str;
499 497
500 for(len=0;len <= PATH_MAX;len++) { 498 for(len=0;len <= PATH_MAX;len++) {
501 if(ustr[len] == 0) 499 if (ustr[len] == 0)
502 return len << 1; 500 return len << 1;
503 } 501 }
504 cFYI(1,("Unicode string longer than PATH_MAX found")); 502 cFYI(1,("Unicode string longer than PATH_MAX found"));
@@ -510,7 +508,7 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
510 char * new_entry; 508 char * new_entry;
511 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry; 509 FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
512 510
513 if(level == SMB_FIND_FILE_INFO_STANDARD) { 511 if (level == SMB_FIND_FILE_INFO_STANDARD) {
514 FIND_FILE_STANDARD_INFO * pfData; 512 FIND_FILE_STANDARD_INFO * pfData;
515 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo; 513 pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
516 514
@@ -520,12 +518,12 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
520 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); 518 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
521 cFYI(1,("new entry %p old entry %p",new_entry,old_entry)); 519 cFYI(1,("new entry %p old entry %p",new_entry,old_entry));
522 /* validate that new_entry is not past end of SMB */ 520 /* validate that new_entry is not past end of SMB */
523 if(new_entry >= end_of_smb) { 521 if (new_entry >= end_of_smb) {
524 cERROR(1, 522 cERROR(1,
525 ("search entry %p began after end of SMB %p old entry %p", 523 ("search entry %p began after end of SMB %p old entry %p",
526 new_entry, end_of_smb, old_entry)); 524 new_entry, end_of_smb, old_entry));
527 return NULL; 525 return NULL;
528 } else if(((level == SMB_FIND_FILE_INFO_STANDARD) && 526 } else if (((level == SMB_FIND_FILE_INFO_STANDARD) &&
529 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) || 527 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) ||
530 ((level != SMB_FIND_FILE_INFO_STANDARD) && 528 ((level != SMB_FIND_FILE_INFO_STANDARD) &&
531 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { 529 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) {
@@ -546,39 +544,39 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
546 char * filename = NULL; 544 char * filename = NULL;
547 int len = 0; 545 int len = 0;
548 546
549 if(cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) { 547 if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
550 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; 548 FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
551 filename = &pFindData->FileName[0]; 549 filename = &pFindData->FileName[0];
552 if(cfile->srch_inf.unicode) { 550 if (cfile->srch_inf.unicode) {
553 len = cifs_unicode_bytelen(filename); 551 len = cifs_unicode_bytelen(filename);
554 } else { 552 } else {
555 /* BB should we make this strnlen of PATH_MAX? */ 553 /* BB should we make this strnlen of PATH_MAX? */
556 len = strnlen(filename, 5); 554 len = strnlen(filename, 5);
557 } 555 }
558 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) { 556 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
559 FILE_DIRECTORY_INFO * pFindData = 557 FILE_DIRECTORY_INFO * pFindData =
560 (FILE_DIRECTORY_INFO *)current_entry; 558 (FILE_DIRECTORY_INFO *)current_entry;
561 filename = &pFindData->FileName[0]; 559 filename = &pFindData->FileName[0];
562 len = le32_to_cpu(pFindData->FileNameLength); 560 len = le32_to_cpu(pFindData->FileNameLength);
563 } else if(cfile->srch_inf.info_level == 561 } else if (cfile->srch_inf.info_level ==
564 SMB_FIND_FILE_FULL_DIRECTORY_INFO) { 562 SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
565 FILE_FULL_DIRECTORY_INFO * pFindData = 563 FILE_FULL_DIRECTORY_INFO * pFindData =
566 (FILE_FULL_DIRECTORY_INFO *)current_entry; 564 (FILE_FULL_DIRECTORY_INFO *)current_entry;
567 filename = &pFindData->FileName[0]; 565 filename = &pFindData->FileName[0];
568 len = le32_to_cpu(pFindData->FileNameLength); 566 len = le32_to_cpu(pFindData->FileNameLength);
569 } else if(cfile->srch_inf.info_level == 567 } else if (cfile->srch_inf.info_level ==
570 SMB_FIND_FILE_ID_FULL_DIR_INFO) { 568 SMB_FIND_FILE_ID_FULL_DIR_INFO) {
571 SEARCH_ID_FULL_DIR_INFO * pFindData = 569 SEARCH_ID_FULL_DIR_INFO * pFindData =
572 (SEARCH_ID_FULL_DIR_INFO *)current_entry; 570 (SEARCH_ID_FULL_DIR_INFO *)current_entry;
573 filename = &pFindData->FileName[0]; 571 filename = &pFindData->FileName[0];
574 len = le32_to_cpu(pFindData->FileNameLength); 572 len = le32_to_cpu(pFindData->FileNameLength);
575 } else if(cfile->srch_inf.info_level == 573 } else if (cfile->srch_inf.info_level ==
576 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { 574 SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
577 FILE_BOTH_DIRECTORY_INFO * pFindData = 575 FILE_BOTH_DIRECTORY_INFO * pFindData =
578 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 576 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
579 filename = &pFindData->FileName[0]; 577 filename = &pFindData->FileName[0];
580 len = le32_to_cpu(pFindData->FileNameLength); 578 len = le32_to_cpu(pFindData->FileNameLength);
581 } else if(cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) { 579 } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
582 FIND_FILE_STANDARD_INFO * pFindData = 580 FIND_FILE_STANDARD_INFO * pFindData =
583 (FIND_FILE_STANDARD_INFO *)current_entry; 581 (FIND_FILE_STANDARD_INFO *)current_entry;
584 filename = &pFindData->FileName[0]; 582 filename = &pFindData->FileName[0];
@@ -587,25 +585,25 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
587 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); 585 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level));
588 } 586 }
589 587
590 if(filename) { 588 if (filename) {
591 if(cfile->srch_inf.unicode) { 589 if (cfile->srch_inf.unicode) {
592 __le16 *ufilename = (__le16 *)filename; 590 __le16 *ufilename = (__le16 *)filename;
593 if(len == 2) { 591 if (len == 2) {
594 /* check for . */ 592 /* check for . */
595 if(ufilename[0] == UNICODE_DOT) 593 if (ufilename[0] == UNICODE_DOT)
596 rc = 1; 594 rc = 1;
597 } else if(len == 4) { 595 } else if (len == 4) {
598 /* check for .. */ 596 /* check for .. */
599 if((ufilename[0] == UNICODE_DOT) 597 if ((ufilename[0] == UNICODE_DOT)
600 &&(ufilename[1] == UNICODE_DOT)) 598 &&(ufilename[1] == UNICODE_DOT))
601 rc = 2; 599 rc = 2;
602 } 600 }
603 } else /* ASCII */ { 601 } else /* ASCII */ {
604 if(len == 1) { 602 if (len == 1) {
605 if(filename[0] == '.') 603 if (filename[0] == '.')
606 rc = 1; 604 rc = 1;
607 } else if(len == 2) { 605 } else if (len == 2) {
608 if((filename[0] == '.') && (filename[1] == '.')) 606 if((filename[0] == '.') && (filename[1] == '.'))
609 rc = 2; 607 rc = 2;
610 } 608 }
611 } 609 }
@@ -618,20 +616,10 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
618 whether we can use the cached search results from the previous search */ 616 whether we can use the cached search results from the previous search */
619static int is_dir_changed(struct file * file) 617static int is_dir_changed(struct file * file)
620{ 618{
621 struct inode * inode; 619 struct inode *inode = file->f_path.dentry->d_inode;
622 struct cifsInodeInfo *cifsInfo; 620 struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
623 621
624 if(file->f_path.dentry == NULL) 622 if (cifsInfo->time == 0)
625 return 0;
626
627 inode = file->f_path.dentry->d_inode;
628
629 if(inode == NULL)
630 return 0;
631
632 cifsInfo = CIFS_I(inode);
633
634 if(cifsInfo->time == 0)
635 return 1; /* directory was changed, perhaps due to unlink */ 623 return 1; /* directory was changed, perhaps due to unlink */
636 else 624 else
637 return 0; 625 return 0;
@@ -654,7 +642,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
654 struct cifsFileInfo * cifsFile = file->private_data; 642 struct cifsFileInfo * cifsFile = file->private_data;
655 /* check if index in the buffer */ 643 /* check if index in the buffer */
656 644
657 if((cifsFile == NULL) || (ppCurrentEntry == NULL) || 645 if ((cifsFile == NULL) || (ppCurrentEntry == NULL) ||
658 (num_to_ret == NULL)) 646 (num_to_ret == NULL))
659 return -ENOENT; 647 return -ENOENT;
660 648
@@ -672,7 +660,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
672#ifdef CONFIG_CIFS_DEBUG2 660#ifdef CONFIG_CIFS_DEBUG2
673 dump_cifs_file_struct(file, "In fce "); 661 dump_cifs_file_struct(file, "In fce ");
674#endif 662#endif
675 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 663 if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
676 is_dir_changed(file)) || 664 is_dir_changed(file)) ||
677 (index_to_find < first_entry_in_buffer)) { 665 (index_to_find < first_entry_in_buffer)) {
678 /* close and restart search */ 666 /* close and restart search */
@@ -681,9 +669,9 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
681 CIFSFindClose(xid, pTcon, cifsFile->netfid); 669 CIFSFindClose(xid, pTcon, cifsFile->netfid);
682 kfree(cifsFile->search_resume_name); 670 kfree(cifsFile->search_resume_name);
683 cifsFile->search_resume_name = NULL; 671 cifsFile->search_resume_name = NULL;
684 if(cifsFile->srch_inf.ntwrk_buf_start) { 672 if (cifsFile->srch_inf.ntwrk_buf_start) {
685 cFYI(1,("freeing SMB ff cache buf on search rewind")); 673 cFYI(1,("freeing SMB ff cache buf on search rewind"));
686 if(cifsFile->srch_inf.smallBuf) 674 if (cifsFile->srch_inf.smallBuf)
687 cifs_small_buf_release(cifsFile->srch_inf. 675 cifs_small_buf_release(cifsFile->srch_inf.
688 ntwrk_buf_start); 676 ntwrk_buf_start);
689 else 677 else
@@ -691,7 +679,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
691 ntwrk_buf_start); 679 ntwrk_buf_start);
692 } 680 }
693 rc = initiate_cifs_search(xid,file); 681 rc = initiate_cifs_search(xid,file);
694 if(rc) { 682 if (rc) {
695 cFYI(1,("error %d reinitiating a search on rewind",rc)); 683 cFYI(1,("error %d reinitiating a search on rewind",rc));
696 return rc; 684 return rc;
697 } 685 }
@@ -702,10 +690,10 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
702 cFYI(1,("calling findnext2")); 690 cFYI(1,("calling findnext2"));
703 rc = CIFSFindNext(xid,pTcon,cifsFile->netfid, 691 rc = CIFSFindNext(xid,pTcon,cifsFile->netfid,
704 &cifsFile->srch_inf); 692 &cifsFile->srch_inf);
705 if(rc) 693 if (rc)
706 return -ENOENT; 694 return -ENOENT;
707 } 695 }
708 if(index_to_find < cifsFile->srch_inf.index_of_last_entry) { 696 if (index_to_find < cifsFile->srch_inf.index_of_last_entry) {
709 /* we found the buffer that contains the entry */ 697 /* we found the buffer that contains the entry */
710 /* scan and find it */ 698 /* scan and find it */
711 int i; 699 int i;
@@ -851,9 +839,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
851 if((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL)) 839 if((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL))
852 return -ENOENT; 840 return -ENOENT;
853 841
854 if(file->f_path.dentry == NULL)
855 return -ENOENT;
856
857 rc = cifs_entry_is_dot(pfindEntry,pCifsF); 842 rc = cifs_entry_is_dot(pfindEntry,pCifsF);
858 /* skip . and .. since we added them first */ 843 /* skip . and .. since we added them first */
859 if(rc != 0) 844 if(rc != 0)
@@ -997,11 +982,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
997 982
998 xid = GetXid(); 983 xid = GetXid();
999 984
1000 if(file->f_path.dentry == NULL) {
1001 FreeXid(xid);
1002 return -EIO;
1003 }
1004
1005 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 985 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1006 pTcon = cifs_sb->tcon; 986 pTcon = cifs_sb->tcon;
1007 if(pTcon == NULL) 987 if(pTcon == NULL)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 614175a3b02e..0aaff3651d14 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -62,8 +62,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
62{ 62{
63 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 63 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
64 64
65 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 65 if (flags & SLAB_CTOR_CONSTRUCTOR)
66 SLAB_CTOR_CONSTRUCTOR)
67 inode_init_once(&ei->vfs_inode); 66 inode_init_once(&ei->vfs_inode);
68} 67}
69 68
diff --git a/fs/compat.c b/fs/compat.c
index 040a8be38a48..9cf75df9b2bb 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -15,6 +15,7 @@
15 * published by the Free Software Foundation. 15 * published by the Free Software Foundation.
16 */ 16 */
17 17
18#include <linux/kernel.h>
18#include <linux/linkage.h> 19#include <linux/linkage.h>
19#include <linux/compat.h> 20#include <linux/compat.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
@@ -24,10 +25,8 @@
24#include <linux/namei.h> 25#include <linux/namei.h>
25#include <linux/file.h> 26#include <linux/file.h>
26#include <linux/vfs.h> 27#include <linux/vfs.h>
27#include <linux/ioctl32.h>
28#include <linux/ioctl.h> 28#include <linux/ioctl.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/sockios.h> /* for SIOCDEVPRIVATE */
31#include <linux/smb.h> 30#include <linux/smb.h>
32#include <linux/smb_mount.h> 31#include <linux/smb_mount.h>
33#include <linux/ncp_mount.h> 32#include <linux/ncp_mount.h>
@@ -45,13 +44,12 @@
45#include <linux/personality.h> 44#include <linux/personality.h>
46#include <linux/rwsem.h> 45#include <linux/rwsem.h>
47#include <linux/tsacct_kern.h> 46#include <linux/tsacct_kern.h>
47#include <linux/security.h>
48#include <linux/highmem.h> 48#include <linux/highmem.h>
49#include <linux/poll.h> 49#include <linux/poll.h>
50#include <linux/mm.h> 50#include <linux/mm.h>
51#include <linux/eventpoll.h> 51#include <linux/eventpoll.h>
52 52
53#include <net/sock.h> /* siocdevprivate_ioctl */
54
55#include <asm/uaccess.h> 53#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
57#include <asm/ioctls.h> 55#include <asm/ioctls.h>
@@ -79,30 +77,57 @@ int compat_printk(const char *fmt, ...)
79 */ 77 */
80asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __user *t) 78asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __user *t)
81{ 79{
82 struct timeval tv[2]; 80 struct timespec tv[2];
83 81
84 if (t) { 82 if (t) {
85 if (get_user(tv[0].tv_sec, &t->actime) || 83 if (get_user(tv[0].tv_sec, &t->actime) ||
86 get_user(tv[1].tv_sec, &t->modtime)) 84 get_user(tv[1].tv_sec, &t->modtime))
87 return -EFAULT; 85 return -EFAULT;
88 tv[0].tv_usec = 0; 86 tv[0].tv_nsec = 0;
89 tv[1].tv_usec = 0; 87 tv[1].tv_nsec = 0;
88 }
89 return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
90}
91
92asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags)
93{
94 struct timespec tv[2];
95
96 if (t) {
97 if (get_compat_timespec(&tv[0], &t[0]) ||
98 get_compat_timespec(&tv[1], &t[1]))
99 return -EFAULT;
100
101 if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW)
102 && tv[0].tv_sec != 0)
103 return -EINVAL;
104 if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW)
105 && tv[1].tv_sec != 0)
106 return -EINVAL;
107
108 if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
109 return 0;
90 } 110 }
91 return do_utimes(AT_FDCWD, filename, t ? tv : NULL); 111 return do_utimes(dfd, filename, t ? tv : NULL, flags);
92} 112}
93 113
94asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t) 114asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
95{ 115{
96 struct timeval tv[2]; 116 struct timespec tv[2];
97 117
98 if (t) { 118 if (t) {
99 if (get_user(tv[0].tv_sec, &t[0].tv_sec) || 119 if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
100 get_user(tv[0].tv_usec, &t[0].tv_usec) || 120 get_user(tv[0].tv_nsec, &t[0].tv_usec) ||
101 get_user(tv[1].tv_sec, &t[1].tv_sec) || 121 get_user(tv[1].tv_sec, &t[1].tv_sec) ||
102 get_user(tv[1].tv_usec, &t[1].tv_usec)) 122 get_user(tv[1].tv_nsec, &t[1].tv_usec))
103 return -EFAULT; 123 return -EFAULT;
124 if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 ||
125 tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0)
126 return -EINVAL;
127 tv[0].tv_nsec *= 1000;
128 tv[1].tv_nsec *= 1000;
104 } 129 }
105 return do_utimes(dfd, filename, t ? tv : NULL); 130 return do_utimes(dfd, filename, t ? tv : NULL, 0);
106} 131}
107 132
108asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) 133asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
@@ -312,162 +337,6 @@ out:
312 return error; 337 return error;
313} 338}
314 339
315/* ioctl32 stuff, used by sparc64, parisc, s390x, ppc64, x86_64, MIPS */
316
317#define IOCTL_HASHSIZE 256
318static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
319
320static inline unsigned long ioctl32_hash(unsigned long cmd)
321{
322 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
323}
324
325static void ioctl32_insert_translation(struct ioctl_trans *trans)
326{
327 unsigned long hash;
328 struct ioctl_trans *t;
329
330 hash = ioctl32_hash (trans->cmd);
331 if (!ioctl32_hash_table[hash])
332 ioctl32_hash_table[hash] = trans;
333 else {
334 t = ioctl32_hash_table[hash];
335 while (t->next)
336 t = t->next;
337 trans->next = NULL;
338 t->next = trans;
339 }
340}
341
342static int __init init_sys32_ioctl(void)
343{
344 int i;
345
346 for (i = 0; i < ioctl_table_size; i++) {
347 if (ioctl_start[i].next != 0) {
348 printk("ioctl translation %d bad\n",i);
349 return -1;
350 }
351
352 ioctl32_insert_translation(&ioctl_start[i]);
353 }
354 return 0;
355}
356
357__initcall(init_sys32_ioctl);
358
359static void compat_ioctl_error(struct file *filp, unsigned int fd,
360 unsigned int cmd, unsigned long arg)
361{
362 char buf[10];
363 char *fn = "?";
364 char *path;
365
366 /* find the name of the device. */
367 path = (char *)__get_free_page(GFP_KERNEL);
368 if (path) {
369 fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE);
370 if (IS_ERR(fn))
371 fn = "?";
372 }
373
374 sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
375 if (!isprint(buf[1]))
376 sprintf(buf, "%02x", buf[1]);
377 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
378 "cmd(%08x){%s} arg(%08x) on %s\n",
379 current->comm, current->pid,
380 (int)fd, (unsigned int)cmd, buf,
381 (unsigned int)arg, fn);
382
383 if (path)
384 free_page((unsigned long)path);
385}
386
387asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
388 unsigned long arg)
389{
390 struct file *filp;
391 int error = -EBADF;
392 struct ioctl_trans *t;
393 int fput_needed;
394
395 filp = fget_light(fd, &fput_needed);
396 if (!filp)
397 goto out;
398
399 /* RED-PEN how should LSM module know it's handling 32bit? */
400 error = security_file_ioctl(filp, cmd, arg);
401 if (error)
402 goto out_fput;
403
404 /*
405 * To allow the compat_ioctl handlers to be self contained
406 * we need to check the common ioctls here first.
407 * Just handle them with the standard handlers below.
408 */
409 switch (cmd) {
410 case FIOCLEX:
411 case FIONCLEX:
412 case FIONBIO:
413 case FIOASYNC:
414 case FIOQSIZE:
415 break;
416
417 case FIBMAP:
418 case FIGETBSZ:
419 case FIONREAD:
420 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
421 break;
422 /*FALL THROUGH*/
423
424 default:
425 if (filp->f_op && filp->f_op->compat_ioctl) {
426 error = filp->f_op->compat_ioctl(filp, cmd, arg);
427 if (error != -ENOIOCTLCMD)
428 goto out_fput;
429 }
430
431 if (!filp->f_op ||
432 (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl))
433 goto do_ioctl;
434 break;
435 }
436
437 for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) {
438 if (t->cmd == cmd)
439 goto found_handler;
440 }
441
442 if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) &&
443 cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
444 error = siocdevprivate_ioctl(fd, cmd, arg);
445 } else {
446 static int count;
447
448 if (++count <= 50)
449 compat_ioctl_error(filp, fd, cmd, arg);
450 error = -EINVAL;
451 }
452
453 goto out_fput;
454
455 found_handler:
456 if (t->handler) {
457 lock_kernel();
458 error = t->handler(fd, cmd, arg, filp);
459 unlock_kernel();
460 goto out_fput;
461 }
462
463 do_ioctl:
464 error = vfs_ioctl(filp, fd, cmd, arg);
465 out_fput:
466 fput_light(filp, fput_needed);
467 out:
468 return error;
469}
470
471static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) 340static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
472{ 341{
473 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || 342 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
@@ -901,8 +770,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
901} 770}
902 771
903#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) 772#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
904#define COMPAT_ROUND_UP(x) (((x)+sizeof(compat_long_t)-1) & \
905 ~(sizeof(compat_long_t)-1))
906 773
907struct compat_old_linux_dirent { 774struct compat_old_linux_dirent {
908 compat_ulong_t d_ino; 775 compat_ulong_t d_ino;
@@ -990,7 +857,7 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
990 struct compat_linux_dirent __user * dirent; 857 struct compat_linux_dirent __user * dirent;
991 struct compat_getdents_callback *buf = __buf; 858 struct compat_getdents_callback *buf = __buf;
992 compat_ulong_t d_ino; 859 compat_ulong_t d_ino;
993 int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); 860 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t));
994 861
995 buf->error = -EINVAL; /* only used if we fail.. */ 862 buf->error = -EINVAL; /* only used if we fail.. */
996 if (reclen > buf->count) 863 if (reclen > buf->count)
@@ -1065,7 +932,6 @@ out:
1065} 932}
1066 933
1067#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 934#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64
1068#define COMPAT_ROUND_UP64(x) (((x)+sizeof(u64)-1) & ~(sizeof(u64)-1))
1069 935
1070struct compat_getdents_callback64 { 936struct compat_getdents_callback64 {
1071 struct linux_dirent64 __user *current_dir; 937 struct linux_dirent64 __user *current_dir;
@@ -1080,7 +946,7 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t
1080 struct linux_dirent64 __user *dirent; 946 struct linux_dirent64 __user *dirent;
1081 struct compat_getdents_callback64 *buf = __buf; 947 struct compat_getdents_callback64 *buf = __buf;
1082 int jj = NAME_OFFSET(dirent); 948 int jj = NAME_OFFSET(dirent);
1083 int reclen = COMPAT_ROUND_UP64(jj + namlen + 1); 949 int reclen = ALIGN(jj + namlen + 1, sizeof(u64));
1084 u64 off; 950 u64 off;
1085 951
1086 buf->error = -EINVAL; /* only used if we fail.. */ 952 buf->error = -EINVAL; /* only used if we fail.. */
@@ -1593,8 +1459,6 @@ out_ret:
1593 1459
1594#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1460#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1595 1461
1596#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
1597
1598/* 1462/*
1599 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1463 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1600 * 64-bit unsigned longs. 1464 * 64-bit unsigned longs.
@@ -1603,7 +1467,7 @@ static
1603int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1467int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1604 unsigned long *fdset) 1468 unsigned long *fdset)
1605{ 1469{
1606 nr = ROUND_UP(nr, __COMPAT_NFDBITS); 1470 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1607 if (ufdset) { 1471 if (ufdset) {
1608 unsigned long odd; 1472 unsigned long odd;
1609 1473
@@ -1637,7 +1501,7 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1637 unsigned long *fdset) 1501 unsigned long *fdset)
1638{ 1502{
1639 unsigned long odd; 1503 unsigned long odd;
1640 nr = ROUND_UP(nr, __COMPAT_NFDBITS); 1504 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1641 1505
1642 if (!ufdset) 1506 if (!ufdset)
1643 return 0; 1507 return 0;
@@ -1759,7 +1623,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1759 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) 1623 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1760 timeout = -1; /* infinite */ 1624 timeout = -1; /* infinite */
1761 else { 1625 else {
1762 timeout = ROUND_UP(tv.tv_usec, 1000000/HZ); 1626 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1763 timeout += tv.tv_sec * HZ; 1627 timeout += tv.tv_sec * HZ;
1764 } 1628 }
1765 } 1629 }
@@ -1827,7 +1691,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1827 do { 1691 do {
1828 if (tsp) { 1692 if (tsp) {
1829 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { 1693 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1830 timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ); 1694 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1831 timeout += ts.tv_sec * (unsigned long)HZ; 1695 timeout += ts.tv_sec * (unsigned long)HZ;
1832 ts.tv_sec = 0; 1696 ts.tv_sec = 0;
1833 ts.tv_nsec = 0; 1697 ts.tv_nsec = 0;
@@ -1923,7 +1787,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1923 /* We assume that ts.tv_sec is always lower than 1787 /* We assume that ts.tv_sec is always lower than
1924 the number of seconds that can be expressed in 1788 the number of seconds that can be expressed in
1925 an s64. Otherwise the compiler bitches at us */ 1789 an s64. Otherwise the compiler bitches at us */
1926 timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ); 1790 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1927 timeout += ts.tv_sec * HZ; 1791 timeout += ts.tv_sec * HZ;
1928 } 1792 }
1929 1793
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c68b055fa26e..d92bc3eb7afc 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -17,7 +17,6 @@
17#include <linux/compiler.h> 17#include <linux/compiler.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/smp_lock.h>
21#include <linux/ioctl.h> 20#include <linux/ioctl.h>
22#include <linux/if.h> 21#include <linux/if.h>
23#include <linux/if_bridge.h> 22#include <linux/if_bridge.h>
@@ -58,7 +57,6 @@
58#include <linux/serial.h> 57#include <linux/serial.h>
59#include <linux/if_tun.h> 58#include <linux/if_tun.h>
60#include <linux/ctype.h> 59#include <linux/ctype.h>
61#include <linux/ioctl32.h>
62#include <linux/syscalls.h> 60#include <linux/syscalls.h>
63#include <linux/i2c.h> 61#include <linux/i2c.h>
64#include <linux/i2c-dev.h> 62#include <linux/i2c-dev.h>
@@ -66,7 +64,6 @@
66#include <linux/atalk.h> 64#include <linux/atalk.h>
67#include <linux/blktrace_api.h> 65#include <linux/blktrace_api.h>
68 66
69#include <net/sock.h> /* siocdevprivate_ioctl */
70#include <net/bluetooth/bluetooth.h> 67#include <net/bluetooth/bluetooth.h>
71#include <net/bluetooth/hci.h> 68#include <net/bluetooth/hci.h>
72#include <net/bluetooth/rfcomm.h> 69#include <net/bluetooth/rfcomm.h>
@@ -475,7 +472,7 @@ static int bond_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
475 }; 472 };
476} 473}
477 474
478int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 475static int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
479{ 476{
480 struct ifreq __user *u_ifreq64; 477 struct ifreq __user *u_ifreq64;
481 struct ifreq32 __user *u_ifreq32 = compat_ptr(arg); 478 struct ifreq32 __user *u_ifreq32 = compat_ptr(arg);
@@ -687,8 +684,10 @@ static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg)
687 if (!err) { 684 if (!err) {
688 err = copy_to_user (ugeo, &geo, 4); 685 err = copy_to_user (ugeo, &geo, 4);
689 err |= __put_user (geo.start, &ugeo->start); 686 err |= __put_user (geo.start, &ugeo->start);
687 if (err)
688 err = -EFAULT;
690 } 689 }
691 return err ? -EFAULT : 0; 690 return err;
692} 691}
693 692
694static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 693static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
@@ -2385,6 +2384,16 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
2385 return sys_ioctl(fd, cmd, (unsigned long)tn); 2384 return sys_ioctl(fd, cmd, (unsigned long)tn);
2386} 2385}
2387 2386
2387
2388typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
2389 unsigned long, struct file *);
2390
2391struct ioctl_trans {
2392 unsigned long cmd;
2393 ioctl_trans_handler_t handler;
2394 struct ioctl_trans *next;
2395};
2396
2388#define HANDLE_IOCTL(cmd,handler) \ 2397#define HANDLE_IOCTL(cmd,handler) \
2389 { (cmd), (ioctl_trans_handler_t)(handler) }, 2398 { (cmd), (ioctl_trans_handler_t)(handler) },
2390 2399
@@ -2396,9 +2405,844 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
2396#define ULONG_IOCTL(cmd) \ 2405#define ULONG_IOCTL(cmd) \
2397 { (cmd), (ioctl_trans_handler_t)sys_ioctl }, 2406 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
2398 2407
2399 2408/* ioctl should not be warned about even if it's not implemented.
2400struct ioctl_trans ioctl_start[] = { 2409 Valid reasons to use this:
2401#include <linux/compat_ioctl.h> 2410 - It is implemented with ->compat_ioctl on some device, but programs
2411 call it on others too.
2412 - The ioctl is not implemented in the native kernel, but programs
2413 call it commonly anyways.
2414 Most other reasons are not valid. */
2415#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd)
2416
2417static struct ioctl_trans ioctl_start[] = {
2418/* compatible ioctls first */
2419COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
2420COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
2421
2422/* Big T */
2423COMPATIBLE_IOCTL(TCGETA)
2424COMPATIBLE_IOCTL(TCSETA)
2425COMPATIBLE_IOCTL(TCSETAW)
2426COMPATIBLE_IOCTL(TCSETAF)
2427COMPATIBLE_IOCTL(TCSBRK)
2428ULONG_IOCTL(TCSBRKP)
2429COMPATIBLE_IOCTL(TCXONC)
2430COMPATIBLE_IOCTL(TCFLSH)
2431COMPATIBLE_IOCTL(TCGETS)
2432COMPATIBLE_IOCTL(TCSETS)
2433COMPATIBLE_IOCTL(TCSETSW)
2434COMPATIBLE_IOCTL(TCSETSF)
2435COMPATIBLE_IOCTL(TIOCLINUX)
2436COMPATIBLE_IOCTL(TIOCSBRK)
2437COMPATIBLE_IOCTL(TIOCCBRK)
2438ULONG_IOCTL(TIOCMIWAIT)
2439COMPATIBLE_IOCTL(TIOCGICOUNT)
2440/* Little t */
2441COMPATIBLE_IOCTL(TIOCGETD)
2442COMPATIBLE_IOCTL(TIOCSETD)
2443COMPATIBLE_IOCTL(TIOCEXCL)
2444COMPATIBLE_IOCTL(TIOCNXCL)
2445COMPATIBLE_IOCTL(TIOCCONS)
2446COMPATIBLE_IOCTL(TIOCGSOFTCAR)
2447COMPATIBLE_IOCTL(TIOCSSOFTCAR)
2448COMPATIBLE_IOCTL(TIOCSWINSZ)
2449COMPATIBLE_IOCTL(TIOCGWINSZ)
2450COMPATIBLE_IOCTL(TIOCMGET)
2451COMPATIBLE_IOCTL(TIOCMBIC)
2452COMPATIBLE_IOCTL(TIOCMBIS)
2453COMPATIBLE_IOCTL(TIOCMSET)
2454COMPATIBLE_IOCTL(TIOCPKT)
2455COMPATIBLE_IOCTL(TIOCNOTTY)
2456COMPATIBLE_IOCTL(TIOCSTI)
2457COMPATIBLE_IOCTL(TIOCOUTQ)
2458COMPATIBLE_IOCTL(TIOCSPGRP)
2459COMPATIBLE_IOCTL(TIOCGPGRP)
2460ULONG_IOCTL(TIOCSCTTY)
2461COMPATIBLE_IOCTL(TIOCGPTN)
2462COMPATIBLE_IOCTL(TIOCSPTLCK)
2463COMPATIBLE_IOCTL(TIOCSERGETLSR)
2464/* Little f */
2465COMPATIBLE_IOCTL(FIOCLEX)
2466COMPATIBLE_IOCTL(FIONCLEX)
2467COMPATIBLE_IOCTL(FIOASYNC)
2468COMPATIBLE_IOCTL(FIONBIO)
2469COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
2470/* 0x00 */
2471COMPATIBLE_IOCTL(FIBMAP)
2472COMPATIBLE_IOCTL(FIGETBSZ)
2473/* 0x03 -- HD/IDE ioctl's used by hdparm and friends.
2474 * Some need translations, these do not.
2475 */
2476COMPATIBLE_IOCTL(HDIO_GET_IDENTITY)
2477COMPATIBLE_IOCTL(HDIO_DRIVE_TASK)
2478COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
2479ULONG_IOCTL(HDIO_SET_MULTCOUNT)
2480ULONG_IOCTL(HDIO_SET_UNMASKINTR)
2481ULONG_IOCTL(HDIO_SET_KEEPSETTINGS)
2482ULONG_IOCTL(HDIO_SET_32BIT)
2483ULONG_IOCTL(HDIO_SET_NOWERR)
2484ULONG_IOCTL(HDIO_SET_DMA)
2485ULONG_IOCTL(HDIO_SET_PIO_MODE)
2486ULONG_IOCTL(HDIO_SET_NICE)
2487ULONG_IOCTL(HDIO_SET_WCACHE)
2488ULONG_IOCTL(HDIO_SET_ACOUSTIC)
2489ULONG_IOCTL(HDIO_SET_BUSSTATE)
2490ULONG_IOCTL(HDIO_SET_ADDRESS)
2491COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
2492/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
2493COMPATIBLE_IOCTL(0x330)
2494/* 0x02 -- Floppy ioctls */
2495COMPATIBLE_IOCTL(FDMSGON)
2496COMPATIBLE_IOCTL(FDMSGOFF)
2497COMPATIBLE_IOCTL(FDSETEMSGTRESH)
2498COMPATIBLE_IOCTL(FDFLUSH)
2499COMPATIBLE_IOCTL(FDWERRORCLR)
2500COMPATIBLE_IOCTL(FDSETMAXERRS)
2501COMPATIBLE_IOCTL(FDGETMAXERRS)
2502COMPATIBLE_IOCTL(FDGETDRVTYP)
2503COMPATIBLE_IOCTL(FDEJECT)
2504COMPATIBLE_IOCTL(FDCLRPRM)
2505COMPATIBLE_IOCTL(FDFMTBEG)
2506COMPATIBLE_IOCTL(FDFMTEND)
2507COMPATIBLE_IOCTL(FDRESET)
2508COMPATIBLE_IOCTL(FDTWADDLE)
2509COMPATIBLE_IOCTL(FDFMTTRK)
2510COMPATIBLE_IOCTL(FDRAWCMD)
2511/* 0x12 */
2512#ifdef CONFIG_BLOCK
2513COMPATIBLE_IOCTL(BLKRASET)
2514COMPATIBLE_IOCTL(BLKROSET)
2515COMPATIBLE_IOCTL(BLKROGET)
2516COMPATIBLE_IOCTL(BLKRRPART)
2517COMPATIBLE_IOCTL(BLKFLSBUF)
2518COMPATIBLE_IOCTL(BLKSECTSET)
2519COMPATIBLE_IOCTL(BLKSSZGET)
2520COMPATIBLE_IOCTL(BLKTRACESTART)
2521COMPATIBLE_IOCTL(BLKTRACESTOP)
2522COMPATIBLE_IOCTL(BLKTRACESETUP)
2523COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
2524ULONG_IOCTL(BLKRASET)
2525ULONG_IOCTL(BLKFRASET)
2526#endif
2527/* RAID */
2528COMPATIBLE_IOCTL(RAID_VERSION)
2529COMPATIBLE_IOCTL(GET_ARRAY_INFO)
2530COMPATIBLE_IOCTL(GET_DISK_INFO)
2531COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
2532COMPATIBLE_IOCTL(RAID_AUTORUN)
2533COMPATIBLE_IOCTL(CLEAR_ARRAY)
2534COMPATIBLE_IOCTL(ADD_NEW_DISK)
2535ULONG_IOCTL(HOT_REMOVE_DISK)
2536COMPATIBLE_IOCTL(SET_ARRAY_INFO)
2537COMPATIBLE_IOCTL(SET_DISK_INFO)
2538COMPATIBLE_IOCTL(WRITE_RAID_INFO)
2539COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
2540COMPATIBLE_IOCTL(PROTECT_ARRAY)
2541ULONG_IOCTL(HOT_ADD_DISK)
2542ULONG_IOCTL(SET_DISK_FAULTY)
2543COMPATIBLE_IOCTL(RUN_ARRAY)
2544COMPATIBLE_IOCTL(STOP_ARRAY)
2545COMPATIBLE_IOCTL(STOP_ARRAY_RO)
2546COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
2547COMPATIBLE_IOCTL(GET_BITMAP_FILE)
2548ULONG_IOCTL(SET_BITMAP_FILE)
2549/* DM */
2550COMPATIBLE_IOCTL(DM_VERSION_32)
2551COMPATIBLE_IOCTL(DM_REMOVE_ALL_32)
2552COMPATIBLE_IOCTL(DM_LIST_DEVICES_32)
2553COMPATIBLE_IOCTL(DM_DEV_CREATE_32)
2554COMPATIBLE_IOCTL(DM_DEV_REMOVE_32)
2555COMPATIBLE_IOCTL(DM_DEV_RENAME_32)
2556COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32)
2557COMPATIBLE_IOCTL(DM_DEV_STATUS_32)
2558COMPATIBLE_IOCTL(DM_DEV_WAIT_32)
2559COMPATIBLE_IOCTL(DM_TABLE_LOAD_32)
2560COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32)
2561COMPATIBLE_IOCTL(DM_TABLE_DEPS_32)
2562COMPATIBLE_IOCTL(DM_TABLE_STATUS_32)
2563COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32)
2564COMPATIBLE_IOCTL(DM_TARGET_MSG_32)
2565COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32)
2566COMPATIBLE_IOCTL(DM_VERSION)
2567COMPATIBLE_IOCTL(DM_REMOVE_ALL)
2568COMPATIBLE_IOCTL(DM_LIST_DEVICES)
2569COMPATIBLE_IOCTL(DM_DEV_CREATE)
2570COMPATIBLE_IOCTL(DM_DEV_REMOVE)
2571COMPATIBLE_IOCTL(DM_DEV_RENAME)
2572COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
2573COMPATIBLE_IOCTL(DM_DEV_STATUS)
2574COMPATIBLE_IOCTL(DM_DEV_WAIT)
2575COMPATIBLE_IOCTL(DM_TABLE_LOAD)
2576COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
2577COMPATIBLE_IOCTL(DM_TABLE_DEPS)
2578COMPATIBLE_IOCTL(DM_TABLE_STATUS)
2579COMPATIBLE_IOCTL(DM_LIST_VERSIONS)
2580COMPATIBLE_IOCTL(DM_TARGET_MSG)
2581COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY)
2582/* Big K */
2583COMPATIBLE_IOCTL(PIO_FONT)
2584COMPATIBLE_IOCTL(GIO_FONT)
2585ULONG_IOCTL(KDSIGACCEPT)
2586COMPATIBLE_IOCTL(KDGETKEYCODE)
2587COMPATIBLE_IOCTL(KDSETKEYCODE)
2588ULONG_IOCTL(KIOCSOUND)
2589ULONG_IOCTL(KDMKTONE)
2590COMPATIBLE_IOCTL(KDGKBTYPE)
2591ULONG_IOCTL(KDSETMODE)
2592COMPATIBLE_IOCTL(KDGETMODE)
2593ULONG_IOCTL(KDSKBMODE)
2594COMPATIBLE_IOCTL(KDGKBMODE)
2595ULONG_IOCTL(KDSKBMETA)
2596COMPATIBLE_IOCTL(KDGKBMETA)
2597COMPATIBLE_IOCTL(KDGKBENT)
2598COMPATIBLE_IOCTL(KDSKBENT)
2599COMPATIBLE_IOCTL(KDGKBSENT)
2600COMPATIBLE_IOCTL(KDSKBSENT)
2601COMPATIBLE_IOCTL(KDGKBDIACR)
2602COMPATIBLE_IOCTL(KDSKBDIACR)
2603COMPATIBLE_IOCTL(KDKBDREP)
2604COMPATIBLE_IOCTL(KDGKBLED)
2605ULONG_IOCTL(KDSKBLED)
2606COMPATIBLE_IOCTL(KDGETLED)
2607ULONG_IOCTL(KDSETLED)
2608COMPATIBLE_IOCTL(GIO_SCRNMAP)
2609COMPATIBLE_IOCTL(PIO_SCRNMAP)
2610COMPATIBLE_IOCTL(GIO_UNISCRNMAP)
2611COMPATIBLE_IOCTL(PIO_UNISCRNMAP)
2612COMPATIBLE_IOCTL(PIO_FONTRESET)
2613COMPATIBLE_IOCTL(PIO_UNIMAPCLR)
2614/* Big S */
2615COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN)
2616COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK)
2617COMPATIBLE_IOCTL(SCSI_IOCTL_DOORUNLOCK)
2618COMPATIBLE_IOCTL(SCSI_IOCTL_TEST_UNIT_READY)
2619COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER)
2620COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
2621COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
2622COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
2623/* Big T */
2624COMPATIBLE_IOCTL(TUNSETNOCSUM)
2625COMPATIBLE_IOCTL(TUNSETDEBUG)
2626COMPATIBLE_IOCTL(TUNSETPERSIST)
2627COMPATIBLE_IOCTL(TUNSETOWNER)
2628/* Big V */
2629COMPATIBLE_IOCTL(VT_SETMODE)
2630COMPATIBLE_IOCTL(VT_GETMODE)
2631COMPATIBLE_IOCTL(VT_GETSTATE)
2632COMPATIBLE_IOCTL(VT_OPENQRY)
2633ULONG_IOCTL(VT_ACTIVATE)
2634ULONG_IOCTL(VT_WAITACTIVE)
2635ULONG_IOCTL(VT_RELDISP)
2636ULONG_IOCTL(VT_DISALLOCATE)
2637COMPATIBLE_IOCTL(VT_RESIZE)
2638COMPATIBLE_IOCTL(VT_RESIZEX)
2639COMPATIBLE_IOCTL(VT_LOCKSWITCH)
2640COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
2641COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
2642/* Little p (/dev/rtc, /dev/envctrl, etc.) */
2643COMPATIBLE_IOCTL(RTC_AIE_ON)
2644COMPATIBLE_IOCTL(RTC_AIE_OFF)
2645COMPATIBLE_IOCTL(RTC_UIE_ON)
2646COMPATIBLE_IOCTL(RTC_UIE_OFF)
2647COMPATIBLE_IOCTL(RTC_PIE_ON)
2648COMPATIBLE_IOCTL(RTC_PIE_OFF)
2649COMPATIBLE_IOCTL(RTC_WIE_ON)
2650COMPATIBLE_IOCTL(RTC_WIE_OFF)
2651COMPATIBLE_IOCTL(RTC_ALM_SET)
2652COMPATIBLE_IOCTL(RTC_ALM_READ)
2653COMPATIBLE_IOCTL(RTC_RD_TIME)
2654COMPATIBLE_IOCTL(RTC_SET_TIME)
2655COMPATIBLE_IOCTL(RTC_WKALM_SET)
2656COMPATIBLE_IOCTL(RTC_WKALM_RD)
2657/*
2658 * These two are only for the sbus rtc driver, but
2659 * hwclock tries them on every rtc device first when
2660 * running on sparc. On other architectures the entries
2661 * are useless but harmless.
2662 */
2663COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
2664COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
2665/* Little m */
2666COMPATIBLE_IOCTL(MTIOCTOP)
2667/* Socket level stuff */
2668COMPATIBLE_IOCTL(FIOQSIZE)
2669COMPATIBLE_IOCTL(FIOSETOWN)
2670COMPATIBLE_IOCTL(SIOCSPGRP)
2671COMPATIBLE_IOCTL(FIOGETOWN)
2672COMPATIBLE_IOCTL(SIOCGPGRP)
2673COMPATIBLE_IOCTL(SIOCATMARK)
2674COMPATIBLE_IOCTL(SIOCSIFLINK)
2675COMPATIBLE_IOCTL(SIOCSIFENCAP)
2676COMPATIBLE_IOCTL(SIOCGIFENCAP)
2677COMPATIBLE_IOCTL(SIOCSIFNAME)
2678COMPATIBLE_IOCTL(SIOCSARP)
2679COMPATIBLE_IOCTL(SIOCGARP)
2680COMPATIBLE_IOCTL(SIOCDARP)
2681COMPATIBLE_IOCTL(SIOCSRARP)
2682COMPATIBLE_IOCTL(SIOCGRARP)
2683COMPATIBLE_IOCTL(SIOCDRARP)
2684COMPATIBLE_IOCTL(SIOCADDDLCI)
2685COMPATIBLE_IOCTL(SIOCDELDLCI)
2686COMPATIBLE_IOCTL(SIOCGMIIPHY)
2687COMPATIBLE_IOCTL(SIOCGMIIREG)
2688COMPATIBLE_IOCTL(SIOCSMIIREG)
2689COMPATIBLE_IOCTL(SIOCGIFVLAN)
2690COMPATIBLE_IOCTL(SIOCSIFVLAN)
2691COMPATIBLE_IOCTL(SIOCBRADDBR)
2692COMPATIBLE_IOCTL(SIOCBRDELBR)
2693/* SG stuff */
2694COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
2695COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
2696COMPATIBLE_IOCTL(SG_EMULATED_HOST)
2697ULONG_IOCTL(SG_SET_TRANSFORM)
2698COMPATIBLE_IOCTL(SG_GET_TRANSFORM)
2699COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE)
2700COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE)
2701COMPATIBLE_IOCTL(SG_GET_SCSI_ID)
2702COMPATIBLE_IOCTL(SG_SET_FORCE_LOW_DMA)
2703COMPATIBLE_IOCTL(SG_GET_LOW_DMA)
2704COMPATIBLE_IOCTL(SG_SET_FORCE_PACK_ID)
2705COMPATIBLE_IOCTL(SG_GET_PACK_ID)
2706COMPATIBLE_IOCTL(SG_GET_NUM_WAITING)
2707COMPATIBLE_IOCTL(SG_SET_DEBUG)
2708COMPATIBLE_IOCTL(SG_GET_SG_TABLESIZE)
2709COMPATIBLE_IOCTL(SG_GET_COMMAND_Q)
2710COMPATIBLE_IOCTL(SG_SET_COMMAND_Q)
2711COMPATIBLE_IOCTL(SG_GET_VERSION_NUM)
2712COMPATIBLE_IOCTL(SG_NEXT_CMD_LEN)
2713COMPATIBLE_IOCTL(SG_SCSI_RESET)
2714COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE)
2715COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN)
2716COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN)
2717/* PPP stuff */
2718COMPATIBLE_IOCTL(PPPIOCGFLAGS)
2719COMPATIBLE_IOCTL(PPPIOCSFLAGS)
2720COMPATIBLE_IOCTL(PPPIOCGASYNCMAP)
2721COMPATIBLE_IOCTL(PPPIOCSASYNCMAP)
2722COMPATIBLE_IOCTL(PPPIOCGUNIT)
2723COMPATIBLE_IOCTL(PPPIOCGRASYNCMAP)
2724COMPATIBLE_IOCTL(PPPIOCSRASYNCMAP)
2725COMPATIBLE_IOCTL(PPPIOCGMRU)
2726COMPATIBLE_IOCTL(PPPIOCSMRU)
2727COMPATIBLE_IOCTL(PPPIOCSMAXCID)
2728COMPATIBLE_IOCTL(PPPIOCGXASYNCMAP)
2729COMPATIBLE_IOCTL(PPPIOCSXASYNCMAP)
2730COMPATIBLE_IOCTL(PPPIOCXFERUNIT)
2731/* PPPIOCSCOMPRESS is translated */
2732COMPATIBLE_IOCTL(PPPIOCGNPMODE)
2733COMPATIBLE_IOCTL(PPPIOCSNPMODE)
2734COMPATIBLE_IOCTL(PPPIOCGDEBUG)
2735COMPATIBLE_IOCTL(PPPIOCSDEBUG)
2736/* PPPIOCSPASS is translated */
2737/* PPPIOCSACTIVE is translated */
2738/* PPPIOCGIDLE is translated */
2739COMPATIBLE_IOCTL(PPPIOCNEWUNIT)
2740COMPATIBLE_IOCTL(PPPIOCATTACH)
2741COMPATIBLE_IOCTL(PPPIOCDETACH)
2742COMPATIBLE_IOCTL(PPPIOCSMRRU)
2743COMPATIBLE_IOCTL(PPPIOCCONNECT)
2744COMPATIBLE_IOCTL(PPPIOCDISCONN)
2745COMPATIBLE_IOCTL(PPPIOCATTCHAN)
2746COMPATIBLE_IOCTL(PPPIOCGCHAN)
2747/* PPPOX */
2748COMPATIBLE_IOCTL(PPPOEIOCSFWD)
2749COMPATIBLE_IOCTL(PPPOEIOCDFWD)
2750/* LP */
2751COMPATIBLE_IOCTL(LPGETSTATUS)
2752/* ppdev */
2753COMPATIBLE_IOCTL(PPSETMODE)
2754COMPATIBLE_IOCTL(PPRSTATUS)
2755COMPATIBLE_IOCTL(PPRCONTROL)
2756COMPATIBLE_IOCTL(PPWCONTROL)
2757COMPATIBLE_IOCTL(PPFCONTROL)
2758COMPATIBLE_IOCTL(PPRDATA)
2759COMPATIBLE_IOCTL(PPWDATA)
2760COMPATIBLE_IOCTL(PPCLAIM)
2761COMPATIBLE_IOCTL(PPRELEASE)
2762COMPATIBLE_IOCTL(PPYIELD)
2763COMPATIBLE_IOCTL(PPEXCL)
2764COMPATIBLE_IOCTL(PPDATADIR)
2765COMPATIBLE_IOCTL(PPNEGOT)
2766COMPATIBLE_IOCTL(PPWCTLONIRQ)
2767COMPATIBLE_IOCTL(PPCLRIRQ)
2768COMPATIBLE_IOCTL(PPSETPHASE)
2769COMPATIBLE_IOCTL(PPGETMODES)
2770COMPATIBLE_IOCTL(PPGETMODE)
2771COMPATIBLE_IOCTL(PPGETPHASE)
2772COMPATIBLE_IOCTL(PPGETFLAGS)
2773COMPATIBLE_IOCTL(PPSETFLAGS)
2774/* CDROM stuff */
2775COMPATIBLE_IOCTL(CDROMPAUSE)
2776COMPATIBLE_IOCTL(CDROMRESUME)
2777COMPATIBLE_IOCTL(CDROMPLAYMSF)
2778COMPATIBLE_IOCTL(CDROMPLAYTRKIND)
2779COMPATIBLE_IOCTL(CDROMREADTOCHDR)
2780COMPATIBLE_IOCTL(CDROMREADTOCENTRY)
2781COMPATIBLE_IOCTL(CDROMSTOP)
2782COMPATIBLE_IOCTL(CDROMSTART)
2783COMPATIBLE_IOCTL(CDROMEJECT)
2784COMPATIBLE_IOCTL(CDROMVOLCTRL)
2785COMPATIBLE_IOCTL(CDROMSUBCHNL)
2786ULONG_IOCTL(CDROMEJECT_SW)
2787COMPATIBLE_IOCTL(CDROMMULTISESSION)
2788COMPATIBLE_IOCTL(CDROM_GET_MCN)
2789COMPATIBLE_IOCTL(CDROMRESET)
2790COMPATIBLE_IOCTL(CDROMVOLREAD)
2791COMPATIBLE_IOCTL(CDROMSEEK)
2792COMPATIBLE_IOCTL(CDROMPLAYBLK)
2793COMPATIBLE_IOCTL(CDROMCLOSETRAY)
2794ULONG_IOCTL(CDROM_SET_OPTIONS)
2795ULONG_IOCTL(CDROM_CLEAR_OPTIONS)
2796ULONG_IOCTL(CDROM_SELECT_SPEED)
2797ULONG_IOCTL(CDROM_SELECT_DISC)
2798ULONG_IOCTL(CDROM_MEDIA_CHANGED)
2799ULONG_IOCTL(CDROM_DRIVE_STATUS)
2800COMPATIBLE_IOCTL(CDROM_DISC_STATUS)
2801COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS)
2802ULONG_IOCTL(CDROM_LOCKDOOR)
2803ULONG_IOCTL(CDROM_DEBUG)
2804COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY)
2805/* Ignore cdrom.h about these next 5 ioctls, they absolutely do
2806 * not take a struct cdrom_read, instead they take a struct cdrom_msf
2807 * which is compatible.
2808 */
2809COMPATIBLE_IOCTL(CDROMREADMODE2)
2810COMPATIBLE_IOCTL(CDROMREADMODE1)
2811COMPATIBLE_IOCTL(CDROMREADRAW)
2812COMPATIBLE_IOCTL(CDROMREADCOOKED)
2813COMPATIBLE_IOCTL(CDROMREADALL)
2814/* DVD ioctls */
2815COMPATIBLE_IOCTL(DVD_READ_STRUCT)
2816COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
2817COMPATIBLE_IOCTL(DVD_AUTH)
2818/* pktcdvd */
2819COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
2820/* Big A */
2821/* sparc only */
2822/* Big Q for sound/OSS */
2823COMPATIBLE_IOCTL(SNDCTL_SEQ_RESET)
2824COMPATIBLE_IOCTL(SNDCTL_SEQ_SYNC)
2825COMPATIBLE_IOCTL(SNDCTL_SYNTH_INFO)
2826COMPATIBLE_IOCTL(SNDCTL_SEQ_CTRLRATE)
2827COMPATIBLE_IOCTL(SNDCTL_SEQ_GETOUTCOUNT)
2828COMPATIBLE_IOCTL(SNDCTL_SEQ_GETINCOUNT)
2829COMPATIBLE_IOCTL(SNDCTL_SEQ_PERCMODE)
2830COMPATIBLE_IOCTL(SNDCTL_FM_LOAD_INSTR)
2831COMPATIBLE_IOCTL(SNDCTL_SEQ_TESTMIDI)
2832COMPATIBLE_IOCTL(SNDCTL_SEQ_RESETSAMPLES)
2833COMPATIBLE_IOCTL(SNDCTL_SEQ_NRSYNTHS)
2834COMPATIBLE_IOCTL(SNDCTL_SEQ_NRMIDIS)
2835COMPATIBLE_IOCTL(SNDCTL_MIDI_INFO)
2836COMPATIBLE_IOCTL(SNDCTL_SEQ_THRESHOLD)
2837COMPATIBLE_IOCTL(SNDCTL_SYNTH_MEMAVL)
2838COMPATIBLE_IOCTL(SNDCTL_FM_4OP_ENABLE)
2839COMPATIBLE_IOCTL(SNDCTL_SEQ_PANIC)
2840COMPATIBLE_IOCTL(SNDCTL_SEQ_OUTOFBAND)
2841COMPATIBLE_IOCTL(SNDCTL_SEQ_GETTIME)
2842COMPATIBLE_IOCTL(SNDCTL_SYNTH_ID)
2843COMPATIBLE_IOCTL(SNDCTL_SYNTH_CONTROL)
2844COMPATIBLE_IOCTL(SNDCTL_SYNTH_REMOVESAMPLE)
2845/* Big T for sound/OSS */
2846COMPATIBLE_IOCTL(SNDCTL_TMR_TIMEBASE)
2847COMPATIBLE_IOCTL(SNDCTL_TMR_START)
2848COMPATIBLE_IOCTL(SNDCTL_TMR_STOP)
2849COMPATIBLE_IOCTL(SNDCTL_TMR_CONTINUE)
2850COMPATIBLE_IOCTL(SNDCTL_TMR_TEMPO)
2851COMPATIBLE_IOCTL(SNDCTL_TMR_SOURCE)
2852COMPATIBLE_IOCTL(SNDCTL_TMR_METRONOME)
2853COMPATIBLE_IOCTL(SNDCTL_TMR_SELECT)
2854/* Little m for sound/OSS */
2855COMPATIBLE_IOCTL(SNDCTL_MIDI_PRETIME)
2856COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUMODE)
2857COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUCMD)
2858/* Big P for sound/OSS */
2859COMPATIBLE_IOCTL(SNDCTL_DSP_RESET)
2860COMPATIBLE_IOCTL(SNDCTL_DSP_SYNC)
2861COMPATIBLE_IOCTL(SNDCTL_DSP_SPEED)
2862COMPATIBLE_IOCTL(SNDCTL_DSP_STEREO)
2863COMPATIBLE_IOCTL(SNDCTL_DSP_GETBLKSIZE)
2864COMPATIBLE_IOCTL(SNDCTL_DSP_CHANNELS)
2865COMPATIBLE_IOCTL(SOUND_PCM_WRITE_FILTER)
2866COMPATIBLE_IOCTL(SNDCTL_DSP_POST)
2867COMPATIBLE_IOCTL(SNDCTL_DSP_SUBDIVIDE)
2868COMPATIBLE_IOCTL(SNDCTL_DSP_SETFRAGMENT)
2869COMPATIBLE_IOCTL(SNDCTL_DSP_GETFMTS)
2870COMPATIBLE_IOCTL(SNDCTL_DSP_SETFMT)
2871COMPATIBLE_IOCTL(SNDCTL_DSP_GETOSPACE)
2872COMPATIBLE_IOCTL(SNDCTL_DSP_GETISPACE)
2873COMPATIBLE_IOCTL(SNDCTL_DSP_NONBLOCK)
2874COMPATIBLE_IOCTL(SNDCTL_DSP_GETCAPS)
2875COMPATIBLE_IOCTL(SNDCTL_DSP_GETTRIGGER)
2876COMPATIBLE_IOCTL(SNDCTL_DSP_SETTRIGGER)
2877COMPATIBLE_IOCTL(SNDCTL_DSP_GETIPTR)
2878COMPATIBLE_IOCTL(SNDCTL_DSP_GETOPTR)
2879/* SNDCTL_DSP_MAPINBUF, XXX needs translation */
2880/* SNDCTL_DSP_MAPOUTBUF, XXX needs translation */
2881COMPATIBLE_IOCTL(SNDCTL_DSP_SETSYNCRO)
2882COMPATIBLE_IOCTL(SNDCTL_DSP_SETDUPLEX)
2883COMPATIBLE_IOCTL(SNDCTL_DSP_GETODELAY)
2884COMPATIBLE_IOCTL(SNDCTL_DSP_PROFILE)
2885COMPATIBLE_IOCTL(SOUND_PCM_READ_RATE)
2886COMPATIBLE_IOCTL(SOUND_PCM_READ_CHANNELS)
2887COMPATIBLE_IOCTL(SOUND_PCM_READ_BITS)
2888COMPATIBLE_IOCTL(SOUND_PCM_READ_FILTER)
2889/* Big C for sound/OSS */
2890COMPATIBLE_IOCTL(SNDCTL_COPR_RESET)
2891COMPATIBLE_IOCTL(SNDCTL_COPR_LOAD)
2892COMPATIBLE_IOCTL(SNDCTL_COPR_RDATA)
2893COMPATIBLE_IOCTL(SNDCTL_COPR_RCODE)
2894COMPATIBLE_IOCTL(SNDCTL_COPR_WDATA)
2895COMPATIBLE_IOCTL(SNDCTL_COPR_WCODE)
2896COMPATIBLE_IOCTL(SNDCTL_COPR_RUN)
2897COMPATIBLE_IOCTL(SNDCTL_COPR_HALT)
2898COMPATIBLE_IOCTL(SNDCTL_COPR_SENDMSG)
2899COMPATIBLE_IOCTL(SNDCTL_COPR_RCVMSG)
2900/* Big M for sound/OSS */
2901COMPATIBLE_IOCTL(SOUND_MIXER_READ_VOLUME)
2902COMPATIBLE_IOCTL(SOUND_MIXER_READ_BASS)
2903COMPATIBLE_IOCTL(SOUND_MIXER_READ_TREBLE)
2904COMPATIBLE_IOCTL(SOUND_MIXER_READ_SYNTH)
2905COMPATIBLE_IOCTL(SOUND_MIXER_READ_PCM)
2906COMPATIBLE_IOCTL(SOUND_MIXER_READ_SPEAKER)
2907COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE)
2908COMPATIBLE_IOCTL(SOUND_MIXER_READ_MIC)
2909COMPATIBLE_IOCTL(SOUND_MIXER_READ_CD)
2910COMPATIBLE_IOCTL(SOUND_MIXER_READ_IMIX)
2911COMPATIBLE_IOCTL(SOUND_MIXER_READ_ALTPCM)
2912COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECLEV)
2913COMPATIBLE_IOCTL(SOUND_MIXER_READ_IGAIN)
2914COMPATIBLE_IOCTL(SOUND_MIXER_READ_OGAIN)
2915COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE1)
2916COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE2)
2917COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE3)
2918COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL1))
2919COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL2))
2920COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL3))
2921COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEIN))
2922COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEOUT))
2923COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_VIDEO))
2924COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_RADIO))
2925COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_MONITOR))
2926COMPATIBLE_IOCTL(SOUND_MIXER_READ_MUTE)
2927/* SOUND_MIXER_READ_ENHANCE, same value as READ_MUTE */
2928/* SOUND_MIXER_READ_LOUD, same value as READ_MUTE */
2929COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECSRC)
2930COMPATIBLE_IOCTL(SOUND_MIXER_READ_DEVMASK)
2931COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECMASK)
2932COMPATIBLE_IOCTL(SOUND_MIXER_READ_STEREODEVS)
2933COMPATIBLE_IOCTL(SOUND_MIXER_READ_CAPS)
2934COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_VOLUME)
2935COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_BASS)
2936COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_TREBLE)
2937COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SYNTH)
2938COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_PCM)
2939COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SPEAKER)
2940COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE)
2941COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MIC)
2942COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_CD)
2943COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IMIX)
2944COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_ALTPCM)
2945COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECLEV)
2946COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IGAIN)
2947COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_OGAIN)
2948COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE1)
2949COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE2)
2950COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE3)
2951COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL1))
2952COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL2))
2953COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL3))
2954COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEIN))
2955COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEOUT))
2956COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_VIDEO))
2957COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_RADIO))
2958COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_MONITOR))
2959COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MUTE)
2960/* SOUND_MIXER_WRITE_ENHANCE, same value as WRITE_MUTE */
2961/* SOUND_MIXER_WRITE_LOUD, same value as WRITE_MUTE */
2962COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECSRC)
2963COMPATIBLE_IOCTL(SOUND_MIXER_INFO)
2964COMPATIBLE_IOCTL(SOUND_OLD_MIXER_INFO)
2965COMPATIBLE_IOCTL(SOUND_MIXER_ACCESS)
2966COMPATIBLE_IOCTL(SOUND_MIXER_AGC)
2967COMPATIBLE_IOCTL(SOUND_MIXER_3DSE)
2968COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE1)
2969COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE2)
2970COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE3)
2971COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE4)
2972COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
2973COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
2974COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
2975COMPATIBLE_IOCTL(OSS_GETVERSION)
2976/* AUTOFS */
2977ULONG_IOCTL(AUTOFS_IOC_READY)
2978ULONG_IOCTL(AUTOFS_IOC_FAIL)
2979COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
2980COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2981COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
2982COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
2983COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
2984COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
2985COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
2986COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
2987/* Raw devices */
2988COMPATIBLE_IOCTL(RAW_SETBIND)
2989COMPATIBLE_IOCTL(RAW_GETBIND)
2990/* SMB ioctls which do not need any translations */
2991COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
2992/* Little a */
2993COMPATIBLE_IOCTL(ATMSIGD_CTRL)
2994COMPATIBLE_IOCTL(ATMARPD_CTRL)
2995COMPATIBLE_IOCTL(ATMLEC_CTRL)
2996COMPATIBLE_IOCTL(ATMLEC_MCAST)
2997COMPATIBLE_IOCTL(ATMLEC_DATA)
2998COMPATIBLE_IOCTL(ATM_SETSC)
2999COMPATIBLE_IOCTL(SIOCSIFATMTCP)
3000COMPATIBLE_IOCTL(SIOCMKCLIP)
3001COMPATIBLE_IOCTL(ATMARP_MKIP)
3002COMPATIBLE_IOCTL(ATMARP_SETENTRY)
3003COMPATIBLE_IOCTL(ATMARP_ENCAP)
3004COMPATIBLE_IOCTL(ATMTCP_CREATE)
3005COMPATIBLE_IOCTL(ATMTCP_REMOVE)
3006COMPATIBLE_IOCTL(ATMMPC_CTRL)
3007COMPATIBLE_IOCTL(ATMMPC_DATA)
3008/* Watchdog */
3009COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
3010COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
3011COMPATIBLE_IOCTL(WDIOC_GETBOOTSTATUS)
3012COMPATIBLE_IOCTL(WDIOC_GETTEMP)
3013COMPATIBLE_IOCTL(WDIOC_SETOPTIONS)
3014COMPATIBLE_IOCTL(WDIOC_KEEPALIVE)
3015COMPATIBLE_IOCTL(WDIOC_SETTIMEOUT)
3016COMPATIBLE_IOCTL(WDIOC_GETTIMEOUT)
3017/* Big R */
3018COMPATIBLE_IOCTL(RNDGETENTCNT)
3019COMPATIBLE_IOCTL(RNDADDTOENTCNT)
3020COMPATIBLE_IOCTL(RNDGETPOOL)
3021COMPATIBLE_IOCTL(RNDADDENTROPY)
3022COMPATIBLE_IOCTL(RNDZAPENTCNT)
3023COMPATIBLE_IOCTL(RNDCLEARPOOL)
3024/* Bluetooth */
3025COMPATIBLE_IOCTL(HCIDEVUP)
3026COMPATIBLE_IOCTL(HCIDEVDOWN)
3027COMPATIBLE_IOCTL(HCIDEVRESET)
3028COMPATIBLE_IOCTL(HCIDEVRESTAT)
3029COMPATIBLE_IOCTL(HCIGETDEVLIST)
3030COMPATIBLE_IOCTL(HCIGETDEVINFO)
3031COMPATIBLE_IOCTL(HCIGETCONNLIST)
3032COMPATIBLE_IOCTL(HCIGETCONNINFO)
3033COMPATIBLE_IOCTL(HCISETRAW)
3034COMPATIBLE_IOCTL(HCISETSCAN)
3035COMPATIBLE_IOCTL(HCISETAUTH)
3036COMPATIBLE_IOCTL(HCISETENCRYPT)
3037COMPATIBLE_IOCTL(HCISETPTYPE)
3038COMPATIBLE_IOCTL(HCISETLINKPOL)
3039COMPATIBLE_IOCTL(HCISETLINKMODE)
3040COMPATIBLE_IOCTL(HCISETACLMTU)
3041COMPATIBLE_IOCTL(HCISETSCOMTU)
3042COMPATIBLE_IOCTL(HCIINQUIRY)
3043COMPATIBLE_IOCTL(HCIUARTSETPROTO)
3044COMPATIBLE_IOCTL(HCIUARTGETPROTO)
3045COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
3046COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
3047COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
3048COMPATIBLE_IOCTL(RFCOMMGETDEVINFO)
3049COMPATIBLE_IOCTL(RFCOMMSTEALDLC)
3050COMPATIBLE_IOCTL(BNEPCONNADD)
3051COMPATIBLE_IOCTL(BNEPCONNDEL)
3052COMPATIBLE_IOCTL(BNEPGETCONNLIST)
3053COMPATIBLE_IOCTL(BNEPGETCONNINFO)
3054COMPATIBLE_IOCTL(CMTPCONNADD)
3055COMPATIBLE_IOCTL(CMTPCONNDEL)
3056COMPATIBLE_IOCTL(CMTPGETCONNLIST)
3057COMPATIBLE_IOCTL(CMTPGETCONNINFO)
3058COMPATIBLE_IOCTL(HIDPCONNADD)
3059COMPATIBLE_IOCTL(HIDPCONNDEL)
3060COMPATIBLE_IOCTL(HIDPGETCONNLIST)
3061COMPATIBLE_IOCTL(HIDPGETCONNINFO)
3062/* CAPI */
3063COMPATIBLE_IOCTL(CAPI_REGISTER)
3064COMPATIBLE_IOCTL(CAPI_GET_MANUFACTURER)
3065COMPATIBLE_IOCTL(CAPI_GET_VERSION)
3066COMPATIBLE_IOCTL(CAPI_GET_SERIAL)
3067COMPATIBLE_IOCTL(CAPI_GET_PROFILE)
3068COMPATIBLE_IOCTL(CAPI_MANUFACTURER_CMD)
3069COMPATIBLE_IOCTL(CAPI_GET_ERRCODE)
3070COMPATIBLE_IOCTL(CAPI_INSTALLED)
3071COMPATIBLE_IOCTL(CAPI_GET_FLAGS)
3072COMPATIBLE_IOCTL(CAPI_SET_FLAGS)
3073COMPATIBLE_IOCTL(CAPI_CLR_FLAGS)
3074COMPATIBLE_IOCTL(CAPI_NCCI_OPENCOUNT)
3075COMPATIBLE_IOCTL(CAPI_NCCI_GETUNIT)
3076/* Siemens Gigaset */
3077COMPATIBLE_IOCTL(GIGASET_REDIR)
3078COMPATIBLE_IOCTL(GIGASET_CONFIG)
3079COMPATIBLE_IOCTL(GIGASET_BRKCHARS)
3080COMPATIBLE_IOCTL(GIGASET_VERSION)
3081/* Misc. */
3082COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */
3083COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */
3084COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
3085COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
3086COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
3087COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
3088/* USB */
3089COMPATIBLE_IOCTL(USBDEVFS_RESETEP)
3090COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE)
3091COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION)
3092COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER)
3093COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB)
3094COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE)
3095COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE)
3096COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO)
3097COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO)
3098COMPATIBLE_IOCTL(USBDEVFS_RESET)
3099COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
3100COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
3101COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
3102COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
3103/* MTD */
3104COMPATIBLE_IOCTL(MEMGETINFO)
3105COMPATIBLE_IOCTL(MEMERASE)
3106COMPATIBLE_IOCTL(MEMLOCK)
3107COMPATIBLE_IOCTL(MEMUNLOCK)
3108COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
3109COMPATIBLE_IOCTL(MEMGETREGIONINFO)
3110COMPATIBLE_IOCTL(MEMGETBADBLOCK)
3111COMPATIBLE_IOCTL(MEMSETBADBLOCK)
3112/* NBD */
3113ULONG_IOCTL(NBD_SET_SOCK)
3114ULONG_IOCTL(NBD_SET_BLKSIZE)
3115ULONG_IOCTL(NBD_SET_SIZE)
3116COMPATIBLE_IOCTL(NBD_DO_IT)
3117COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
3118COMPATIBLE_IOCTL(NBD_CLEAR_QUE)
3119COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
3120ULONG_IOCTL(NBD_SET_SIZE_BLOCKS)
3121COMPATIBLE_IOCTL(NBD_DISCONNECT)
3122/* i2c */
3123COMPATIBLE_IOCTL(I2C_SLAVE)
3124COMPATIBLE_IOCTL(I2C_SLAVE_FORCE)
3125COMPATIBLE_IOCTL(I2C_TENBIT)
3126COMPATIBLE_IOCTL(I2C_PEC)
3127COMPATIBLE_IOCTL(I2C_RETRIES)
3128COMPATIBLE_IOCTL(I2C_TIMEOUT)
3129/* wireless */
3130COMPATIBLE_IOCTL(SIOCSIWCOMMIT)
3131COMPATIBLE_IOCTL(SIOCGIWNAME)
3132COMPATIBLE_IOCTL(SIOCSIWNWID)
3133COMPATIBLE_IOCTL(SIOCGIWNWID)
3134COMPATIBLE_IOCTL(SIOCSIWFREQ)
3135COMPATIBLE_IOCTL(SIOCGIWFREQ)
3136COMPATIBLE_IOCTL(SIOCSIWMODE)
3137COMPATIBLE_IOCTL(SIOCGIWMODE)
3138COMPATIBLE_IOCTL(SIOCSIWSENS)
3139COMPATIBLE_IOCTL(SIOCGIWSENS)
3140COMPATIBLE_IOCTL(SIOCSIWRANGE)
3141COMPATIBLE_IOCTL(SIOCSIWPRIV)
3142COMPATIBLE_IOCTL(SIOCGIWPRIV)
3143COMPATIBLE_IOCTL(SIOCSIWSTATS)
3144COMPATIBLE_IOCTL(SIOCGIWSTATS)
3145COMPATIBLE_IOCTL(SIOCSIWAP)
3146COMPATIBLE_IOCTL(SIOCGIWAP)
3147COMPATIBLE_IOCTL(SIOCSIWSCAN)
3148COMPATIBLE_IOCTL(SIOCSIWRATE)
3149COMPATIBLE_IOCTL(SIOCGIWRATE)
3150COMPATIBLE_IOCTL(SIOCSIWRTS)
3151COMPATIBLE_IOCTL(SIOCGIWRTS)
3152COMPATIBLE_IOCTL(SIOCSIWFRAG)
3153COMPATIBLE_IOCTL(SIOCGIWFRAG)
3154COMPATIBLE_IOCTL(SIOCSIWTXPOW)
3155COMPATIBLE_IOCTL(SIOCGIWTXPOW)
3156COMPATIBLE_IOCTL(SIOCSIWRETRY)
3157COMPATIBLE_IOCTL(SIOCGIWRETRY)
3158COMPATIBLE_IOCTL(SIOCSIWPOWER)
3159COMPATIBLE_IOCTL(SIOCGIWPOWER)
3160/* hiddev */
3161COMPATIBLE_IOCTL(HIDIOCGVERSION)
3162COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
3163COMPATIBLE_IOCTL(HIDIOCGDEVINFO)
3164COMPATIBLE_IOCTL(HIDIOCGSTRING)
3165COMPATIBLE_IOCTL(HIDIOCINITREPORT)
3166COMPATIBLE_IOCTL(HIDIOCGREPORT)
3167COMPATIBLE_IOCTL(HIDIOCSREPORT)
3168COMPATIBLE_IOCTL(HIDIOCGREPORTINFO)
3169COMPATIBLE_IOCTL(HIDIOCGFIELDINFO)
3170COMPATIBLE_IOCTL(HIDIOCGUSAGE)
3171COMPATIBLE_IOCTL(HIDIOCSUSAGE)
3172COMPATIBLE_IOCTL(HIDIOCGUCODE)
3173COMPATIBLE_IOCTL(HIDIOCGFLAG)
3174COMPATIBLE_IOCTL(HIDIOCSFLAG)
3175COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINDEX)
3176COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINFO)
3177/* dvb */
3178COMPATIBLE_IOCTL(AUDIO_STOP)
3179COMPATIBLE_IOCTL(AUDIO_PLAY)
3180COMPATIBLE_IOCTL(AUDIO_PAUSE)
3181COMPATIBLE_IOCTL(AUDIO_CONTINUE)
3182COMPATIBLE_IOCTL(AUDIO_SELECT_SOURCE)
3183COMPATIBLE_IOCTL(AUDIO_SET_MUTE)
3184COMPATIBLE_IOCTL(AUDIO_SET_AV_SYNC)
3185COMPATIBLE_IOCTL(AUDIO_SET_BYPASS_MODE)
3186COMPATIBLE_IOCTL(AUDIO_CHANNEL_SELECT)
3187COMPATIBLE_IOCTL(AUDIO_GET_STATUS)
3188COMPATIBLE_IOCTL(AUDIO_GET_CAPABILITIES)
3189COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER)
3190COMPATIBLE_IOCTL(AUDIO_SET_ID)
3191COMPATIBLE_IOCTL(AUDIO_SET_MIXER)
3192COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE)
3193COMPATIBLE_IOCTL(AUDIO_SET_EXT_ID)
3194COMPATIBLE_IOCTL(AUDIO_SET_ATTRIBUTES)
3195COMPATIBLE_IOCTL(AUDIO_SET_KARAOKE)
3196COMPATIBLE_IOCTL(DMX_START)
3197COMPATIBLE_IOCTL(DMX_STOP)
3198COMPATIBLE_IOCTL(DMX_SET_FILTER)
3199COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
3200COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
3201COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
3202COMPATIBLE_IOCTL(DMX_GET_CAPS)
3203COMPATIBLE_IOCTL(DMX_SET_SOURCE)
3204COMPATIBLE_IOCTL(DMX_GET_STC)
3205COMPATIBLE_IOCTL(FE_GET_INFO)
3206COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD)
3207COMPATIBLE_IOCTL(FE_DISEQC_SEND_MASTER_CMD)
3208COMPATIBLE_IOCTL(FE_DISEQC_RECV_SLAVE_REPLY)
3209COMPATIBLE_IOCTL(FE_DISEQC_SEND_BURST)
3210COMPATIBLE_IOCTL(FE_SET_TONE)
3211COMPATIBLE_IOCTL(FE_SET_VOLTAGE)
3212COMPATIBLE_IOCTL(FE_ENABLE_HIGH_LNB_VOLTAGE)
3213COMPATIBLE_IOCTL(FE_READ_STATUS)
3214COMPATIBLE_IOCTL(FE_READ_BER)
3215COMPATIBLE_IOCTL(FE_READ_SIGNAL_STRENGTH)
3216COMPATIBLE_IOCTL(FE_READ_SNR)
3217COMPATIBLE_IOCTL(FE_READ_UNCORRECTED_BLOCKS)
3218COMPATIBLE_IOCTL(FE_SET_FRONTEND)
3219COMPATIBLE_IOCTL(FE_GET_FRONTEND)
3220COMPATIBLE_IOCTL(FE_GET_EVENT)
3221COMPATIBLE_IOCTL(FE_DISHNETWORK_SEND_LEGACY_CMD)
3222COMPATIBLE_IOCTL(VIDEO_STOP)
3223COMPATIBLE_IOCTL(VIDEO_PLAY)
3224COMPATIBLE_IOCTL(VIDEO_FREEZE)
3225COMPATIBLE_IOCTL(VIDEO_CONTINUE)
3226COMPATIBLE_IOCTL(VIDEO_SELECT_SOURCE)
3227COMPATIBLE_IOCTL(VIDEO_SET_BLANK)
3228COMPATIBLE_IOCTL(VIDEO_GET_STATUS)
3229COMPATIBLE_IOCTL(VIDEO_SET_DISPLAY_FORMAT)
3230COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD)
3231COMPATIBLE_IOCTL(VIDEO_SLOWMOTION)
3232COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES)
3233COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER)
3234COMPATIBLE_IOCTL(VIDEO_SET_ID)
3235COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE)
3236COMPATIBLE_IOCTL(VIDEO_SET_FORMAT)
3237COMPATIBLE_IOCTL(VIDEO_SET_SYSTEM)
3238COMPATIBLE_IOCTL(VIDEO_SET_HIGHLIGHT)
3239COMPATIBLE_IOCTL(VIDEO_SET_SPU)
3240COMPATIBLE_IOCTL(VIDEO_GET_NAVI)
3241COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES)
3242COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
3243COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE)
3244
3245/* now things that need handlers */
2402HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) 3246HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
2403HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) 3247HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
2404#ifdef CONFIG_NET 3248#ifdef CONFIG_NET
@@ -2594,6 +3438,8 @@ HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl)
2594HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl) 3438HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl)
2595HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) 3439HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
2596HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) 3440HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
3441/* Not implemented in the native kernel */
3442IGNORE_IOCTL(SIOCGIFCOUNT)
2597HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl) 3443HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
2598HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl) 3444HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
2599HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl) 3445HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
@@ -2617,6 +3463,167 @@ COMPATIBLE_IOCTL(LPRESET)
2617/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/ 3463/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
2618COMPATIBLE_IOCTL(LPGETFLAGS) 3464COMPATIBLE_IOCTL(LPGETFLAGS)
2619HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) 3465HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
3466
3467/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
3468 but we don't want warnings on other file systems. So declare
3469 them as compatible here. */
3470#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
3471#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
3472
3473IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
3474IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
2620}; 3475};
2621 3476
2622int ioctl_table_size = ARRAY_SIZE(ioctl_start); 3477#define IOCTL_HASHSIZE 256
3478static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
3479
3480static inline unsigned long ioctl32_hash(unsigned long cmd)
3481{
3482 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
3483}
3484
3485static void compat_ioctl_error(struct file *filp, unsigned int fd,
3486 unsigned int cmd, unsigned long arg)
3487{
3488 char buf[10];
3489 char *fn = "?";
3490 char *path;
3491
3492 /* find the name of the device. */
3493 path = (char *)__get_free_page(GFP_KERNEL);
3494 if (path) {
3495 fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE);
3496 if (IS_ERR(fn))
3497 fn = "?";
3498 }
3499
3500 sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
3501 if (!isprint(buf[1]))
3502 sprintf(buf, "%02x", buf[1]);
3503 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
3504 "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
3505 current->comm, current->pid,
3506 (int)fd, (unsigned int)cmd, buf,
3507 (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
3508 (unsigned int)arg, fn);
3509
3510 if (path)
3511 free_page((unsigned long)path);
3512}
3513
3514asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
3515 unsigned long arg)
3516{
3517 struct file *filp;
3518 int error = -EBADF;
3519 struct ioctl_trans *t;
3520 int fput_needed;
3521
3522 filp = fget_light(fd, &fput_needed);
3523 if (!filp)
3524 goto out;
3525
3526 /* RED-PEN how should LSM module know it's handling 32bit? */
3527 error = security_file_ioctl(filp, cmd, arg);
3528 if (error)
3529 goto out_fput;
3530
3531 /*
3532 * To allow the compat_ioctl handlers to be self contained
3533 * we need to check the common ioctls here first.
3534 * Just handle them with the standard handlers below.
3535 */
3536 switch (cmd) {
3537 case FIOCLEX:
3538 case FIONCLEX:
3539 case FIONBIO:
3540 case FIOASYNC:
3541 case FIOQSIZE:
3542 break;
3543
3544 case FIBMAP:
3545 case FIGETBSZ:
3546 case FIONREAD:
3547 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
3548 break;
3549 /*FALL THROUGH*/
3550
3551 default:
3552 if (filp->f_op && filp->f_op->compat_ioctl) {
3553 error = filp->f_op->compat_ioctl(filp, cmd, arg);
3554 if (error != -ENOIOCTLCMD)
3555 goto out_fput;
3556 }
3557
3558 if (!filp->f_op ||
3559 (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl))
3560 goto do_ioctl;
3561 break;
3562 }
3563
3564 for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) {
3565 if (t->cmd == cmd)
3566 goto found_handler;
3567 }
3568
3569 if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) &&
3570 cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
3571 error = siocdevprivate_ioctl(fd, cmd, arg);
3572 } else {
3573 static int count;
3574
3575 if (++count <= 50)
3576 compat_ioctl_error(filp, fd, cmd, arg);
3577 error = -EINVAL;
3578 }
3579
3580 goto out_fput;
3581
3582 found_handler:
3583 if (t->handler) {
3584 lock_kernel();
3585 error = t->handler(fd, cmd, arg, filp);
3586 unlock_kernel();
3587 goto out_fput;
3588 }
3589
3590 do_ioctl:
3591 error = vfs_ioctl(filp, fd, cmd, arg);
3592 out_fput:
3593 fput_light(filp, fput_needed);
3594 out:
3595 return error;
3596}
3597
3598static void ioctl32_insert_translation(struct ioctl_trans *trans)
3599{
3600 unsigned long hash;
3601 struct ioctl_trans *t;
3602
3603 hash = ioctl32_hash (trans->cmd);
3604 if (!ioctl32_hash_table[hash])
3605 ioctl32_hash_table[hash] = trans;
3606 else {
3607 t = ioctl32_hash_table[hash];
3608 while (t->next)
3609 t = t->next;
3610 trans->next = NULL;
3611 t->next = trans;
3612 }
3613}
3614
3615static int __init init_sys32_ioctl(void)
3616{
3617 int i;
3618
3619 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
3620 if (ioctl_start[i].next != 0) {
3621 printk("ioctl translation %d bad\n",i);
3622 return -1;
3623 }
3624
3625 ioctl32_insert_translation(&ioctl_start[i]);
3626 }
3627 return 0;
3628}
3629__initcall(init_sys32_ioctl);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index d98be5e01328..3527c7c6def8 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -77,36 +77,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
77 return ret; 77 return ret;
78} 78}
79 79
80
81/**
82 * flush_read_buffer - push buffer to userspace.
83 * @buffer: data buffer for file.
84 * @userbuf: user-passed buffer.
85 * @count: number of bytes requested.
86 * @ppos: file position.
87 *
88 * Copy the buffer we filled in fill_read_buffer() to userspace.
89 * This is done at the reader's leisure, copying and advancing
90 * the amount they specify each time.
91 * This may be called continuously until the buffer is empty.
92 */
93static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf,
94 size_t count, loff_t * ppos)
95{
96 int error;
97
98 if (*ppos > buffer->count)
99 return 0;
100
101 if (count > (buffer->count - *ppos))
102 count = buffer->count - *ppos;
103
104 error = copy_to_user(buf,buffer->page + *ppos,count);
105 if (!error)
106 *ppos += count;
107 return error ? -EFAULT : count;
108}
109
110/** 80/**
111 * configfs_read_file - read an attribute. 81 * configfs_read_file - read an attribute.
112 * @file: file pointer. 82 * @file: file pointer.
@@ -139,7 +109,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
139 } 109 }
140 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 110 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
141 __FUNCTION__, count, *ppos, buffer->page); 111 __FUNCTION__, count, *ppos, buffer->page);
142 retval = flush_read_buffer(buffer,buf,count,ppos); 112 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
113 buffer->count);
143out: 114out:
144 up(&buffer->sem); 115 up(&buffer->sem);
145 return retval; 116 return retval;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 6f573004cd7d..b00d962de833 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -140,7 +140,7 @@ static int __init configfs_init(void)
140 if (!configfs_dir_cachep) 140 if (!configfs_dir_cachep)
141 goto out; 141 goto out;
142 142
143 kset_set_kset_s(&config_subsys, kernel_subsys); 143 kobj_set_kset_s(&config_subsys, kernel_subsys);
144 err = subsystem_register(&config_subsys); 144 err = subsystem_register(&config_subsys);
145 if (err) { 145 if (err) {
146 kmem_cache_destroy(configfs_dir_cachep); 146 kmem_cache_destroy(configfs_dir_cachep);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index facd0c89be8f..3d194a2be3f5 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -180,7 +180,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
180 struct page *page = NULL; 180 struct page *page = NULL;
181 181
182 if (blocknr + i < devsize) { 182 if (blocknr + i < devsize) {
183 page = read_mapping_page(mapping, blocknr + i, NULL); 183 page = read_mapping_page_async(mapping, blocknr + i,
184 NULL);
184 /* synchronous error? */ 185 /* synchronous error? */
185 if (IS_ERR(page)) 186 if (IS_ERR(page))
186 page = NULL; 187 page = NULL;
diff --git a/fs/dcache.c b/fs/dcache.c
index d68631f18df1..0e73aa0a0e8b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -21,7 +21,6 @@
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/smp_lock.h>
25#include <linux/hash.h> 24#include <linux/hash.h>
26#include <linux/cache.h> 25#include <linux/cache.h>
27#include <linux/module.h> 26#include <linux/module.h>
@@ -121,6 +120,28 @@ static void dentry_iput(struct dentry * dentry)
121 } 120 }
122} 121}
123 122
123/**
124 * d_kill - kill dentry and return parent
125 * @dentry: dentry to kill
126 *
127 * Called with dcache_lock and d_lock, releases both. The dentry must
128 * already be unhashed and removed from the LRU.
129 *
130 * If this is the root of the dentry tree, return NULL.
131 */
132static struct dentry *d_kill(struct dentry *dentry)
133{
134 struct dentry *parent;
135
136 list_del(&dentry->d_u.d_child);
137 dentry_stat.nr_dentry--; /* For d_free, below */
138 /*drops the locks, at that point nobody can reach this dentry */
139 dentry_iput(dentry);
140 parent = dentry->d_parent;
141 d_free(dentry);
142 return dentry == parent ? NULL : parent;
143}
144
124/* 145/*
125 * This is dput 146 * This is dput
126 * 147 *
@@ -189,28 +210,17 @@ repeat:
189 210
190unhash_it: 211unhash_it:
191 __d_drop(dentry); 212 __d_drop(dentry);
192 213kill_it:
193kill_it: { 214 /* If dentry was on d_lru list
194 struct dentry *parent; 215 * delete it from there
195 216 */
196 /* If dentry was on d_lru list 217 if (!list_empty(&dentry->d_lru)) {
197 * delete it from there 218 list_del(&dentry->d_lru);
198 */ 219 dentry_stat.nr_unused--;
199 if (!list_empty(&dentry->d_lru)) {
200 list_del(&dentry->d_lru);
201 dentry_stat.nr_unused--;
202 }
203 list_del(&dentry->d_u.d_child);
204 dentry_stat.nr_dentry--; /* For d_free, below */
205 /*drops the locks, at that point nobody can reach this dentry */
206 dentry_iput(dentry);
207 parent = dentry->d_parent;
208 d_free(dentry);
209 if (dentry == parent)
210 return;
211 dentry = parent;
212 goto repeat;
213 } 220 }
221 dentry = d_kill(dentry);
222 if (dentry)
223 goto repeat;
214} 224}
215 225
216/** 226/**
@@ -371,22 +381,40 @@ restart:
371 * Throw away a dentry - free the inode, dput the parent. This requires that 381 * Throw away a dentry - free the inode, dput the parent. This requires that
372 * the LRU list has already been removed. 382 * the LRU list has already been removed.
373 * 383 *
384 * If prune_parents is true, try to prune ancestors as well.
385 *
374 * Called with dcache_lock, drops it and then regains. 386 * Called with dcache_lock, drops it and then regains.
375 * Called with dentry->d_lock held, drops it. 387 * Called with dentry->d_lock held, drops it.
376 */ 388 */
377static void prune_one_dentry(struct dentry * dentry) 389static void prune_one_dentry(struct dentry * dentry, int prune_parents)
378{ 390{
379 struct dentry * parent;
380
381 __d_drop(dentry); 391 __d_drop(dentry);
382 list_del(&dentry->d_u.d_child); 392 dentry = d_kill(dentry);
383 dentry_stat.nr_dentry--; /* For d_free, below */ 393 if (!prune_parents) {
384 dentry_iput(dentry); 394 dput(dentry);
385 parent = dentry->d_parent; 395 spin_lock(&dcache_lock);
386 d_free(dentry); 396 return;
387 if (parent != dentry) 397 }
388 dput(parent); 398
399 /*
400 * Prune ancestors. Locking is simpler than in dput(),
401 * because dcache_lock needs to be taken anyway.
402 */
389 spin_lock(&dcache_lock); 403 spin_lock(&dcache_lock);
404 while (dentry) {
405 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
406 return;
407
408 if (dentry->d_op && dentry->d_op->d_delete)
409 dentry->d_op->d_delete(dentry);
410 if (!list_empty(&dentry->d_lru)) {
411 list_del(&dentry->d_lru);
412 dentry_stat.nr_unused--;
413 }
414 __d_drop(dentry);
415 dentry = d_kill(dentry);
416 spin_lock(&dcache_lock);
417 }
390} 418}
391 419
392/** 420/**
@@ -394,6 +422,7 @@ static void prune_one_dentry(struct dentry * dentry)
394 * @count: number of entries to try and free 422 * @count: number of entries to try and free
395 * @sb: if given, ignore dentries for other superblocks 423 * @sb: if given, ignore dentries for other superblocks
396 * which are being unmounted. 424 * which are being unmounted.
425 * @prune_parents: if true, try to prune ancestors as well in one go
397 * 426 *
398 * Shrink the dcache. This is done when we need 427 * Shrink the dcache. This is done when we need
399 * more memory, or simply when we need to unmount 428 * more memory, or simply when we need to unmount
@@ -404,7 +433,7 @@ static void prune_one_dentry(struct dentry * dentry)
404 * all the dentries are in use. 433 * all the dentries are in use.
405 */ 434 */
406 435
407static void prune_dcache(int count, struct super_block *sb) 436static void prune_dcache(int count, struct super_block *sb, int prune_parents)
408{ 437{
409 spin_lock(&dcache_lock); 438 spin_lock(&dcache_lock);
410 for (; count ; count--) { 439 for (; count ; count--) {
@@ -464,7 +493,7 @@ static void prune_dcache(int count, struct super_block *sb)
464 * without taking the s_umount lock (I already hold it). 493 * without taking the s_umount lock (I already hold it).
465 */ 494 */
466 if (sb && dentry->d_sb == sb) { 495 if (sb && dentry->d_sb == sb) {
467 prune_one_dentry(dentry); 496 prune_one_dentry(dentry, prune_parents);
468 continue; 497 continue;
469 } 498 }
470 /* 499 /*
@@ -479,7 +508,7 @@ static void prune_dcache(int count, struct super_block *sb)
479 s_umount = &dentry->d_sb->s_umount; 508 s_umount = &dentry->d_sb->s_umount;
480 if (down_read_trylock(s_umount)) { 509 if (down_read_trylock(s_umount)) {
481 if (dentry->d_sb->s_root != NULL) { 510 if (dentry->d_sb->s_root != NULL) {
482 prune_one_dentry(dentry); 511 prune_one_dentry(dentry, prune_parents);
483 up_read(s_umount); 512 up_read(s_umount);
484 continue; 513 continue;
485 } 514 }
@@ -550,7 +579,7 @@ repeat:
550 spin_unlock(&dentry->d_lock); 579 spin_unlock(&dentry->d_lock);
551 continue; 580 continue;
552 } 581 }
553 prune_one_dentry(dentry); 582 prune_one_dentry(dentry, 1);
554 cond_resched_lock(&dcache_lock); 583 cond_resched_lock(&dcache_lock);
555 goto repeat; 584 goto repeat;
556 } 585 }
@@ -829,7 +858,7 @@ void shrink_dcache_parent(struct dentry * parent)
829 int found; 858 int found;
830 859
831 while ((found = select_parent(parent)) != 0) 860 while ((found = select_parent(parent)) != 0)
832 prune_dcache(found, parent->d_sb); 861 prune_dcache(found, parent->d_sb, 1);
833} 862}
834 863
835/* 864/*
@@ -849,7 +878,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
849 if (nr) { 878 if (nr) {
850 if (!(gfp_mask & __GFP_FS)) 879 if (!(gfp_mask & __GFP_FS))
851 return -1; 880 return -1;
852 prune_dcache(nr, NULL); 881 prune_dcache(nr, NULL, 1);
853 } 882 }
854 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 883 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
855} 884}
@@ -1823,6 +1852,16 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1823 struct vfsmount *rootmnt; 1852 struct vfsmount *rootmnt;
1824 struct dentry *root; 1853 struct dentry *root;
1825 1854
1855 /*
1856 * We have various synthetic filesystems that never get mounted. On
1857 * these filesystems dentries are never used for lookup purposes, and
1858 * thus don't need to be hashed. They also don't need a name until a
1859 * user wants to identify the object in /proc/pid/fd/. The little hack
1860 * below allows us to generate a name for these objects on demand:
1861 */
1862 if (dentry->d_op && dentry->d_op->d_dname)
1863 return dentry->d_op->d_dname(dentry, buf, buflen);
1864
1826 read_lock(&current->fs->lock); 1865 read_lock(&current->fs->lock);
1827 rootmnt = mntget(current->fs->rootmnt); 1866 rootmnt = mntget(current->fs->rootmnt);
1828 root = dget(current->fs->root); 1867 root = dget(current->fs->root);
@@ -1836,6 +1875,27 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1836} 1875}
1837 1876
1838/* 1877/*
1878 * Helper function for dentry_operations.d_dname() members
1879 */
1880char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
1881 const char *fmt, ...)
1882{
1883 va_list args;
1884 char temp[64];
1885 int sz;
1886
1887 va_start(args, fmt);
1888 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
1889 va_end(args);
1890
1891 if (sz > sizeof(temp) || sz > buflen)
1892 return ERR_PTR(-ENAMETOOLONG);
1893
1894 buffer += buflen - sz;
1895 return memcpy(buffer, temp, sz);
1896}
1897
1898/*
1839 * NOTE! The user-level library version returns a 1899 * NOTE! The user-level library version returns a
1840 * character pointer. The kernel system call just 1900 * character pointer. The kernel system call just
1841 * returns the length of the buffer filled (which 1901 * returns the length of the buffer filled (which
@@ -2052,12 +2112,8 @@ static void __init dcache_init(unsigned long mempages)
2052 * but it is probably not worth it because of the cache nature 2112 * but it is probably not worth it because of the cache nature
2053 * of the dcache. 2113 * of the dcache.
2054 */ 2114 */
2055 dentry_cache = kmem_cache_create("dentry_cache", 2115 dentry_cache = KMEM_CACHE(dentry,
2056 sizeof(struct dentry), 2116 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
2057 0,
2058 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
2059 SLAB_MEM_SPREAD),
2060 NULL, NULL);
2061 2117
2062 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); 2118 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
2063 2119
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 7b324cfebcb1..ec8896b264de 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -374,7 +374,7 @@ static int __init debugfs_init(void)
374{ 374{
375 int retval; 375 int retval;
376 376
377 kset_set_kset_s(&debug_subsys, kernel_subsys); 377 kobj_set_kset_s(&debug_subsys, kernel_subsys);
378 retval = subsystem_register(&debug_subsys); 378 retval = subsystem_register(&debug_subsys);
379 if (retval) 379 if (retval)
380 return retval; 380 return retval;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 643e57b622bd..06ef9a255c76 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -19,6 +19,7 @@
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/devpts_fs.h> 20#include <linux/devpts_fs.h>
21#include <linux/parser.h> 21#include <linux/parser.h>
22#include <linux/fsnotify.h>
22 23
23#define DEVPTS_SUPER_MAGIC 0x1cd1 24#define DEVPTS_SUPER_MAGIC 0x1cd1
24 25
@@ -178,8 +179,10 @@ int devpts_pty_new(struct tty_struct *tty)
178 inode->i_private = tty; 179 inode->i_private = tty;
179 180
180 dentry = get_node(number); 181 dentry = get_node(number);
181 if (!IS_ERR(dentry) && !dentry->d_inode) 182 if (!IS_ERR(dentry) && !dentry->d_inode) {
182 d_instantiate(dentry, inode); 183 d_instantiate(dentry, inode);
184 fsnotify_create(devpts_root->d_inode, dentry);
185 }
183 186
184 mutex_unlock(&devpts_root->d_inode->i_mutex); 187 mutex_unlock(&devpts_root->d_inode->i_mutex);
185 188
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d9d0833444f5..8593f3dfd299 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -439,7 +439,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
439 * Wait on and process all in-flight BIOs. This must only be called once 439 * Wait on and process all in-flight BIOs. This must only be called once
440 * all bios have been issued so that the refcount can only decrease. 440 * all bios have been issued so that the refcount can only decrease.
441 * This just waits for all bios to make it through dio_bio_complete. IO 441 * This just waits for all bios to make it through dio_bio_complete. IO
442 * errors are propogated through dio->io_error and should be propogated via 442 * errors are propagated through dio->io_error and should be propagated via
443 * dio_complete(). 443 * dio_complete().
444 */ 444 */
445static void dio_await_completion(struct dio *dio) 445static void dio_await_completion(struct dio *dio)
@@ -867,7 +867,6 @@ static int do_direct_IO(struct dio *dio)
867do_holes: 867do_holes:
868 /* Handle holes */ 868 /* Handle holes */
869 if (!buffer_mapped(map_bh)) { 869 if (!buffer_mapped(map_bh)) {
870 char *kaddr;
871 loff_t i_size_aligned; 870 loff_t i_size_aligned;
872 871
873 /* AKPM: eargh, -ENOTBLK is a hack */ 872 /* AKPM: eargh, -ENOTBLK is a hack */
@@ -888,11 +887,8 @@ do_holes:
888 page_cache_release(page); 887 page_cache_release(page);
889 goto out; 888 goto out;
890 } 889 }
891 kaddr = kmap_atomic(page, KM_USER0); 890 zero_user_page(page, block_in_page << blkbits,
892 memset(kaddr + (block_in_page << blkbits), 891 1 << blkbits, KM_USER0);
893 0, 1 << blkbits);
894 flush_dcache_page(page);
895 kunmap_atomic(kaddr, KM_USER0);
896 dio->block_in_file++; 892 dio->block_in_file++;
897 block_in_page++; 893 block_in_page++;
898 goto next_block; 894 goto next_block;
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 6fa7b0d5c043..69a94690e493 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,36 +3,19 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on SYSFS && (IPV6 || IPV6=n) 6 depends on IPV6 || IPV6=n
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP if DLM_SCTP 8 select IP_SCTP
9 help 9 help
10 A general purpose distributed lock manager for kernel or userspace 10 A general purpose distributed lock manager for kernel or userspace
11 applications. 11 applications.
12
13choice
14 prompt "Select DLM communications protocol"
15 depends on DLM
16 default DLM_TCP
17 help
18 The DLM Can use TCP or SCTP for it's network communications.
19 SCTP supports multi-homed operations whereas TCP doesn't.
20 However, SCTP seems to have stability problems at the moment.
21
22config DLM_TCP
23 bool "TCP/IP"
24
25config DLM_SCTP
26 bool "SCTP"
27
28endchoice
29 12
30config DLM_DEBUG 13config DLM_DEBUG
31 bool "DLM debugging" 14 bool "DLM debugging"
32 depends on DLM 15 depends on DLM
33 help 16 help
34 Under the debugfs mount point, the name of each lockspace will 17 Under the debugfs mount point, the name of each lockspace will
35 appear as a file in the "dlm" directory. The output is the 18 appear as a file in the "dlm" directory. The output is the
36 list of resource and locks the local node knows about. 19 list of resource and locks the local node knows about.
37 20
38endmenu 21endmenu
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 65388944eba0..604cf7dc5f39 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,14 +8,12 @@ dlm-y := ast.o \
8 member.o \ 8 member.o \
9 memory.o \ 9 memory.o \
10 midcomms.o \ 10 midcomms.o \
11 lowcomms.o \
11 rcom.o \ 12 rcom.o \
12 recover.o \ 13 recover.o \
13 recoverd.o \ 14 recoverd.o \
14 requestqueue.o \ 15 requestqueue.o \
15 user.o \ 16 user.o \
16 util.o 17 util.o
17dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o 18dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
18 19
19dlm-$(CONFIG_DLM_TCP) += lowcomms-tcp.o
20
21dlm-$(CONFIG_DLM_SCTP) += lowcomms-sctp.o \ No newline at end of file
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index f91d39cb1e0b..6308122890ca 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -14,6 +14,7 @@
14#include "dlm_internal.h" 14#include "dlm_internal.h"
15#include "lock.h" 15#include "lock.h"
16#include "user.h" 16#include "user.h"
17#include "ast.h"
17 18
18#define WAKE_ASTS 0 19#define WAKE_ASTS 0
19 20
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 8665c88e5af2..822abdcd1434 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -89,6 +89,7 @@ struct cluster {
89 unsigned int cl_toss_secs; 89 unsigned int cl_toss_secs;
90 unsigned int cl_scan_secs; 90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug; 91 unsigned int cl_log_debug;
92 unsigned int cl_protocol;
92}; 93};
93 94
94enum { 95enum {
@@ -101,6 +102,7 @@ enum {
101 CLUSTER_ATTR_TOSS_SECS, 102 CLUSTER_ATTR_TOSS_SECS,
102 CLUSTER_ATTR_SCAN_SECS, 103 CLUSTER_ATTR_SCAN_SECS,
103 CLUSTER_ATTR_LOG_DEBUG, 104 CLUSTER_ATTR_LOG_DEBUG,
105 CLUSTER_ATTR_PROTOCOL,
104}; 106};
105 107
106struct cluster_attribute { 108struct cluster_attribute {
@@ -159,6 +161,7 @@ CLUSTER_ATTR(recover_timer, 1);
159CLUSTER_ATTR(toss_secs, 1); 161CLUSTER_ATTR(toss_secs, 1);
160CLUSTER_ATTR(scan_secs, 1); 162CLUSTER_ATTR(scan_secs, 1);
161CLUSTER_ATTR(log_debug, 0); 163CLUSTER_ATTR(log_debug, 0);
164CLUSTER_ATTR(protocol, 0);
162 165
163static struct configfs_attribute *cluster_attrs[] = { 166static struct configfs_attribute *cluster_attrs[] = {
164 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 167 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -170,6 +173,7 @@ static struct configfs_attribute *cluster_attrs[] = {
170 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, 173 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
171 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, 174 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
172 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 175 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
176 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
173 NULL, 177 NULL,
174}; 178};
175 179
@@ -904,6 +908,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
904#define DEFAULT_TOSS_SECS 10 908#define DEFAULT_TOSS_SECS 10
905#define DEFAULT_SCAN_SECS 5 909#define DEFAULT_SCAN_SECS 5
906#define DEFAULT_LOG_DEBUG 0 910#define DEFAULT_LOG_DEBUG 0
911#define DEFAULT_PROTOCOL 0
907 912
908struct dlm_config_info dlm_config = { 913struct dlm_config_info dlm_config = {
909 .ci_tcp_port = DEFAULT_TCP_PORT, 914 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -914,6 +919,7 @@ struct dlm_config_info dlm_config = {
914 .ci_recover_timer = DEFAULT_RECOVER_TIMER, 919 .ci_recover_timer = DEFAULT_RECOVER_TIMER,
915 .ci_toss_secs = DEFAULT_TOSS_SECS, 920 .ci_toss_secs = DEFAULT_TOSS_SECS,
916 .ci_scan_secs = DEFAULT_SCAN_SECS, 921 .ci_scan_secs = DEFAULT_SCAN_SECS,
917 .ci_log_debug = DEFAULT_LOG_DEBUG 922 .ci_log_debug = DEFAULT_LOG_DEBUG,
923 .ci_protocol = DEFAULT_PROTOCOL
918}; 924};
919 925
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 1e978611a96e..967cc3d72e5e 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -26,6 +26,7 @@ struct dlm_config_info {
26 int ci_toss_secs; 26 int ci_toss_secs;
27 int ci_scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol;
29}; 30};
30 31
31extern struct dlm_config_info dlm_config; 32extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 61d93201e1b2..30994d68f6a0 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -210,6 +210,9 @@ struct dlm_args {
210#define DLM_IFL_MSTCPY 0x00010000 210#define DLM_IFL_MSTCPY 0x00010000
211#define DLM_IFL_RESEND 0x00020000 211#define DLM_IFL_RESEND 0x00020000
212#define DLM_IFL_DEAD 0x00040000 212#define DLM_IFL_DEAD 0x00040000
213#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
214#define DLM_IFL_OVERLAP_CANCEL 0x00100000
215#define DLM_IFL_ENDOFLIFE 0x00200000
213#define DLM_IFL_USER 0x00000001 216#define DLM_IFL_USER 0x00000001
214#define DLM_IFL_ORPHAN 0x00000002 217#define DLM_IFL_ORPHAN 0x00000002
215 218
@@ -230,8 +233,8 @@ struct dlm_lkb {
230 int8_t lkb_grmode; /* granted lock mode */ 233 int8_t lkb_grmode; /* granted lock mode */
231 int8_t lkb_bastmode; /* requested mode */ 234 int8_t lkb_bastmode; /* requested mode */
232 int8_t lkb_highbast; /* highest mode bast sent for */ 235 int8_t lkb_highbast; /* highest mode bast sent for */
233
234 int8_t lkb_wait_type; /* type of reply waiting for */ 236 int8_t lkb_wait_type; /* type of reply waiting for */
237 int8_t lkb_wait_count;
235 int8_t lkb_ast_type; /* type of ast queued for */ 238 int8_t lkb_ast_type; /* type of ast queued for */
236 239
237 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 240 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
@@ -339,6 +342,7 @@ struct dlm_header {
339#define DLM_MSG_LOOKUP 11 342#define DLM_MSG_LOOKUP 11
340#define DLM_MSG_REMOVE 12 343#define DLM_MSG_REMOVE 12
341#define DLM_MSG_LOOKUP_REPLY 13 344#define DLM_MSG_LOOKUP_REPLY 13
345#define DLM_MSG_PURGE 14
342 346
343struct dlm_message { 347struct dlm_message {
344 struct dlm_header m_header; 348 struct dlm_header m_header;
@@ -440,6 +444,9 @@ struct dlm_ls {
440 struct mutex ls_waiters_mutex; 444 struct mutex ls_waiters_mutex;
441 struct list_head ls_waiters; /* lkbs needing a reply */ 445 struct list_head ls_waiters; /* lkbs needing a reply */
442 446
447 struct mutex ls_orphans_mutex;
448 struct list_head ls_orphans;
449
443 struct list_head ls_nodes; /* current nodes in ls */ 450 struct list_head ls_nodes; /* current nodes in ls */
444 struct list_head ls_nodes_gone; /* dead node list, recovery */ 451 struct list_head ls_nodes_gone; /* dead node list, recovery */
445 int ls_num_nodes; /* number of nodes in ls */ 452 int ls_num_nodes; /* number of nodes in ls */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index e725005fafd0..d8d6e729f96b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -85,6 +85,7 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, 85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 struct dlm_message *ms); 86 struct dlm_message *ms);
87static int receive_extralen(struct dlm_message *ms); 87static int receive_extralen(struct dlm_message *ms);
88static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
88 89
89/* 90/*
90 * Lock compatibilty matrix - thanks Steve 91 * Lock compatibilty matrix - thanks Steve
@@ -223,6 +224,16 @@ static inline int is_demoted(struct dlm_lkb *lkb)
223 return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); 224 return (lkb->lkb_sbflags & DLM_SBF_DEMOTED);
224} 225}
225 226
227static inline int is_altmode(struct dlm_lkb *lkb)
228{
229 return (lkb->lkb_sbflags & DLM_SBF_ALTMODE);
230}
231
232static inline int is_granted(struct dlm_lkb *lkb)
233{
234 return (lkb->lkb_status == DLM_LKSTS_GRANTED);
235}
236
226static inline int is_remote(struct dlm_rsb *r) 237static inline int is_remote(struct dlm_rsb *r)
227{ 238{
228 DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); 239 DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r););
@@ -254,6 +265,22 @@ static inline int down_conversion(struct dlm_lkb *lkb)
254 return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); 265 return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
255} 266}
256 267
268static inline int is_overlap_unlock(struct dlm_lkb *lkb)
269{
270 return lkb->lkb_flags & DLM_IFL_OVERLAP_UNLOCK;
271}
272
273static inline int is_overlap_cancel(struct dlm_lkb *lkb)
274{
275 return lkb->lkb_flags & DLM_IFL_OVERLAP_CANCEL;
276}
277
278static inline int is_overlap(struct dlm_lkb *lkb)
279{
280 return (lkb->lkb_flags & (DLM_IFL_OVERLAP_UNLOCK |
281 DLM_IFL_OVERLAP_CANCEL));
282}
283
257static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) 284static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
258{ 285{
259 if (is_master_copy(lkb)) 286 if (is_master_copy(lkb))
@@ -267,6 +294,12 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
267 dlm_add_ast(lkb, AST_COMP); 294 dlm_add_ast(lkb, AST_COMP);
268} 295}
269 296
297static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
298{
299 queue_cast(r, lkb,
300 is_overlap_unlock(lkb) ? -DLM_EUNLOCK : -DLM_ECANCEL);
301}
302
270static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) 303static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
271{ 304{
272 if (is_master_copy(lkb)) 305 if (is_master_copy(lkb))
@@ -547,6 +580,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
547 lkb->lkb_grmode = DLM_LOCK_IV; 580 lkb->lkb_grmode = DLM_LOCK_IV;
548 kref_init(&lkb->lkb_ref); 581 kref_init(&lkb->lkb_ref);
549 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 582 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
583 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
550 584
551 get_random_bytes(&bucket, sizeof(bucket)); 585 get_random_bytes(&bucket, sizeof(bucket));
552 bucket &= (ls->ls_lkbtbl_size - 1); 586 bucket &= (ls->ls_lkbtbl_size - 1);
@@ -556,7 +590,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
556 /* counter can roll over so we must verify lkid is not in use */ 590 /* counter can roll over so we must verify lkid is not in use */
557 591
558 while (lkid == 0) { 592 while (lkid == 0) {
559 lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); 593 lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++;
560 594
561 list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, 595 list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
562 lkb_idtbl_list) { 596 lkb_idtbl_list) {
@@ -577,8 +611,8 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
577 611
578static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) 612static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
579{ 613{
580 uint16_t bucket = lkid & 0xFFFF;
581 struct dlm_lkb *lkb; 614 struct dlm_lkb *lkb;
615 uint16_t bucket = (lkid >> 16);
582 616
583 list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { 617 list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
584 if (lkb->lkb_id == lkid) 618 if (lkb->lkb_id == lkid)
@@ -590,7 +624,7 @@ static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
590static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) 624static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
591{ 625{
592 struct dlm_lkb *lkb; 626 struct dlm_lkb *lkb;
593 uint16_t bucket = lkid & 0xFFFF; 627 uint16_t bucket = (lkid >> 16);
594 628
595 if (bucket >= ls->ls_lkbtbl_size) 629 if (bucket >= ls->ls_lkbtbl_size)
596 return -EBADSLT; 630 return -EBADSLT;
@@ -620,7 +654,7 @@ static void kill_lkb(struct kref *kref)
620 654
621static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) 655static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
622{ 656{
623 uint16_t bucket = lkb->lkb_id & 0xFFFF; 657 uint16_t bucket = (lkb->lkb_id >> 16);
624 658
625 write_lock(&ls->ls_lkbtbl[bucket].lock); 659 write_lock(&ls->ls_lkbtbl[bucket].lock);
626 if (kref_put(&lkb->lkb_ref, kill_lkb)) { 660 if (kref_put(&lkb->lkb_ref, kill_lkb)) {
@@ -735,23 +769,75 @@ static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
735 unhold_lkb(lkb); 769 unhold_lkb(lkb);
736} 770}
737 771
772static int msg_reply_type(int mstype)
773{
774 switch (mstype) {
775 case DLM_MSG_REQUEST:
776 return DLM_MSG_REQUEST_REPLY;
777 case DLM_MSG_CONVERT:
778 return DLM_MSG_CONVERT_REPLY;
779 case DLM_MSG_UNLOCK:
780 return DLM_MSG_UNLOCK_REPLY;
781 case DLM_MSG_CANCEL:
782 return DLM_MSG_CANCEL_REPLY;
783 case DLM_MSG_LOOKUP:
784 return DLM_MSG_LOOKUP_REPLY;
785 }
786 return -1;
787}
788
738/* add/remove lkb from global waiters list of lkb's waiting for 789/* add/remove lkb from global waiters list of lkb's waiting for
739 a reply from a remote node */ 790 a reply from a remote node */
740 791
741static void add_to_waiters(struct dlm_lkb *lkb, int mstype) 792static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
742{ 793{
743 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 794 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
795 int error = 0;
744 796
745 mutex_lock(&ls->ls_waiters_mutex); 797 mutex_lock(&ls->ls_waiters_mutex);
746 if (lkb->lkb_wait_type) { 798
747 log_print("add_to_waiters error %d", lkb->lkb_wait_type); 799 if (is_overlap_unlock(lkb) ||
800 (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL))) {
801 error = -EINVAL;
802 goto out;
803 }
804
805 if (lkb->lkb_wait_type || is_overlap_cancel(lkb)) {
806 switch (mstype) {
807 case DLM_MSG_UNLOCK:
808 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
809 break;
810 case DLM_MSG_CANCEL:
811 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
812 break;
813 default:
814 error = -EBUSY;
815 goto out;
816 }
817 lkb->lkb_wait_count++;
818 hold_lkb(lkb);
819
820 log_debug(ls, "add overlap %x cur %d new %d count %d flags %x",
821 lkb->lkb_id, lkb->lkb_wait_type, mstype,
822 lkb->lkb_wait_count, lkb->lkb_flags);
748 goto out; 823 goto out;
749 } 824 }
825
826 DLM_ASSERT(!lkb->lkb_wait_count,
827 dlm_print_lkb(lkb);
828 printk("wait_count %d\n", lkb->lkb_wait_count););
829
830 lkb->lkb_wait_count++;
750 lkb->lkb_wait_type = mstype; 831 lkb->lkb_wait_type = mstype;
751 kref_get(&lkb->lkb_ref); 832 hold_lkb(lkb);
752 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 833 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
753 out: 834 out:
835 if (error)
836 log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s",
837 lkb->lkb_id, error, lkb->lkb_flags, mstype,
838 lkb->lkb_wait_type, lkb->lkb_resource->res_name);
754 mutex_unlock(&ls->ls_waiters_mutex); 839 mutex_unlock(&ls->ls_waiters_mutex);
840 return error;
755} 841}
756 842
757/* We clear the RESEND flag because we might be taking an lkb off the waiters 843/* We clear the RESEND flag because we might be taking an lkb off the waiters
@@ -759,34 +845,85 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
759 request reply on the requestqueue) between dlm_recover_waiters_pre() which 845 request reply on the requestqueue) between dlm_recover_waiters_pre() which
760 set RESEND and dlm_recover_waiters_post() */ 846 set RESEND and dlm_recover_waiters_post() */
761 847
762static int _remove_from_waiters(struct dlm_lkb *lkb) 848static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype)
763{ 849{
764 int error = 0; 850 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
851 int overlap_done = 0;
765 852
766 if (!lkb->lkb_wait_type) { 853 if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) {
767 log_print("remove_from_waiters error"); 854 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
768 error = -EINVAL; 855 overlap_done = 1;
769 goto out; 856 goto out_del;
857 }
858
859 if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) {
860 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
861 overlap_done = 1;
862 goto out_del;
863 }
864
865 /* N.B. type of reply may not always correspond to type of original
866 msg due to lookup->request optimization, verify others? */
867
868 if (lkb->lkb_wait_type) {
869 lkb->lkb_wait_type = 0;
870 goto out_del;
871 }
872
873 log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d",
874 lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type);
875 return -1;
876
877 out_del:
878 /* the force-unlock/cancel has completed and we haven't recvd a reply
879 to the op that was in progress prior to the unlock/cancel; we
880 give up on any reply to the earlier op. FIXME: not sure when/how
881 this would happen */
882
883 if (overlap_done && lkb->lkb_wait_type) {
884 log_error(ls, "remove_from_waiters %x reply %d give up on %d",
885 lkb->lkb_id, mstype, lkb->lkb_wait_type);
886 lkb->lkb_wait_count--;
887 lkb->lkb_wait_type = 0;
770 } 888 }
771 lkb->lkb_wait_type = 0; 889
890 DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
891
772 lkb->lkb_flags &= ~DLM_IFL_RESEND; 892 lkb->lkb_flags &= ~DLM_IFL_RESEND;
773 list_del(&lkb->lkb_wait_reply); 893 lkb->lkb_wait_count--;
894 if (!lkb->lkb_wait_count)
895 list_del_init(&lkb->lkb_wait_reply);
774 unhold_lkb(lkb); 896 unhold_lkb(lkb);
775 out: 897 return 0;
776 return error;
777} 898}
778 899
779static int remove_from_waiters(struct dlm_lkb *lkb) 900static int remove_from_waiters(struct dlm_lkb *lkb, int mstype)
780{ 901{
781 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 902 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
782 int error; 903 int error;
783 904
784 mutex_lock(&ls->ls_waiters_mutex); 905 mutex_lock(&ls->ls_waiters_mutex);
785 error = _remove_from_waiters(lkb); 906 error = _remove_from_waiters(lkb, mstype);
786 mutex_unlock(&ls->ls_waiters_mutex); 907 mutex_unlock(&ls->ls_waiters_mutex);
787 return error; 908 return error;
788} 909}
789 910
911/* Handles situations where we might be processing a "fake" or "stub" reply in
912 which we can't try to take waiters_mutex again. */
913
914static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
915{
916 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
917 int error;
918
919 if (ms != &ls->ls_stub_ms)
920 mutex_lock(&ls->ls_waiters_mutex);
921 error = _remove_from_waiters(lkb, ms->m_type);
922 if (ms != &ls->ls_stub_ms)
923 mutex_unlock(&ls->ls_waiters_mutex);
924 return error;
925}
926
790static void dir_remove(struct dlm_rsb *r) 927static void dir_remove(struct dlm_rsb *r)
791{ 928{
792 int to_nodeid; 929 int to_nodeid;
@@ -988,8 +1125,14 @@ static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
988 _remove_lock(r, lkb); 1125 _remove_lock(r, lkb);
989} 1126}
990 1127
991static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) 1128/* returns: 0 did nothing
1129 1 moved lock to granted
1130 -1 removed lock */
1131
1132static int revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
992{ 1133{
1134 int rv = 0;
1135
993 lkb->lkb_rqmode = DLM_LOCK_IV; 1136 lkb->lkb_rqmode = DLM_LOCK_IV;
994 1137
995 switch (lkb->lkb_status) { 1138 switch (lkb->lkb_status) {
@@ -997,6 +1140,7 @@ static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
997 break; 1140 break;
998 case DLM_LKSTS_CONVERT: 1141 case DLM_LKSTS_CONVERT:
999 move_lkb(r, lkb, DLM_LKSTS_GRANTED); 1142 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
1143 rv = 1;
1000 break; 1144 break;
1001 case DLM_LKSTS_WAITING: 1145 case DLM_LKSTS_WAITING:
1002 del_lkb(r, lkb); 1146 del_lkb(r, lkb);
@@ -1004,15 +1148,17 @@ static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1004 /* this unhold undoes the original ref from create_lkb() 1148 /* this unhold undoes the original ref from create_lkb()
1005 so this leads to the lkb being freed */ 1149 so this leads to the lkb being freed */
1006 unhold_lkb(lkb); 1150 unhold_lkb(lkb);
1151 rv = -1;
1007 break; 1152 break;
1008 default: 1153 default:
1009 log_print("invalid status for revert %d", lkb->lkb_status); 1154 log_print("invalid status for revert %d", lkb->lkb_status);
1010 } 1155 }
1156 return rv;
1011} 1157}
1012 1158
1013static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) 1159static int revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
1014{ 1160{
1015 revert_lock(r, lkb); 1161 return revert_lock(r, lkb);
1016} 1162}
1017 1163
1018static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) 1164static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1055,6 +1201,50 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1055 queue_cast(r, lkb, 0); 1201 queue_cast(r, lkb, 0);
1056} 1202}
1057 1203
1204/* The special CONVDEADLK, ALTPR and ALTCW flags allow the master to
1205 change the granted/requested modes. We're munging things accordingly in
1206 the process copy.
1207 CONVDEADLK: our grmode may have been forced down to NL to resolve a
1208 conversion deadlock
1209 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
1210 compatible with other granted locks */
1211
1212static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
1213{
1214 if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
1215 log_print("munge_demoted %x invalid reply type %d",
1216 lkb->lkb_id, ms->m_type);
1217 return;
1218 }
1219
1220 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
1221 log_print("munge_demoted %x invalid modes gr %d rq %d",
1222 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
1223 return;
1224 }
1225
1226 lkb->lkb_grmode = DLM_LOCK_NL;
1227}
1228
1229static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms)
1230{
1231 if (ms->m_type != DLM_MSG_REQUEST_REPLY &&
1232 ms->m_type != DLM_MSG_GRANT) {
1233 log_print("munge_altmode %x invalid reply type %d",
1234 lkb->lkb_id, ms->m_type);
1235 return;
1236 }
1237
1238 if (lkb->lkb_exflags & DLM_LKF_ALTPR)
1239 lkb->lkb_rqmode = DLM_LOCK_PR;
1240 else if (lkb->lkb_exflags & DLM_LKF_ALTCW)
1241 lkb->lkb_rqmode = DLM_LOCK_CW;
1242 else {
1243 log_print("munge_altmode invalid exflags %x", lkb->lkb_exflags);
1244 dlm_print_lkb(lkb);
1245 }
1246}
1247
1058static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) 1248static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
1059{ 1249{
1060 struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, 1250 struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb,
@@ -1499,7 +1689,7 @@ static void process_lookup_list(struct dlm_rsb *r)
1499 struct dlm_lkb *lkb, *safe; 1689 struct dlm_lkb *lkb, *safe;
1500 1690
1501 list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { 1691 list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) {
1502 list_del(&lkb->lkb_rsb_lookup); 1692 list_del_init(&lkb->lkb_rsb_lookup);
1503 _request_lock(r, lkb); 1693 _request_lock(r, lkb);
1504 schedule(); 1694 schedule();
1505 } 1695 }
@@ -1530,7 +1720,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
1530 if (!list_empty(&r->res_lookup)) { 1720 if (!list_empty(&r->res_lookup)) {
1531 lkb = list_entry(r->res_lookup.next, struct dlm_lkb, 1721 lkb = list_entry(r->res_lookup.next, struct dlm_lkb,
1532 lkb_rsb_lookup); 1722 lkb_rsb_lookup);
1533 list_del(&lkb->lkb_rsb_lookup); 1723 list_del_init(&lkb->lkb_rsb_lookup);
1534 r->res_first_lkid = lkb->lkb_id; 1724 r->res_first_lkid = lkb->lkb_id;
1535 _request_lock(r, lkb); 1725 _request_lock(r, lkb);
1536 } else 1726 } else
@@ -1614,6 +1804,9 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
1614 DLM_LKF_FORCEUNLOCK)) 1804 DLM_LKF_FORCEUNLOCK))
1615 return -EINVAL; 1805 return -EINVAL;
1616 1806
1807 if (flags & DLM_LKF_CANCEL && flags & DLM_LKF_FORCEUNLOCK)
1808 return -EINVAL;
1809
1617 args->flags = flags; 1810 args->flags = flags;
1618 args->astparam = (long) astarg; 1811 args->astparam = (long) astarg;
1619 return 0; 1812 return 0;
@@ -1638,6 +1831,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1638 1831
1639 if (lkb->lkb_wait_type) 1832 if (lkb->lkb_wait_type)
1640 goto out; 1833 goto out;
1834
1835 if (is_overlap(lkb))
1836 goto out;
1641 } 1837 }
1642 1838
1643 lkb->lkb_exflags = args->flags; 1839 lkb->lkb_exflags = args->flags;
@@ -1654,35 +1850,126 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1654 return rv; 1850 return rv;
1655} 1851}
1656 1852
1853/* when dlm_unlock() sees -EBUSY with CANCEL/FORCEUNLOCK it returns 0
1854 for success */
1855
1856/* note: it's valid for lkb_nodeid/res_nodeid to be -1 when we get here
1857 because there may be a lookup in progress and it's valid to do
1858 cancel/unlockf on it */
1859
1657static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) 1860static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1658{ 1861{
1862 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1659 int rv = -EINVAL; 1863 int rv = -EINVAL;
1660 1864
1661 if (lkb->lkb_flags & DLM_IFL_MSTCPY) 1865 if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
1866 log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
1867 dlm_print_lkb(lkb);
1662 goto out; 1868 goto out;
1869 }
1663 1870
1664 if (args->flags & DLM_LKF_FORCEUNLOCK) 1871 /* an lkb may still exist even though the lock is EOL'ed due to a
1665 goto out_ok; 1872 cancel, unlock or failed noqueue request; an app can't use these
1873 locks; return same error as if the lkid had not been found at all */
1666 1874
1667 if (args->flags & DLM_LKF_CANCEL && 1875 if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
1668 lkb->lkb_status == DLM_LKSTS_GRANTED) 1876 log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
1877 rv = -ENOENT;
1669 goto out; 1878 goto out;
1879 }
1670 1880
1671 if (!(args->flags & DLM_LKF_CANCEL) && 1881 /* an lkb may be waiting for an rsb lookup to complete where the
1672 lkb->lkb_status != DLM_LKSTS_GRANTED) 1882 lookup was initiated by another lock */
1673 goto out; 1883
1884 if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
1885 if (!list_empty(&lkb->lkb_rsb_lookup)) {
1886 log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
1887 list_del_init(&lkb->lkb_rsb_lookup);
1888 queue_cast(lkb->lkb_resource, lkb,
1889 args->flags & DLM_LKF_CANCEL ?
1890 -DLM_ECANCEL : -DLM_EUNLOCK);
1891 unhold_lkb(lkb); /* undoes create_lkb() */
1892 rv = -EBUSY;
1893 goto out;
1894 }
1895 }
1896
1897 /* cancel not allowed with another cancel/unlock in progress */
1898
1899 if (args->flags & DLM_LKF_CANCEL) {
1900 if (lkb->lkb_exflags & DLM_LKF_CANCEL)
1901 goto out;
1902
1903 if (is_overlap(lkb))
1904 goto out;
1905
1906 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1907 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1908 rv = -EBUSY;
1909 goto out;
1910 }
1911
1912 switch (lkb->lkb_wait_type) {
1913 case DLM_MSG_LOOKUP:
1914 case DLM_MSG_REQUEST:
1915 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1916 rv = -EBUSY;
1917 goto out;
1918 case DLM_MSG_UNLOCK:
1919 case DLM_MSG_CANCEL:
1920 goto out;
1921 }
1922 /* add_to_waiters() will set OVERLAP_CANCEL */
1923 goto out_ok;
1924 }
1925
1926 /* do we need to allow a force-unlock if there's a normal unlock
1927 already in progress? in what conditions could the normal unlock
1928 fail such that we'd want to send a force-unlock to be sure? */
1929
1930 if (args->flags & DLM_LKF_FORCEUNLOCK) {
1931 if (lkb->lkb_exflags & DLM_LKF_FORCEUNLOCK)
1932 goto out;
1933
1934 if (is_overlap_unlock(lkb))
1935 goto out;
1674 1936
1937 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1938 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1939 rv = -EBUSY;
1940 goto out;
1941 }
1942
1943 switch (lkb->lkb_wait_type) {
1944 case DLM_MSG_LOOKUP:
1945 case DLM_MSG_REQUEST:
1946 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1947 rv = -EBUSY;
1948 goto out;
1949 case DLM_MSG_UNLOCK:
1950 goto out;
1951 }
1952 /* add_to_waiters() will set OVERLAP_UNLOCK */
1953 goto out_ok;
1954 }
1955
1956 /* normal unlock not allowed if there's any op in progress */
1675 rv = -EBUSY; 1957 rv = -EBUSY;
1676 if (lkb->lkb_wait_type) 1958 if (lkb->lkb_wait_type || lkb->lkb_wait_count)
1677 goto out; 1959 goto out;
1678 1960
1679 out_ok: 1961 out_ok:
1680 lkb->lkb_exflags = args->flags; 1962 /* an overlapping op shouldn't blow away exflags from other op */
1963 lkb->lkb_exflags |= args->flags;
1681 lkb->lkb_sbflags = 0; 1964 lkb->lkb_sbflags = 0;
1682 lkb->lkb_astparam = args->astparam; 1965 lkb->lkb_astparam = args->astparam;
1683
1684 rv = 0; 1966 rv = 0;
1685 out: 1967 out:
1968 if (rv)
1969 log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv,
1970 lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
1971 args->flags, lkb->lkb_wait_type,
1972 lkb->lkb_resource->res_name);
1686 return rv; 1973 return rv;
1687} 1974}
1688 1975
@@ -1732,9 +2019,24 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
1732 goto out; 2019 goto out;
1733 } 2020 }
1734 2021
1735 if (can_be_queued(lkb)) { 2022 /* is_demoted() means the can_be_granted() above set the grmode
1736 if (is_demoted(lkb)) 2023 to NL, and left us on the granted queue. This auto-demotion
2024 (due to CONVDEADLK) might mean other locks, and/or this lock, are
2025 now grantable. We have to try to grant other converting locks
2026 before we try again to grant this one. */
2027
2028 if (is_demoted(lkb)) {
2029 grant_pending_convert(r, DLM_LOCK_IV);
2030 if (_can_be_granted(r, lkb, 1)) {
2031 grant_lock(r, lkb);
2032 queue_cast(r, lkb, 0);
1737 grant_pending_locks(r); 2033 grant_pending_locks(r);
2034 goto out;
2035 }
2036 /* else fall through and move to convert queue */
2037 }
2038
2039 if (can_be_queued(lkb)) {
1738 error = -EINPROGRESS; 2040 error = -EINPROGRESS;
1739 del_lkb(r, lkb); 2041 del_lkb(r, lkb);
1740 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 2042 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
@@ -1759,17 +2061,19 @@ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1759 return -DLM_EUNLOCK; 2061 return -DLM_EUNLOCK;
1760} 2062}
1761 2063
1762/* FIXME: if revert_lock() finds that the lkb is granted, we should 2064/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
1763 skip the queue_cast(ECANCEL). It indicates that the request/convert
1764 completed (and queued a normal ast) just before the cancel; we don't
1765 want to clobber the sb_result for the normal ast with ECANCEL. */
1766 2065
1767static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) 2066static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
1768{ 2067{
1769 revert_lock(r, lkb); 2068 int error;
1770 queue_cast(r, lkb, -DLM_ECANCEL); 2069
1771 grant_pending_locks(r); 2070 error = revert_lock(r, lkb);
1772 return -DLM_ECANCEL; 2071 if (error) {
2072 queue_cast(r, lkb, -DLM_ECANCEL);
2073 grant_pending_locks(r);
2074 return -DLM_ECANCEL;
2075 }
2076 return 0;
1773} 2077}
1774 2078
1775/* 2079/*
@@ -2035,6 +2339,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2035 2339
2036 if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) 2340 if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL)
2037 error = 0; 2341 error = 0;
2342 if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)))
2343 error = 0;
2038 out_put: 2344 out_put:
2039 dlm_put_lkb(lkb); 2345 dlm_put_lkb(lkb);
2040 out: 2346 out:
@@ -2065,31 +2371,14 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2065 * receive_lookup_reply send_lookup_reply 2371 * receive_lookup_reply send_lookup_reply
2066 */ 2372 */
2067 2373
2068static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, 2374static int _create_message(struct dlm_ls *ls, int mb_len,
2069 int to_nodeid, int mstype, 2375 int to_nodeid, int mstype,
2070 struct dlm_message **ms_ret, 2376 struct dlm_message **ms_ret,
2071 struct dlm_mhandle **mh_ret) 2377 struct dlm_mhandle **mh_ret)
2072{ 2378{
2073 struct dlm_message *ms; 2379 struct dlm_message *ms;
2074 struct dlm_mhandle *mh; 2380 struct dlm_mhandle *mh;
2075 char *mb; 2381 char *mb;
2076 int mb_len = sizeof(struct dlm_message);
2077
2078 switch (mstype) {
2079 case DLM_MSG_REQUEST:
2080 case DLM_MSG_LOOKUP:
2081 case DLM_MSG_REMOVE:
2082 mb_len += r->res_length;
2083 break;
2084 case DLM_MSG_CONVERT:
2085 case DLM_MSG_UNLOCK:
2086 case DLM_MSG_REQUEST_REPLY:
2087 case DLM_MSG_CONVERT_REPLY:
2088 case DLM_MSG_GRANT:
2089 if (lkb && lkb->lkb_lvbptr)
2090 mb_len += r->res_ls->ls_lvblen;
2091 break;
2092 }
2093 2382
2094 /* get_buffer gives us a message handle (mh) that we need to 2383 /* get_buffer gives us a message handle (mh) that we need to
2095 pass into lowcomms_commit and a message buffer (mb) that we 2384 pass into lowcomms_commit and a message buffer (mb) that we
@@ -2104,7 +2393,7 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
2104 ms = (struct dlm_message *) mb; 2393 ms = (struct dlm_message *) mb;
2105 2394
2106 ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); 2395 ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
2107 ms->m_header.h_lockspace = r->res_ls->ls_global_id; 2396 ms->m_header.h_lockspace = ls->ls_global_id;
2108 ms->m_header.h_nodeid = dlm_our_nodeid(); 2397 ms->m_header.h_nodeid = dlm_our_nodeid();
2109 ms->m_header.h_length = mb_len; 2398 ms->m_header.h_length = mb_len;
2110 ms->m_header.h_cmd = DLM_MSG; 2399 ms->m_header.h_cmd = DLM_MSG;
@@ -2116,6 +2405,33 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
2116 return 0; 2405 return 0;
2117} 2406}
2118 2407
2408static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
2409 int to_nodeid, int mstype,
2410 struct dlm_message **ms_ret,
2411 struct dlm_mhandle **mh_ret)
2412{
2413 int mb_len = sizeof(struct dlm_message);
2414
2415 switch (mstype) {
2416 case DLM_MSG_REQUEST:
2417 case DLM_MSG_LOOKUP:
2418 case DLM_MSG_REMOVE:
2419 mb_len += r->res_length;
2420 break;
2421 case DLM_MSG_CONVERT:
2422 case DLM_MSG_UNLOCK:
2423 case DLM_MSG_REQUEST_REPLY:
2424 case DLM_MSG_CONVERT_REPLY:
2425 case DLM_MSG_GRANT:
2426 if (lkb && lkb->lkb_lvbptr)
2427 mb_len += r->res_ls->ls_lvblen;
2428 break;
2429 }
2430
2431 return _create_message(r->res_ls, mb_len, to_nodeid, mstype,
2432 ms_ret, mh_ret);
2433}
2434
2119/* further lowcomms enhancements or alternate implementations may make 2435/* further lowcomms enhancements or alternate implementations may make
2120 the return value from this function useful at some point */ 2436 the return value from this function useful at some point */
2121 2437
@@ -2176,7 +2492,9 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2176 struct dlm_mhandle *mh; 2492 struct dlm_mhandle *mh;
2177 int to_nodeid, error; 2493 int to_nodeid, error;
2178 2494
2179 add_to_waiters(lkb, mstype); 2495 error = add_to_waiters(lkb, mstype);
2496 if (error)
2497 return error;
2180 2498
2181 to_nodeid = r->res_nodeid; 2499 to_nodeid = r->res_nodeid;
2182 2500
@@ -2192,7 +2510,7 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2192 return 0; 2510 return 0;
2193 2511
2194 fail: 2512 fail:
2195 remove_from_waiters(lkb); 2513 remove_from_waiters(lkb, msg_reply_type(mstype));
2196 return error; 2514 return error;
2197} 2515}
2198 2516
@@ -2209,7 +2527,8 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2209 2527
2210 /* down conversions go without a reply from the master */ 2528 /* down conversions go without a reply from the master */
2211 if (!error && down_conversion(lkb)) { 2529 if (!error && down_conversion(lkb)) {
2212 remove_from_waiters(lkb); 2530 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
2531 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
2213 r->res_ls->ls_stub_ms.m_result = 0; 2532 r->res_ls->ls_stub_ms.m_result = 0;
2214 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; 2533 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
2215 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); 2534 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
@@ -2280,7 +2599,9 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2280 struct dlm_mhandle *mh; 2599 struct dlm_mhandle *mh;
2281 int to_nodeid, error; 2600 int to_nodeid, error;
2282 2601
2283 add_to_waiters(lkb, DLM_MSG_LOOKUP); 2602 error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
2603 if (error)
2604 return error;
2284 2605
2285 to_nodeid = dlm_dir_nodeid(r); 2606 to_nodeid = dlm_dir_nodeid(r);
2286 2607
@@ -2296,7 +2617,7 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2296 return 0; 2617 return 0;
2297 2618
2298 fail: 2619 fail:
2299 remove_from_waiters(lkb); 2620 remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
2300 return error; 2621 return error;
2301} 2622}
2302 2623
@@ -2656,6 +2977,8 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
2656 lock_rsb(r); 2977 lock_rsb(r);
2657 2978
2658 receive_flags_reply(lkb, ms); 2979 receive_flags_reply(lkb, ms);
2980 if (is_altmode(lkb))
2981 munge_altmode(lkb, ms);
2659 grant_lock_pc(r, lkb, ms); 2982 grant_lock_pc(r, lkb, ms);
2660 queue_cast(r, lkb, 0); 2983 queue_cast(r, lkb, 0);
2661 2984
@@ -2736,11 +3059,16 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
2736 dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); 3059 dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len);
2737} 3060}
2738 3061
3062static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms)
3063{
3064 do_purge(ls, ms->m_nodeid, ms->m_pid);
3065}
3066
2739static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) 3067static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2740{ 3068{
2741 struct dlm_lkb *lkb; 3069 struct dlm_lkb *lkb;
2742 struct dlm_rsb *r; 3070 struct dlm_rsb *r;
2743 int error, mstype; 3071 int error, mstype, result;
2744 3072
2745 error = find_lkb(ls, ms->m_remid, &lkb); 3073 error = find_lkb(ls, ms->m_remid, &lkb);
2746 if (error) { 3074 if (error) {
@@ -2749,20 +3077,15 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2749 } 3077 }
2750 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); 3078 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2751 3079
2752 mstype = lkb->lkb_wait_type;
2753 error = remove_from_waiters(lkb);
2754 if (error) {
2755 log_error(ls, "receive_request_reply not on waiters");
2756 goto out;
2757 }
2758
2759 /* this is the value returned from do_request() on the master */
2760 error = ms->m_result;
2761
2762 r = lkb->lkb_resource; 3080 r = lkb->lkb_resource;
2763 hold_rsb(r); 3081 hold_rsb(r);
2764 lock_rsb(r); 3082 lock_rsb(r);
2765 3083
3084 mstype = lkb->lkb_wait_type;
3085 error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
3086 if (error)
3087 goto out;
3088
2766 /* Optimization: the dir node was also the master, so it took our 3089 /* Optimization: the dir node was also the master, so it took our
2767 lookup as a request and sent request reply instead of lookup reply */ 3090 lookup as a request and sent request reply instead of lookup reply */
2768 if (mstype == DLM_MSG_LOOKUP) { 3091 if (mstype == DLM_MSG_LOOKUP) {
@@ -2770,14 +3093,15 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2770 lkb->lkb_nodeid = r->res_nodeid; 3093 lkb->lkb_nodeid = r->res_nodeid;
2771 } 3094 }
2772 3095
2773 switch (error) { 3096 /* this is the value returned from do_request() on the master */
3097 result = ms->m_result;
3098
3099 switch (result) {
2774 case -EAGAIN: 3100 case -EAGAIN:
2775 /* request would block (be queued) on remote master; 3101 /* request would block (be queued) on remote master */
2776 the unhold undoes the original ref from create_lkb()
2777 so it leads to the lkb being freed */
2778 queue_cast(r, lkb, -EAGAIN); 3102 queue_cast(r, lkb, -EAGAIN);
2779 confirm_master(r, -EAGAIN); 3103 confirm_master(r, -EAGAIN);
2780 unhold_lkb(lkb); 3104 unhold_lkb(lkb); /* undoes create_lkb() */
2781 break; 3105 break;
2782 3106
2783 case -EINPROGRESS: 3107 case -EINPROGRESS:
@@ -2785,41 +3109,64 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2785 /* request was queued or granted on remote master */ 3109 /* request was queued or granted on remote master */
2786 receive_flags_reply(lkb, ms); 3110 receive_flags_reply(lkb, ms);
2787 lkb->lkb_remid = ms->m_lkid; 3111 lkb->lkb_remid = ms->m_lkid;
2788 if (error) 3112 if (is_altmode(lkb))
3113 munge_altmode(lkb, ms);
3114 if (result)
2789 add_lkb(r, lkb, DLM_LKSTS_WAITING); 3115 add_lkb(r, lkb, DLM_LKSTS_WAITING);
2790 else { 3116 else {
2791 grant_lock_pc(r, lkb, ms); 3117 grant_lock_pc(r, lkb, ms);
2792 queue_cast(r, lkb, 0); 3118 queue_cast(r, lkb, 0);
2793 } 3119 }
2794 confirm_master(r, error); 3120 confirm_master(r, result);
2795 break; 3121 break;
2796 3122
2797 case -EBADR: 3123 case -EBADR:
2798 case -ENOTBLK: 3124 case -ENOTBLK:
2799 /* find_rsb failed to find rsb or rsb wasn't master */ 3125 /* find_rsb failed to find rsb or rsb wasn't master */
3126 log_debug(ls, "receive_request_reply %x %x master diff %d %d",
3127 lkb->lkb_id, lkb->lkb_flags, r->res_nodeid, result);
2800 r->res_nodeid = -1; 3128 r->res_nodeid = -1;
2801 lkb->lkb_nodeid = -1; 3129 lkb->lkb_nodeid = -1;
2802 _request_lock(r, lkb); 3130
3131 if (is_overlap(lkb)) {
3132 /* we'll ignore error in cancel/unlock reply */
3133 queue_cast_overlap(r, lkb);
3134 unhold_lkb(lkb); /* undoes create_lkb() */
3135 } else
3136 _request_lock(r, lkb);
2803 break; 3137 break;
2804 3138
2805 default: 3139 default:
2806 log_error(ls, "receive_request_reply error %d", error); 3140 log_error(ls, "receive_request_reply %x error %d",
3141 lkb->lkb_id, result);
2807 } 3142 }
2808 3143
3144 if (is_overlap_unlock(lkb) && (result == 0 || result == -EINPROGRESS)) {
3145 log_debug(ls, "receive_request_reply %x result %d unlock",
3146 lkb->lkb_id, result);
3147 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
3148 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
3149 send_unlock(r, lkb);
3150 } else if (is_overlap_cancel(lkb) && (result == -EINPROGRESS)) {
3151 log_debug(ls, "receive_request_reply %x cancel", lkb->lkb_id);
3152 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
3153 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
3154 send_cancel(r, lkb);
3155 } else {
3156 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
3157 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
3158 }
3159 out:
2809 unlock_rsb(r); 3160 unlock_rsb(r);
2810 put_rsb(r); 3161 put_rsb(r);
2811 out:
2812 dlm_put_lkb(lkb); 3162 dlm_put_lkb(lkb);
2813} 3163}
2814 3164
2815static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, 3165static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2816 struct dlm_message *ms) 3166 struct dlm_message *ms)
2817{ 3167{
2818 int error = ms->m_result;
2819
2820 /* this is the value returned from do_convert() on the master */ 3168 /* this is the value returned from do_convert() on the master */
2821 3169 switch (ms->m_result) {
2822 switch (error) {
2823 case -EAGAIN: 3170 case -EAGAIN:
2824 /* convert would block (be queued) on remote master */ 3171 /* convert would block (be queued) on remote master */
2825 queue_cast(r, lkb, -EAGAIN); 3172 queue_cast(r, lkb, -EAGAIN);
@@ -2827,6 +3174,9 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2827 3174
2828 case -EINPROGRESS: 3175 case -EINPROGRESS:
2829 /* convert was queued on remote master */ 3176 /* convert was queued on remote master */
3177 receive_flags_reply(lkb, ms);
3178 if (is_demoted(lkb))
3179 munge_demoted(lkb, ms);
2830 del_lkb(r, lkb); 3180 del_lkb(r, lkb);
2831 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3181 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2832 break; 3182 break;
@@ -2834,24 +3184,33 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2834 case 0: 3184 case 0:
2835 /* convert was granted on remote master */ 3185 /* convert was granted on remote master */
2836 receive_flags_reply(lkb, ms); 3186 receive_flags_reply(lkb, ms);
3187 if (is_demoted(lkb))
3188 munge_demoted(lkb, ms);
2837 grant_lock_pc(r, lkb, ms); 3189 grant_lock_pc(r, lkb, ms);
2838 queue_cast(r, lkb, 0); 3190 queue_cast(r, lkb, 0);
2839 break; 3191 break;
2840 3192
2841 default: 3193 default:
2842 log_error(r->res_ls, "receive_convert_reply error %d", error); 3194 log_error(r->res_ls, "receive_convert_reply %x error %d",
3195 lkb->lkb_id, ms->m_result);
2843 } 3196 }
2844} 3197}
2845 3198
2846static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3199static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2847{ 3200{
2848 struct dlm_rsb *r = lkb->lkb_resource; 3201 struct dlm_rsb *r = lkb->lkb_resource;
3202 int error;
2849 3203
2850 hold_rsb(r); 3204 hold_rsb(r);
2851 lock_rsb(r); 3205 lock_rsb(r);
2852 3206
2853 __receive_convert_reply(r, lkb, ms); 3207 /* stub reply can happen with waiters_mutex held */
3208 error = remove_from_waiters_ms(lkb, ms);
3209 if (error)
3210 goto out;
2854 3211
3212 __receive_convert_reply(r, lkb, ms);
3213 out:
2855 unlock_rsb(r); 3214 unlock_rsb(r);
2856 put_rsb(r); 3215 put_rsb(r);
2857} 3216}
@@ -2868,37 +3227,38 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
2868 } 3227 }
2869 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); 3228 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2870 3229
2871 error = remove_from_waiters(lkb);
2872 if (error) {
2873 log_error(ls, "receive_convert_reply not on waiters");
2874 goto out;
2875 }
2876
2877 _receive_convert_reply(lkb, ms); 3230 _receive_convert_reply(lkb, ms);
2878 out:
2879 dlm_put_lkb(lkb); 3231 dlm_put_lkb(lkb);
2880} 3232}
2881 3233
2882static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3234static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2883{ 3235{
2884 struct dlm_rsb *r = lkb->lkb_resource; 3236 struct dlm_rsb *r = lkb->lkb_resource;
2885 int error = ms->m_result; 3237 int error;
2886 3238
2887 hold_rsb(r); 3239 hold_rsb(r);
2888 lock_rsb(r); 3240 lock_rsb(r);
2889 3241
3242 /* stub reply can happen with waiters_mutex held */
3243 error = remove_from_waiters_ms(lkb, ms);
3244 if (error)
3245 goto out;
3246
2890 /* this is the value returned from do_unlock() on the master */ 3247 /* this is the value returned from do_unlock() on the master */
2891 3248
2892 switch (error) { 3249 switch (ms->m_result) {
2893 case -DLM_EUNLOCK: 3250 case -DLM_EUNLOCK:
2894 receive_flags_reply(lkb, ms); 3251 receive_flags_reply(lkb, ms);
2895 remove_lock_pc(r, lkb); 3252 remove_lock_pc(r, lkb);
2896 queue_cast(r, lkb, -DLM_EUNLOCK); 3253 queue_cast(r, lkb, -DLM_EUNLOCK);
2897 break; 3254 break;
3255 case -ENOENT:
3256 break;
2898 default: 3257 default:
2899 log_error(r->res_ls, "receive_unlock_reply error %d", error); 3258 log_error(r->res_ls, "receive_unlock_reply %x error %d",
3259 lkb->lkb_id, ms->m_result);
2900 } 3260 }
2901 3261 out:
2902 unlock_rsb(r); 3262 unlock_rsb(r);
2903 put_rsb(r); 3263 put_rsb(r);
2904} 3264}
@@ -2915,37 +3275,39 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
2915 } 3275 }
2916 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); 3276 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2917 3277
2918 error = remove_from_waiters(lkb);
2919 if (error) {
2920 log_error(ls, "receive_unlock_reply not on waiters");
2921 goto out;
2922 }
2923
2924 _receive_unlock_reply(lkb, ms); 3278 _receive_unlock_reply(lkb, ms);
2925 out:
2926 dlm_put_lkb(lkb); 3279 dlm_put_lkb(lkb);
2927} 3280}
2928 3281
2929static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3282static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2930{ 3283{
2931 struct dlm_rsb *r = lkb->lkb_resource; 3284 struct dlm_rsb *r = lkb->lkb_resource;
2932 int error = ms->m_result; 3285 int error;
2933 3286
2934 hold_rsb(r); 3287 hold_rsb(r);
2935 lock_rsb(r); 3288 lock_rsb(r);
2936 3289
3290 /* stub reply can happen with waiters_mutex held */
3291 error = remove_from_waiters_ms(lkb, ms);
3292 if (error)
3293 goto out;
3294
2937 /* this is the value returned from do_cancel() on the master */ 3295 /* this is the value returned from do_cancel() on the master */
2938 3296
2939 switch (error) { 3297 switch (ms->m_result) {
2940 case -DLM_ECANCEL: 3298 case -DLM_ECANCEL:
2941 receive_flags_reply(lkb, ms); 3299 receive_flags_reply(lkb, ms);
2942 revert_lock_pc(r, lkb); 3300 revert_lock_pc(r, lkb);
2943 queue_cast(r, lkb, -DLM_ECANCEL); 3301 if (ms->m_result)
3302 queue_cast(r, lkb, -DLM_ECANCEL);
3303 break;
3304 case 0:
2944 break; 3305 break;
2945 default: 3306 default:
2946 log_error(r->res_ls, "receive_cancel_reply error %d", error); 3307 log_error(r->res_ls, "receive_cancel_reply %x error %d",
3308 lkb->lkb_id, ms->m_result);
2947 } 3309 }
2948 3310 out:
2949 unlock_rsb(r); 3311 unlock_rsb(r);
2950 put_rsb(r); 3312 put_rsb(r);
2951} 3313}
@@ -2962,14 +3324,7 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
2962 } 3324 }
2963 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); 3325 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2964 3326
2965 error = remove_from_waiters(lkb);
2966 if (error) {
2967 log_error(ls, "receive_cancel_reply not on waiters");
2968 goto out;
2969 }
2970
2971 _receive_cancel_reply(lkb, ms); 3327 _receive_cancel_reply(lkb, ms);
2972 out:
2973 dlm_put_lkb(lkb); 3328 dlm_put_lkb(lkb);
2974} 3329}
2975 3330
@@ -2985,20 +3340,17 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
2985 return; 3340 return;
2986 } 3341 }
2987 3342
2988 error = remove_from_waiters(lkb); 3343 /* ms->m_result is the value returned by dlm_dir_lookup on dir node
2989 if (error) {
2990 log_error(ls, "receive_lookup_reply not on waiters");
2991 goto out;
2992 }
2993
2994 /* this is the value returned by dlm_dir_lookup on dir node
2995 FIXME: will a non-zero error ever be returned? */ 3344 FIXME: will a non-zero error ever be returned? */
2996 error = ms->m_result;
2997 3345
2998 r = lkb->lkb_resource; 3346 r = lkb->lkb_resource;
2999 hold_rsb(r); 3347 hold_rsb(r);
3000 lock_rsb(r); 3348 lock_rsb(r);
3001 3349
3350 error = remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
3351 if (error)
3352 goto out;
3353
3002 ret_nodeid = ms->m_nodeid; 3354 ret_nodeid = ms->m_nodeid;
3003 if (ret_nodeid == dlm_our_nodeid()) { 3355 if (ret_nodeid == dlm_our_nodeid()) {
3004 r->res_nodeid = 0; 3356 r->res_nodeid = 0;
@@ -3009,14 +3361,22 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
3009 r->res_nodeid = ret_nodeid; 3361 r->res_nodeid = ret_nodeid;
3010 } 3362 }
3011 3363
3364 if (is_overlap(lkb)) {
3365 log_debug(ls, "receive_lookup_reply %x unlock %x",
3366 lkb->lkb_id, lkb->lkb_flags);
3367 queue_cast_overlap(r, lkb);
3368 unhold_lkb(lkb); /* undoes create_lkb() */
3369 goto out_list;
3370 }
3371
3012 _request_lock(r, lkb); 3372 _request_lock(r, lkb);
3013 3373
3374 out_list:
3014 if (!ret_nodeid) 3375 if (!ret_nodeid)
3015 process_lookup_list(r); 3376 process_lookup_list(r);
3016 3377 out:
3017 unlock_rsb(r); 3378 unlock_rsb(r);
3018 put_rsb(r); 3379 put_rsb(r);
3019 out:
3020 dlm_put_lkb(lkb); 3380 dlm_put_lkb(lkb);
3021} 3381}
3022 3382
@@ -3133,6 +3493,12 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3133 receive_lookup_reply(ls, ms); 3493 receive_lookup_reply(ls, ms);
3134 break; 3494 break;
3135 3495
3496 /* other messages */
3497
3498 case DLM_MSG_PURGE:
3499 receive_purge(ls, ms);
3500 break;
3501
3136 default: 3502 default:
3137 log_error(ls, "unknown message type %d", ms->m_type); 3503 log_error(ls, "unknown message type %d", ms->m_type);
3138 } 3504 }
@@ -3153,9 +3519,9 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3153{ 3519{
3154 if (middle_conversion(lkb)) { 3520 if (middle_conversion(lkb)) {
3155 hold_lkb(lkb); 3521 hold_lkb(lkb);
3522 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
3156 ls->ls_stub_ms.m_result = -EINPROGRESS; 3523 ls->ls_stub_ms.m_result = -EINPROGRESS;
3157 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3524 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3158 _remove_from_waiters(lkb);
3159 _receive_convert_reply(lkb, &ls->ls_stub_ms); 3525 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3160 3526
3161 /* Same special case as in receive_rcom_lock_args() */ 3527 /* Same special case as in receive_rcom_lock_args() */
@@ -3227,18 +3593,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3227 3593
3228 case DLM_MSG_UNLOCK: 3594 case DLM_MSG_UNLOCK:
3229 hold_lkb(lkb); 3595 hold_lkb(lkb);
3596 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
3230 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3597 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3231 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3598 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3232 _remove_from_waiters(lkb);
3233 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3599 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3234 dlm_put_lkb(lkb); 3600 dlm_put_lkb(lkb);
3235 break; 3601 break;
3236 3602
3237 case DLM_MSG_CANCEL: 3603 case DLM_MSG_CANCEL:
3238 hold_lkb(lkb); 3604 hold_lkb(lkb);
3605 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
3239 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3606 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3240 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3607 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3241 _remove_from_waiters(lkb);
3242 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3608 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3243 dlm_put_lkb(lkb); 3609 dlm_put_lkb(lkb);
3244 break; 3610 break;
@@ -3252,37 +3618,47 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3252 mutex_unlock(&ls->ls_waiters_mutex); 3618 mutex_unlock(&ls->ls_waiters_mutex);
3253} 3619}
3254 3620
3255static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) 3621static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
3256{ 3622{
3257 struct dlm_lkb *lkb; 3623 struct dlm_lkb *lkb;
3258 int rv = 0; 3624 int found = 0;
3259 3625
3260 mutex_lock(&ls->ls_waiters_mutex); 3626 mutex_lock(&ls->ls_waiters_mutex);
3261 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { 3627 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
3262 if (lkb->lkb_flags & DLM_IFL_RESEND) { 3628 if (lkb->lkb_flags & DLM_IFL_RESEND) {
3263 rv = lkb->lkb_wait_type; 3629 hold_lkb(lkb);
3264 _remove_from_waiters(lkb); 3630 found = 1;
3265 lkb->lkb_flags &= ~DLM_IFL_RESEND;
3266 break; 3631 break;
3267 } 3632 }
3268 } 3633 }
3269 mutex_unlock(&ls->ls_waiters_mutex); 3634 mutex_unlock(&ls->ls_waiters_mutex);
3270 3635
3271 if (!rv) 3636 if (!found)
3272 lkb = NULL; 3637 lkb = NULL;
3273 *lkb_ret = lkb; 3638 return lkb;
3274 return rv;
3275} 3639}
3276 3640
3277/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the 3641/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
3278 master or dir-node for r. Processing the lkb may result in it being placed 3642 master or dir-node for r. Processing the lkb may result in it being placed
3279 back on waiters. */ 3643 back on waiters. */
3280 3644
3645/* We do this after normal locking has been enabled and any saved messages
3646 (in requestqueue) have been processed. We should be confident that at
3647 this point we won't get or process a reply to any of these waiting
3648 operations. But, new ops may be coming in on the rsbs/locks here from
3649 userspace or remotely. */
3650
3651/* there may have been an overlap unlock/cancel prior to recovery or after
3652 recovery. if before, the lkb may still have a pos wait_count; if after, the
3653 overlap flag would just have been set and nothing new sent. we can be
3654 confident here than any replies to either the initial op or overlap ops
3655 prior to recovery have been received. */
3656
3281int dlm_recover_waiters_post(struct dlm_ls *ls) 3657int dlm_recover_waiters_post(struct dlm_ls *ls)
3282{ 3658{
3283 struct dlm_lkb *lkb; 3659 struct dlm_lkb *lkb;
3284 struct dlm_rsb *r; 3660 struct dlm_rsb *r;
3285 int error = 0, mstype; 3661 int error = 0, mstype, err, oc, ou;
3286 3662
3287 while (1) { 3663 while (1) {
3288 if (dlm_locking_stopped(ls)) { 3664 if (dlm_locking_stopped(ls)) {
@@ -3291,48 +3667,78 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
3291 break; 3667 break;
3292 } 3668 }
3293 3669
3294 mstype = remove_resend_waiter(ls, &lkb); 3670 lkb = find_resend_waiter(ls);
3295 if (!mstype) 3671 if (!lkb)
3296 break; 3672 break;
3297 3673
3298 r = lkb->lkb_resource; 3674 r = lkb->lkb_resource;
3675 hold_rsb(r);
3676 lock_rsb(r);
3677
3678 mstype = lkb->lkb_wait_type;
3679 oc = is_overlap_cancel(lkb);
3680 ou = is_overlap_unlock(lkb);
3681 err = 0;
3299 3682
3300 log_debug(ls, "recover_waiters_post %x type %d flags %x %s", 3683 log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
3301 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); 3684 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
3302 3685
3303 switch (mstype) { 3686 /* At this point we assume that we won't get a reply to any
3304 3687 previous op or overlap op on this lock. First, do a big
3305 case DLM_MSG_LOOKUP: 3688 remove_from_waiters() for all previous ops. */
3306 hold_rsb(r); 3689
3307 lock_rsb(r); 3690 lkb->lkb_flags &= ~DLM_IFL_RESEND;
3308 _request_lock(r, lkb); 3691 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
3309 if (is_master(r)) 3692 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
3310 confirm_master(r, 0); 3693 lkb->lkb_wait_type = 0;
3311 unlock_rsb(r); 3694 lkb->lkb_wait_count = 0;
3312 put_rsb(r); 3695 mutex_lock(&ls->ls_waiters_mutex);
3313 break; 3696 list_del_init(&lkb->lkb_wait_reply);
3314 3697 mutex_unlock(&ls->ls_waiters_mutex);
3315 case DLM_MSG_REQUEST: 3698 unhold_lkb(lkb); /* for waiters list */
3316 hold_rsb(r); 3699
3317 lock_rsb(r); 3700 if (oc || ou) {
3318 _request_lock(r, lkb); 3701 /* do an unlock or cancel instead of resending */
3319 if (is_master(r)) 3702 switch (mstype) {
3320 confirm_master(r, 0); 3703 case DLM_MSG_LOOKUP:
3321 unlock_rsb(r); 3704 case DLM_MSG_REQUEST:
3322 put_rsb(r); 3705 queue_cast(r, lkb, ou ? -DLM_EUNLOCK :
3323 break; 3706 -DLM_ECANCEL);
3324 3707 unhold_lkb(lkb); /* undoes create_lkb() */
3325 case DLM_MSG_CONVERT: 3708 break;
3326 hold_rsb(r); 3709 case DLM_MSG_CONVERT:
3327 lock_rsb(r); 3710 if (oc) {
3328 _convert_lock(r, lkb); 3711 queue_cast(r, lkb, -DLM_ECANCEL);
3329 unlock_rsb(r); 3712 } else {
3330 put_rsb(r); 3713 lkb->lkb_exflags |= DLM_LKF_FORCEUNLOCK;
3331 break; 3714 _unlock_lock(r, lkb);
3332 3715 }
3333 default: 3716 break;
3334 log_error(ls, "recover_waiters_post type %d", mstype); 3717 default:
3718 err = 1;
3719 }
3720 } else {
3721 switch (mstype) {
3722 case DLM_MSG_LOOKUP:
3723 case DLM_MSG_REQUEST:
3724 _request_lock(r, lkb);
3725 if (is_master(r))
3726 confirm_master(r, 0);
3727 break;
3728 case DLM_MSG_CONVERT:
3729 _convert_lock(r, lkb);
3730 break;
3731 default:
3732 err = 1;
3733 }
3335 } 3734 }
3735
3736 if (err)
3737 log_error(ls, "recover_waiters_post %x %d %x %d %d",
3738 lkb->lkb_id, mstype, lkb->lkb_flags, oc, ou);
3739 unlock_rsb(r);
3740 put_rsb(r);
3741 dlm_put_lkb(lkb);
3336 } 3742 }
3337 3743
3338 return error; 3744 return error;
@@ -3684,7 +4090,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
3684 4090
3685 /* add this new lkb to the per-process list of locks */ 4091 /* add this new lkb to the per-process list of locks */
3686 spin_lock(&ua->proc->locks_spin); 4092 spin_lock(&ua->proc->locks_spin);
3687 kref_get(&lkb->lkb_ref); 4093 hold_lkb(lkb);
3688 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); 4094 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
3689 spin_unlock(&ua->proc->locks_spin); 4095 spin_unlock(&ua->proc->locks_spin);
3690 out: 4096 out:
@@ -3774,6 +4180,9 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3774 4180
3775 if (error == -DLM_EUNLOCK) 4181 if (error == -DLM_EUNLOCK)
3776 error = 0; 4182 error = 0;
4183 /* from validate_unlock_args() */
4184 if (error == -EBUSY && (flags & DLM_LKF_FORCEUNLOCK))
4185 error = 0;
3777 if (error) 4186 if (error)
3778 goto out_put; 4187 goto out_put;
3779 4188
@@ -3786,6 +4195,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3786 dlm_put_lkb(lkb); 4195 dlm_put_lkb(lkb);
3787 out: 4196 out:
3788 unlock_recovery(ls); 4197 unlock_recovery(ls);
4198 kfree(ua_tmp);
3789 return error; 4199 return error;
3790} 4200}
3791 4201
@@ -3815,33 +4225,37 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3815 4225
3816 if (error == -DLM_ECANCEL) 4226 if (error == -DLM_ECANCEL)
3817 error = 0; 4227 error = 0;
3818 if (error) 4228 /* from validate_unlock_args() */
3819 goto out_put; 4229 if (error == -EBUSY)
3820 4230 error = 0;
3821 /* this lkb was removed from the WAITING queue */
3822 if (lkb->lkb_grmode == DLM_LOCK_IV) {
3823 spin_lock(&ua->proc->locks_spin);
3824 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3825 spin_unlock(&ua->proc->locks_spin);
3826 }
3827 out_put: 4231 out_put:
3828 dlm_put_lkb(lkb); 4232 dlm_put_lkb(lkb);
3829 out: 4233 out:
3830 unlock_recovery(ls); 4234 unlock_recovery(ls);
4235 kfree(ua_tmp);
3831 return error; 4236 return error;
3832} 4237}
3833 4238
4239/* lkb's that are removed from the waiters list by revert are just left on the
4240 orphans list with the granted orphan locks, to be freed by purge */
4241
3834static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) 4242static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
3835{ 4243{
3836 struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; 4244 struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam;
4245 struct dlm_args args;
4246 int error;
3837 4247
3838 if (ua->lksb.sb_lvbptr) 4248 hold_lkb(lkb);
3839 kfree(ua->lksb.sb_lvbptr); 4249 mutex_lock(&ls->ls_orphans_mutex);
3840 kfree(ua); 4250 list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
3841 lkb->lkb_astparam = (long)NULL; 4251 mutex_unlock(&ls->ls_orphans_mutex);
3842 4252
3843 /* TODO: propogate to master if needed */ 4253 set_unlock_args(0, ua, &args);
3844 return 0; 4254
4255 error = cancel_lock(ls, lkb, &args);
4256 if (error == -DLM_ECANCEL)
4257 error = 0;
4258 return error;
3845} 4259}
3846 4260
3847/* The force flag allows the unlock to go ahead even if the lkb isn't granted. 4261/* The force flag allows the unlock to go ahead even if the lkb isn't granted.
@@ -3853,10 +4267,6 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
3853 struct dlm_args args; 4267 struct dlm_args args;
3854 int error; 4268 int error;
3855 4269
3856 /* FIXME: we need to handle the case where the lkb is in limbo
3857 while the rsb is being looked up, currently we assert in
3858 _unlock_lock/is_remote because rsb nodeid is -1. */
3859
3860 set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); 4270 set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args);
3861 4271
3862 error = unlock_lock(ls, lkb, &args); 4272 error = unlock_lock(ls, lkb, &args);
@@ -3865,6 +4275,31 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
3865 return error; 4275 return error;
3866} 4276}
3867 4277
4278/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
4279 (which does lock_rsb) due to deadlock with receiving a message that does
4280 lock_rsb followed by dlm_user_add_ast() */
4281
4282static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
4283 struct dlm_user_proc *proc)
4284{
4285 struct dlm_lkb *lkb = NULL;
4286
4287 mutex_lock(&ls->ls_clear_proc_locks);
4288 if (list_empty(&proc->locks))
4289 goto out;
4290
4291 lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue);
4292 list_del_init(&lkb->lkb_ownqueue);
4293
4294 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
4295 lkb->lkb_flags |= DLM_IFL_ORPHAN;
4296 else
4297 lkb->lkb_flags |= DLM_IFL_DEAD;
4298 out:
4299 mutex_unlock(&ls->ls_clear_proc_locks);
4300 return lkb;
4301}
4302
3868/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which 4303/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
3869 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, 4304 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
3870 which we clear here. */ 4305 which we clear here. */
@@ -3880,18 +4315,15 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3880 struct dlm_lkb *lkb, *safe; 4315 struct dlm_lkb *lkb, *safe;
3881 4316
3882 lock_recovery(ls); 4317 lock_recovery(ls);
3883 mutex_lock(&ls->ls_clear_proc_locks);
3884 4318
3885 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { 4319 while (1) {
3886 list_del_init(&lkb->lkb_ownqueue); 4320 lkb = del_proc_lock(ls, proc);
3887 4321 if (!lkb)
3888 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { 4322 break;
3889 lkb->lkb_flags |= DLM_IFL_ORPHAN; 4323 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
3890 orphan_proc_lock(ls, lkb); 4324 orphan_proc_lock(ls, lkb);
3891 } else { 4325 else
3892 lkb->lkb_flags |= DLM_IFL_DEAD;
3893 unlock_proc_lock(ls, lkb); 4326 unlock_proc_lock(ls, lkb);
3894 }
3895 4327
3896 /* this removes the reference for the proc->locks list 4328 /* this removes the reference for the proc->locks list
3897 added by dlm_user_request, it may result in the lkb 4329 added by dlm_user_request, it may result in the lkb
@@ -3900,6 +4332,8 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3900 dlm_put_lkb(lkb); 4332 dlm_put_lkb(lkb);
3901 } 4333 }
3902 4334
4335 mutex_lock(&ls->ls_clear_proc_locks);
4336
3903 /* in-progress unlocks */ 4337 /* in-progress unlocks */
3904 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { 4338 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
3905 list_del_init(&lkb->lkb_ownqueue); 4339 list_del_init(&lkb->lkb_ownqueue);
@@ -3916,3 +4350,92 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3916 unlock_recovery(ls); 4350 unlock_recovery(ls);
3917} 4351}
3918 4352
4353static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4354{
4355 struct dlm_lkb *lkb, *safe;
4356
4357 while (1) {
4358 lkb = NULL;
4359 spin_lock(&proc->locks_spin);
4360 if (!list_empty(&proc->locks)) {
4361 lkb = list_entry(proc->locks.next, struct dlm_lkb,
4362 lkb_ownqueue);
4363 list_del_init(&lkb->lkb_ownqueue);
4364 }
4365 spin_unlock(&proc->locks_spin);
4366
4367 if (!lkb)
4368 break;
4369
4370 lkb->lkb_flags |= DLM_IFL_DEAD;
4371 unlock_proc_lock(ls, lkb);
4372 dlm_put_lkb(lkb); /* ref from proc->locks list */
4373 }
4374
4375 spin_lock(&proc->locks_spin);
4376 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
4377 list_del_init(&lkb->lkb_ownqueue);
4378 lkb->lkb_flags |= DLM_IFL_DEAD;
4379 dlm_put_lkb(lkb);
4380 }
4381 spin_unlock(&proc->locks_spin);
4382
4383 spin_lock(&proc->asts_spin);
4384 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
4385 list_del(&lkb->lkb_astqueue);
4386 dlm_put_lkb(lkb);
4387 }
4388 spin_unlock(&proc->asts_spin);
4389}
4390
4391/* pid of 0 means purge all orphans */
4392
4393static void do_purge(struct dlm_ls *ls, int nodeid, int pid)
4394{
4395 struct dlm_lkb *lkb, *safe;
4396
4397 mutex_lock(&ls->ls_orphans_mutex);
4398 list_for_each_entry_safe(lkb, safe, &ls->ls_orphans, lkb_ownqueue) {
4399 if (pid && lkb->lkb_ownpid != pid)
4400 continue;
4401 unlock_proc_lock(ls, lkb);
4402 list_del_init(&lkb->lkb_ownqueue);
4403 dlm_put_lkb(lkb);
4404 }
4405 mutex_unlock(&ls->ls_orphans_mutex);
4406}
4407
4408static int send_purge(struct dlm_ls *ls, int nodeid, int pid)
4409{
4410 struct dlm_message *ms;
4411 struct dlm_mhandle *mh;
4412 int error;
4413
4414 error = _create_message(ls, sizeof(struct dlm_message), nodeid,
4415 DLM_MSG_PURGE, &ms, &mh);
4416 if (error)
4417 return error;
4418 ms->m_nodeid = nodeid;
4419 ms->m_pid = pid;
4420
4421 return send_message(mh, ms);
4422}
4423
4424int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
4425 int nodeid, int pid)
4426{
4427 int error = 0;
4428
4429 if (nodeid != dlm_our_nodeid()) {
4430 error = send_purge(ls, nodeid, pid);
4431 } else {
4432 lock_recovery(ls);
4433 if (pid == current->pid)
4434 purge_proc_locks(ls, proc);
4435 else
4436 do_purge(ls, nodeid, pid);
4437 unlock_recovery(ls);
4438 }
4439 return error;
4440}
4441
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 0843a3073ec3..64fc4ec40668 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -41,6 +41,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
41 uint32_t flags, uint32_t lkid, char *lvb_in); 41 uint32_t flags, uint32_t lkid, char *lvb_in);
42int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 42int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
43 uint32_t flags, uint32_t lkid); 43 uint32_t flags, uint32_t lkid);
44int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
45 int nodeid, int pid);
44void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); 46void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
45 47
46static inline int is_master(struct dlm_rsb *r) 48static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f40817b53c6f..a677b2a5eed4 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -167,7 +167,6 @@ static struct kobj_type dlm_ktype = {
167}; 167};
168 168
169static struct kset dlm_kset = { 169static struct kset dlm_kset = {
170 .subsys = &kernel_subsys,
171 .kobj = {.name = "dlm",}, 170 .kobj = {.name = "dlm",},
172 .ktype = &dlm_ktype, 171 .ktype = &dlm_ktype,
173}; 172};
@@ -218,6 +217,7 @@ int dlm_lockspace_init(void)
218 INIT_LIST_HEAD(&lslist); 217 INIT_LIST_HEAD(&lslist);
219 spin_lock_init(&lslist_lock); 218 spin_lock_init(&lslist_lock);
220 219
220 kobj_set_kset_s(&dlm_kset, kernel_subsys);
221 error = kset_register(&dlm_kset); 221 error = kset_register(&dlm_kset);
222 if (error) 222 if (error)
223 printk("dlm_lockspace_init: cannot register kset %d\n", error); 223 printk("dlm_lockspace_init: cannot register kset %d\n", error);
@@ -459,6 +459,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
459 459
460 INIT_LIST_HEAD(&ls->ls_waiters); 460 INIT_LIST_HEAD(&ls->ls_waiters);
461 mutex_init(&ls->ls_waiters_mutex); 461 mutex_init(&ls->ls_waiters_mutex);
462 INIT_LIST_HEAD(&ls->ls_orphans);
463 mutex_init(&ls->ls_orphans_mutex);
462 464
463 INIT_LIST_HEAD(&ls->ls_nodes); 465 INIT_LIST_HEAD(&ls->ls_nodes);
464 INIT_LIST_HEAD(&ls->ls_nodes_gone); 466 INIT_LIST_HEAD(&ls->ls_nodes_gone);
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
deleted file mode 100644
index dc83a9d979b5..000000000000
--- a/fs/dlm/lowcomms-sctp.c
+++ /dev/null
@@ -1,1210 +0,0 @@
1/******************************************************************************
2*******************************************************************************
3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
6**
7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions
9** of the GNU General Public License v.2.
10**
11*******************************************************************************
12******************************************************************************/
13
14/*
15 * lowcomms.c
16 *
17 * This is the "low-level" comms layer.
18 *
19 * It is responsible for sending/receiving messages
20 * from other nodes in the cluster.
21 *
22 * Cluster nodes are referred to by their nodeids. nodeids are
23 * simply 32 bit numbers to the locking module - if they need to
24 * be expanded for the cluster infrastructure then that is it's
25 * responsibility. It is this layer's
26 * responsibility to resolve these into IP address or
27 * whatever it needs for inter-node communication.
28 *
29 * The comms level is two kernel threads that deal mainly with
30 * the receiving of messages from other nodes and passing them
31 * up to the mid-level comms layer (which understands the
32 * message format) for execution by the locking core, and
33 * a send thread which does all the setting up of connections
34 * to remote nodes and the sending of data. Threads are not allowed
35 * to send their own data because it may cause them to wait in times
36 * of high load. Also, this way, the sending thread can collect together
37 * messages bound for one node and send them in one block.
38 *
39 * I don't see any problem with the recv thread executing the locking
40 * code on behalf of remote processes as the locking code is
41 * short, efficient and never (well, hardly ever) waits.
42 *
43 */
44
45#include <asm/ioctls.h>
46#include <net/sock.h>
47#include <net/tcp.h>
48#include <net/sctp/user.h>
49#include <linux/pagemap.h>
50#include <linux/socket.h>
51#include <linux/idr.h>
52
53#include "dlm_internal.h"
54#include "lowcomms.h"
55#include "config.h"
56#include "midcomms.h"
57
58static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
59static int dlm_local_count;
60static int dlm_local_nodeid;
61
62/* One of these per connected node */
63
64#define NI_INIT_PENDING 1
65#define NI_WRITE_PENDING 2
66
67struct nodeinfo {
68 spinlock_t lock;
69 sctp_assoc_t assoc_id;
70 unsigned long flags;
71 struct list_head write_list; /* nodes with pending writes */
72 struct list_head writequeue; /* outgoing writequeue_entries */
73 spinlock_t writequeue_lock;
74 int nodeid;
75 struct work_struct swork; /* Send workqueue */
76 struct work_struct lwork; /* Locking workqueue */
77};
78
79static DEFINE_IDR(nodeinfo_idr);
80static DECLARE_RWSEM(nodeinfo_lock);
81static int max_nodeid;
82
83struct cbuf {
84 unsigned int base;
85 unsigned int len;
86 unsigned int mask;
87};
88
89/* Just the one of these, now. But this struct keeps
90 the connection-specific variables together */
91
92#define CF_READ_PENDING 1
93
94struct connection {
95 struct socket *sock;
96 unsigned long flags;
97 struct page *rx_page;
98 atomic_t waiting_requests;
99 struct cbuf cb;
100 int eagain_flag;
101 struct work_struct work; /* Send workqueue */
102};
103
104/* An entry waiting to be sent */
105
106struct writequeue_entry {
107 struct list_head list;
108 struct page *page;
109 int offset;
110 int len;
111 int end;
112 int users;
113 struct nodeinfo *ni;
114};
115
116static void cbuf_add(struct cbuf *cb, int n)
117{
118 cb->len += n;
119}
120
121static int cbuf_data(struct cbuf *cb)
122{
123 return ((cb->base + cb->len) & cb->mask);
124}
125
126static void cbuf_init(struct cbuf *cb, int size)
127{
128 cb->base = cb->len = 0;
129 cb->mask = size-1;
130}
131
132static void cbuf_eat(struct cbuf *cb, int n)
133{
134 cb->len -= n;
135 cb->base += n;
136 cb->base &= cb->mask;
137}
138
139/* List of nodes which have writes pending */
140static LIST_HEAD(write_nodes);
141static DEFINE_SPINLOCK(write_nodes_lock);
142
143
144/* Maximum number of incoming messages to process before
145 * doing a schedule()
146 */
147#define MAX_RX_MSG_COUNT 25
148
149/* Work queues */
150static struct workqueue_struct *recv_workqueue;
151static struct workqueue_struct *send_workqueue;
152static struct workqueue_struct *lock_workqueue;
153
154/* The SCTP connection */
155static struct connection sctp_con;
156
157static void process_send_sockets(struct work_struct *work);
158static void process_recv_sockets(struct work_struct *work);
159static void process_lock_request(struct work_struct *work);
160
161static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
162{
163 struct sockaddr_storage addr;
164 int error;
165
166 if (!dlm_local_count)
167 return -1;
168
169 error = dlm_nodeid_to_addr(nodeid, &addr);
170 if (error)
171 return error;
172
173 if (dlm_local_addr[0]->ss_family == AF_INET) {
174 struct sockaddr_in *in4 = (struct sockaddr_in *) &addr;
175 struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
176 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
177 } else {
178 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
179 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
180 memcpy(&ret6->sin6_addr, &in6->sin6_addr,
181 sizeof(in6->sin6_addr));
182 }
183
184 return 0;
185}
186
187/* If alloc is 0 here we will not attempt to allocate a new
188 nodeinfo struct */
189static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
190{
191 struct nodeinfo *ni;
192 int r;
193 int n;
194
195 down_read(&nodeinfo_lock);
196 ni = idr_find(&nodeinfo_idr, nodeid);
197 up_read(&nodeinfo_lock);
198
199 if (ni || !alloc)
200 return ni;
201
202 down_write(&nodeinfo_lock);
203
204 ni = idr_find(&nodeinfo_idr, nodeid);
205 if (ni)
206 goto out_up;
207
208 r = idr_pre_get(&nodeinfo_idr, alloc);
209 if (!r)
210 goto out_up;
211
212 ni = kmalloc(sizeof(struct nodeinfo), alloc);
213 if (!ni)
214 goto out_up;
215
216 r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
217 if (r) {
218 kfree(ni);
219 ni = NULL;
220 goto out_up;
221 }
222 if (n != nodeid) {
223 idr_remove(&nodeinfo_idr, n);
224 kfree(ni);
225 ni = NULL;
226 goto out_up;
227 }
228 memset(ni, 0, sizeof(struct nodeinfo));
229 spin_lock_init(&ni->lock);
230 INIT_LIST_HEAD(&ni->writequeue);
231 spin_lock_init(&ni->writequeue_lock);
232 INIT_WORK(&ni->lwork, process_lock_request);
233 INIT_WORK(&ni->swork, process_send_sockets);
234 ni->nodeid = nodeid;
235
236 if (nodeid > max_nodeid)
237 max_nodeid = nodeid;
238out_up:
239 up_write(&nodeinfo_lock);
240
241 return ni;
242}
243
244/* Don't call this too often... */
245static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
246{
247 int i;
248 struct nodeinfo *ni;
249
250 for (i=1; i<=max_nodeid; i++) {
251 ni = nodeid2nodeinfo(i, 0);
252 if (ni && ni->assoc_id == assoc)
253 return ni;
254 }
255 return NULL;
256}
257
258/* Data or notification available on socket */
259static void lowcomms_data_ready(struct sock *sk, int count_unused)
260{
261 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
262 queue_work(recv_workqueue, &sctp_con.work);
263}
264
265
266/* Add the port number to an IP6 or 4 sockaddr and return the address length.
267 Also padd out the struct with zeros to make comparisons meaningful */
268
269static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
270 int *addr_len)
271{
272 struct sockaddr_in *local4_addr;
273 struct sockaddr_in6 *local6_addr;
274
275 if (!dlm_local_count)
276 return;
277
278 if (!port) {
279 if (dlm_local_addr[0]->ss_family == AF_INET) {
280 local4_addr = (struct sockaddr_in *)dlm_local_addr[0];
281 port = be16_to_cpu(local4_addr->sin_port);
282 } else {
283 local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0];
284 port = be16_to_cpu(local6_addr->sin6_port);
285 }
286 }
287
288 saddr->ss_family = dlm_local_addr[0]->ss_family;
289 if (dlm_local_addr[0]->ss_family == AF_INET) {
290 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
291 in4_addr->sin_port = cpu_to_be16(port);
292 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
293 memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
294 sizeof(struct sockaddr_in));
295 *addr_len = sizeof(struct sockaddr_in);
296 } else {
297 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
298 in6_addr->sin6_port = cpu_to_be16(port);
299 memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
300 sizeof(struct sockaddr_in6));
301 *addr_len = sizeof(struct sockaddr_in6);
302 }
303}
304
305/* Close the connection and tidy up */
306static void close_connection(void)
307{
308 if (sctp_con.sock) {
309 sock_release(sctp_con.sock);
310 sctp_con.sock = NULL;
311 }
312
313 if (sctp_con.rx_page) {
314 __free_page(sctp_con.rx_page);
315 sctp_con.rx_page = NULL;
316 }
317}
318
319/* We only send shutdown messages to nodes that are not part of the cluster */
320static void send_shutdown(sctp_assoc_t associd)
321{
322 static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
323 struct msghdr outmessage;
324 struct cmsghdr *cmsg;
325 struct sctp_sndrcvinfo *sinfo;
326 int ret;
327
328 outmessage.msg_name = NULL;
329 outmessage.msg_namelen = 0;
330 outmessage.msg_control = outcmsg;
331 outmessage.msg_controllen = sizeof(outcmsg);
332 outmessage.msg_flags = MSG_EOR;
333
334 cmsg = CMSG_FIRSTHDR(&outmessage);
335 cmsg->cmsg_level = IPPROTO_SCTP;
336 cmsg->cmsg_type = SCTP_SNDRCV;
337 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
338 outmessage.msg_controllen = cmsg->cmsg_len;
339 sinfo = CMSG_DATA(cmsg);
340 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
341
342 sinfo->sinfo_flags |= MSG_EOF;
343 sinfo->sinfo_assoc_id = associd;
344
345 ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0);
346
347 if (ret != 0)
348 log_print("send EOF to node failed: %d", ret);
349}
350
351
352/* INIT failed but we don't know which node...
353 restart INIT on all pending nodes */
354static void init_failed(void)
355{
356 int i;
357 struct nodeinfo *ni;
358
359 for (i=1; i<=max_nodeid; i++) {
360 ni = nodeid2nodeinfo(i, 0);
361 if (!ni)
362 continue;
363
364 if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
365 ni->assoc_id = 0;
366 if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
367 spin_lock_bh(&write_nodes_lock);
368 list_add_tail(&ni->write_list, &write_nodes);
369 spin_unlock_bh(&write_nodes_lock);
370 queue_work(send_workqueue, &ni->swork);
371 }
372 }
373 }
374}
375
376/* Something happened to an association */
377static void process_sctp_notification(struct msghdr *msg, char *buf)
378{
379 union sctp_notification *sn = (union sctp_notification *)buf;
380
381 if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
382 switch (sn->sn_assoc_change.sac_state) {
383
384 case SCTP_COMM_UP:
385 case SCTP_RESTART:
386 {
387 /* Check that the new node is in the lockspace */
388 struct sctp_prim prim;
389 mm_segment_t fs;
390 int nodeid;
391 int prim_len, ret;
392 int addr_len;
393 struct nodeinfo *ni;
394
395 /* This seems to happen when we received a connection
396 * too early... or something... anyway, it happens but
397 * we always seem to get a real message too, see
398 * receive_from_sock */
399
400 if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
401 log_print("COMM_UP for invalid assoc ID %d",
402 (int)sn->sn_assoc_change.sac_assoc_id);
403 init_failed();
404 return;
405 }
406 memset(&prim, 0, sizeof(struct sctp_prim));
407 prim_len = sizeof(struct sctp_prim);
408 prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
409
410 fs = get_fs();
411 set_fs(get_ds());
412 ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
413 IPPROTO_SCTP,
414 SCTP_PRIMARY_ADDR,
415 (char*)&prim,
416 &prim_len);
417 set_fs(fs);
418 if (ret < 0) {
419 struct nodeinfo *ni;
420
421 log_print("getsockopt/sctp_primary_addr on "
422 "new assoc %d failed : %d",
423 (int)sn->sn_assoc_change.sac_assoc_id,
424 ret);
425
426 /* Retry INIT later */
427 ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
428 if (ni)
429 clear_bit(NI_INIT_PENDING, &ni->flags);
430 return;
431 }
432 make_sockaddr(&prim.ssp_addr, 0, &addr_len);
433 if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
434 log_print("reject connect from unknown addr");
435 send_shutdown(prim.ssp_assoc_id);
436 return;
437 }
438
439 ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
440 if (!ni)
441 return;
442
443 /* Save the assoc ID */
444 ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
445
446 log_print("got new/restarted association %d nodeid %d",
447 (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
448
449 /* Send any pending writes */
450 clear_bit(NI_INIT_PENDING, &ni->flags);
451 if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
452 spin_lock_bh(&write_nodes_lock);
453 list_add_tail(&ni->write_list, &write_nodes);
454 spin_unlock_bh(&write_nodes_lock);
455 queue_work(send_workqueue, &ni->swork);
456 }
457 }
458 break;
459
460 case SCTP_COMM_LOST:
461 case SCTP_SHUTDOWN_COMP:
462 {
463 struct nodeinfo *ni;
464
465 ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
466 if (ni) {
467 spin_lock(&ni->lock);
468 ni->assoc_id = 0;
469 spin_unlock(&ni->lock);
470 }
471 }
472 break;
473
474 /* We don't know which INIT failed, so clear the PENDING flags
475 * on them all. if assoc_id is zero then it will then try
476 * again */
477
478 case SCTP_CANT_STR_ASSOC:
479 {
480 log_print("Can't start SCTP association - retrying");
481 init_failed();
482 }
483 break;
484
485 default:
486 log_print("unexpected SCTP assoc change id=%d state=%d",
487 (int)sn->sn_assoc_change.sac_assoc_id,
488 sn->sn_assoc_change.sac_state);
489 }
490 }
491}
492
493/* Data received from remote end */
494static int receive_from_sock(void)
495{
496 int ret = 0;
497 struct msghdr msg;
498 struct kvec iov[2];
499 unsigned len;
500 int r;
501 struct sctp_sndrcvinfo *sinfo;
502 struct cmsghdr *cmsg;
503 struct nodeinfo *ni;
504
505 /* These two are marginally too big for stack allocation, but this
506 * function is (currently) only called by dlm_recvd so static should be
507 * OK.
508 */
509 static struct sockaddr_storage msgname;
510 static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
511
512 if (sctp_con.sock == NULL)
513 goto out;
514
515 if (sctp_con.rx_page == NULL) {
516 /*
517 * This doesn't need to be atomic, but I think it should
518 * improve performance if it is.
519 */
520 sctp_con.rx_page = alloc_page(GFP_ATOMIC);
521 if (sctp_con.rx_page == NULL)
522 goto out_resched;
523 cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE);
524 }
525
526 memset(&incmsg, 0, sizeof(incmsg));
527 memset(&msgname, 0, sizeof(msgname));
528
529 msg.msg_name = &msgname;
530 msg.msg_namelen = sizeof(msgname);
531 msg.msg_flags = 0;
532 msg.msg_control = incmsg;
533 msg.msg_controllen = sizeof(incmsg);
534 msg.msg_iovlen = 1;
535
536 /* I don't see why this circular buffer stuff is necessary for SCTP
537 * which is a packet-based protocol, but the whole thing breaks under
538 * load without it! The overhead is minimal (and is in the TCP lowcomms
539 * anyway, of course) so I'll leave it in until I can figure out what's
540 * really happening.
541 */
542
543 /*
544 * iov[0] is the bit of the circular buffer between the current end
545 * point (cb.base + cb.len) and the end of the buffer.
546 */
547 iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb);
548 iov[0].iov_base = page_address(sctp_con.rx_page) +
549 cbuf_data(&sctp_con.cb);
550 iov[1].iov_len = 0;
551
552 /*
553 * iov[1] is the bit of the circular buffer between the start of the
554 * buffer and the start of the currently used section (cb.base)
555 */
556 if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) {
557 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb);
558 iov[1].iov_len = sctp_con.cb.base;
559 iov[1].iov_base = page_address(sctp_con.rx_page);
560 msg.msg_iovlen = 2;
561 }
562 len = iov[0].iov_len + iov[1].iov_len;
563
564 r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
565 MSG_NOSIGNAL | MSG_DONTWAIT);
566 if (ret <= 0)
567 goto out_close;
568
569 msg.msg_control = incmsg;
570 msg.msg_controllen = sizeof(incmsg);
571 cmsg = CMSG_FIRSTHDR(&msg);
572 sinfo = CMSG_DATA(cmsg);
573
574 if (msg.msg_flags & MSG_NOTIFICATION) {
575 process_sctp_notification(&msg, page_address(sctp_con.rx_page));
576 return 0;
577 }
578
579 /* Is this a new association ? */
580 ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL);
581 if (ni) {
582 ni->assoc_id = sinfo->sinfo_assoc_id;
583 if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
584
585 if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
586 spin_lock_bh(&write_nodes_lock);
587 list_add_tail(&ni->write_list, &write_nodes);
588 spin_unlock_bh(&write_nodes_lock);
589 queue_work(send_workqueue, &ni->swork);
590 }
591 }
592 }
593
594 /* INIT sends a message with length of 1 - ignore it */
595 if (r == 1)
596 return 0;
597
598 cbuf_add(&sctp_con.cb, ret);
599 // PJC: TODO: Add to node's workqueue....can we ??
600 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
601 page_address(sctp_con.rx_page),
602 sctp_con.cb.base, sctp_con.cb.len,
603 PAGE_CACHE_SIZE);
604 if (ret < 0)
605 goto out_close;
606 cbuf_eat(&sctp_con.cb, ret);
607
608out:
609 ret = 0;
610 goto out_ret;
611
612out_resched:
613 lowcomms_data_ready(sctp_con.sock->sk, 0);
614 ret = 0;
615 cond_resched();
616 goto out_ret;
617
618out_close:
619 if (ret != -EAGAIN)
620 log_print("error reading from sctp socket: %d", ret);
621out_ret:
622 return ret;
623}
624
625/* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */
626static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
627{
628 mm_segment_t fs;
629 int result = 0;
630
631 fs = get_fs();
632 set_fs(get_ds());
633 if (num == 1)
634 result = sctp_con.sock->ops->bind(sctp_con.sock,
635 (struct sockaddr *) addr,
636 addr_len);
637 else
638 result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
639 SCTP_SOCKOPT_BINDX_ADD,
640 (char *)addr, addr_len);
641 set_fs(fs);
642
643 if (result < 0)
644 log_print("Can't bind to port %d addr number %d",
645 dlm_config.ci_tcp_port, num);
646
647 return result;
648}
649
650static void init_local(void)
651{
652 struct sockaddr_storage sas, *addr;
653 int i;
654
655 dlm_local_nodeid = dlm_our_nodeid();
656
657 for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
658 if (dlm_our_addr(&sas, i))
659 break;
660
661 addr = kmalloc(sizeof(*addr), GFP_KERNEL);
662 if (!addr)
663 break;
664 memcpy(addr, &sas, sizeof(*addr));
665 dlm_local_addr[dlm_local_count++] = addr;
666 }
667}
668
669/* Initialise SCTP socket and bind to all interfaces */
670static int init_sock(void)
671{
672 mm_segment_t fs;
673 struct socket *sock = NULL;
674 struct sockaddr_storage localaddr;
675 struct sctp_event_subscribe subscribe;
676 int result = -EINVAL, num = 1, i, addr_len;
677
678 if (!dlm_local_count) {
679 init_local();
680 if (!dlm_local_count) {
681 log_print("no local IP address has been set");
682 goto out;
683 }
684 }
685
686 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
687 IPPROTO_SCTP, &sock);
688 if (result < 0) {
689 log_print("Can't create comms socket, check SCTP is loaded");
690 goto out;
691 }
692
693 /* Listen for events */
694 memset(&subscribe, 0, sizeof(subscribe));
695 subscribe.sctp_data_io_event = 1;
696 subscribe.sctp_association_event = 1;
697 subscribe.sctp_send_failure_event = 1;
698 subscribe.sctp_shutdown_event = 1;
699 subscribe.sctp_partial_delivery_event = 1;
700
701 fs = get_fs();
702 set_fs(get_ds());
703 result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
704 (char *)&subscribe, sizeof(subscribe));
705 set_fs(fs);
706
707 if (result < 0) {
708 log_print("Failed to set SCTP_EVENTS on socket: result=%d",
709 result);
710 goto create_delsock;
711 }
712
713 /* Init con struct */
714 sock->sk->sk_user_data = &sctp_con;
715 sctp_con.sock = sock;
716 sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready;
717
718 /* Bind to all interfaces. */
719 for (i = 0; i < dlm_local_count; i++) {
720 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
721 make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
722
723 result = add_bind_addr(&localaddr, addr_len, num);
724 if (result)
725 goto create_delsock;
726 ++num;
727 }
728
729 result = sock->ops->listen(sock, 5);
730 if (result < 0) {
731 log_print("Can't set socket listening");
732 goto create_delsock;
733 }
734
735 return 0;
736
737create_delsock:
738 sock_release(sock);
739 sctp_con.sock = NULL;
740out:
741 return result;
742}
743
744
745static struct writequeue_entry *new_writequeue_entry(gfp_t allocation)
746{
747 struct writequeue_entry *entry;
748
749 entry = kmalloc(sizeof(struct writequeue_entry), allocation);
750 if (!entry)
751 return NULL;
752
753 entry->page = alloc_page(allocation);
754 if (!entry->page) {
755 kfree(entry);
756 return NULL;
757 }
758
759 entry->offset = 0;
760 entry->len = 0;
761 entry->end = 0;
762 entry->users = 0;
763
764 return entry;
765}
766
767void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
768{
769 struct writequeue_entry *e;
770 int offset = 0;
771 int users = 0;
772 struct nodeinfo *ni;
773
774 ni = nodeid2nodeinfo(nodeid, allocation);
775 if (!ni)
776 return NULL;
777
778 spin_lock(&ni->writequeue_lock);
779 e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
780 if ((&e->list == &ni->writequeue) ||
781 (PAGE_CACHE_SIZE - e->end < len)) {
782 e = NULL;
783 } else {
784 offset = e->end;
785 e->end += len;
786 users = e->users++;
787 }
788 spin_unlock(&ni->writequeue_lock);
789
790 if (e) {
791 got_one:
792 if (users == 0)
793 kmap(e->page);
794 *ppc = page_address(e->page) + offset;
795 return e;
796 }
797
798 e = new_writequeue_entry(allocation);
799 if (e) {
800 spin_lock(&ni->writequeue_lock);
801 offset = e->end;
802 e->end += len;
803 e->ni = ni;
804 users = e->users++;
805 list_add_tail(&e->list, &ni->writequeue);
806 spin_unlock(&ni->writequeue_lock);
807 goto got_one;
808 }
809 return NULL;
810}
811
812void dlm_lowcomms_commit_buffer(void *arg)
813{
814 struct writequeue_entry *e = (struct writequeue_entry *) arg;
815 int users;
816 struct nodeinfo *ni = e->ni;
817
818 spin_lock(&ni->writequeue_lock);
819 users = --e->users;
820 if (users)
821 goto out;
822 e->len = e->end - e->offset;
823 kunmap(e->page);
824 spin_unlock(&ni->writequeue_lock);
825
826 if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
827 spin_lock_bh(&write_nodes_lock);
828 list_add_tail(&ni->write_list, &write_nodes);
829 spin_unlock_bh(&write_nodes_lock);
830
831 queue_work(send_workqueue, &ni->swork);
832 }
833 return;
834
835out:
836 spin_unlock(&ni->writequeue_lock);
837 return;
838}
839
840static void free_entry(struct writequeue_entry *e)
841{
842 __free_page(e->page);
843 kfree(e);
844}
845
846/* Initiate an SCTP association. In theory we could just use sendmsg() on
847 the first IP address and it should work, but this allows us to set up the
848 association before sending any valuable data that we can't afford to lose.
849 It also keeps the send path clean as it can now always use the association ID */
850static void initiate_association(int nodeid)
851{
852 struct sockaddr_storage rem_addr;
853 static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
854 struct msghdr outmessage;
855 struct cmsghdr *cmsg;
856 struct sctp_sndrcvinfo *sinfo;
857 int ret;
858 int addrlen;
859 char buf[1];
860 struct kvec iov[1];
861 struct nodeinfo *ni;
862
863 log_print("Initiating association with node %d", nodeid);
864
865 ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
866 if (!ni)
867 return;
868
869 if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) {
870 log_print("no address for nodeid %d", nodeid);
871 return;
872 }
873
874 make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
875
876 outmessage.msg_name = &rem_addr;
877 outmessage.msg_namelen = addrlen;
878 outmessage.msg_control = outcmsg;
879 outmessage.msg_controllen = sizeof(outcmsg);
880 outmessage.msg_flags = MSG_EOR;
881
882 iov[0].iov_base = buf;
883 iov[0].iov_len = 1;
884
885 /* Real INIT messages seem to cause trouble. Just send a 1 byte message
886 we can afford to lose */
887 cmsg = CMSG_FIRSTHDR(&outmessage);
888 cmsg->cmsg_level = IPPROTO_SCTP;
889 cmsg->cmsg_type = SCTP_SNDRCV;
890 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
891 sinfo = CMSG_DATA(cmsg);
892 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
893 sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
894
895 outmessage.msg_controllen = cmsg->cmsg_len;
896 ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1);
897 if (ret < 0) {
898 log_print("send INIT to node failed: %d", ret);
899 /* Try again later */
900 clear_bit(NI_INIT_PENDING, &ni->flags);
901 }
902}
903
904/* Send a message */
905static void send_to_sock(struct nodeinfo *ni)
906{
907 int ret = 0;
908 struct writequeue_entry *e;
909 int len, offset;
910 struct msghdr outmsg;
911 static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
912 struct cmsghdr *cmsg;
913 struct sctp_sndrcvinfo *sinfo;
914 struct kvec iov;
915
916 /* See if we need to init an association before we start
917 sending precious messages */
918 spin_lock(&ni->lock);
919 if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
920 spin_unlock(&ni->lock);
921 initiate_association(ni->nodeid);
922 return;
923 }
924 spin_unlock(&ni->lock);
925
926 outmsg.msg_name = NULL; /* We use assoc_id */
927 outmsg.msg_namelen = 0;
928 outmsg.msg_control = outcmsg;
929 outmsg.msg_controllen = sizeof(outcmsg);
930 outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR;
931
932 cmsg = CMSG_FIRSTHDR(&outmsg);
933 cmsg->cmsg_level = IPPROTO_SCTP;
934 cmsg->cmsg_type = SCTP_SNDRCV;
935 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
936 sinfo = CMSG_DATA(cmsg);
937 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
938 sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
939 sinfo->sinfo_assoc_id = ni->assoc_id;
940 outmsg.msg_controllen = cmsg->cmsg_len;
941
942 spin_lock(&ni->writequeue_lock);
943 for (;;) {
944 if (list_empty(&ni->writequeue))
945 break;
946 e = list_entry(ni->writequeue.next, struct writequeue_entry,
947 list);
948 len = e->len;
949 offset = e->offset;
950 BUG_ON(len == 0 && e->users == 0);
951 spin_unlock(&ni->writequeue_lock);
952 kmap(e->page);
953
954 ret = 0;
955 if (len) {
956 iov.iov_base = page_address(e->page)+offset;
957 iov.iov_len = len;
958
959 ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1,
960 len);
961 if (ret == -EAGAIN) {
962 sctp_con.eagain_flag = 1;
963 goto out;
964 } else if (ret < 0)
965 goto send_error;
966 } else {
967 /* Don't starve people filling buffers */
968 cond_resched();
969 }
970
971 spin_lock(&ni->writequeue_lock);
972 e->offset += ret;
973 e->len -= ret;
974
975 if (e->len == 0 && e->users == 0) {
976 list_del(&e->list);
977 kunmap(e->page);
978 free_entry(e);
979 continue;
980 }
981 }
982 spin_unlock(&ni->writequeue_lock);
983out:
984 return;
985
986send_error:
987 log_print("Error sending to node %d %d", ni->nodeid, ret);
988 spin_lock(&ni->lock);
989 if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
990 ni->assoc_id = 0;
991 spin_unlock(&ni->lock);
992 initiate_association(ni->nodeid);
993 } else
994 spin_unlock(&ni->lock);
995
996 return;
997}
998
999/* Try to send any messages that are pending */
1000static void process_output_queue(void)
1001{
1002 struct list_head *list;
1003 struct list_head *temp;
1004
1005 spin_lock_bh(&write_nodes_lock);
1006 list_for_each_safe(list, temp, &write_nodes) {
1007 struct nodeinfo *ni =
1008 list_entry(list, struct nodeinfo, write_list);
1009 clear_bit(NI_WRITE_PENDING, &ni->flags);
1010 list_del(&ni->write_list);
1011
1012 spin_unlock_bh(&write_nodes_lock);
1013
1014 send_to_sock(ni);
1015 spin_lock_bh(&write_nodes_lock);
1016 }
1017 spin_unlock_bh(&write_nodes_lock);
1018}
1019
1020/* Called after we've had -EAGAIN and been woken up */
1021static void refill_write_queue(void)
1022{
1023 int i;
1024
1025 for (i=1; i<=max_nodeid; i++) {
1026 struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
1027
1028 if (ni) {
1029 if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
1030 spin_lock_bh(&write_nodes_lock);
1031 list_add_tail(&ni->write_list, &write_nodes);
1032 spin_unlock_bh(&write_nodes_lock);
1033 }
1034 }
1035 }
1036}
1037
1038static void clean_one_writequeue(struct nodeinfo *ni)
1039{
1040 struct list_head *list;
1041 struct list_head *temp;
1042
1043 spin_lock(&ni->writequeue_lock);
1044 list_for_each_safe(list, temp, &ni->writequeue) {
1045 struct writequeue_entry *e =
1046 list_entry(list, struct writequeue_entry, list);
1047 list_del(&e->list);
1048 free_entry(e);
1049 }
1050 spin_unlock(&ni->writequeue_lock);
1051}
1052
1053static void clean_writequeues(void)
1054{
1055 int i;
1056
1057 for (i=1; i<=max_nodeid; i++) {
1058 struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
1059 if (ni)
1060 clean_one_writequeue(ni);
1061 }
1062}
1063
1064
1065static void dealloc_nodeinfo(void)
1066{
1067 int i;
1068
1069 for (i=1; i<=max_nodeid; i++) {
1070 struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
1071 if (ni) {
1072 idr_remove(&nodeinfo_idr, i);
1073 kfree(ni);
1074 }
1075 }
1076}
1077
1078int dlm_lowcomms_close(int nodeid)
1079{
1080 struct nodeinfo *ni;
1081
1082 ni = nodeid2nodeinfo(nodeid, 0);
1083 if (!ni)
1084 return -1;
1085
1086 spin_lock(&ni->lock);
1087 if (ni->assoc_id) {
1088 ni->assoc_id = 0;
1089 /* Don't send shutdown here, sctp will just queue it
1090 till the node comes back up! */
1091 }
1092 spin_unlock(&ni->lock);
1093
1094 clean_one_writequeue(ni);
1095 clear_bit(NI_INIT_PENDING, &ni->flags);
1096 return 0;
1097}
1098
1099// PJC: The work queue function for receiving.
1100static void process_recv_sockets(struct work_struct *work)
1101{
1102 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1103 int ret;
1104 int count = 0;
1105
1106 do {
1107 ret = receive_from_sock();
1108
1109 /* Don't starve out everyone else */
1110 if (++count >= MAX_RX_MSG_COUNT) {
1111 cond_resched();
1112 count = 0;
1113 }
1114 } while (!kthread_should_stop() && ret >=0);
1115 }
1116 cond_resched();
1117}
1118
1119// PJC: the work queue function for sending
1120static void process_send_sockets(struct work_struct *work)
1121{
1122 if (sctp_con.eagain_flag) {
1123 sctp_con.eagain_flag = 0;
1124 refill_write_queue();
1125 }
1126 process_output_queue();
1127}
1128
1129// PJC: Process lock requests from a particular node.
1130// TODO: can we optimise this out on UP ??
1131static void process_lock_request(struct work_struct *work)
1132{
1133}
1134
1135static void daemons_stop(void)
1136{
1137 destroy_workqueue(recv_workqueue);
1138 destroy_workqueue(send_workqueue);
1139 destroy_workqueue(lock_workqueue);
1140}
1141
1142static int daemons_start(void)
1143{
1144 int error;
1145 recv_workqueue = create_workqueue("dlm_recv");
1146 error = IS_ERR(recv_workqueue);
1147 if (error) {
1148 log_print("can't start dlm_recv %d", error);
1149 return error;
1150 }
1151
1152 send_workqueue = create_singlethread_workqueue("dlm_send");
1153 error = IS_ERR(send_workqueue);
1154 if (error) {
1155 log_print("can't start dlm_send %d", error);
1156 destroy_workqueue(recv_workqueue);
1157 return error;
1158 }
1159
1160 lock_workqueue = create_workqueue("dlm_rlock");
1161 error = IS_ERR(lock_workqueue);
1162 if (error) {
1163 log_print("can't start dlm_rlock %d", error);
1164 destroy_workqueue(send_workqueue);
1165 destroy_workqueue(recv_workqueue);
1166 return error;
1167 }
1168
1169 return 0;
1170}
1171
1172/*
1173 * This is quite likely to sleep...
1174 */
1175int dlm_lowcomms_start(void)
1176{
1177 int error;
1178
1179 INIT_WORK(&sctp_con.work, process_recv_sockets);
1180
1181 error = init_sock();
1182 if (error)
1183 goto fail_sock;
1184 error = daemons_start();
1185 if (error)
1186 goto fail_sock;
1187 return 0;
1188
1189fail_sock:
1190 close_connection();
1191 return error;
1192}
1193
1194void dlm_lowcomms_stop(void)
1195{
1196 int i;
1197
1198 sctp_con.flags = 0x7;
1199 daemons_stop();
1200 clean_writequeues();
1201 close_connection();
1202 dealloc_nodeinfo();
1203 max_nodeid = 0;
1204
1205 dlm_local_count = 0;
1206 dlm_local_nodeid = 0;
1207
1208 for (i = 0; i < dlm_local_count; i++)
1209 kfree(dlm_local_addr[i]);
1210}
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms.c
index 07e0a122c32f..27970a58d29b 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms.c
@@ -36,30 +36,36 @@
36 * of high load. Also, this way, the sending thread can collect together 36 * of high load. Also, this way, the sending thread can collect together
37 * messages bound for one node and send them in one block. 37 * messages bound for one node and send them in one block.
38 * 38 *
39 * I don't see any problem with the recv thread executing the locking 39 * lowcomms will choose to use wither TCP or SCTP as its transport layer
40 * code on behalf of remote processes as the locking code is 40 * depending on the configuration variable 'protocol'. This should be set
41 * short, efficient and never waits. 41 * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a
42 * cluster-wide mechanism as it must be the same on all nodes of the cluster
43 * for the DLM to function.
42 * 44 *
43 */ 45 */
44 46
45
46#include <asm/ioctls.h> 47#include <asm/ioctls.h>
47#include <net/sock.h> 48#include <net/sock.h>
48#include <net/tcp.h> 49#include <net/tcp.h>
49#include <linux/pagemap.h> 50#include <linux/pagemap.h>
51#include <linux/idr.h>
52#include <linux/file.h>
53#include <linux/sctp.h>
54#include <net/sctp/user.h>
50 55
51#include "dlm_internal.h" 56#include "dlm_internal.h"
52#include "lowcomms.h" 57#include "lowcomms.h"
53#include "midcomms.h" 58#include "midcomms.h"
54#include "config.h" 59#include "config.h"
55 60
61#define NEEDED_RMEM (4*1024*1024)
62
56struct cbuf { 63struct cbuf {
57 unsigned int base; 64 unsigned int base;
58 unsigned int len; 65 unsigned int len;
59 unsigned int mask; 66 unsigned int mask;
60}; 67};
61 68
62#define NODE_INCREMENT 32
63static void cbuf_add(struct cbuf *cb, int n) 69static void cbuf_add(struct cbuf *cb, int n)
64{ 70{
65 cb->len += n; 71 cb->len += n;
@@ -88,28 +94,25 @@ static bool cbuf_empty(struct cbuf *cb)
88 return cb->len == 0; 94 return cb->len == 0;
89} 95}
90 96
91/* Maximum number of incoming messages to process before
92 doing a cond_resched()
93*/
94#define MAX_RX_MSG_COUNT 25
95
96struct connection { 97struct connection {
97 struct socket *sock; /* NULL if not connected */ 98 struct socket *sock; /* NULL if not connected */
98 uint32_t nodeid; /* So we know who we are in the list */ 99 uint32_t nodeid; /* So we know who we are in the list */
99 struct mutex sock_mutex; 100 struct mutex sock_mutex;
100 unsigned long flags; /* bit 1,2 = We are on the read/write lists */ 101 unsigned long flags;
101#define CF_READ_PENDING 1 102#define CF_READ_PENDING 1
102#define CF_WRITE_PENDING 2 103#define CF_WRITE_PENDING 2
103#define CF_CONNECT_PENDING 3 104#define CF_CONNECT_PENDING 3
104#define CF_IS_OTHERCON 4 105#define CF_INIT_PENDING 4
106#define CF_IS_OTHERCON 5
105 struct list_head writequeue; /* List of outgoing writequeue_entries */ 107 struct list_head writequeue; /* List of outgoing writequeue_entries */
106 struct list_head listenlist; /* List of allocated listening sockets */
107 spinlock_t writequeue_lock; 108 spinlock_t writequeue_lock;
108 int (*rx_action) (struct connection *); /* What to do when active */ 109 int (*rx_action) (struct connection *); /* What to do when active */
110 void (*connect_action) (struct connection *); /* What to do to connect */
109 struct page *rx_page; 111 struct page *rx_page;
110 struct cbuf cb; 112 struct cbuf cb;
111 int retries; 113 int retries;
112#define MAX_CONNECT_RETRIES 3 114#define MAX_CONNECT_RETRIES 3
115 int sctp_assoc;
113 struct connection *othercon; 116 struct connection *othercon;
114 struct work_struct rwork; /* Receive workqueue */ 117 struct work_struct rwork; /* Receive workqueue */
115 struct work_struct swork; /* Send workqueue */ 118 struct work_struct swork; /* Send workqueue */
@@ -127,68 +130,136 @@ struct writequeue_entry {
127 struct connection *con; 130 struct connection *con;
128}; 131};
129 132
130static struct sockaddr_storage dlm_local_addr; 133static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
134static int dlm_local_count;
131 135
132/* Work queues */ 136/* Work queues */
133static struct workqueue_struct *recv_workqueue; 137static struct workqueue_struct *recv_workqueue;
134static struct workqueue_struct *send_workqueue; 138static struct workqueue_struct *send_workqueue;
135 139
136/* An array of pointers to connections, indexed by NODEID */ 140static DEFINE_IDR(connections_idr);
137static struct connection **connections;
138static DECLARE_MUTEX(connections_lock); 141static DECLARE_MUTEX(connections_lock);
142static int max_nodeid;
139static struct kmem_cache *con_cache; 143static struct kmem_cache *con_cache;
140static int conn_array_size;
141 144
142static void process_recv_sockets(struct work_struct *work); 145static void process_recv_sockets(struct work_struct *work);
143static void process_send_sockets(struct work_struct *work); 146static void process_send_sockets(struct work_struct *work);
144 147
145static struct connection *nodeid2con(int nodeid, gfp_t allocation) 148/*
149 * If 'allocation' is zero then we don't attempt to create a new
150 * connection structure for this node.
151 */
152static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
146{ 153{
147 struct connection *con = NULL; 154 struct connection *con = NULL;
155 int r;
156 int n;
148 157
149 down(&connections_lock); 158 con = idr_find(&connections_idr, nodeid);
150 if (nodeid >= conn_array_size) { 159 if (con || !alloc)
151 int new_size = nodeid + NODE_INCREMENT; 160 return con;
152 struct connection **new_conns;
153 161
154 new_conns = kzalloc(sizeof(struct connection *) * 162 r = idr_pre_get(&connections_idr, alloc);
155 new_size, allocation); 163 if (!r)
156 if (!new_conns) 164 return NULL;
157 goto finish; 165
166 con = kmem_cache_zalloc(con_cache, alloc);
167 if (!con)
168 return NULL;
158 169
159 memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size); 170 r = idr_get_new_above(&connections_idr, con, nodeid, &n);
160 conn_array_size = new_size; 171 if (r) {
161 kfree(connections); 172 kmem_cache_free(con_cache, con);
162 connections = new_conns; 173 return NULL;
174 }
163 175
176 if (n != nodeid) {
177 idr_remove(&connections_idr, n);
178 kmem_cache_free(con_cache, con);
179 return NULL;
164 } 180 }
165 181
166 con = connections[nodeid]; 182 con->nodeid = nodeid;
167 if (con == NULL && allocation) { 183 mutex_init(&con->sock_mutex);
168 con = kmem_cache_zalloc(con_cache, allocation); 184 INIT_LIST_HEAD(&con->writequeue);
169 if (!con) 185 spin_lock_init(&con->writequeue_lock);
170 goto finish; 186 INIT_WORK(&con->swork, process_send_sockets);
187 INIT_WORK(&con->rwork, process_recv_sockets);
171 188
172 con->nodeid = nodeid; 189 /* Setup action pointers for child sockets */
173 mutex_init(&con->sock_mutex); 190 if (con->nodeid) {
174 INIT_LIST_HEAD(&con->writequeue); 191 struct connection *zerocon = idr_find(&connections_idr, 0);
175 spin_lock_init(&con->writequeue_lock);
176 INIT_WORK(&con->swork, process_send_sockets);
177 INIT_WORK(&con->rwork, process_recv_sockets);
178 192
179 connections[nodeid] = con; 193 con->connect_action = zerocon->connect_action;
194 if (!con->rx_action)
195 con->rx_action = zerocon->rx_action;
180 } 196 }
181 197
182finish: 198 if (nodeid > max_nodeid)
199 max_nodeid = nodeid;
200
201 return con;
202}
203
204static struct connection *nodeid2con(int nodeid, gfp_t allocation)
205{
206 struct connection *con;
207
208 down(&connections_lock);
209 con = __nodeid2con(nodeid, allocation);
183 up(&connections_lock); 210 up(&connections_lock);
211
184 return con; 212 return con;
185} 213}
186 214
215/* This is a bit drastic, but only called when things go wrong */
216static struct connection *assoc2con(int assoc_id)
217{
218 int i;
219 struct connection *con;
220
221 down(&connections_lock);
222 for (i=0; i<=max_nodeid; i++) {
223 con = __nodeid2con(i, 0);
224 if (con && con->sctp_assoc == assoc_id) {
225 up(&connections_lock);
226 return con;
227 }
228 }
229 up(&connections_lock);
230 return NULL;
231}
232
233static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
234{
235 struct sockaddr_storage addr;
236 int error;
237
238 if (!dlm_local_count)
239 return -1;
240
241 error = dlm_nodeid_to_addr(nodeid, &addr);
242 if (error)
243 return error;
244
245 if (dlm_local_addr[0]->ss_family == AF_INET) {
246 struct sockaddr_in *in4 = (struct sockaddr_in *) &addr;
247 struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
248 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
249 } else {
250 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
251 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
252 memcpy(&ret6->sin6_addr, &in6->sin6_addr,
253 sizeof(in6->sin6_addr));
254 }
255
256 return 0;
257}
258
187/* Data available on socket or listen socket received a connect */ 259/* Data available on socket or listen socket received a connect */
188static void lowcomms_data_ready(struct sock *sk, int count_unused) 260static void lowcomms_data_ready(struct sock *sk, int count_unused)
189{ 261{
190 struct connection *con = sock2con(sk); 262 struct connection *con = sock2con(sk);
191
192 if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 263 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
193 queue_work(recv_workqueue, &con->rwork); 264 queue_work(recv_workqueue, &con->rwork);
194} 265}
@@ -222,20 +293,21 @@ static int add_sock(struct socket *sock, struct connection *con)
222 con->sock->sk->sk_data_ready = lowcomms_data_ready; 293 con->sock->sk->sk_data_ready = lowcomms_data_ready;
223 con->sock->sk->sk_write_space = lowcomms_write_space; 294 con->sock->sk->sk_write_space = lowcomms_write_space;
224 con->sock->sk->sk_state_change = lowcomms_state_change; 295 con->sock->sk->sk_state_change = lowcomms_state_change;
225 296 con->sock->sk->sk_user_data = con;
226 return 0; 297 return 0;
227} 298}
228 299
229/* Add the port number to an IP6 or 4 sockaddr and return the address 300/* Add the port number to an IPv6 or 4 sockaddr and return the address
230 length */ 301 length */
231static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, 302static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
232 int *addr_len) 303 int *addr_len)
233{ 304{
234 saddr->ss_family = dlm_local_addr.ss_family; 305 saddr->ss_family = dlm_local_addr[0]->ss_family;
235 if (saddr->ss_family == AF_INET) { 306 if (saddr->ss_family == AF_INET) {
236 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; 307 struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
237 in4_addr->sin_port = cpu_to_be16(port); 308 in4_addr->sin_port = cpu_to_be16(port);
238 *addr_len = sizeof(struct sockaddr_in); 309 *addr_len = sizeof(struct sockaddr_in);
310 memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
239 } else { 311 } else {
240 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; 312 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
241 in6_addr->sin6_port = cpu_to_be16(port); 313 in6_addr->sin6_port = cpu_to_be16(port);
@@ -264,6 +336,193 @@ static void close_connection(struct connection *con, bool and_other)
264 mutex_unlock(&con->sock_mutex); 336 mutex_unlock(&con->sock_mutex);
265} 337}
266 338
339/* We only send shutdown messages to nodes that are not part of the cluster */
340static void sctp_send_shutdown(sctp_assoc_t associd)
341{
342 static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
343 struct msghdr outmessage;
344 struct cmsghdr *cmsg;
345 struct sctp_sndrcvinfo *sinfo;
346 int ret;
347 struct connection *con;
348
349 con = nodeid2con(0,0);
350 BUG_ON(con == NULL);
351
352 outmessage.msg_name = NULL;
353 outmessage.msg_namelen = 0;
354 outmessage.msg_control = outcmsg;
355 outmessage.msg_controllen = sizeof(outcmsg);
356 outmessage.msg_flags = MSG_EOR;
357
358 cmsg = CMSG_FIRSTHDR(&outmessage);
359 cmsg->cmsg_level = IPPROTO_SCTP;
360 cmsg->cmsg_type = SCTP_SNDRCV;
361 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
362 outmessage.msg_controllen = cmsg->cmsg_len;
363 sinfo = CMSG_DATA(cmsg);
364 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
365
366 sinfo->sinfo_flags |= MSG_EOF;
367 sinfo->sinfo_assoc_id = associd;
368
369 ret = kernel_sendmsg(con->sock, &outmessage, NULL, 0, 0);
370
371 if (ret != 0)
372 log_print("send EOF to node failed: %d", ret);
373}
374
375/* INIT failed but we don't know which node...
376 restart INIT on all pending nodes */
377static void sctp_init_failed(void)
378{
379 int i;
380 struct connection *con;
381
382 down(&connections_lock);
383 for (i=1; i<=max_nodeid; i++) {
384 con = __nodeid2con(i, 0);
385 if (!con)
386 continue;
387 con->sctp_assoc = 0;
388 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
389 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
390 queue_work(send_workqueue, &con->swork);
391 }
392 }
393 }
394 up(&connections_lock);
395}
396
397/* Something happened to an association */
398static void process_sctp_notification(struct connection *con,
399 struct msghdr *msg, char *buf)
400{
401 union sctp_notification *sn = (union sctp_notification *)buf;
402
403 if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
404 switch (sn->sn_assoc_change.sac_state) {
405
406 case SCTP_COMM_UP:
407 case SCTP_RESTART:
408 {
409 /* Check that the new node is in the lockspace */
410 struct sctp_prim prim;
411 int nodeid;
412 int prim_len, ret;
413 int addr_len;
414 struct connection *new_con;
415 struct file *file;
416 sctp_peeloff_arg_t parg;
417 int parglen = sizeof(parg);
418
419 /*
420 * We get this before any data for an association.
421 * We verify that the node is in the cluster and
422 * then peel off a socket for it.
423 */
424 if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
425 log_print("COMM_UP for invalid assoc ID %d",
426 (int)sn->sn_assoc_change.sac_assoc_id);
427 sctp_init_failed();
428 return;
429 }
430 memset(&prim, 0, sizeof(struct sctp_prim));
431 prim_len = sizeof(struct sctp_prim);
432 prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
433
434 ret = kernel_getsockopt(con->sock,
435 IPPROTO_SCTP,
436 SCTP_PRIMARY_ADDR,
437 (char*)&prim,
438 &prim_len);
439 if (ret < 0) {
440 log_print("getsockopt/sctp_primary_addr on "
441 "new assoc %d failed : %d",
442 (int)sn->sn_assoc_change.sac_assoc_id,
443 ret);
444
445 /* Retry INIT later */
446 new_con = assoc2con(sn->sn_assoc_change.sac_assoc_id);
447 if (new_con)
448 clear_bit(CF_CONNECT_PENDING, &con->flags);
449 return;
450 }
451 make_sockaddr(&prim.ssp_addr, 0, &addr_len);
452 if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
453 int i;
454 unsigned char *b=(unsigned char *)&prim.ssp_addr;
455 log_print("reject connect from unknown addr");
456 for (i=0; i<sizeof(struct sockaddr_storage);i++)
457 printk("%02x ", b[i]);
458 printk("\n");
459 sctp_send_shutdown(prim.ssp_assoc_id);
460 return;
461 }
462
463 new_con = nodeid2con(nodeid, GFP_KERNEL);
464 if (!new_con)
465 return;
466
467 /* Peel off a new sock */
468 parg.associd = sn->sn_assoc_change.sac_assoc_id;
469 ret = kernel_getsockopt(con->sock, IPPROTO_SCTP,
470 SCTP_SOCKOPT_PEELOFF,
471 (void *)&parg, &parglen);
472 if (ret) {
473 log_print("Can't peel off a socket for "
474 "connection %d to node %d: err=%d\n",
475 parg.associd, nodeid, ret);
476 }
477 file = fget(parg.sd);
478 new_con->sock = SOCKET_I(file->f_dentry->d_inode);
479 add_sock(new_con->sock, new_con);
480 fput(file);
481 put_unused_fd(parg.sd);
482
483 log_print("got new/restarted association %d nodeid %d",
484 (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
485
486 /* Send any pending writes */
487 clear_bit(CF_CONNECT_PENDING, &new_con->flags);
488 clear_bit(CF_INIT_PENDING, &con->flags);
489 if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) {
490 queue_work(send_workqueue, &new_con->swork);
491 }
492 if (!test_and_set_bit(CF_READ_PENDING, &new_con->flags))
493 queue_work(recv_workqueue, &new_con->rwork);
494 }
495 break;
496
497 case SCTP_COMM_LOST:
498 case SCTP_SHUTDOWN_COMP:
499 {
500 con = assoc2con(sn->sn_assoc_change.sac_assoc_id);
501 if (con) {
502 con->sctp_assoc = 0;
503 }
504 }
505 break;
506
507 /* We don't know which INIT failed, so clear the PENDING flags
508 * on them all. if assoc_id is zero then it will then try
509 * again */
510
511 case SCTP_CANT_STR_ASSOC:
512 {
513 log_print("Can't start SCTP association - retrying");
514 sctp_init_failed();
515 }
516 break;
517
518 default:
519 log_print("unexpected SCTP assoc change id=%d state=%d",
520 (int)sn->sn_assoc_change.sac_assoc_id,
521 sn->sn_assoc_change.sac_state);
522 }
523 }
524}
525
267/* Data received from remote end */ 526/* Data received from remote end */
268static int receive_from_sock(struct connection *con) 527static int receive_from_sock(struct connection *con)
269{ 528{
@@ -274,6 +533,7 @@ static int receive_from_sock(struct connection *con)
274 int r; 533 int r;
275 int call_again_soon = 0; 534 int call_again_soon = 0;
276 int nvec; 535 int nvec;
536 char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
277 537
278 mutex_lock(&con->sock_mutex); 538 mutex_lock(&con->sock_mutex);
279 539
@@ -293,12 +553,18 @@ static int receive_from_sock(struct connection *con)
293 cbuf_init(&con->cb, PAGE_CACHE_SIZE); 553 cbuf_init(&con->cb, PAGE_CACHE_SIZE);
294 } 554 }
295 555
556 /* Only SCTP needs these really */
557 memset(&incmsg, 0, sizeof(incmsg));
558 msg.msg_control = incmsg;
559 msg.msg_controllen = sizeof(incmsg);
560
296 /* 561 /*
297 * iov[0] is the bit of the circular buffer between the current end 562 * iov[0] is the bit of the circular buffer between the current end
298 * point (cb.base + cb.len) and the end of the buffer. 563 * point (cb.base + cb.len) and the end of the buffer.
299 */ 564 */
300 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); 565 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
301 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); 566 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
567 iov[1].iov_len = 0;
302 nvec = 1; 568 nvec = 1;
303 569
304 /* 570 /*
@@ -315,11 +581,20 @@ static int receive_from_sock(struct connection *con)
315 581
316 r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len, 582 r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
317 MSG_DONTWAIT | MSG_NOSIGNAL); 583 MSG_DONTWAIT | MSG_NOSIGNAL);
318
319 if (ret <= 0) 584 if (ret <= 0)
320 goto out_close; 585 goto out_close;
321 if (ret == -EAGAIN) 586
322 goto out_resched; 587 /* Process SCTP notifications */
588 if (msg.msg_flags & MSG_NOTIFICATION) {
589 msg.msg_control = incmsg;
590 msg.msg_controllen = sizeof(incmsg);
591
592 process_sctp_notification(con, &msg,
593 page_address(con->rx_page) + con->cb.base);
594 mutex_unlock(&con->sock_mutex);
595 return 0;
596 }
597 BUG_ON(con->nodeid == 0);
323 598
324 if (ret == len) 599 if (ret == len)
325 call_again_soon = 1; 600 call_again_soon = 1;
@@ -329,10 +604,10 @@ static int receive_from_sock(struct connection *con)
329 con->cb.base, con->cb.len, 604 con->cb.base, con->cb.len,
330 PAGE_CACHE_SIZE); 605 PAGE_CACHE_SIZE);
331 if (ret == -EBADMSG) { 606 if (ret == -EBADMSG) {
332 printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, " 607 log_print("lowcomms: addr=%p, base=%u, len=%u, "
333 "iov_len=%u, iov_base[0]=%p, read=%d\n", 608 "iov_len=%u, iov_base[0]=%p, read=%d",
334 page_address(con->rx_page), con->cb.base, con->cb.len, 609 page_address(con->rx_page), con->cb.base, con->cb.len,
335 len, iov[0].iov_base, r); 610 len, iov[0].iov_base, r);
336 } 611 }
337 if (ret < 0) 612 if (ret < 0)
338 goto out_close; 613 goto out_close;
@@ -368,7 +643,7 @@ out_close:
368} 643}
369 644
370/* Listening socket is busy, accept a connection */ 645/* Listening socket is busy, accept a connection */
371static int accept_from_sock(struct connection *con) 646static int tcp_accept_from_sock(struct connection *con)
372{ 647{
373 int result; 648 int result;
374 struct sockaddr_storage peeraddr; 649 struct sockaddr_storage peeraddr;
@@ -379,7 +654,7 @@ static int accept_from_sock(struct connection *con)
379 struct connection *addcon; 654 struct connection *addcon;
380 655
381 memset(&peeraddr, 0, sizeof(peeraddr)); 656 memset(&peeraddr, 0, sizeof(peeraddr));
382 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, 657 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
383 IPPROTO_TCP, &newsock); 658 IPPROTO_TCP, &newsock);
384 if (result < 0) 659 if (result < 0)
385 return -ENOMEM; 660 return -ENOMEM;
@@ -408,7 +683,7 @@ static int accept_from_sock(struct connection *con)
408 /* Get the new node's NODEID */ 683 /* Get the new node's NODEID */
409 make_sockaddr(&peeraddr, 0, &len); 684 make_sockaddr(&peeraddr, 0, &len);
410 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 685 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
411 printk("dlm: connect from non cluster node\n"); 686 log_print("connect from non cluster node");
412 sock_release(newsock); 687 sock_release(newsock);
413 mutex_unlock(&con->sock_mutex); 688 mutex_unlock(&con->sock_mutex);
414 return -1; 689 return -1;
@@ -419,7 +694,6 @@ static int accept_from_sock(struct connection *con)
419 /* Check to see if we already have a connection to this node. This 694 /* Check to see if we already have a connection to this node. This
420 * could happen if the two nodes initiate a connection at roughly 695 * could happen if the two nodes initiate a connection at roughly
421 * the same time and the connections cross on the wire. 696 * the same time and the connections cross on the wire.
422 * TEMPORARY FIX:
423 * In this case we store the incoming one in "othercon" 697 * In this case we store the incoming one in "othercon"
424 */ 698 */
425 newcon = nodeid2con(nodeid, GFP_KERNEL); 699 newcon = nodeid2con(nodeid, GFP_KERNEL);
@@ -434,7 +708,7 @@ static int accept_from_sock(struct connection *con)
434 if (!othercon) { 708 if (!othercon) {
435 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); 709 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
436 if (!othercon) { 710 if (!othercon) {
437 printk("dlm: failed to allocate incoming socket\n"); 711 log_print("failed to allocate incoming socket");
438 mutex_unlock(&newcon->sock_mutex); 712 mutex_unlock(&newcon->sock_mutex);
439 result = -ENOMEM; 713 result = -ENOMEM;
440 goto accept_err; 714 goto accept_err;
@@ -477,12 +751,107 @@ accept_err:
477 sock_release(newsock); 751 sock_release(newsock);
478 752
479 if (result != -EAGAIN) 753 if (result != -EAGAIN)
480 printk("dlm: error accepting connection from node: %d\n", result); 754 log_print("error accepting connection from node: %d", result);
481 return result; 755 return result;
482} 756}
483 757
758static void free_entry(struct writequeue_entry *e)
759{
760 __free_page(e->page);
761 kfree(e);
762}
763
764/* Initiate an SCTP association.
765 This is a special case of send_to_sock() in that we don't yet have a
766 peeled-off socket for this association, so we use the listening socket
767 and add the primary IP address of the remote node.
768 */
769static void sctp_init_assoc(struct connection *con)
770{
771 struct sockaddr_storage rem_addr;
772 char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
773 struct msghdr outmessage;
774 struct cmsghdr *cmsg;
775 struct sctp_sndrcvinfo *sinfo;
776 struct connection *base_con;
777 struct writequeue_entry *e;
778 int len, offset;
779 int ret;
780 int addrlen;
781 struct kvec iov[1];
782
783 if (test_and_set_bit(CF_INIT_PENDING, &con->flags))
784 return;
785
786 if (con->retries++ > MAX_CONNECT_RETRIES)
787 return;
788
789 log_print("Initiating association with node %d", con->nodeid);
790
791 if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) {
792 log_print("no address for nodeid %d", con->nodeid);
793 return;
794 }
795 base_con = nodeid2con(0, 0);
796 BUG_ON(base_con == NULL);
797
798 make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
799
800 outmessage.msg_name = &rem_addr;
801 outmessage.msg_namelen = addrlen;
802 outmessage.msg_control = outcmsg;
803 outmessage.msg_controllen = sizeof(outcmsg);
804 outmessage.msg_flags = MSG_EOR;
805
806 spin_lock(&con->writequeue_lock);
807 e = list_entry(con->writequeue.next, struct writequeue_entry,
808 list);
809
810 BUG_ON((struct list_head *) e == &con->writequeue);
811
812 len = e->len;
813 offset = e->offset;
814 spin_unlock(&con->writequeue_lock);
815 kmap(e->page);
816
817 /* Send the first block off the write queue */
818 iov[0].iov_base = page_address(e->page)+offset;
819 iov[0].iov_len = len;
820
821 cmsg = CMSG_FIRSTHDR(&outmessage);
822 cmsg->cmsg_level = IPPROTO_SCTP;
823 cmsg->cmsg_type = SCTP_SNDRCV;
824 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
825 sinfo = CMSG_DATA(cmsg);
826 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
827 sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid());
828 outmessage.msg_controllen = cmsg->cmsg_len;
829
830 ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len);
831 if (ret < 0) {
832 log_print("Send first packet to node %d failed: %d",
833 con->nodeid, ret);
834
835 /* Try again later */
836 clear_bit(CF_CONNECT_PENDING, &con->flags);
837 clear_bit(CF_INIT_PENDING, &con->flags);
838 }
839 else {
840 spin_lock(&con->writequeue_lock);
841 e->offset += ret;
842 e->len -= ret;
843
844 if (e->len == 0 && e->users == 0) {
845 list_del(&e->list);
846 kunmap(e->page);
847 free_entry(e);
848 }
849 spin_unlock(&con->writequeue_lock);
850 }
851}
852
484/* Connect a new socket to its peer */ 853/* Connect a new socket to its peer */
485static void connect_to_sock(struct connection *con) 854static void tcp_connect_to_sock(struct connection *con)
486{ 855{
487 int result = -EHOSTUNREACH; 856 int result = -EHOSTUNREACH;
488 struct sockaddr_storage saddr; 857 struct sockaddr_storage saddr;
@@ -505,7 +874,7 @@ static void connect_to_sock(struct connection *con)
505 } 874 }
506 875
507 /* Create a socket to communicate with */ 876 /* Create a socket to communicate with */
508 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, 877 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
509 IPPROTO_TCP, &sock); 878 IPPROTO_TCP, &sock);
510 if (result < 0) 879 if (result < 0)
511 goto out_err; 880 goto out_err;
@@ -516,11 +885,11 @@ static void connect_to_sock(struct connection *con)
516 885
517 sock->sk->sk_user_data = con; 886 sock->sk->sk_user_data = con;
518 con->rx_action = receive_from_sock; 887 con->rx_action = receive_from_sock;
888 con->connect_action = tcp_connect_to_sock;
889 add_sock(sock, con);
519 890
520 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 891 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
521 892
522 add_sock(sock, con);
523
524 log_print("connecting to %d", con->nodeid); 893 log_print("connecting to %d", con->nodeid);
525 result = 894 result =
526 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 895 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
@@ -550,64 +919,57 @@ out:
550 return; 919 return;
551} 920}
552 921
553static struct socket *create_listen_sock(struct connection *con, 922static struct socket *tcp_create_listen_sock(struct connection *con,
554 struct sockaddr_storage *saddr) 923 struct sockaddr_storage *saddr)
555{ 924{
556 struct socket *sock = NULL; 925 struct socket *sock = NULL;
557 mm_segment_t fs;
558 int result = 0; 926 int result = 0;
559 int one = 1; 927 int one = 1;
560 int addr_len; 928 int addr_len;
561 929
562 if (dlm_local_addr.ss_family == AF_INET) 930 if (dlm_local_addr[0]->ss_family == AF_INET)
563 addr_len = sizeof(struct sockaddr_in); 931 addr_len = sizeof(struct sockaddr_in);
564 else 932 else
565 addr_len = sizeof(struct sockaddr_in6); 933 addr_len = sizeof(struct sockaddr_in6);
566 934
567 /* Create a socket to communicate with */ 935 /* Create a socket to communicate with */
568 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); 936 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
937 IPPROTO_TCP, &sock);
569 if (result < 0) { 938 if (result < 0) {
570 printk("dlm: Can't create listening comms socket\n"); 939 log_print("Can't create listening comms socket");
571 goto create_out; 940 goto create_out;
572 } 941 }
573 942
574 fs = get_fs(); 943 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
575 set_fs(get_ds()); 944 (char *)&one, sizeof(one));
576 result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 945
577 (char *)&one, sizeof(one));
578 set_fs(fs);
579 if (result < 0) { 946 if (result < 0) {
580 printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n", 947 log_print("Failed to set SO_REUSEADDR on socket: %d", result);
581 result);
582 } 948 }
583 sock->sk->sk_user_data = con; 949 sock->sk->sk_user_data = con;
584 con->rx_action = accept_from_sock; 950 con->rx_action = tcp_accept_from_sock;
951 con->connect_action = tcp_connect_to_sock;
585 con->sock = sock; 952 con->sock = sock;
586 953
587 /* Bind to our port */ 954 /* Bind to our port */
588 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); 955 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
589 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 956 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
590 if (result < 0) { 957 if (result < 0) {
591 printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port); 958 log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
592 sock_release(sock); 959 sock_release(sock);
593 sock = NULL; 960 sock = NULL;
594 con->sock = NULL; 961 con->sock = NULL;
595 goto create_out; 962 goto create_out;
596 } 963 }
597 964 result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
598 fs = get_fs();
599 set_fs(get_ds());
600
601 result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
602 (char *)&one, sizeof(one)); 965 (char *)&one, sizeof(one));
603 set_fs(fs);
604 if (result < 0) { 966 if (result < 0) {
605 printk("dlm: Set keepalive failed: %d\n", result); 967 log_print("Set keepalive failed: %d", result);
606 } 968 }
607 969
608 result = sock->ops->listen(sock, 5); 970 result = sock->ops->listen(sock, 5);
609 if (result < 0) { 971 if (result < 0) {
610 printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port); 972 log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
611 sock_release(sock); 973 sock_release(sock);
612 sock = NULL; 974 sock = NULL;
613 goto create_out; 975 goto create_out;
@@ -617,18 +979,146 @@ create_out:
617 return sock; 979 return sock;
618} 980}
619 981
982/* Get local addresses */
983static void init_local(void)
984{
985 struct sockaddr_storage sas, *addr;
986 int i;
987
988 dlm_local_count = 0;
989 for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
990 if (dlm_our_addr(&sas, i))
991 break;
992
993 addr = kmalloc(sizeof(*addr), GFP_KERNEL);
994 if (!addr)
995 break;
996 memcpy(addr, &sas, sizeof(*addr));
997 dlm_local_addr[dlm_local_count++] = addr;
998 }
999}
1000
1001/* Bind to an IP address. SCTP allows multiple address so it can do
1002 multi-homing */
1003static int add_sctp_bind_addr(struct connection *sctp_con,
1004 struct sockaddr_storage *addr,
1005 int addr_len, int num)
1006{
1007 int result = 0;
1008
1009 if (num == 1)
1010 result = kernel_bind(sctp_con->sock,
1011 (struct sockaddr *) addr,
1012 addr_len);
1013 else
1014 result = kernel_setsockopt(sctp_con->sock, SOL_SCTP,
1015 SCTP_SOCKOPT_BINDX_ADD,
1016 (char *)addr, addr_len);
1017
1018 if (result < 0)
1019 log_print("Can't bind to port %d addr number %d",
1020 dlm_config.ci_tcp_port, num);
1021
1022 return result;
1023}
620 1024
621/* Listen on all interfaces */ 1025/* Initialise SCTP socket and bind to all interfaces */
622static int listen_for_all(void) 1026static int sctp_listen_for_all(void)
1027{
1028 struct socket *sock = NULL;
1029 struct sockaddr_storage localaddr;
1030 struct sctp_event_subscribe subscribe;
1031 int result = -EINVAL, num = 1, i, addr_len;
1032 struct connection *con = nodeid2con(0, GFP_KERNEL);
1033 int bufsize = NEEDED_RMEM;
1034
1035 if (!con)
1036 return -ENOMEM;
1037
1038 log_print("Using SCTP for communications");
1039
1040 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
1041 IPPROTO_SCTP, &sock);
1042 if (result < 0) {
1043 log_print("Can't create comms socket, check SCTP is loaded");
1044 goto out;
1045 }
1046
1047 /* Listen for events */
1048 memset(&subscribe, 0, sizeof(subscribe));
1049 subscribe.sctp_data_io_event = 1;
1050 subscribe.sctp_association_event = 1;
1051 subscribe.sctp_send_failure_event = 1;
1052 subscribe.sctp_shutdown_event = 1;
1053 subscribe.sctp_partial_delivery_event = 1;
1054
1055 result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
1056 (char *)&bufsize, sizeof(bufsize));
1057 if (result)
1058 log_print("Error increasing buffer space on socket %d", result);
1059
1060 result = kernel_setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
1061 (char *)&subscribe, sizeof(subscribe));
1062 if (result < 0) {
1063 log_print("Failed to set SCTP_EVENTS on socket: result=%d",
1064 result);
1065 goto create_delsock;
1066 }
1067
1068 /* Init con struct */
1069 sock->sk->sk_user_data = con;
1070 con->sock = sock;
1071 con->sock->sk->sk_data_ready = lowcomms_data_ready;
1072 con->rx_action = receive_from_sock;
1073 con->connect_action = sctp_init_assoc;
1074
1075 /* Bind to all interfaces. */
1076 for (i = 0; i < dlm_local_count; i++) {
1077 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
1078 make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
1079
1080 result = add_sctp_bind_addr(con, &localaddr, addr_len, num);
1081 if (result)
1082 goto create_delsock;
1083 ++num;
1084 }
1085
1086 result = sock->ops->listen(sock, 5);
1087 if (result < 0) {
1088 log_print("Can't set socket listening");
1089 goto create_delsock;
1090 }
1091
1092 return 0;
1093
1094create_delsock:
1095 sock_release(sock);
1096 con->sock = NULL;
1097out:
1098 return result;
1099}
1100
1101static int tcp_listen_for_all(void)
623{ 1102{
624 struct socket *sock = NULL; 1103 struct socket *sock = NULL;
625 struct connection *con = nodeid2con(0, GFP_KERNEL); 1104 struct connection *con = nodeid2con(0, GFP_KERNEL);
626 int result = -EINVAL; 1105 int result = -EINVAL;
627 1106
1107 if (!con)
1108 return -ENOMEM;
1109
628 /* We don't support multi-homed hosts */ 1110 /* We don't support multi-homed hosts */
1111 if (dlm_local_addr[1] != NULL) {
1112 log_print("TCP protocol can't handle multi-homed hosts, "
1113 "try SCTP");
1114 return -EINVAL;
1115 }
1116
1117 log_print("Using TCP for communications");
1118
629 set_bit(CF_IS_OTHERCON, &con->flags); 1119 set_bit(CF_IS_OTHERCON, &con->flags);
630 1120
631 sock = create_listen_sock(con, &dlm_local_addr); 1121 sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
632 if (sock) { 1122 if (sock) {
633 add_sock(sock, con); 1123 add_sock(sock, con);
634 result = 0; 1124 result = 0;
@@ -666,8 +1156,7 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
666 return entry; 1156 return entry;
667} 1157}
668 1158
669void *dlm_lowcomms_get_buffer(int nodeid, int len, 1159void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
670 gfp_t allocation, char **ppc)
671{ 1160{
672 struct connection *con; 1161 struct connection *con;
673 struct writequeue_entry *e; 1162 struct writequeue_entry *e;
@@ -735,12 +1224,6 @@ out:
735 return; 1224 return;
736} 1225}
737 1226
738static void free_entry(struct writequeue_entry *e)
739{
740 __free_page(e->page);
741 kfree(e);
742}
743
744/* Send a message */ 1227/* Send a message */
745static void send_to_sock(struct connection *con) 1228static void send_to_sock(struct connection *con)
746{ 1229{
@@ -777,8 +1260,7 @@ static void send_to_sock(struct connection *con)
777 goto out; 1260 goto out;
778 if (ret <= 0) 1261 if (ret <= 0)
779 goto send_error; 1262 goto send_error;
780 } 1263 } else {
781 else {
782 /* Don't starve people filling buffers */ 1264 /* Don't starve people filling buffers */
783 cond_resched(); 1265 cond_resched();
784 } 1266 }
@@ -807,7 +1289,8 @@ send_error:
807 1289
808out_connect: 1290out_connect:
809 mutex_unlock(&con->sock_mutex); 1291 mutex_unlock(&con->sock_mutex);
810 connect_to_sock(con); 1292 if (!test_bit(CF_INIT_PENDING, &con->flags))
1293 lowcomms_connect_sock(con);
811 return; 1294 return;
812} 1295}
813 1296
@@ -832,9 +1315,6 @@ int dlm_lowcomms_close(int nodeid)
832{ 1315{
833 struct connection *con; 1316 struct connection *con;
834 1317
835 if (!connections)
836 goto out;
837
838 log_print("closing connection to node %d", nodeid); 1318 log_print("closing connection to node %d", nodeid);
839 con = nodeid2con(nodeid, 0); 1319 con = nodeid2con(nodeid, 0);
840 if (con) { 1320 if (con) {
@@ -842,12 +1322,9 @@ int dlm_lowcomms_close(int nodeid)
842 close_connection(con, true); 1322 close_connection(con, true);
843 } 1323 }
844 return 0; 1324 return 0;
845
846out:
847 return -1;
848} 1325}
849 1326
850/* Look for activity on active sockets */ 1327/* Receive workqueue function */
851static void process_recv_sockets(struct work_struct *work) 1328static void process_recv_sockets(struct work_struct *work)
852{ 1329{
853 struct connection *con = container_of(work, struct connection, rwork); 1330 struct connection *con = container_of(work, struct connection, rwork);
@@ -859,15 +1336,14 @@ static void process_recv_sockets(struct work_struct *work)
859 } while (!err); 1336 } while (!err);
860} 1337}
861 1338
862 1339/* Send workqueue function */
863static void process_send_sockets(struct work_struct *work) 1340static void process_send_sockets(struct work_struct *work)
864{ 1341{
865 struct connection *con = container_of(work, struct connection, swork); 1342 struct connection *con = container_of(work, struct connection, swork);
866 1343
867 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { 1344 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
868 connect_to_sock(con); 1345 con->connect_action(con);
869 } 1346 }
870
871 clear_bit(CF_WRITE_PENDING, &con->flags); 1347 clear_bit(CF_WRITE_PENDING, &con->flags);
872 send_to_sock(con); 1348 send_to_sock(con);
873} 1349}
@@ -878,8 +1354,8 @@ static void clean_writequeues(void)
878{ 1354{
879 int nodeid; 1355 int nodeid;
880 1356
881 for (nodeid = 1; nodeid < conn_array_size; nodeid++) { 1357 for (nodeid = 1; nodeid <= max_nodeid; nodeid++) {
882 struct connection *con = nodeid2con(nodeid, 0); 1358 struct connection *con = __nodeid2con(nodeid, 0);
883 1359
884 if (con) 1360 if (con)
885 clean_one_writequeue(con); 1361 clean_one_writequeue(con);
@@ -916,64 +1392,67 @@ static int work_start(void)
916void dlm_lowcomms_stop(void) 1392void dlm_lowcomms_stop(void)
917{ 1393{
918 int i; 1394 int i;
1395 struct connection *con;
919 1396
920 /* Set all the flags to prevent any 1397 /* Set all the flags to prevent any
921 socket activity. 1398 socket activity.
922 */ 1399 */
923 for (i = 0; i < conn_array_size; i++) { 1400 down(&connections_lock);
924 if (connections[i]) 1401 for (i = 0; i <= max_nodeid; i++) {
925 connections[i]->flags |= 0xFF; 1402 con = __nodeid2con(i, 0);
1403 if (con)
1404 con->flags |= 0xFF;
926 } 1405 }
1406 up(&connections_lock);
927 1407
928 work_stop(); 1408 work_stop();
1409
1410 down(&connections_lock);
929 clean_writequeues(); 1411 clean_writequeues();
930 1412
931 for (i = 0; i < conn_array_size; i++) { 1413 for (i = 0; i <= max_nodeid; i++) {
932 if (connections[i]) { 1414 con = __nodeid2con(i, 0);
933 close_connection(connections[i], true); 1415 if (con) {
934 if (connections[i]->othercon) 1416 close_connection(con, true);
935 kmem_cache_free(con_cache, connections[i]->othercon); 1417 if (con->othercon)
936 kmem_cache_free(con_cache, connections[i]); 1418 kmem_cache_free(con_cache, con->othercon);
1419 kmem_cache_free(con_cache, con);
937 } 1420 }
938 } 1421 }
939 1422 max_nodeid = 0;
940 kfree(connections); 1423 up(&connections_lock);
941 connections = NULL;
942
943 kmem_cache_destroy(con_cache); 1424 kmem_cache_destroy(con_cache);
1425 idr_init(&connections_idr);
944} 1426}
945 1427
946/* This is quite likely to sleep... */
947int dlm_lowcomms_start(void) 1428int dlm_lowcomms_start(void)
948{ 1429{
949 int error = 0; 1430 int error = -EINVAL;
950 1431 struct connection *con;
951 error = -ENOMEM;
952 connections = kzalloc(sizeof(struct connection *) *
953 NODE_INCREMENT, GFP_KERNEL);
954 if (!connections)
955 goto out;
956
957 conn_array_size = NODE_INCREMENT;
958 1432
959 if (dlm_our_addr(&dlm_local_addr, 0)) { 1433 init_local();
1434 if (!dlm_local_count) {
1435 error = -ENOTCONN;
960 log_print("no local IP address has been set"); 1436 log_print("no local IP address has been set");
961 goto fail_free_conn; 1437 goto out;
962 }
963 if (!dlm_our_addr(&dlm_local_addr, 1)) {
964 log_print("This dlm comms module does not support multi-homed clustering");
965 goto fail_free_conn;
966 } 1438 }
967 1439
1440 error = -ENOMEM;
968 con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), 1441 con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
969 __alignof__(struct connection), 0, 1442 __alignof__(struct connection), 0,
970 NULL, NULL); 1443 NULL, NULL);
971 if (!con_cache) 1444 if (!con_cache)
972 goto fail_free_conn; 1445 goto out;
973 1446
1447 /* Set some sysctl minima */
1448 if (sysctl_rmem_max < NEEDED_RMEM)
1449 sysctl_rmem_max = NEEDED_RMEM;
974 1450
975 /* Start listening */ 1451 /* Start listening */
976 error = listen_for_all(); 1452 if (dlm_config.ci_protocol == 0)
1453 error = tcp_listen_for_all();
1454 else
1455 error = sctp_listen_for_all();
977 if (error) 1456 if (error)
978 goto fail_unlisten; 1457 goto fail_unlisten;
979 1458
@@ -984,24 +1463,13 @@ int dlm_lowcomms_start(void)
984 return 0; 1463 return 0;
985 1464
986fail_unlisten: 1465fail_unlisten:
987 close_connection(connections[0], false); 1466 con = nodeid2con(0,0);
988 kmem_cache_free(con_cache, connections[0]); 1467 if (con) {
1468 close_connection(con, false);
1469 kmem_cache_free(con_cache, con);
1470 }
989 kmem_cache_destroy(con_cache); 1471 kmem_cache_destroy(con_cache);
990 1472
991fail_free_conn:
992 kfree(connections);
993
994out: 1473out:
995 return error; 1474 return error;
996} 1475}
997
998/*
999 * Overrides for Emacs so that we follow Linus's tabbing style.
1000 * Emacs will notice this stuff at the end of the file and automatically
1001 * adjust the settings for this buffer only. This must remain at the end
1002 * of the file.
1003 * ---------------------------------------------------------------------------
1004 * Local variables:
1005 * c-file-style: "linux"
1006 * End:
1007 */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 3870150b83a4..b0201ec325a7 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006 Red Hat, Inc. All rights reserved. 2 * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
3 * 3 *
4 * This copyrighted material is made available to anyone wishing to use, 4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions 5 * modify, copy, or redistribute it subject to the terms and conditions
@@ -56,6 +56,7 @@ struct dlm_write_request32 {
56 union { 56 union {
57 struct dlm_lock_params32 lock; 57 struct dlm_lock_params32 lock;
58 struct dlm_lspace_params lspace; 58 struct dlm_lspace_params lspace;
59 struct dlm_purge_params purge;
59 } i; 60 } i;
60}; 61};
61 62
@@ -92,6 +93,9 @@ static void compat_input(struct dlm_write_request *kb,
92 kb->i.lspace.flags = kb32->i.lspace.flags; 93 kb->i.lspace.flags = kb32->i.lspace.flags;
93 kb->i.lspace.minor = kb32->i.lspace.minor; 94 kb->i.lspace.minor = kb32->i.lspace.minor;
94 strcpy(kb->i.lspace.name, kb32->i.lspace.name); 95 strcpy(kb->i.lspace.name, kb32->i.lspace.name);
96 } else if (kb->cmd == DLM_USER_PURGE) {
97 kb->i.purge.nodeid = kb32->i.purge.nodeid;
98 kb->i.purge.pid = kb32->i.purge.pid;
95 } else { 99 } else {
96 kb->i.lock.mode = kb32->i.lock.mode; 100 kb->i.lock.mode = kb32->i.lock.mode;
97 kb->i.lock.namelen = kb32->i.lock.namelen; 101 kb->i.lock.namelen = kb32->i.lock.namelen;
@@ -111,8 +115,6 @@ static void compat_input(struct dlm_write_request *kb,
111static void compat_output(struct dlm_lock_result *res, 115static void compat_output(struct dlm_lock_result *res,
112 struct dlm_lock_result32 *res32) 116 struct dlm_lock_result32 *res32)
113{ 117{
114 res32->length = res->length - (sizeof(struct dlm_lock_result) -
115 sizeof(struct dlm_lock_result32));
116 res32->user_astaddr = (__u32)(long)res->user_astaddr; 118 res32->user_astaddr = (__u32)(long)res->user_astaddr;
117 res32->user_astparam = (__u32)(long)res->user_astparam; 119 res32->user_astparam = (__u32)(long)res->user_astparam;
118 res32->user_lksb = (__u32)(long)res->user_lksb; 120 res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -128,35 +130,30 @@ static void compat_output(struct dlm_lock_result *res,
128} 130}
129#endif 131#endif
130 132
133/* we could possibly check if the cancel of an orphan has resulted in the lkb
134 being removed and then remove that lkb from the orphans list and free it */
131 135
132void dlm_user_add_ast(struct dlm_lkb *lkb, int type) 136void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
133{ 137{
134 struct dlm_ls *ls; 138 struct dlm_ls *ls;
135 struct dlm_user_args *ua; 139 struct dlm_user_args *ua;
136 struct dlm_user_proc *proc; 140 struct dlm_user_proc *proc;
137 int remove_ownqueue = 0; 141 int eol = 0, ast_type;
138 142
139 /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each 143 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
140 lkb before dealing with it. We need to check this
141 flag before taking ls_clear_proc_locks mutex because if
142 it's set, dlm_clear_proc_locks() holds the mutex. */
143
144 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
145 /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */
146 return; 144 return;
147 }
148 145
149 ls = lkb->lkb_resource->res_ls; 146 ls = lkb->lkb_resource->res_ls;
150 mutex_lock(&ls->ls_clear_proc_locks); 147 mutex_lock(&ls->ls_clear_proc_locks);
151 148
152 /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast 149 /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
153 can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed 150 can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
154 lkb->ua so we can't try to use it. */ 151 lkb->ua so we can't try to use it. This second check is necessary
152 for cases where a completion ast is received for an operation that
153 began before clear_proc_locks did its cancel/unlock. */
155 154
156 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { 155 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
157 /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */
158 goto out; 156 goto out;
159 }
160 157
161 DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); 158 DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
162 ua = (struct dlm_user_args *)lkb->lkb_astparam; 159 ua = (struct dlm_user_args *)lkb->lkb_astparam;
@@ -166,28 +163,42 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
166 goto out; 163 goto out;
167 164
168 spin_lock(&proc->asts_spin); 165 spin_lock(&proc->asts_spin);
169 if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { 166
167 ast_type = lkb->lkb_ast_type;
168 lkb->lkb_ast_type |= type;
169
170 if (!ast_type) {
170 kref_get(&lkb->lkb_ref); 171 kref_get(&lkb->lkb_ref);
171 list_add_tail(&lkb->lkb_astqueue, &proc->asts); 172 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
172 lkb->lkb_ast_type |= type;
173 wake_up_interruptible(&proc->wait); 173 wake_up_interruptible(&proc->wait);
174 } 174 }
175 175 if (type == AST_COMP && (ast_type & AST_COMP))
176 /* noqueue requests that fail may need to be removed from the 176 log_debug(ls, "ast overlap %x status %x %x",
177 proc's locks list, there should be a better way of detecting 177 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
178 this situation than checking all these things... */ 178
179 179 /* Figure out if this lock is at the end of its life and no longer
180 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && 180 available for the application to use. The lkb still exists until
181 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) 181 the final ast is read. A lock becomes EOL in three situations:
182 remove_ownqueue = 1; 182 1. a noqueue request fails with EAGAIN
183 183 2. an unlock completes with EUNLOCK
184 /* unlocks or cancels of waiting requests need to be removed from the 184 3. a cancel of a waiting request completes with ECANCEL
185 proc's unlocking list, again there must be a better way... */ 185 An EOL lock needs to be removed from the process's list of locks.
186 186 And we can't allow any new operation on an EOL lock. This is
187 if (ua->lksb.sb_status == -DLM_EUNLOCK || 187 not related to the lifetime of the lkb struct which is managed
188 entirely by refcount. */
189
190 if (type == AST_COMP &&
191 lkb->lkb_grmode == DLM_LOCK_IV &&
192 ua->lksb.sb_status == -EAGAIN)
193 eol = 1;
194 else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
188 (ua->lksb.sb_status == -DLM_ECANCEL && 195 (ua->lksb.sb_status == -DLM_ECANCEL &&
189 lkb->lkb_grmode == DLM_LOCK_IV)) 196 lkb->lkb_grmode == DLM_LOCK_IV))
190 remove_ownqueue = 1; 197 eol = 1;
198 if (eol) {
199 lkb->lkb_ast_type &= ~AST_BAST;
200 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
201 }
191 202
192 /* We want to copy the lvb to userspace when the completion 203 /* We want to copy the lvb to userspace when the completion
193 ast is read if the status is 0, the lock has an lvb and 204 ast is read if the status is 0, the lock has an lvb and
@@ -204,11 +215,13 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
204 215
205 spin_unlock(&proc->asts_spin); 216 spin_unlock(&proc->asts_spin);
206 217
207 if (remove_ownqueue) { 218 if (eol) {
208 spin_lock(&ua->proc->locks_spin); 219 spin_lock(&ua->proc->locks_spin);
209 list_del_init(&lkb->lkb_ownqueue); 220 if (!list_empty(&lkb->lkb_ownqueue)) {
221 list_del_init(&lkb->lkb_ownqueue);
222 dlm_put_lkb(lkb);
223 }
210 spin_unlock(&ua->proc->locks_spin); 224 spin_unlock(&ua->proc->locks_spin);
211 dlm_put_lkb(lkb);
212 } 225 }
213 out: 226 out:
214 mutex_unlock(&ls->ls_clear_proc_locks); 227 mutex_unlock(&ls->ls_clear_proc_locks);
@@ -286,47 +299,71 @@ static int device_user_unlock(struct dlm_user_proc *proc,
286 return error; 299 return error;
287} 300}
288 301
289static int device_create_lockspace(struct dlm_lspace_params *params) 302static int create_misc_device(struct dlm_ls *ls, char *name)
290{ 303{
291 dlm_lockspace_t *lockspace;
292 struct dlm_ls *ls;
293 int error, len; 304 int error, len;
294 305
295 if (!capable(CAP_SYS_ADMIN))
296 return -EPERM;
297
298 error = dlm_new_lockspace(params->name, strlen(params->name),
299 &lockspace, 0, DLM_USER_LVB_LEN);
300 if (error)
301 return error;
302
303 ls = dlm_find_lockspace_local(lockspace);
304 if (!ls)
305 return -ENOENT;
306
307 error = -ENOMEM; 306 error = -ENOMEM;
308 len = strlen(params->name) + strlen(name_prefix) + 2; 307 len = strlen(name) + strlen(name_prefix) + 2;
309 ls->ls_device.name = kzalloc(len, GFP_KERNEL); 308 ls->ls_device.name = kzalloc(len, GFP_KERNEL);
310 if (!ls->ls_device.name) 309 if (!ls->ls_device.name)
311 goto fail; 310 goto fail;
311
312 snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix, 312 snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
313 params->name); 313 name);
314 ls->ls_device.fops = &device_fops; 314 ls->ls_device.fops = &device_fops;
315 ls->ls_device.minor = MISC_DYNAMIC_MINOR; 315 ls->ls_device.minor = MISC_DYNAMIC_MINOR;
316 316
317 error = misc_register(&ls->ls_device); 317 error = misc_register(&ls->ls_device);
318 if (error) { 318 if (error) {
319 kfree(ls->ls_device.name); 319 kfree(ls->ls_device.name);
320 goto fail;
321 } 320 }
321fail:
322 return error;
323}
324
325static int device_user_purge(struct dlm_user_proc *proc,
326 struct dlm_purge_params *params)
327{
328 struct dlm_ls *ls;
329 int error;
330
331 ls = dlm_find_lockspace_local(proc->lockspace);
332 if (!ls)
333 return -ENOENT;
334
335 error = dlm_user_purge(ls, proc, params->nodeid, params->pid);
322 336
323 error = ls->ls_device.minor;
324 dlm_put_lockspace(ls); 337 dlm_put_lockspace(ls);
325 return error; 338 return error;
339}
340
341static int device_create_lockspace(struct dlm_lspace_params *params)
342{
343 dlm_lockspace_t *lockspace;
344 struct dlm_ls *ls;
345 int error;
326 346
327 fail: 347 if (!capable(CAP_SYS_ADMIN))
348 return -EPERM;
349
350 error = dlm_new_lockspace(params->name, strlen(params->name),
351 &lockspace, 0, DLM_USER_LVB_LEN);
352 if (error)
353 return error;
354
355 ls = dlm_find_lockspace_local(lockspace);
356 if (!ls)
357 return -ENOENT;
358
359 error = create_misc_device(ls, params->name);
328 dlm_put_lockspace(ls); 360 dlm_put_lockspace(ls);
329 dlm_release_lockspace(lockspace, 0); 361
362 if (error)
363 dlm_release_lockspace(lockspace, 0);
364 else
365 error = ls->ls_device.minor;
366
330 return error; 367 return error;
331} 368}
332 369
@@ -343,6 +380,10 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
343 if (!ls) 380 if (!ls)
344 return -ENOENT; 381 return -ENOENT;
345 382
383 /* Deregister the misc device first, so we don't have
384 * a device that's not attached to a lockspace. If
385 * dlm_release_lockspace fails then we can recreate it
386 */
346 error = misc_deregister(&ls->ls_device); 387 error = misc_deregister(&ls->ls_device);
347 if (error) { 388 if (error) {
348 dlm_put_lockspace(ls); 389 dlm_put_lockspace(ls);
@@ -361,6 +402,8 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
361 402
362 dlm_put_lockspace(ls); 403 dlm_put_lockspace(ls);
363 error = dlm_release_lockspace(lockspace, force); 404 error = dlm_release_lockspace(lockspace, force);
405 if (error)
406 create_misc_device(ls, ls->ls_name);
364 out: 407 out:
365 return error; 408 return error;
366} 409}
@@ -497,6 +540,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
497 error = device_remove_lockspace(&kbuf->i.lspace); 540 error = device_remove_lockspace(&kbuf->i.lspace);
498 break; 541 break;
499 542
543 case DLM_USER_PURGE:
544 if (!proc) {
545 log_print("no locking on control device");
546 goto out_sig;
547 }
548 error = device_user_purge(proc, &kbuf->i.purge);
549 break;
550
500 default: 551 default:
501 log_print("Unknown command passed to DLM device : %d\n", 552 log_print("Unknown command passed to DLM device : %d\n",
502 kbuf->cmd); 553 kbuf->cmd);
diff --git a/fs/dquot.c b/fs/dquot.c
index b16f991662c1..3a995841de90 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -69,7 +69,6 @@
69#include <linux/file.h> 69#include <linux/file.h>
70#include <linux/slab.h> 70#include <linux/slab.h>
71#include <linux/sysctl.h> 71#include <linux/sysctl.h>
72#include <linux/smp_lock.h>
73#include <linux/init.h> 72#include <linux/init.h>
74#include <linux/module.h> 73#include <linux/module.h>
75#include <linux/proc_fs.h> 74#include <linux/proc_fs.h>
@@ -475,7 +474,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
475 spin_lock(&dq_list_lock); 474 spin_lock(&dq_list_lock);
476 dirty = &dqopt->info[cnt].dqi_dirty_list; 475 dirty = &dqopt->info[cnt].dqi_dirty_list;
477 while (!list_empty(dirty)) { 476 while (!list_empty(dirty)) {
478 dquot = list_entry(dirty->next, struct dquot, dq_dirty); 477 dquot = list_first_entry(dirty, struct dquot, dq_dirty);
479 /* Dirty and inactive can be only bad dquot... */ 478 /* Dirty and inactive can be only bad dquot... */
480 if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { 479 if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
481 clear_dquot_dirty(dquot); 480 clear_dquot_dirty(dquot);
@@ -721,7 +720,8 @@ static inline int dqput_blocks(struct dquot *dquot)
721 720
722/* Remove references to dquots from inode - add dquot to list for freeing if needed */ 721/* Remove references to dquots from inode - add dquot to list for freeing if needed */
723/* We can't race with anybody because we hold dqptr_sem for writing... */ 722/* We can't race with anybody because we hold dqptr_sem for writing... */
724int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) 723static int remove_inode_dquot_ref(struct inode *inode, int type,
724 struct list_head *tofree_head)
725{ 725{
726 struct dquot *dquot = inode->i_dquot[type]; 726 struct dquot *dquot = inode->i_dquot[type];
727 727
@@ -1432,7 +1432,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1432 mutex_unlock(&dqopt->dqonoff_mutex); 1432 mutex_unlock(&dqopt->dqonoff_mutex);
1433 } 1433 }
1434 if (sb->s_bdev) 1434 if (sb->s_bdev)
1435 invalidate_bdev(sb->s_bdev, 0); 1435 invalidate_bdev(sb->s_bdev);
1436 return 0; 1436 return 0;
1437} 1437}
1438 1438
@@ -1468,7 +1468,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1468 * we see all the changes from userspace... */ 1468 * we see all the changes from userspace... */
1469 write_inode_now(inode, 1); 1469 write_inode_now(inode, 1);
1470 /* And now flush the block cache so that kernel sees the changes */ 1470 /* And now flush the block cache so that kernel sees the changes */
1471 invalidate_bdev(sb->s_bdev, 0); 1471 invalidate_bdev(sb->s_bdev);
1472 mutex_lock(&inode->i_mutex); 1472 mutex_lock(&inode->i_mutex);
1473 mutex_lock(&dqopt->dqonoff_mutex); 1473 mutex_lock(&dqopt->dqonoff_mutex);
1474 if (sb_has_quota_enabled(sb, type)) { 1474 if (sb_has_quota_enabled(sb, type)) {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7a7d25d541e7..9881b5c5de59 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -28,7 +28,6 @@
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/security.h> 30#include <linux/security.h>
31#include <linux/smp_lock.h>
32#include <linux/compat.h> 31#include <linux/compat.h>
33#include <linux/fs_stack.h> 32#include <linux/fs_stack.h>
34#include "ecryptfs_kernel.h" 33#include "ecryptfs_kernel.h"
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index fc4a3a224641..8cbf3f69ebe5 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -583,8 +583,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
583{ 583{
584 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; 584 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
585 585
586 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 586 if (flags & SLAB_CTOR_CONSTRUCTOR)
587 SLAB_CTOR_CONSTRUCTOR)
588 inode_init_once(&ei->vfs_inode); 587 inode_init_once(&ei->vfs_inode);
589} 588}
590 589
@@ -793,7 +792,7 @@ static int do_sysfs_registration(void)
793 "Unable to register ecryptfs sysfs subsystem\n"); 792 "Unable to register ecryptfs sysfs subsystem\n");
794 goto out; 793 goto out;
795 } 794 }
796 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, 795 rc = sysfs_create_file(&ecryptfs_subsys.kobj,
797 &sysfs_attr_version.attr); 796 &sysfs_attr_version.attr);
798 if (rc) { 797 if (rc) {
799 printk(KERN_ERR 798 printk(KERN_ERR
@@ -801,12 +800,12 @@ static int do_sysfs_registration(void)
801 subsystem_unregister(&ecryptfs_subsys); 800 subsystem_unregister(&ecryptfs_subsys);
802 goto out; 801 goto out;
803 } 802 }
804 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, 803 rc = sysfs_create_file(&ecryptfs_subsys.kobj,
805 &sysfs_attr_version_str.attr); 804 &sysfs_attr_version_str.attr);
806 if (rc) { 805 if (rc) {
807 printk(KERN_ERR 806 printk(KERN_ERR
808 "Unable to create ecryptfs version_str attribute\n"); 807 "Unable to create ecryptfs version_str attribute\n");
809 sysfs_remove_file(&ecryptfs_subsys.kset.kobj, 808 sysfs_remove_file(&ecryptfs_subsys.kobj,
810 &sysfs_attr_version.attr); 809 &sysfs_attr_version.attr);
811 subsystem_unregister(&ecryptfs_subsys); 810 subsystem_unregister(&ecryptfs_subsys);
812 goto out; 811 goto out;
@@ -841,7 +840,7 @@ static int __init ecryptfs_init(void)
841 ecryptfs_free_kmem_caches(); 840 ecryptfs_free_kmem_caches();
842 goto out; 841 goto out;
843 } 842 }
844 kset_set_kset_s(&ecryptfs_subsys, fs_subsys); 843 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
845 sysfs_attr_version.attr.owner = THIS_MODULE; 844 sysfs_attr_version.attr.owner = THIS_MODULE;
846 sysfs_attr_version_str.attr.owner = THIS_MODULE; 845 sysfs_attr_version_str.attr.owner = THIS_MODULE;
847 rc = do_sysfs_registration(); 846 rc = do_sysfs_registration();
@@ -862,9 +861,9 @@ out:
862 861
863static void __exit ecryptfs_exit(void) 862static void __exit ecryptfs_exit(void)
864{ 863{
865 sysfs_remove_file(&ecryptfs_subsys.kset.kobj, 864 sysfs_remove_file(&ecryptfs_subsys.kobj,
866 &sysfs_attr_version.attr); 865 &sysfs_attr_version.attr);
867 sysfs_remove_file(&ecryptfs_subsys.kset.kobj, 866 sysfs_remove_file(&ecryptfs_subsys.kobj,
868 &sysfs_attr_version_str.attr); 867 &sysfs_attr_version_str.attr);
869 subsystem_unregister(&ecryptfs_subsys); 868 subsystem_unregister(&ecryptfs_subsys);
870 ecryptfs_release_messaging(ecryptfs_transport); 869 ecryptfs_release_messaging(ecryptfs_transport);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index b731b09499cb..0770c4b66f53 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -46,7 +46,6 @@ struct kmem_cache *ecryptfs_lower_page_cache;
46 */ 46 */
47static struct page *ecryptfs_get1page(struct file *file, int index) 47static struct page *ecryptfs_get1page(struct file *file, int index)
48{ 48{
49 struct page *page;
50 struct dentry *dentry; 49 struct dentry *dentry;
51 struct inode *inode; 50 struct inode *inode;
52 struct address_space *mapping; 51 struct address_space *mapping;
@@ -54,14 +53,7 @@ static struct page *ecryptfs_get1page(struct file *file, int index)
54 dentry = file->f_path.dentry; 53 dentry = file->f_path.dentry;
55 inode = dentry->d_inode; 54 inode = dentry->d_inode;
56 mapping = inode->i_mapping; 55 mapping = inode->i_mapping;
57 page = read_cache_page(mapping, index, 56 return read_mapping_page(mapping, index, (void *)file);
58 (filler_t *)mapping->a_ops->readpage,
59 (void *)file);
60 if (IS_ERR(page))
61 goto out;
62 wait_on_page_locked(page);
63out:
64 return page;
65} 57}
66 58
67static 59static
@@ -233,7 +225,6 @@ int ecryptfs_do_readpage(struct file *file, struct page *page,
233 ecryptfs_printk(KERN_ERR, "Error reading from page cache\n"); 225 ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
234 goto out; 226 goto out;
235 } 227 }
236 wait_on_page_locked(lower_page);
237 page_data = kmap_atomic(page, KM_USER0); 228 page_data = kmap_atomic(page, KM_USER0);
238 lower_page_data = kmap_atomic(lower_page, KM_USER1); 229 lower_page_data = kmap_atomic(lower_page, KM_USER1);
239 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); 230 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index c2235e46edcd..ba7a8b9da0c1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -72,8 +72,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
72{ 72{
73 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 73 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
74 74
75 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 75 if (flags & SLAB_CTOR_CONSTRUCTOR)
76 SLAB_CTOR_CONSTRUCTOR)
77 inode_init_once(&ei->vfs_inode); 76 inode_init_once(&ei->vfs_inode);
78} 77}
79 78
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3ae644e7e860..b5c7ca584939 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -22,7 +22,6 @@
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/poll.h> 24#include <linux/poll.h>
25#include <linux/smp_lock.h>
26#include <linux/string.h> 25#include <linux/string.h>
27#include <linux/list.h> 26#include <linux/list.h>
28#include <linux/hash.h> 27#include <linux/hash.h>
@@ -185,7 +184,7 @@ struct eppoll_entry {
185 184
186/* 185/*
187 * Each file descriptor added to the eventpoll interface will 186 * Each file descriptor added to the eventpoll interface will
188 * have an entry of this type linked to the hash. 187 * have an entry of this type linked to the "rbr" RB tree.
189 */ 188 */
190struct epitem { 189struct epitem {
191 /* RB-Tree node used to link this structure to the eventpoll rb-tree */ 190 /* RB-Tree node used to link this structure to the eventpoll rb-tree */
@@ -217,15 +216,6 @@ struct epitem {
217 216
218 /* List header used to link this item to the "struct file" items list */ 217 /* List header used to link this item to the "struct file" items list */
219 struct list_head fllink; 218 struct list_head fllink;
220
221 /* List header used to link the item to the transfer list */
222 struct list_head txlink;
223
224 /*
225 * This is used during the collection/transfer of events to userspace
226 * to pin items empty events set.
227 */
228 unsigned int revents;
229}; 219};
230 220
231/* Wrapper struct used by poll queueing */ 221/* Wrapper struct used by poll queueing */
@@ -258,11 +248,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi);
258static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key); 248static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key);
259static int ep_eventpoll_close(struct inode *inode, struct file *file); 249static int ep_eventpoll_close(struct inode *inode, struct file *file);
260static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); 250static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait);
261static int ep_collect_ready_items(struct eventpoll *ep,
262 struct list_head *txlist, int maxevents);
263static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, 251static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
264 struct epoll_event __user *events); 252 struct epoll_event __user *events, int maxevents);
265static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist);
266static int ep_events_transfer(struct eventpoll *ep, 253static int ep_events_transfer(struct eventpoll *ep,
267 struct epoll_event __user *events, 254 struct epoll_event __user *events,
268 int maxevents); 255 int maxevents);
@@ -355,17 +342,6 @@ static inline int ep_rb_linked(struct rb_node *n)
355 return rb_parent(n) != n; 342 return rb_parent(n) != n;
356} 343}
357 344
358/*
359 * Remove the item from the list and perform its initialization.
360 * This is useful for us because we can test if the item is linked
361 * using "ep_is_linked(p)".
362 */
363static inline void ep_list_del(struct list_head *p)
364{
365 list_del(p);
366 INIT_LIST_HEAD(p);
367}
368
369/* Tells us if the item is currently linked */ 345/* Tells us if the item is currently linked */
370static inline int ep_is_linked(struct list_head *p) 346static inline int ep_is_linked(struct list_head *p)
371{ 347{
@@ -385,7 +361,7 @@ static inline struct epitem * ep_item_from_epqueue(poll_table *p)
385} 361}
386 362
387/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ 363/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
388static inline int ep_op_hash_event(int op) 364static inline int ep_op_has_event(int op)
389{ 365{
390 return op != EPOLL_CTL_DEL; 366 return op != EPOLL_CTL_DEL;
391} 367}
@@ -477,10 +453,10 @@ void eventpoll_release_file(struct file *file)
477 mutex_lock(&epmutex); 453 mutex_lock(&epmutex);
478 454
479 while (!list_empty(lsthead)) { 455 while (!list_empty(lsthead)) {
480 epi = list_entry(lsthead->next, struct epitem, fllink); 456 epi = list_first_entry(lsthead, struct epitem, fllink);
481 457
482 ep = epi->ep; 458 ep = epi->ep;
483 ep_list_del(&epi->fllink); 459 list_del_init(&epi->fllink);
484 down_write(&ep->sem); 460 down_write(&ep->sem);
485 ep_remove(ep, epi); 461 ep_remove(ep, epi);
486 up_write(&ep->sem); 462 up_write(&ep->sem);
@@ -557,7 +533,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
557 current, epfd, op, fd, event)); 533 current, epfd, op, fd, event));
558 534
559 error = -EFAULT; 535 error = -EFAULT;
560 if (ep_op_hash_event(op) && 536 if (ep_op_has_event(op) &&
561 copy_from_user(&epds, event, sizeof(struct epoll_event))) 537 copy_from_user(&epds, event, sizeof(struct epoll_event)))
562 goto eexit_1; 538 goto eexit_1;
563 539
@@ -594,7 +570,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
594 570
595 down_write(&ep->sem); 571 down_write(&ep->sem);
596 572
597 /* Try to lookup the file inside our hash table */ 573 /* Try to lookup the file inside our RB tree */
598 epi = ep_find(ep, tfile, fd); 574 epi = ep_find(ep, tfile, fd);
599 575
600 error = -EINVAL; 576 error = -EINVAL;
@@ -876,7 +852,7 @@ static void ep_free(struct eventpoll *ep)
876 } 852 }
877 853
878 /* 854 /*
879 * Walks through the whole hash by freeing each "struct epitem". At this 855 * Walks through the whole tree by freeing each "struct epitem". At this
880 * point we are sure no poll callbacks will be lingering around, and also by 856 * point we are sure no poll callbacks will be lingering around, and also by
881 * write-holding "sem" we can be sure that no file cleanup code will hit 857 * write-holding "sem" we can be sure that no file cleanup code will hit
882 * us during this operation. So we can avoid the lock on "ep->lock". 858 * us during this operation. So we can avoid the lock on "ep->lock".
@@ -891,7 +867,7 @@ static void ep_free(struct eventpoll *ep)
891 867
892 868
893/* 869/*
894 * Search the file inside the eventpoll hash. It add usage count to 870 * Search the file inside the eventpoll tree. It add usage count to
895 * the returned item, so the caller must call ep_release_epitem() 871 * the returned item, so the caller must call ep_release_epitem()
896 * after finished using the "struct epitem". 872 * after finished using the "struct epitem".
897 */ 873 */
@@ -1011,7 +987,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1011 ep_rb_initnode(&epi->rbn); 987 ep_rb_initnode(&epi->rbn);
1012 INIT_LIST_HEAD(&epi->rdllink); 988 INIT_LIST_HEAD(&epi->rdllink);
1013 INIT_LIST_HEAD(&epi->fllink); 989 INIT_LIST_HEAD(&epi->fllink);
1014 INIT_LIST_HEAD(&epi->txlink);
1015 INIT_LIST_HEAD(&epi->pwqlist); 990 INIT_LIST_HEAD(&epi->pwqlist);
1016 epi->ep = ep; 991 epi->ep = ep;
1017 ep_set_ffd(&epi->ffd, tfile, fd); 992 ep_set_ffd(&epi->ffd, tfile, fd);
@@ -1080,7 +1055,7 @@ eexit_2:
1080 */ 1055 */
1081 write_lock_irqsave(&ep->lock, flags); 1056 write_lock_irqsave(&ep->lock, flags);
1082 if (ep_is_linked(&epi->rdllink)) 1057 if (ep_is_linked(&epi->rdllink))
1083 ep_list_del(&epi->rdllink); 1058 list_del_init(&epi->rdllink);
1084 write_unlock_irqrestore(&ep->lock, flags); 1059 write_unlock_irqrestore(&ep->lock, flags);
1085 1060
1086 kmem_cache_free(epi_cache, epi); 1061 kmem_cache_free(epi_cache, epi);
@@ -1119,7 +1094,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1119 epi->event.data = event->data; 1094 epi->event.data = event->data;
1120 1095
1121 /* 1096 /*
1122 * If the item is not linked to the hash it means that it's on its 1097 * If the item is not linked to the RB tree it means that it's on its
1123 * way toward the removal. Do nothing in this case. 1098 * way toward the removal. Do nothing in this case.
1124 */ 1099 */
1125 if (ep_rb_linked(&epi->rbn)) { 1100 if (ep_rb_linked(&epi->rbn)) {
@@ -1168,9 +1143,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
1168 1143
1169 if (nwait) { 1144 if (nwait) {
1170 while (!list_empty(lsthead)) { 1145 while (!list_empty(lsthead)) {
1171 pwq = list_entry(lsthead->next, struct eppoll_entry, llink); 1146 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
1172 1147
1173 ep_list_del(&pwq->llink); 1148 list_del_init(&pwq->llink);
1174 remove_wait_queue(pwq->whead, &pwq->wait); 1149 remove_wait_queue(pwq->whead, &pwq->wait);
1175 kmem_cache_free(pwq_cache, pwq); 1150 kmem_cache_free(pwq_cache, pwq);
1176 } 1151 }
@@ -1213,7 +1188,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1213 * we want to remove it from this list to avoid stale events. 1188 * we want to remove it from this list to avoid stale events.
1214 */ 1189 */
1215 if (ep_is_linked(&epi->rdllink)) 1190 if (ep_is_linked(&epi->rdllink))
1216 ep_list_del(&epi->rdllink); 1191 list_del_init(&epi->rdllink);
1217 1192
1218 error = 0; 1193 error = 0;
1219eexit_1: 1194eexit_1:
@@ -1226,7 +1201,7 @@ eexit_1:
1226 1201
1227 1202
1228/* 1203/*
1229 * Removes a "struct epitem" from the eventpoll hash and deallocates 1204 * Removes a "struct epitem" from the eventpoll RB tree and deallocates
1230 * all the associated resources. 1205 * all the associated resources.
1231 */ 1206 */
1232static int ep_remove(struct eventpoll *ep, struct epitem *epi) 1207static int ep_remove(struct eventpoll *ep, struct epitem *epi)
@@ -1248,13 +1223,13 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
1248 /* Remove the current item from the list of epoll hooks */ 1223 /* Remove the current item from the list of epoll hooks */
1249 spin_lock(&file->f_ep_lock); 1224 spin_lock(&file->f_ep_lock);
1250 if (ep_is_linked(&epi->fllink)) 1225 if (ep_is_linked(&epi->fllink))
1251 ep_list_del(&epi->fllink); 1226 list_del_init(&epi->fllink);
1252 spin_unlock(&file->f_ep_lock); 1227 spin_unlock(&file->f_ep_lock);
1253 1228
1254 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 1229 /* We need to acquire the write IRQ lock before calling ep_unlink() */
1255 write_lock_irqsave(&ep->lock, flags); 1230 write_lock_irqsave(&ep->lock, flags);
1256 1231
1257 /* Really unlink the item from the hash */ 1232 /* Really unlink the item from the RB tree */
1258 error = ep_unlink(ep, epi); 1233 error = ep_unlink(ep, epi);
1259 1234
1260 write_unlock_irqrestore(&ep->lock, flags); 1235 write_unlock_irqrestore(&ep->lock, flags);
@@ -1362,71 +1337,30 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
1362 1337
1363 1338
1364/* 1339/*
1365 * Since we have to release the lock during the __copy_to_user() operation and
1366 * during the f_op->poll() call, we try to collect the maximum number of items
1367 * by reducing the irqlock/irqunlock switching rate.
1368 */
1369static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist, int maxevents)
1370{
1371 int nepi;
1372 unsigned long flags;
1373 struct list_head *lsthead = &ep->rdllist, *lnk;
1374 struct epitem *epi;
1375
1376 write_lock_irqsave(&ep->lock, flags);
1377
1378 for (nepi = 0, lnk = lsthead->next; lnk != lsthead && nepi < maxevents;) {
1379 epi = list_entry(lnk, struct epitem, rdllink);
1380
1381 lnk = lnk->next;
1382
1383 /* If this file is already in the ready list we exit soon */
1384 if (!ep_is_linked(&epi->txlink)) {
1385 /*
1386 * This is initialized in this way so that the default
1387 * behaviour of the reinjecting code will be to push back
1388 * the item inside the ready list.
1389 */
1390 epi->revents = epi->event.events;
1391
1392 /* Link the ready item into the transfer list */
1393 list_add(&epi->txlink, txlist);
1394 nepi++;
1395
1396 /*
1397 * Unlink the item from the ready list.
1398 */
1399 ep_list_del(&epi->rdllink);
1400 }
1401 }
1402
1403 write_unlock_irqrestore(&ep->lock, flags);
1404
1405 return nepi;
1406}
1407
1408
1409/*
1410 * This function is called without holding the "ep->lock" since the call to 1340 * This function is called without holding the "ep->lock" since the call to
1411 * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ 1341 * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ
1412 * because of the way poll() is traditionally implemented in Linux. 1342 * because of the way poll() is traditionally implemented in Linux.
1413 */ 1343 */
1414static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, 1344static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
1415 struct epoll_event __user *events) 1345 struct epoll_event __user *events, int maxevents)
1416{ 1346{
1417 int eventcnt = 0; 1347 int eventcnt, error = -EFAULT, pwake = 0;
1418 unsigned int revents; 1348 unsigned int revents;
1419 struct list_head *lnk; 1349 unsigned long flags;
1420 struct epitem *epi; 1350 struct epitem *epi;
1351 struct list_head injlist;
1352
1353 INIT_LIST_HEAD(&injlist);
1421 1354
1422 /* 1355 /*
1423 * We can loop without lock because this is a task private list. 1356 * We can loop without lock because this is a task private list.
1424 * The test done during the collection loop will guarantee us that 1357 * We just splice'd out the ep->rdllist in ep_collect_ready_items().
1425 * another task will not try to collect this file. Also, items 1358 * Items cannot vanish during the loop because we are holding "sem" in
1426 * cannot vanish during the loop because we are holding "sem". 1359 * read.
1427 */ 1360 */
1428 list_for_each(lnk, txlist) { 1361 for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) {
1429 epi = list_entry(lnk, struct epitem, txlink); 1362 epi = list_first_entry(txlist, struct epitem, rdllink);
1363 prefetch(epi->rdllink.next);
1430 1364
1431 /* 1365 /*
1432 * Get the ready file event set. We can safely use the file 1366 * Get the ready file event set. We can safely use the file
@@ -1434,64 +1368,65 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
1434 * guarantee that both the file and the item will not vanish. 1368 * guarantee that both the file and the item will not vanish.
1435 */ 1369 */
1436 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 1370 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
1371 revents &= epi->event.events;
1437 1372
1438 /* 1373 /*
1439 * Set the return event set for the current file descriptor. 1374 * Is the event mask intersect the caller-requested one,
1440 * Note that only the task task was successfully able to link 1375 * deliver the event to userspace. Again, we are holding
1441 * the item to its "txlist" will write this field. 1376 * "sem" in read, so no operations coming from userspace
1377 * can change the item.
1442 */ 1378 */
1443 epi->revents = revents & epi->event.events; 1379 if (revents) {
1444 1380 if (__put_user(revents,
1445 if (epi->revents) {
1446 if (__put_user(epi->revents,
1447 &events[eventcnt].events) || 1381 &events[eventcnt].events) ||
1448 __put_user(epi->event.data, 1382 __put_user(epi->event.data,
1449 &events[eventcnt].data)) 1383 &events[eventcnt].data))
1450 return -EFAULT; 1384 goto errxit;
1451 if (epi->event.events & EPOLLONESHOT) 1385 if (epi->event.events & EPOLLONESHOT)
1452 epi->event.events &= EP_PRIVATE_BITS; 1386 epi->event.events &= EP_PRIVATE_BITS;
1453 eventcnt++; 1387 eventcnt++;
1454 } 1388 }
1455 }
1456 return eventcnt;
1457}
1458
1459
1460/*
1461 * Walk through the transfer list we collected with ep_collect_ready_items()
1462 * and, if 1) the item is still "alive" 2) its event set is not empty 3) it's
1463 * not already linked, links it to the ready list. Same as above, we are holding
1464 * "sem" so items cannot vanish underneath our nose.
1465 */
1466static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1467{
1468 int ricnt = 0, pwake = 0;
1469 unsigned long flags;
1470 struct epitem *epi;
1471
1472 write_lock_irqsave(&ep->lock, flags);
1473
1474 while (!list_empty(txlist)) {
1475 epi = list_entry(txlist->next, struct epitem, txlink);
1476
1477 /* Unlink the current item from the transfer list */
1478 ep_list_del(&epi->txlink);
1479 1389
1480 /* 1390 /*
1481 * If the item is no more linked to the interest set, we don't 1391 * This is tricky. We are holding the "sem" in read, and this
1482 * have to push it inside the ready list because the following 1392 * means that the operations that can change the "linked" status
1483 * ep_release_epitem() is going to drop it. Also, if the current 1393 * of the epoll item (epi->rbn and epi->rdllink), cannot touch
1484 * item is set to have an Edge Triggered behaviour, we don't have 1394 * them. Also, since we are "linked" from a epi->rdllink POV
1485 * to push it back either. 1395 * (the item is linked to our transmission list we just
1396 * spliced), the ep_poll_callback() cannot touch us either,
1397 * because of the check present in there. Another parallel
1398 * epoll_wait() will not get the same result set, since we
1399 * spliced the ready list before. Note that list_del() still
1400 * shows the item as linked to the test in ep_poll_callback().
1486 */ 1401 */
1487 if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) && 1402 list_del(&epi->rdllink);
1488 (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) { 1403 if (!(epi->event.events & EPOLLET) &&
1489 list_add_tail(&epi->rdllink, &ep->rdllist); 1404 (revents & epi->event.events))
1490 ricnt++; 1405 list_add_tail(&epi->rdllink, &injlist);
1406 else {
1407 /*
1408 * Be sure the item is totally detached before re-init
1409 * the list_head. After INIT_LIST_HEAD() is committed,
1410 * the ep_poll_callback() can requeue the item again,
1411 * but we don't care since we are already past it.
1412 */
1413 smp_mb();
1414 INIT_LIST_HEAD(&epi->rdllink);
1491 } 1415 }
1492 } 1416 }
1417 error = 0;
1493 1418
1494 if (ricnt) { 1419 errxit:
1420
1421 /*
1422 * If the re-injection list or the txlist are not empty, re-splice
1423 * them to the ready list and do proper wakeups.
1424 */
1425 if (!list_empty(&injlist) || !list_empty(txlist)) {
1426 write_lock_irqsave(&ep->lock, flags);
1427
1428 list_splice(txlist, &ep->rdllist);
1429 list_splice(&injlist, &ep->rdllist);
1495 /* 1430 /*
1496 * Wake up ( if active ) both the eventpoll wait list and the ->poll() 1431 * Wake up ( if active ) both the eventpoll wait list and the ->poll()
1497 * wait list. 1432 * wait list.
@@ -1501,13 +1436,15 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1501 TASK_INTERRUPTIBLE); 1436 TASK_INTERRUPTIBLE);
1502 if (waitqueue_active(&ep->poll_wait)) 1437 if (waitqueue_active(&ep->poll_wait))
1503 pwake++; 1438 pwake++;
1504 }
1505 1439
1506 write_unlock_irqrestore(&ep->lock, flags); 1440 write_unlock_irqrestore(&ep->lock, flags);
1441 }
1507 1442
1508 /* We have to call this outside the lock */ 1443 /* We have to call this outside the lock */
1509 if (pwake) 1444 if (pwake)
1510 ep_poll_safewake(&psw, &ep->poll_wait); 1445 ep_poll_safewake(&psw, &ep->poll_wait);
1446
1447 return eventcnt == 0 ? error: eventcnt;
1511} 1448}
1512 1449
1513 1450
@@ -1517,7 +1454,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1517static int ep_events_transfer(struct eventpoll *ep, 1454static int ep_events_transfer(struct eventpoll *ep,
1518 struct epoll_event __user *events, int maxevents) 1455 struct epoll_event __user *events, int maxevents)
1519{ 1456{
1520 int eventcnt = 0; 1457 int eventcnt;
1458 unsigned long flags;
1521 struct list_head txlist; 1459 struct list_head txlist;
1522 1460
1523 INIT_LIST_HEAD(&txlist); 1461 INIT_LIST_HEAD(&txlist);
@@ -1528,14 +1466,17 @@ static int ep_events_transfer(struct eventpoll *ep,
1528 */ 1466 */
1529 down_read(&ep->sem); 1467 down_read(&ep->sem);
1530 1468
1531 /* Collect/extract ready items */ 1469 /*
1532 if (ep_collect_ready_items(ep, &txlist, maxevents) > 0) { 1470 * Steal the ready list, and re-init the original one to the
1533 /* Build result set in userspace */ 1471 * empty list.
1534 eventcnt = ep_send_events(ep, &txlist, events); 1472 */
1473 write_lock_irqsave(&ep->lock, flags);
1474 list_splice(&ep->rdllist, &txlist);
1475 INIT_LIST_HEAD(&ep->rdllist);
1476 write_unlock_irqrestore(&ep->lock, flags);
1535 1477
1536 /* Reinject ready items into the ready list */ 1478 /* Build result set in userspace */
1537 ep_reinject_items(ep, &txlist); 1479 eventcnt = ep_send_events(ep, &txlist, events, maxevents);
1538 }
1539 1480
1540 up_read(&ep->sem); 1481 up_read(&ep->sem);
1541 1482
@@ -1612,14 +1553,12 @@ retry:
1612 return res; 1553 return res;
1613} 1554}
1614 1555
1615
1616static int eventpollfs_delete_dentry(struct dentry *dentry) 1556static int eventpollfs_delete_dentry(struct dentry *dentry)
1617{ 1557{
1618 1558
1619 return 1; 1559 return 1;
1620} 1560}
1621 1561
1622
1623static struct inode *ep_eventpoll_inode(void) 1562static struct inode *ep_eventpoll_inode(void)
1624{ 1563{
1625 int error = -ENOMEM; 1564 int error = -ENOMEM;
@@ -1647,7 +1586,6 @@ eexit_1:
1647 return ERR_PTR(error); 1586 return ERR_PTR(error);
1648} 1587}
1649 1588
1650
1651static int 1589static int
1652eventpollfs_get_sb(struct file_system_type *fs_type, int flags, 1590eventpollfs_get_sb(struct file_system_type *fs_type, int flags,
1653 const char *dev_name, void *data, struct vfsmount *mnt) 1591 const char *dev_name, void *data, struct vfsmount *mnt)
diff --git a/fs/exec.c b/fs/exec.c
index 3155e915307a..1ba85c7fc6af 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -100,6 +100,7 @@ int unregister_binfmt(struct linux_binfmt * fmt)
100 while (*tmp) { 100 while (*tmp) {
101 if (fmt == *tmp) { 101 if (fmt == *tmp) {
102 *tmp = fmt->next; 102 *tmp = fmt->next;
103 fmt->next = NULL;
103 write_unlock(&binfmt_lock); 104 write_unlock(&binfmt_lock);
104 return 0; 105 return 0;
105 } 106 }
@@ -982,33 +983,51 @@ void compute_creds(struct linux_binprm *bprm)
982 task_unlock(current); 983 task_unlock(current);
983 security_bprm_post_apply_creds(bprm); 984 security_bprm_post_apply_creds(bprm);
984} 985}
985
986EXPORT_SYMBOL(compute_creds); 986EXPORT_SYMBOL(compute_creds);
987 987
988/*
989 * Arguments are '\0' separated strings found at the location bprm->p
990 * points to; chop off the first by relocating brpm->p to right after
991 * the first '\0' encountered.
992 */
988void remove_arg_zero(struct linux_binprm *bprm) 993void remove_arg_zero(struct linux_binprm *bprm)
989{ 994{
990 if (bprm->argc) { 995 if (bprm->argc) {
991 unsigned long offset; 996 char ch;
992 char * kaddr;
993 struct page *page;
994 997
995 offset = bprm->p % PAGE_SIZE; 998 do {
996 goto inside; 999 unsigned long offset;
1000 unsigned long index;
1001 char *kaddr;
1002 struct page *page;
997 1003
998 while (bprm->p++, *(kaddr+offset++)) { 1004 offset = bprm->p & ~PAGE_MASK;
999 if (offset != PAGE_SIZE) 1005 index = bprm->p >> PAGE_SHIFT;
1000 continue; 1006
1001 offset = 0; 1007 page = bprm->page[index];
1002 kunmap_atomic(kaddr, KM_USER0);
1003inside:
1004 page = bprm->page[bprm->p/PAGE_SIZE];
1005 kaddr = kmap_atomic(page, KM_USER0); 1008 kaddr = kmap_atomic(page, KM_USER0);
1006 } 1009
1007 kunmap_atomic(kaddr, KM_USER0); 1010 /* run through page until we reach end or find NUL */
1011 do {
1012 ch = *(kaddr + offset);
1013
1014 /* discard that character... */
1015 bprm->p++;
1016 offset++;
1017 } while (offset < PAGE_SIZE && ch != '\0');
1018
1019 kunmap_atomic(kaddr, KM_USER0);
1020
1021 /* free the old page */
1022 if (offset == PAGE_SIZE) {
1023 __free_page(page);
1024 bprm->page[index] = NULL;
1025 }
1026 } while (ch != '\0');
1027
1008 bprm->argc--; 1028 bprm->argc--;
1009 } 1029 }
1010} 1030}
1011
1012EXPORT_SYMBOL(remove_arg_zero); 1031EXPORT_SYMBOL(remove_arg_zero);
1013 1032
1014/* 1033/*
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 93e77c3d2490..e98f6cd7200c 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -2,7 +2,6 @@
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/smp_lock.h>
6#include <linux/namei.h> 5#include <linux/namei.h>
7 6
8struct export_operations export_op_default; 7struct export_operations export_op_default;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index e89bfc8cf957..2bf49d7ef841 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -23,7 +23,6 @@
23 23
24#include "ext2.h" 24#include "ext2.h"
25#include <linux/pagemap.h> 25#include <linux/pagemap.h>
26#include <linux/smp_lock.h>
27 26
28typedef struct ext2_dir_entry_2 ext2_dirent; 27typedef struct ext2_dir_entry_2 ext2_dirent;
29 28
@@ -161,10 +160,7 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n)
161 struct address_space *mapping = dir->i_mapping; 160 struct address_space *mapping = dir->i_mapping;
162 struct page *page = read_mapping_page(mapping, n, NULL); 161 struct page *page = read_mapping_page(mapping, n, NULL);
163 if (!IS_ERR(page)) { 162 if (!IS_ERR(page)) {
164 wait_on_page_locked(page);
165 kmap(page); 163 kmap(page);
166 if (!PageUptodate(page))
167 goto fail;
168 if (!PageChecked(page)) 164 if (!PageChecked(page))
169 ext2_check_page(page); 165 ext2_check_page(page);
170 if (PageError(page)) 166 if (PageError(page))
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e2a0ea50af1d..9fd0ec5ba0d0 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -133,6 +133,7 @@ extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
133extern void ext2_truncate (struct inode *); 133extern void ext2_truncate (struct inode *);
134extern int ext2_setattr (struct dentry *, struct iattr *); 134extern int ext2_setattr (struct dentry *, struct iattr *);
135extern void ext2_set_inode_flags(struct inode *inode); 135extern void ext2_set_inode_flags(struct inode *inode);
136extern void ext2_get_inode_flags(struct ext2_inode_info *);
136 137
137/* ioctl.c */ 138/* ioctl.c */
138extern int ext2_ioctl (struct inode *, struct file *, unsigned int, 139extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
index 7806b9e8155b..fc66c93fcb5c 100644
--- a/fs/ext2/fsync.c
+++ b/fs/ext2/fsync.c
@@ -23,7 +23,6 @@
23 */ 23 */
24 24
25#include "ext2.h" 25#include "ext2.h"
26#include <linux/smp_lock.h>
27#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ 26#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
28 27
29 28
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index dd4e14c221e0..0079b2cd5314 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1055,6 +1055,25 @@ void ext2_set_inode_flags(struct inode *inode)
1055 inode->i_flags |= S_DIRSYNC; 1055 inode->i_flags |= S_DIRSYNC;
1056} 1056}
1057 1057
1058/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
1059void ext2_get_inode_flags(struct ext2_inode_info *ei)
1060{
1061 unsigned int flags = ei->vfs_inode.i_flags;
1062
1063 ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
1064 EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
1065 if (flags & S_SYNC)
1066 ei->i_flags |= EXT2_SYNC_FL;
1067 if (flags & S_APPEND)
1068 ei->i_flags |= EXT2_APPEND_FL;
1069 if (flags & S_IMMUTABLE)
1070 ei->i_flags |= EXT2_IMMUTABLE_FL;
1071 if (flags & S_NOATIME)
1072 ei->i_flags |= EXT2_NOATIME_FL;
1073 if (flags & S_DIRSYNC)
1074 ei->i_flags |= EXT2_DIRSYNC_FL;
1075}
1076
1058void ext2_read_inode (struct inode * inode) 1077void ext2_read_inode (struct inode * inode)
1059{ 1078{
1060 struct ext2_inode_info *ei = EXT2_I(inode); 1079 struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1079,9 +1098,9 @@ void ext2_read_inode (struct inode * inode)
1079 } 1098 }
1080 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 1099 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
1081 inode->i_size = le32_to_cpu(raw_inode->i_size); 1100 inode->i_size = le32_to_cpu(raw_inode->i_size);
1082 inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); 1101 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
1083 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); 1102 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
1084 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); 1103 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
1085 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1104 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1086 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 1105 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
1087 /* We now have enough fields to check if the inode was active or not. 1106 /* We now have enough fields to check if the inode was active or not.
@@ -1188,6 +1207,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
1188 if (ei->i_state & EXT2_STATE_NEW) 1207 if (ei->i_state & EXT2_STATE_NEW)
1189 memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); 1208 memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
1190 1209
1210 ext2_get_inode_flags(ei);
1191 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 1211 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
1192 if (!(test_opt(sb, NO_UID32))) { 1212 if (!(test_opt(sb, NO_UID32))) {
1193 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); 1213 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 4b099d310712..e85c48218239 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -27,6 +27,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
27 27
28 switch (cmd) { 28 switch (cmd) {
29 case EXT2_IOC_GETFLAGS: 29 case EXT2_IOC_GETFLAGS:
30 ext2_get_inode_flags(ei);
30 flags = ei->i_flags & EXT2_FL_USER_VISIBLE; 31 flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
31 return put_user(flags, (int __user *) arg); 32 return put_user(flags, (int __user *) arg);
32 case EXT2_IOC_SETFLAGS: { 33 case EXT2_IOC_SETFLAGS: {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index a046a419d8af..685a1c287177 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -160,8 +160,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
160{ 160{
161 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 161 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
162 162
163 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 163 if (flags & SLAB_CTOR_CONSTRUCTOR) {
164 SLAB_CTOR_CONSTRUCTOR) {
165 rwlock_init(&ei->i_meta_lock); 164 rwlock_init(&ei->i_meta_lock);
166#ifdef CONFIG_EXT2_FS_XATTR 165#ifdef CONFIG_EXT2_FS_XATTR
167 init_rwsem(&ei->xattr_sem); 166 init_rwsem(&ei->xattr_sem);
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index a26612798471..eaa23d2d5213 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -6,7 +6,6 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/smp_lock.h>
10#include <linux/ext2_fs.h> 9#include <linux/ext2_fs.h>
11#include <linux/security.h> 10#include <linux/security.h>
12#include "xattr.h" 11#include "xattr.h"
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index f28a6a499c96..83ee149f353d 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -9,7 +9,6 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/smp_lock.h>
13#include <linux/ext2_fs.h> 12#include <linux/ext2_fs.h>
14#include "xattr.h" 13#include "xattr.h"
15 14
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 665adee99b31..852869840f24 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -25,7 +25,6 @@
25#include <linux/jbd.h> 25#include <linux/jbd.h>
26#include <linux/ext3_fs.h> 26#include <linux/ext3_fs.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/smp_lock.h>
29#include <linux/slab.h> 28#include <linux/slab.h>
30#include <linux/rbtree.h> 29#include <linux/rbtree.h>
31 30
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a5b150f7e8a2..a6cb6171c3af 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,6 @@
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext3_jbd.h> 28#include <linux/ext3_jbd.h>
29#include <linux/jbd.h> 29#include <linux/jbd.h>
30#include <linux/smp_lock.h>
31#include <linux/highuid.h> 30#include <linux/highuid.h>
32#include <linux/pagemap.h> 31#include <linux/pagemap.h>
33#include <linux/quotaops.h> 32#include <linux/quotaops.h>
@@ -1768,7 +1767,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1768 struct inode *inode = mapping->host; 1767 struct inode *inode = mapping->host;
1769 struct buffer_head *bh; 1768 struct buffer_head *bh;
1770 int err = 0; 1769 int err = 0;
1771 void *kaddr;
1772 1770
1773 blocksize = inode->i_sb->s_blocksize; 1771 blocksize = inode->i_sb->s_blocksize;
1774 length = blocksize - (offset & (blocksize - 1)); 1772 length = blocksize - (offset & (blocksize - 1));
@@ -1780,10 +1778,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1780 */ 1778 */
1781 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && 1779 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
1782 ext3_should_writeback_data(inode) && PageUptodate(page)) { 1780 ext3_should_writeback_data(inode) && PageUptodate(page)) {
1783 kaddr = kmap_atomic(page, KM_USER0); 1781 zero_user_page(page, offset, length, KM_USER0);
1784 memset(kaddr + offset, 0, length);
1785 flush_dcache_page(page);
1786 kunmap_atomic(kaddr, KM_USER0);
1787 set_page_dirty(page); 1782 set_page_dirty(page);
1788 goto unlock; 1783 goto unlock;
1789 } 1784 }
@@ -1836,11 +1831,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1836 goto unlock; 1831 goto unlock;
1837 } 1832 }
1838 1833
1839 kaddr = kmap_atomic(page, KM_USER0); 1834 zero_user_page(page, offset, length, KM_USER0);
1840 memset(kaddr + offset, 0, length);
1841 flush_dcache_page(page);
1842 kunmap_atomic(kaddr, KM_USER0);
1843
1844 BUFFER_TRACE(bh, "zeroed end of block"); 1835 BUFFER_TRACE(bh, "zeroed end of block");
1845 1836
1846 err = 0; 1837 err = 0;
@@ -2581,6 +2572,25 @@ void ext3_set_inode_flags(struct inode *inode)
2581 inode->i_flags |= S_DIRSYNC; 2572 inode->i_flags |= S_DIRSYNC;
2582} 2573}
2583 2574
2575/* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
2576void ext3_get_inode_flags(struct ext3_inode_info *ei)
2577{
2578 unsigned int flags = ei->vfs_inode.i_flags;
2579
2580 ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
2581 EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
2582 if (flags & S_SYNC)
2583 ei->i_flags |= EXT3_SYNC_FL;
2584 if (flags & S_APPEND)
2585 ei->i_flags |= EXT3_APPEND_FL;
2586 if (flags & S_IMMUTABLE)
2587 ei->i_flags |= EXT3_IMMUTABLE_FL;
2588 if (flags & S_NOATIME)
2589 ei->i_flags |= EXT3_NOATIME_FL;
2590 if (flags & S_DIRSYNC)
2591 ei->i_flags |= EXT3_DIRSYNC_FL;
2592}
2593
2584void ext3_read_inode(struct inode * inode) 2594void ext3_read_inode(struct inode * inode)
2585{ 2595{
2586 struct ext3_iloc iloc; 2596 struct ext3_iloc iloc;
@@ -2608,9 +2618,9 @@ void ext3_read_inode(struct inode * inode)
2608 } 2618 }
2609 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 2619 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2610 inode->i_size = le32_to_cpu(raw_inode->i_size); 2620 inode->i_size = le32_to_cpu(raw_inode->i_size);
2611 inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); 2621 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2612 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); 2622 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
2613 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); 2623 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
2614 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; 2624 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
2615 2625
2616 ei->i_state = 0; 2626 ei->i_state = 0;
@@ -2736,6 +2746,7 @@ static int ext3_do_update_inode(handle_t *handle,
2736 if (ei->i_state & EXT3_STATE_NEW) 2746 if (ei->i_state & EXT3_STATE_NEW)
2737 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 2747 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
2738 2748
2749 ext3_get_inode_flags(ei);
2739 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 2750 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2740 if(!(test_opt(inode->i_sb, NO_UID32))) { 2751 if(!(test_opt(inode->i_sb, NO_UID32))) {
2741 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); 2752 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 9b8090d94e6c..965006dba6be 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -28,6 +28,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
28 28
29 switch (cmd) { 29 switch (cmd) {
30 case EXT3_IOC_GETFLAGS: 30 case EXT3_IOC_GETFLAGS:
31 ext3_get_inode_flags(ei);
31 flags = ei->i_flags & EXT3_FL_USER_VISIBLE; 32 flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
32 return put_user(flags, (int __user *) arg); 33 return put_user(flags, (int __user *) arg);
33 case EXT3_IOC_SETFLAGS: { 34 case EXT3_IOC_SETFLAGS: {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 49159f13cc1f..9bb046df827a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,7 +36,6 @@
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/smp_lock.h>
40 39
41#include "namei.h" 40#include "namei.h"
42#include "xattr.h" 41#include "xattr.h"
@@ -969,6 +968,7 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
969 (block<<EXT3_BLOCK_SIZE_BITS(sb)) 968 (block<<EXT3_BLOCK_SIZE_BITS(sb))
970 +((char *)de - bh->b_data))) { 969 +((char *)de - bh->b_data))) {
971 brelse (bh); 970 brelse (bh);
971 *err = ERR_BAD_DX_DIR;
972 goto errout; 972 goto errout;
973 } 973 }
974 *res_dir = de; 974 *res_dir = de;
@@ -1134,9 +1134,9 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1134 char *data1 = (*bh)->b_data, *data2; 1134 char *data1 = (*bh)->b_data, *data2;
1135 unsigned split; 1135 unsigned split;
1136 struct ext3_dir_entry_2 *de = NULL, *de2; 1136 struct ext3_dir_entry_2 *de = NULL, *de2;
1137 int err; 1137 int err = 0;
1138 1138
1139 bh2 = ext3_append (handle, dir, &newblock, error); 1139 bh2 = ext3_append (handle, dir, &newblock, &err);
1140 if (!(bh2)) { 1140 if (!(bh2)) {
1141 brelse(*bh); 1141 brelse(*bh);
1142 *bh = NULL; 1142 *bh = NULL;
@@ -1145,14 +1145,9 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1145 1145
1146 BUFFER_TRACE(*bh, "get_write_access"); 1146 BUFFER_TRACE(*bh, "get_write_access");
1147 err = ext3_journal_get_write_access(handle, *bh); 1147 err = ext3_journal_get_write_access(handle, *bh);
1148 if (err) { 1148 if (err)
1149 journal_error: 1149 goto journal_error;
1150 brelse(*bh); 1150
1151 brelse(bh2);
1152 *bh = NULL;
1153 ext3_std_error(dir->i_sb, err);
1154 goto errout;
1155 }
1156 BUFFER_TRACE(frame->bh, "get_write_access"); 1151 BUFFER_TRACE(frame->bh, "get_write_access");
1157 err = ext3_journal_get_write_access(handle, frame->bh); 1152 err = ext3_journal_get_write_access(handle, frame->bh);
1158 if (err) 1153 if (err)
@@ -1195,8 +1190,16 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1195 goto journal_error; 1190 goto journal_error;
1196 brelse (bh2); 1191 brelse (bh2);
1197 dxtrace(dx_show_index ("frame", frame->entries)); 1192 dxtrace(dx_show_index ("frame", frame->entries));
1198errout:
1199 return de; 1193 return de;
1194
1195journal_error:
1196 brelse(*bh);
1197 brelse(bh2);
1198 *bh = NULL;
1199 ext3_std_error(dir->i_sb, err);
1200errout:
1201 *error = err;
1202 return NULL;
1200} 1203}
1201#endif 1204#endif
1202 1205
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index ecf89904c113..2c97e09c6c6b 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -11,7 +11,6 @@
11 11
12#define EXT3FS_DEBUG 12#define EXT3FS_DEBUG
13 13
14#include <linux/smp_lock.h>
15#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
16 15
17#include <linux/errno.h> 16#include <linux/errno.h>
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4a4fcd6868c7..54d3c9041259 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -420,7 +420,7 @@ static void ext3_put_super (struct super_block * sb)
420 dump_orphan_list(sb, sbi); 420 dump_orphan_list(sb, sbi);
421 J_ASSERT(list_empty(&sbi->s_orphan)); 421 J_ASSERT(list_empty(&sbi->s_orphan));
422 422
423 invalidate_bdev(sb->s_bdev, 0); 423 invalidate_bdev(sb->s_bdev);
424 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 424 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
425 /* 425 /*
426 * Invalidate the journal device's buffers. We don't want them 426 * Invalidate the journal device's buffers. We don't want them
@@ -428,7 +428,7 @@ static void ext3_put_super (struct super_block * sb)
428 * hotswapped, and it breaks the `ro-after' testing code. 428 * hotswapped, and it breaks the `ro-after' testing code.
429 */ 429 */
430 sync_blockdev(sbi->journal_bdev); 430 sync_blockdev(sbi->journal_bdev);
431 invalidate_bdev(sbi->journal_bdev, 0); 431 invalidate_bdev(sbi->journal_bdev);
432 ext3_blkdev_remove(sbi); 432 ext3_blkdev_remove(sbi);
433 } 433 }
434 sb->s_fs_info = NULL; 434 sb->s_fs_info = NULL;
@@ -466,8 +466,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
466{ 466{
467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
468 468
469 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 469 if (flags & SLAB_CTOR_CONSTRUCTOR) {
470 SLAB_CTOR_CONSTRUCTOR) {
471 INIT_LIST_HEAD(&ei->i_orphan); 470 INIT_LIST_HEAD(&ei->i_orphan);
472#ifdef CONFIG_EXT3_FS_XATTR 471#ifdef CONFIG_EXT3_FS_XATTR
473 init_rwsem(&ei->xattr_sem); 472 init_rwsem(&ei->xattr_sem);
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index b9c40c15647b..821efaf2b94e 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -6,7 +6,6 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/smp_lock.h>
10#include <linux/ext3_jbd.h> 9#include <linux/ext3_jbd.h>
11#include <linux/ext3_fs.h> 10#include <linux/ext3_fs.h>
12#include <linux/security.h> 11#include <linux/security.h>
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 86d91f1186dc..0327497a55ce 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -9,7 +9,6 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/smp_lock.h>
13#include <linux/ext3_jbd.h> 12#include <linux/ext3_jbd.h>
14#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
15#include "xattr.h" 14#include "xattr.h"
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index a85a0a17c4fd..1abd8f92c440 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -8,7 +8,6 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/smp_lock.h>
12#include <linux/ext3_jbd.h> 11#include <linux/ext3_jbd.h>
13#include <linux/ext3_fs.h> 12#include <linux/ext3_fs.h>
14#include "xattr.h" 13#include "xattr.h"
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index da80368b66f0..e8ad06e28318 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -25,7 +25,6 @@
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h> 26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/smp_lock.h>
29#include <linux/slab.h> 28#include <linux/slab.h>
30#include <linux/rbtree.h> 29#include <linux/rbtree.h>
31 30
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7916b50f9a13..a0f0c04e79b2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -34,7 +34,6 @@
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h> 35#include <linux/ext4_jbd2.h>
36#include <linux/jbd.h> 36#include <linux/jbd.h>
37#include <linux/smp_lock.h>
38#include <linux/highuid.h> 37#include <linux/highuid.h>
39#include <linux/pagemap.h> 38#include <linux/pagemap.h>
40#include <linux/quotaops.h> 39#include <linux/quotaops.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 810b6d6474bf..b34182b6ee4d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -27,7 +27,6 @@
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h> 28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/smp_lock.h>
31#include <linux/highuid.h> 30#include <linux/highuid.h>
32#include <linux/pagemap.h> 31#include <linux/pagemap.h>
33#include <linux/quotaops.h> 32#include <linux/quotaops.h>
@@ -2611,9 +2610,9 @@ void ext4_read_inode(struct inode * inode)
2611 } 2610 }
2612 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 2611 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2613 inode->i_size = le32_to_cpu(raw_inode->i_size); 2612 inode->i_size = le32_to_cpu(raw_inode->i_size);
2614 inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); 2613 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2615 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); 2614 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
2616 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); 2615 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
2617 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; 2616 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
2618 2617
2619 ei->i_state = 0; 2618 ei->i_state = 0;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e7e1d79a7d75..4ec57be5baf5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -36,7 +36,6 @@
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/smp_lock.h>
40 39
41#include "namei.h" 40#include "namei.h"
42#include "xattr.h" 41#include "xattr.h"
@@ -967,6 +966,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
967 (block<<EXT4_BLOCK_SIZE_BITS(sb)) 966 (block<<EXT4_BLOCK_SIZE_BITS(sb))
968 +((char *)de - bh->b_data))) { 967 +((char *)de - bh->b_data))) {
969 brelse (bh); 968 brelse (bh);
969 *err = ERR_BAD_DX_DIR;
970 goto errout; 970 goto errout;
971 } 971 }
972 *res_dir = de; 972 *res_dir = de;
@@ -1132,9 +1132,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1132 char *data1 = (*bh)->b_data, *data2; 1132 char *data1 = (*bh)->b_data, *data2;
1133 unsigned split; 1133 unsigned split;
1134 struct ext4_dir_entry_2 *de = NULL, *de2; 1134 struct ext4_dir_entry_2 *de = NULL, *de2;
1135 int err; 1135 int err = 0;
1136 1136
1137 bh2 = ext4_append (handle, dir, &newblock, error); 1137 bh2 = ext4_append (handle, dir, &newblock, &err);
1138 if (!(bh2)) { 1138 if (!(bh2)) {
1139 brelse(*bh); 1139 brelse(*bh);
1140 *bh = NULL; 1140 *bh = NULL;
@@ -1143,14 +1143,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1143 1143
1144 BUFFER_TRACE(*bh, "get_write_access"); 1144 BUFFER_TRACE(*bh, "get_write_access");
1145 err = ext4_journal_get_write_access(handle, *bh); 1145 err = ext4_journal_get_write_access(handle, *bh);
1146 if (err) { 1146 if (err)
1147 journal_error: 1147 goto journal_error;
1148 brelse(*bh); 1148
1149 brelse(bh2);
1150 *bh = NULL;
1151 ext4_std_error(dir->i_sb, err);
1152 goto errout;
1153 }
1154 BUFFER_TRACE(frame->bh, "get_write_access"); 1149 BUFFER_TRACE(frame->bh, "get_write_access");
1155 err = ext4_journal_get_write_access(handle, frame->bh); 1150 err = ext4_journal_get_write_access(handle, frame->bh);
1156 if (err) 1151 if (err)
@@ -1193,8 +1188,16 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1193 goto journal_error; 1188 goto journal_error;
1194 brelse (bh2); 1189 brelse (bh2);
1195 dxtrace(dx_show_index ("frame", frame->entries)); 1190 dxtrace(dx_show_index ("frame", frame->entries));
1196errout:
1197 return de; 1191 return de;
1192
1193journal_error:
1194 brelse(*bh);
1195 brelse(bh2);
1196 *bh = NULL;
1197 ext4_std_error(dir->i_sb, err);
1198errout:
1199 *error = err;
1200 return NULL;
1198} 1201}
1199#endif 1202#endif
1200 1203
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ea99f6c97f56..aa11d7dbe970 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,7 +11,6 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/smp_lock.h>
15#include <linux/ext4_jbd2.h> 14#include <linux/ext4_jbd2.h>
16 15
17#include <linux/errno.h> 16#include <linux/errno.h>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 61c4718e4a53..719126932354 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -470,7 +470,7 @@ static void ext4_put_super (struct super_block * sb)
470 dump_orphan_list(sb, sbi); 470 dump_orphan_list(sb, sbi);
471 J_ASSERT(list_empty(&sbi->s_orphan)); 471 J_ASSERT(list_empty(&sbi->s_orphan));
472 472
473 invalidate_bdev(sb->s_bdev, 0); 473 invalidate_bdev(sb->s_bdev);
474 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 474 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
475 /* 475 /*
476 * Invalidate the journal device's buffers. We don't want them 476 * Invalidate the journal device's buffers. We don't want them
@@ -478,7 +478,7 @@ static void ext4_put_super (struct super_block * sb)
478 * hotswapped, and it breaks the `ro-after' testing code. 478 * hotswapped, and it breaks the `ro-after' testing code.
479 */ 479 */
480 sync_blockdev(sbi->journal_bdev); 480 sync_blockdev(sbi->journal_bdev);
481 invalidate_bdev(sbi->journal_bdev, 0); 481 invalidate_bdev(sbi->journal_bdev);
482 ext4_blkdev_remove(sbi); 482 ext4_blkdev_remove(sbi);
483 } 483 }
484 sb->s_fs_info = NULL; 484 sb->s_fs_info = NULL;
@@ -517,8 +517,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
517{ 517{
518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
519 519
520 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 520 if (flags & SLAB_CTOR_CONSTRUCTOR) {
521 SLAB_CTOR_CONSTRUCTOR) {
522 INIT_LIST_HEAD(&ei->i_orphan); 521 INIT_LIST_HEAD(&ei->i_orphan);
523#ifdef CONFIG_EXT4DEV_FS_XATTR 522#ifdef CONFIG_EXT4DEV_FS_XATTR
524 init_rwsem(&ei->xattr_sem); 523 init_rwsem(&ei->xattr_sem);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b6a6861951f9..f17eaf2321b9 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,7 +6,6 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/smp_lock.h>
10#include <linux/ext4_jbd2.h> 9#include <linux/ext4_jbd2.h>
11#include <linux/ext4_fs.h> 10#include <linux/ext4_fs.h>
12#include <linux/security.h> 11#include <linux/security.h>
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index b76f2dbc82da..e0f05acdafec 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,7 +9,6 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/smp_lock.h>
13#include <linux/ext4_jbd2.h> 12#include <linux/ext4_jbd2.h>
14#include <linux/ext4_fs.h> 13#include <linux/ext4_fs.h>
15#include "xattr.h" 14#include "xattr.h"
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index c53cded0761a..7ed3d8ebf096 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,7 +8,6 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/smp_lock.h>
12#include <linux/ext4_jbd2.h> 11#include <linux/ext4_jbd2.h>
13#include <linux/ext4_fs.h> 12#include <linux/ext4_fs.h>
14#include "xattr.h" 13#include "xattr.h"
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 05c2941c74f2..1959143c1d27 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -40,8 +40,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
40{ 40{
41 struct fat_cache *cache = (struct fat_cache *)foo; 41 struct fat_cache *cache = (struct fat_cache *)foo;
42 42
43 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 43 if (flags & SLAB_CTOR_CONSTRUCTOR)
44 SLAB_CTOR_CONSTRUCTOR)
45 INIT_LIST_HEAD(&cache->cache_list); 44 INIT_LIST_HEAD(&cache->cache_list);
46} 45}
47 46
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c16af246d245..ccf161dffb63 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -422,7 +422,7 @@ EODir:
422EXPORT_SYMBOL_GPL(fat_search_long); 422EXPORT_SYMBOL_GPL(fat_search_long);
423 423
424struct fat_ioctl_filldir_callback { 424struct fat_ioctl_filldir_callback {
425 struct dirent __user *dirent; 425 void __user *dirent;
426 int result; 426 int result;
427 /* for dir ioctl */ 427 /* for dir ioctl */
428 const char *longname; 428 const char *longname;
@@ -647,62 +647,85 @@ static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
647 return __fat_readdir(inode, filp, dirent, filldir, 0, 0); 647 return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
648} 648}
649 649
650static int fat_ioctl_filldir(void *__buf, const char *name, int name_len, 650#define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \
651 loff_t offset, u64 ino, unsigned int d_type) 651static int func(void *__buf, const char *name, int name_len, \
652 loff_t offset, u64 ino, unsigned int d_type) \
653{ \
654 struct fat_ioctl_filldir_callback *buf = __buf; \
655 struct dirent_type __user *d1 = buf->dirent; \
656 struct dirent_type __user *d2 = d1 + 1; \
657 \
658 if (buf->result) \
659 return -EINVAL; \
660 buf->result++; \
661 \
662 if (name != NULL) { \
663 /* dirent has only short name */ \
664 if (name_len >= sizeof(d1->d_name)) \
665 name_len = sizeof(d1->d_name) - 1; \
666 \
667 if (put_user(0, d2->d_name) || \
668 put_user(0, &d2->d_reclen) || \
669 copy_to_user(d1->d_name, name, name_len) || \
670 put_user(0, d1->d_name + name_len) || \
671 put_user(name_len, &d1->d_reclen)) \
672 goto efault; \
673 } else { \
674 /* dirent has short and long name */ \
675 const char *longname = buf->longname; \
676 int long_len = buf->long_len; \
677 const char *shortname = buf->shortname; \
678 int short_len = buf->short_len; \
679 \
680 if (long_len >= sizeof(d1->d_name)) \
681 long_len = sizeof(d1->d_name) - 1; \
682 if (short_len >= sizeof(d1->d_name)) \
683 short_len = sizeof(d1->d_name) - 1; \
684 \
685 if (copy_to_user(d2->d_name, longname, long_len) || \
686 put_user(0, d2->d_name + long_len) || \
687 put_user(long_len, &d2->d_reclen) || \
688 put_user(ino, &d2->d_ino) || \
689 put_user(offset, &d2->d_off) || \
690 copy_to_user(d1->d_name, shortname, short_len) || \
691 put_user(0, d1->d_name + short_len) || \
692 put_user(short_len, &d1->d_reclen)) \
693 goto efault; \
694 } \
695 return 0; \
696efault: \
697 buf->result = -EFAULT; \
698 return -EFAULT; \
699}
700
701FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
702
703static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
704 void __user *dirent, filldir_t filldir,
705 int short_only, int both)
652{ 706{
653 struct fat_ioctl_filldir_callback *buf = __buf; 707 struct fat_ioctl_filldir_callback buf;
654 struct dirent __user *d1 = buf->dirent; 708 int ret;
655 struct dirent __user *d2 = d1 + 1; 709
656 710 buf.dirent = dirent;
657 if (buf->result) 711 buf.result = 0;
658 return -EINVAL; 712 mutex_lock(&inode->i_mutex);
659 buf->result++; 713 ret = -ENOENT;
660 714 if (!IS_DEADDIR(inode)) {
661 if (name != NULL) { 715 ret = __fat_readdir(inode, filp, &buf, filldir,
662 /* dirent has only short name */ 716 short_only, both);
663 if (name_len >= sizeof(d1->d_name))
664 name_len = sizeof(d1->d_name) - 1;
665
666 if (put_user(0, d2->d_name) ||
667 put_user(0, &d2->d_reclen) ||
668 copy_to_user(d1->d_name, name, name_len) ||
669 put_user(0, d1->d_name + name_len) ||
670 put_user(name_len, &d1->d_reclen))
671 goto efault;
672 } else {
673 /* dirent has short and long name */
674 const char *longname = buf->longname;
675 int long_len = buf->long_len;
676 const char *shortname = buf->shortname;
677 int short_len = buf->short_len;
678
679 if (long_len >= sizeof(d1->d_name))
680 long_len = sizeof(d1->d_name) - 1;
681 if (short_len >= sizeof(d1->d_name))
682 short_len = sizeof(d1->d_name) - 1;
683
684 if (copy_to_user(d2->d_name, longname, long_len) ||
685 put_user(0, d2->d_name + long_len) ||
686 put_user(long_len, &d2->d_reclen) ||
687 put_user(ino, &d2->d_ino) ||
688 put_user(offset, &d2->d_off) ||
689 copy_to_user(d1->d_name, shortname, short_len) ||
690 put_user(0, d1->d_name + short_len) ||
691 put_user(short_len, &d1->d_reclen))
692 goto efault;
693 } 717 }
694 return 0; 718 mutex_unlock(&inode->i_mutex);
695efault: 719 if (ret >= 0)
696 buf->result = -EFAULT; 720 ret = buf.result;
697 return -EFAULT; 721 return ret;
698} 722}
699 723
700static int fat_dir_ioctl(struct inode * inode, struct file * filp, 724static int fat_dir_ioctl(struct inode *inode, struct file *filp,
701 unsigned int cmd, unsigned long arg) 725 unsigned int cmd, unsigned long arg)
702{ 726{
703 struct fat_ioctl_filldir_callback buf; 727 struct dirent __user *d1 = (struct dirent __user *)arg;
704 struct dirent __user *d1; 728 int short_only, both;
705 int ret, short_only, both;
706 729
707 switch (cmd) { 730 switch (cmd) {
708 case VFAT_IOCTL_READDIR_SHORT: 731 case VFAT_IOCTL_READDIR_SHORT:
@@ -717,7 +740,6 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
717 return fat_generic_ioctl(inode, filp, cmd, arg); 740 return fat_generic_ioctl(inode, filp, cmd, arg);
718 } 741 }
719 742
720 d1 = (struct dirent __user *)arg;
721 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) 743 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
722 return -EFAULT; 744 return -EFAULT;
723 /* 745 /*
@@ -728,69 +750,48 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
728 if (put_user(0, &d1->d_reclen)) 750 if (put_user(0, &d1->d_reclen))
729 return -EFAULT; 751 return -EFAULT;
730 752
731 buf.dirent = d1; 753 return fat_ioctl_readdir(inode, filp, d1, fat_ioctl_filldir,
732 buf.result = 0; 754 short_only, both);
733 mutex_lock(&inode->i_mutex);
734 ret = -ENOENT;
735 if (!IS_DEADDIR(inode)) {
736 ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir,
737 short_only, both);
738 }
739 mutex_unlock(&inode->i_mutex);
740 if (ret >= 0)
741 ret = buf.result;
742 return ret;
743} 755}
744 756
745#ifdef CONFIG_COMPAT 757#ifdef CONFIG_COMPAT
746#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) 758#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
747#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) 759#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
748 760
749static long fat_compat_put_dirent32(struct dirent *d, 761FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent)
750 struct compat_dirent __user *d32)
751{
752 if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
753 return -EFAULT;
754
755 __put_user(d->d_ino, &d32->d_ino);
756 __put_user(d->d_off, &d32->d_off);
757 __put_user(d->d_reclen, &d32->d_reclen);
758 if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
759 return -EFAULT;
760 762
761 return 0; 763static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
762}
763
764static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
765 unsigned long arg) 764 unsigned long arg)
766{ 765{
767 struct compat_dirent __user *p = compat_ptr(arg); 766 struct inode *inode = filp->f_path.dentry->d_inode;
768 int ret; 767 struct compat_dirent __user *d1 = compat_ptr(arg);
769 mm_segment_t oldfs = get_fs(); 768 int short_only, both;
770 struct dirent d[2];
771 769
772 switch (cmd) { 770 switch (cmd) {
773 case VFAT_IOCTL_READDIR_BOTH32:
774 cmd = VFAT_IOCTL_READDIR_BOTH;
775 break;
776 case VFAT_IOCTL_READDIR_SHORT32: 771 case VFAT_IOCTL_READDIR_SHORT32:
777 cmd = VFAT_IOCTL_READDIR_SHORT; 772 short_only = 1;
773 both = 0;
774 break;
775 case VFAT_IOCTL_READDIR_BOTH32:
776 short_only = 0;
777 both = 1;
778 break; 778 break;
779 default: 779 default:
780 return -ENOIOCTLCMD; 780 return -ENOIOCTLCMD;
781 } 781 }
782 782
783 set_fs(KERNEL_DS); 783 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
784 lock_kernel(); 784 return -EFAULT;
785 ret = fat_dir_ioctl(file->f_path.dentry->d_inode, file, 785 /*
786 cmd, (unsigned long) &d); 786 * Yes, we don't need this put_user() absolutely. However old
787 unlock_kernel(); 787 * code didn't return the right value. So, app use this value,
788 set_fs(oldfs); 788 * in order to check whether it is EOF.
789 if (ret >= 0) { 789 */
790 ret |= fat_compat_put_dirent32(&d[0], p); 790 if (put_user(0, &d1->d_reclen))
791 ret |= fat_compat_put_dirent32(&d[1], p + 1); 791 return -EFAULT;
792 } 792
793 return ret; 793 return fat_ioctl_readdir(inode, filp, d1, fat_compat_ioctl_filldir,
794 short_only, both);
794} 795}
795#endif /* CONFIG_COMPAT */ 796#endif /* CONFIG_COMPAT */
796 797
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 9bfe607c892e..2c55e8dce793 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -25,6 +25,7 @@
25#include <linux/parser.h> 25#include <linux/parser.h>
26#include <linux/uio.h> 26#include <linux/uio.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/log2.h>
28#include <asm/unaligned.h> 29#include <asm/unaligned.h>
29 30
30#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 31#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -499,8 +500,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
499{ 500{
500 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 501 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
501 502
502 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 503 if (flags & SLAB_CTOR_CONSTRUCTOR) {
503 SLAB_CTOR_CONSTRUCTOR) {
504 spin_lock_init(&ei->cache_lru_lock); 504 spin_lock_init(&ei->cache_lru_lock);
505 ei->nr_caches = 0; 505 ei->nr_caches = 0;
506 ei->cache_valid_id = FAT_CACHE_VALID + 1; 506 ei->cache_valid_id = FAT_CACHE_VALID + 1;
@@ -825,6 +825,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
825 } 825 }
826 if (opts->name_check != 'n') 826 if (opts->name_check != 'n')
827 seq_printf(m, ",check=%c", opts->name_check); 827 seq_printf(m, ",check=%c", opts->name_check);
828 if (opts->usefree)
829 seq_puts(m, ",usefree");
828 if (opts->quiet) 830 if (opts->quiet)
829 seq_puts(m, ",quiet"); 831 seq_puts(m, ",quiet");
830 if (opts->showexec) 832 if (opts->showexec)
@@ -850,7 +852,7 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
850 852
851enum { 853enum {
852 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid, 854 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
853 Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_nocase, 855 Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_usefree, Opt_nocase,
854 Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable, 856 Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable,
855 Opt_dots, Opt_nodots, 857 Opt_dots, Opt_nodots,
856 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 858 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
@@ -872,6 +874,7 @@ static match_table_t fat_tokens = {
872 {Opt_dmask, "dmask=%o"}, 874 {Opt_dmask, "dmask=%o"},
873 {Opt_fmask, "fmask=%o"}, 875 {Opt_fmask, "fmask=%o"},
874 {Opt_codepage, "codepage=%u"}, 876 {Opt_codepage, "codepage=%u"},
877 {Opt_usefree, "usefree"},
875 {Opt_nocase, "nocase"}, 878 {Opt_nocase, "nocase"},
876 {Opt_quiet, "quiet"}, 879 {Opt_quiet, "quiet"},
877 {Opt_showexec, "showexec"}, 880 {Opt_showexec, "showexec"},
@@ -951,7 +954,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
951 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; 954 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
952 opts->utf8 = opts->unicode_xlate = 0; 955 opts->utf8 = opts->unicode_xlate = 0;
953 opts->numtail = 1; 956 opts->numtail = 1;
954 opts->nocase = 0; 957 opts->usefree = opts->nocase = 0;
955 *debug = 0; 958 *debug = 0;
956 959
957 if (!options) 960 if (!options)
@@ -979,6 +982,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
979 case Opt_check_n: 982 case Opt_check_n:
980 opts->name_check = 'n'; 983 opts->name_check = 'n';
981 break; 984 break;
985 case Opt_usefree:
986 opts->usefree = 1;
987 break;
982 case Opt_nocase: 988 case Opt_nocase:
983 if (!is_vfat) 989 if (!is_vfat)
984 opts->nocase = 1; 990 opts->nocase = 1;
@@ -1218,8 +1224,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1218 } 1224 }
1219 logical_sector_size = 1225 logical_sector_size =
1220 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size)); 1226 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
1221 if (!logical_sector_size 1227 if (!is_power_of_2(logical_sector_size)
1222 || (logical_sector_size & (logical_sector_size - 1))
1223 || (logical_sector_size < 512) 1228 || (logical_sector_size < 512)
1224 || (PAGE_CACHE_SIZE < logical_sector_size)) { 1229 || (PAGE_CACHE_SIZE < logical_sector_size)) {
1225 if (!silent) 1230 if (!silent)
@@ -1229,8 +1234,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1229 goto out_invalid; 1234 goto out_invalid;
1230 } 1235 }
1231 sbi->sec_per_clus = b->sec_per_clus; 1236 sbi->sec_per_clus = b->sec_per_clus;
1232 if (!sbi->sec_per_clus 1237 if (!is_power_of_2(sbi->sec_per_clus)) {
1233 || (sbi->sec_per_clus & (sbi->sec_per_clus - 1))) {
1234 if (!silent) 1238 if (!silent)
1235 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", 1239 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n",
1236 sbi->sec_per_clus); 1240 sbi->sec_per_clus);
@@ -1306,7 +1310,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1306 le32_to_cpu(fsinfo->signature2), 1310 le32_to_cpu(fsinfo->signature2),
1307 sbi->fsinfo_sector); 1311 sbi->fsinfo_sector);
1308 } else { 1312 } else {
1309 sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters); 1313 if (sbi->options.usefree)
1314 sbi->free_clusters =
1315 le32_to_cpu(fsinfo->free_clusters);
1310 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster); 1316 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
1311 } 1317 }
1312 1318
diff --git a/fs/fifo.c b/fs/fifo.c
index 49035b174b48..6e7df7256782 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/smp_lock.h>
15#include <linux/fs.h> 14#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
17 16
diff --git a/fs/file_table.c b/fs/file_table.c
index 4c17a18d8c10..d17fd691b832 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -10,7 +10,6 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/smp_lock.h>
14#include <linux/fs.h> 13#include <linux/fs.h>
15#include <linux/security.h> 14#include <linux/security.h>
16#include <linux/eventpoll.h> 15#include <linux/eventpoll.h>
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 7a4f61aa05f8..f37f87262837 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -41,11 +41,12 @@ void put_filesystem(struct file_system_type *fs)
41 module_put(fs->owner); 41 module_put(fs->owner);
42} 42}
43 43
44static struct file_system_type **find_filesystem(const char *name) 44static struct file_system_type **find_filesystem(const char *name, unsigned len)
45{ 45{
46 struct file_system_type **p; 46 struct file_system_type **p;
47 for (p=&file_systems; *p; p=&(*p)->next) 47 for (p=&file_systems; *p; p=&(*p)->next)
48 if (strcmp((*p)->name,name) == 0) 48 if (strlen((*p)->name) == len &&
49 strncmp((*p)->name, name, len) == 0)
49 break; 50 break;
50 return p; 51 return p;
51} 52}
@@ -68,11 +69,12 @@ int register_filesystem(struct file_system_type * fs)
68 int res = 0; 69 int res = 0;
69 struct file_system_type ** p; 70 struct file_system_type ** p;
70 71
72 BUG_ON(strchr(fs->name, '.'));
71 if (fs->next) 73 if (fs->next)
72 return -EBUSY; 74 return -EBUSY;
73 INIT_LIST_HEAD(&fs->fs_supers); 75 INIT_LIST_HEAD(&fs->fs_supers);
74 write_lock(&file_systems_lock); 76 write_lock(&file_systems_lock);
75 p = find_filesystem(fs->name); 77 p = find_filesystem(fs->name, strlen(fs->name));
76 if (*p) 78 if (*p)
77 res = -EBUSY; 79 res = -EBUSY;
78 else 80 else
@@ -215,19 +217,26 @@ int get_filesystem_list(char * buf)
215struct file_system_type *get_fs_type(const char *name) 217struct file_system_type *get_fs_type(const char *name)
216{ 218{
217 struct file_system_type *fs; 219 struct file_system_type *fs;
220 const char *dot = strchr(name, '.');
221 unsigned len = dot ? dot - name : strlen(name);
218 222
219 read_lock(&file_systems_lock); 223 read_lock(&file_systems_lock);
220 fs = *(find_filesystem(name)); 224 fs = *(find_filesystem(name, len));
221 if (fs && !try_module_get(fs->owner)) 225 if (fs && !try_module_get(fs->owner))
222 fs = NULL; 226 fs = NULL;
223 read_unlock(&file_systems_lock); 227 read_unlock(&file_systems_lock);
224 if (!fs && (request_module("%s", name) == 0)) { 228 if (!fs && (request_module("%.*s", len, name) == 0)) {
225 read_lock(&file_systems_lock); 229 read_lock(&file_systems_lock);
226 fs = *(find_filesystem(name)); 230 fs = *(find_filesystem(name, len));
227 if (fs && !try_module_get(fs->owner)) 231 if (fs && !try_module_get(fs->owner))
228 fs = NULL; 232 fs = NULL;
229 read_unlock(&file_systems_lock); 233 read_unlock(&file_systems_lock);
230 } 234 }
235
236 if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
237 put_filesystem(fs);
238 fs = NULL;
239 }
231 return fs; 240 return fs;
232} 241}
233 242
diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index 2d71128bd8d6..f86fd3cacd5a 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -137,7 +137,7 @@ vxfs_bmap_indir(struct inode *ip, long indir, int size, long block)
137 137
138 bp = sb_bread(ip->i_sb, 138 bp = sb_bread(ip->i_sb,
139 indir + (i / VXFS_TYPED_PER_BLOCK(ip->i_sb))); 139 indir + (i / VXFS_TYPED_PER_BLOCK(ip->i_sb)));
140 if (!buffer_mapped(bp)) 140 if (!bp || !buffer_mapped(bp))
141 return 0; 141 return 0;
142 142
143 typ = ((struct vxfs_typed *)bp->b_data) + 143 typ = ((struct vxfs_typed *)bp->b_data) +
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 098a915fd9a1..d1f7c5b5b3c3 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -99,7 +99,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
99 offset = ((ino % (sbp->s_blocksize / VXFS_ISIZE)) * VXFS_ISIZE); 99 offset = ((ino % (sbp->s_blocksize / VXFS_ISIZE)) * VXFS_ISIZE);
100 bp = sb_bread(sbp, block); 100 bp = sb_bread(sbp, block);
101 101
102 if (buffer_mapped(bp)) { 102 if (bp && buffer_mapped(bp)) {
103 struct vxfs_inode_info *vip; 103 struct vxfs_inode_info *vip;
104 struct vxfs_dinode *dip; 104 struct vxfs_dinode *dip;
105 105
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index decac62efe57..ed8f0b0dd880 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -74,10 +74,7 @@ vxfs_get_page(struct address_space *mapping, u_long n)
74 pp = read_mapping_page(mapping, n, NULL); 74 pp = read_mapping_page(mapping, n, NULL);
75 75
76 if (!IS_ERR(pp)) { 76 if (!IS_ERR(pp)) {
77 wait_on_page_locked(pp);
78 kmap(pp); 77 kmap(pp);
79 if (!PageUptodate(pp))
80 goto fail;
81 /** if (!PageChecked(pp)) **/ 78 /** if (!PageChecked(pp)) **/
82 /** vxfs_check_page(pp); **/ 79 /** vxfs_check_page(pp); **/
83 if (PageError(pp)) 80 if (PageError(pp))
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2fd06927e851..acfad65a6e8e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -738,8 +738,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
738 738
739 if (cmd == F_GETLK) { 739 if (cmd == F_GETLK) {
740 if (fc->no_lock) { 740 if (fc->no_lock) {
741 if (!posix_test_lock(file, fl, fl)) 741 posix_test_lock(file, fl);
742 fl->fl_type = F_UNLCK;
743 err = 0; 742 err = 0;
744 } else 743 } else
745 err = fuse_getlk(file, fl); 744 err = fuse_getlk(file, fl);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 608db81219a0..1397018ff476 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -636,6 +636,7 @@ static int fuse_get_sb(struct file_system_type *fs_type,
636static struct file_system_type fuse_fs_type = { 636static struct file_system_type fuse_fs_type = {
637 .owner = THIS_MODULE, 637 .owner = THIS_MODULE,
638 .name = "fuse", 638 .name = "fuse",
639 .fs_flags = FS_HAS_SUBTYPE,
639 .get_sb = fuse_get_sb, 640 .get_sb = fuse_get_sb,
640 .kill_sb = kill_anon_super, 641 .kill_sb = kill_anon_super,
641}; 642};
@@ -652,6 +653,7 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
652static struct file_system_type fuseblk_fs_type = { 653static struct file_system_type fuseblk_fs_type = {
653 .owner = THIS_MODULE, 654 .owner = THIS_MODULE,
654 .name = "fuseblk", 655 .name = "fuseblk",
656 .fs_flags = FS_HAS_SUBTYPE,
655 .get_sb = fuse_get_sb_blk, 657 .get_sb = fuse_get_sb_blk,
656 .kill_sb = kill_block_super, 658 .kill_sb = kill_block_super,
657 .fs_flags = FS_REQUIRES_DEV, 659 .fs_flags = FS_REQUIRES_DEV,
@@ -685,8 +687,7 @@ static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep,
685{ 687{
686 struct inode * inode = foo; 688 struct inode * inode = foo;
687 689
688 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 690 if (flags & SLAB_CTOR_CONSTRUCTOR)
689 SLAB_CTOR_CONSTRUCTOR)
690 inode_init_once(inode); 691 inode_init_once(inode);
691} 692}
692 693
@@ -731,12 +732,12 @@ static int fuse_sysfs_init(void)
731{ 732{
732 int err; 733 int err;
733 734
734 kset_set_kset_s(&fuse_subsys, fs_subsys); 735 kobj_set_kset_s(&fuse_subsys, fs_subsys);
735 err = subsystem_register(&fuse_subsys); 736 err = subsystem_register(&fuse_subsys);
736 if (err) 737 if (err)
737 goto out_err; 738 goto out_err;
738 739
739 kset_set_kset_s(&connections_subsys, fuse_subsys); 740 kobj_set_kset_s(&connections_subsys, fuse_subsys);
740 err = subsystem_register(&connections_subsys); 741 err = subsystem_register(&connections_subsys);
741 if (err) 742 if (err)
742 goto out_fuse_unregister; 743 goto out_fuse_unregister;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 82a1ac7895a2..a96fa07b3f3b 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1262,9 +1262,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1262 u64 leaf_no) 1262 u64 leaf_no)
1263{ 1263{
1264 struct gfs2_inode *ip = GFS2_I(inode); 1264 struct gfs2_inode *ip = GFS2_I(inode);
1265 struct gfs2_sbd *sdp = GFS2_SB(inode);
1265 struct buffer_head *bh; 1266 struct buffer_head *bh;
1266 struct gfs2_leaf *lf; 1267 struct gfs2_leaf *lf;
1267 unsigned entries = 0; 1268 unsigned entries = 0, entries2 = 0;
1268 unsigned leaves = 0; 1269 unsigned leaves = 0;
1269 const struct gfs2_dirent **darr, *dent; 1270 const struct gfs2_dirent **darr, *dent;
1270 struct dirent_gather g; 1271 struct dirent_gather g;
@@ -1290,7 +1291,13 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1290 return 0; 1291 return 0;
1291 1292
1292 error = -ENOMEM; 1293 error = -ENOMEM;
1293 larr = vmalloc((leaves + entries) * sizeof(void *)); 1294 /*
1295 * The extra 99 entries are not normally used, but are a buffer
1296 * zone in case the number of entries in the leaf is corrupt.
1297 * 99 is the maximum number of entries that can fit in a single
1298 * leaf block.
1299 */
1300 larr = vmalloc((leaves + entries + 99) * sizeof(void *));
1294 if (!larr) 1301 if (!larr)
1295 goto out; 1302 goto out;
1296 darr = (const struct gfs2_dirent **)(larr + leaves); 1303 darr = (const struct gfs2_dirent **)(larr + leaves);
@@ -1305,10 +1312,20 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1305 lf = (struct gfs2_leaf *)bh->b_data; 1312 lf = (struct gfs2_leaf *)bh->b_data;
1306 lfn = be64_to_cpu(lf->lf_next); 1313 lfn = be64_to_cpu(lf->lf_next);
1307 if (lf->lf_entries) { 1314 if (lf->lf_entries) {
1315 entries2 += be16_to_cpu(lf->lf_entries);
1308 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 1316 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1309 gfs2_dirent_gather, NULL, &g); 1317 gfs2_dirent_gather, NULL, &g);
1310 error = PTR_ERR(dent); 1318 error = PTR_ERR(dent);
1311 if (IS_ERR(dent)) { 1319 if (IS_ERR(dent))
1320 goto out_kfree;
1321 if (entries2 != g.offset) {
1322 fs_warn(sdp, "Number of entries corrupt in dir "
1323 "leaf %llu, entries2 (%u) != "
1324 "g.offset (%u)\n",
1325 (unsigned long long)bh->b_blocknr,
1326 entries2, g.offset);
1327
1328 error = -EIO;
1312 goto out_kfree; 1329 goto out_kfree;
1313 } 1330 }
1314 error = 0; 1331 error = 0;
@@ -1318,6 +1335,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1318 } 1335 }
1319 } while(lfn); 1336 } while(lfn);
1320 1337
1338 BUG_ON(entries2 != entries);
1321 error = do_filldir_main(ip, offset, opaque, filldir, darr, 1339 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1322 entries, copied); 1340 entries, copied);
1323out_kfree: 1341out_kfree:
@@ -1401,6 +1419,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1401 filldir_t filldir) 1419 filldir_t filldir)
1402{ 1420{
1403 struct gfs2_inode *dip = GFS2_I(inode); 1421 struct gfs2_inode *dip = GFS2_I(inode);
1422 struct gfs2_sbd *sdp = GFS2_SB(inode);
1404 struct dirent_gather g; 1423 struct dirent_gather g;
1405 const struct gfs2_dirent **darr, *dent; 1424 const struct gfs2_dirent **darr, *dent;
1406 struct buffer_head *dibh; 1425 struct buffer_head *dibh;
@@ -1423,8 +1442,8 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1423 return error; 1442 return error;
1424 1443
1425 error = -ENOMEM; 1444 error = -ENOMEM;
1426 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), 1445 /* 96 is max number of dirents which can be stuffed into an inode */
1427 GFP_KERNEL); 1446 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL);
1428 if (darr) { 1447 if (darr) {
1429 g.pdent = darr; 1448 g.pdent = darr;
1430 g.offset = 0; 1449 g.offset = 0;
@@ -1434,6 +1453,15 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1434 error = PTR_ERR(dent); 1453 error = PTR_ERR(dent);
1435 goto out; 1454 goto out;
1436 } 1455 }
1456 if (dip->i_di.di_entries != g.offset) {
1457 fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1458 "ip->i_di.di_entries (%u) != g.offset (%u)\n",
1459 (unsigned long long)dip->i_num.no_addr,
1460 dip->i_di.di_entries,
1461 g.offset);
1462 error = -EIO;
1463 goto out;
1464 }
1437 error = do_filldir_main(dip, offset, opaque, filldir, darr, 1465 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1438 dip->i_di.di_entries, &copied); 1466 dip->i_di.di_entries, &copied);
1439out: 1467out:
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 12accb08fe02..1815429a2978 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -23,6 +23,10 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/rwsem.h> 24#include <linux/rwsem.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/seq_file.h>
27#include <linux/debugfs.h>
28#include <linux/module.h>
29#include <linux/kallsyms.h>
26 30
27#include "gfs2.h" 31#include "gfs2.h"
28#include "incore.h" 32#include "incore.h"
@@ -40,20 +44,30 @@ struct gfs2_gl_hash_bucket {
40 struct hlist_head hb_list; 44 struct hlist_head hb_list;
41}; 45};
42 46
47struct glock_iter {
48 int hash; /* hash bucket index */
49 struct gfs2_sbd *sdp; /* incore superblock */
50 struct gfs2_glock *gl; /* current glock struct */
51 struct hlist_head *hb_list; /* current hash bucket ptr */
52 struct seq_file *seq; /* sequence file for debugfs */
53 char string[512]; /* scratch space */
54};
55
43typedef void (*glock_examiner) (struct gfs2_glock * gl); 56typedef void (*glock_examiner) (struct gfs2_glock * gl);
44 57
45static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); 58static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
46static int dump_glock(struct gfs2_glock *gl); 59static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
47static int dump_inode(struct gfs2_inode *ip); 60static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
48static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
49static void gfs2_glock_drop_th(struct gfs2_glock *gl); 61static void gfs2_glock_drop_th(struct gfs2_glock *gl);
50static DECLARE_RWSEM(gfs2_umount_flush_sem); 62static DECLARE_RWSEM(gfs2_umount_flush_sem);
63static struct dentry *gfs2_root;
51 64
52#define GFS2_GL_HASH_SHIFT 15 65#define GFS2_GL_HASH_SHIFT 15
53#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 66#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
54#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 67#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
55 68
56static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 69static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
70static struct dentry *gfs2_root;
57 71
58/* 72/*
59 * Despite what you might think, the numbers below are not arbitrary :-) 73 * Despite what you might think, the numbers below are not arbitrary :-)
@@ -202,7 +216,6 @@ int gfs2_glock_put(struct gfs2_glock *gl)
202 gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); 216 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
203 gfs2_assert(sdp, list_empty(&gl->gl_holders)); 217 gfs2_assert(sdp, list_empty(&gl->gl_holders));
204 gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); 218 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
205 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
206 gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); 219 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
207 glock_free(gl); 220 glock_free(gl);
208 rv = 1; 221 rv = 1;
@@ -303,7 +316,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
303 atomic_set(&gl->gl_ref, 1); 316 atomic_set(&gl->gl_ref, 1);
304 gl->gl_state = LM_ST_UNLOCKED; 317 gl->gl_state = LM_ST_UNLOCKED;
305 gl->gl_hash = hash; 318 gl->gl_hash = hash;
306 gl->gl_owner = NULL; 319 gl->gl_owner_pid = 0;
307 gl->gl_ip = 0; 320 gl->gl_ip = 0;
308 gl->gl_ops = glops; 321 gl->gl_ops = glops;
309 gl->gl_req_gh = NULL; 322 gl->gl_req_gh = NULL;
@@ -367,7 +380,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
367 INIT_LIST_HEAD(&gh->gh_list); 380 INIT_LIST_HEAD(&gh->gh_list);
368 gh->gh_gl = gl; 381 gh->gh_gl = gl;
369 gh->gh_ip = (unsigned long)__builtin_return_address(0); 382 gh->gh_ip = (unsigned long)__builtin_return_address(0);
370 gh->gh_owner = current; 383 gh->gh_owner_pid = current->pid;
371 gh->gh_state = state; 384 gh->gh_state = state;
372 gh->gh_flags = flags; 385 gh->gh_flags = flags;
373 gh->gh_error = 0; 386 gh->gh_error = 0;
@@ -389,7 +402,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
389{ 402{
390 gh->gh_state = state; 403 gh->gh_state = state;
391 gh->gh_flags = flags; 404 gh->gh_flags = flags;
392 gh->gh_iflags &= 1 << HIF_ALLOCED; 405 gh->gh_iflags = 0;
393 gh->gh_ip = (unsigned long)__builtin_return_address(0); 406 gh->gh_ip = (unsigned long)__builtin_return_address(0);
394} 407}
395 408
@@ -406,54 +419,8 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
406 gh->gh_ip = 0; 419 gh->gh_ip = 0;
407} 420}
408 421
409/** 422static void gfs2_holder_wake(struct gfs2_holder *gh)
410 * gfs2_holder_get - get a struct gfs2_holder structure
411 * @gl: the glock
412 * @state: the state we're requesting
413 * @flags: the modifier flags
414 * @gfp_flags:
415 *
416 * Figure out how big an impact this function has. Either:
417 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
418 * 2) Leave it like it is
419 *
420 * Returns: the holder structure, NULL on ENOMEM
421 */
422
423static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl,
424 unsigned int state,
425 int flags, gfp_t gfp_flags)
426{
427 struct gfs2_holder *gh;
428
429 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
430 if (!gh)
431 return NULL;
432
433 gfs2_holder_init(gl, state, flags, gh);
434 set_bit(HIF_ALLOCED, &gh->gh_iflags);
435 gh->gh_ip = (unsigned long)__builtin_return_address(0);
436 return gh;
437}
438
439/**
440 * gfs2_holder_put - get rid of a struct gfs2_holder structure
441 * @gh: the holder structure
442 *
443 */
444
445static void gfs2_holder_put(struct gfs2_holder *gh)
446{ 423{
447 gfs2_holder_uninit(gh);
448 kfree(gh);
449}
450
451static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
452{
453 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
454 gfs2_holder_put(gh);
455 return;
456 }
457 clear_bit(HIF_WAIT, &gh->gh_iflags); 424 clear_bit(HIF_WAIT, &gh->gh_iflags);
458 smp_mb(); 425 smp_mb();
459 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 426 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
@@ -519,7 +486,7 @@ static int rq_promote(struct gfs2_holder *gh)
519 gfs2_reclaim_glock(sdp); 486 gfs2_reclaim_glock(sdp);
520 } 487 }
521 488
522 gfs2_glock_xmote_th(gh); 489 gfs2_glock_xmote_th(gh->gh_gl, gh);
523 spin_lock(&gl->gl_spin); 490 spin_lock(&gl->gl_spin);
524 } 491 }
525 return 1; 492 return 1;
@@ -542,7 +509,7 @@ static int rq_promote(struct gfs2_holder *gh)
542 gh->gh_error = 0; 509 gh->gh_error = 0;
543 set_bit(HIF_HOLDER, &gh->gh_iflags); 510 set_bit(HIF_HOLDER, &gh->gh_iflags);
544 511
545 gfs2_holder_dispose_or_wake(gh); 512 gfs2_holder_wake(gh);
546 513
547 return 0; 514 return 0;
548} 515}
@@ -554,32 +521,24 @@ static int rq_promote(struct gfs2_holder *gh)
554 * Returns: 1 if the queue is blocked 521 * Returns: 1 if the queue is blocked
555 */ 522 */
556 523
557static int rq_demote(struct gfs2_holder *gh) 524static int rq_demote(struct gfs2_glock *gl)
558{ 525{
559 struct gfs2_glock *gl = gh->gh_gl;
560
561 if (!list_empty(&gl->gl_holders)) 526 if (!list_empty(&gl->gl_holders))
562 return 1; 527 return 1;
563 528
564 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { 529 if (gl->gl_state == gl->gl_demote_state ||
565 list_del_init(&gh->gh_list); 530 gl->gl_state == LM_ST_UNLOCKED) {
566 gh->gh_error = 0; 531 clear_bit(GLF_DEMOTE, &gl->gl_flags);
567 spin_unlock(&gl->gl_spin); 532 return 0;
568 gfs2_holder_dispose_or_wake(gh);
569 spin_lock(&gl->gl_spin);
570 } else {
571 gl->gl_req_gh = gh;
572 set_bit(GLF_LOCK, &gl->gl_flags);
573 spin_unlock(&gl->gl_spin);
574
575 if (gh->gh_state == LM_ST_UNLOCKED ||
576 gl->gl_state != LM_ST_EXCLUSIVE)
577 gfs2_glock_drop_th(gl);
578 else
579 gfs2_glock_xmote_th(gh);
580
581 spin_lock(&gl->gl_spin);
582 } 533 }
534 set_bit(GLF_LOCK, &gl->gl_flags);
535 spin_unlock(&gl->gl_spin);
536 if (gl->gl_demote_state == LM_ST_UNLOCKED ||
537 gl->gl_state != LM_ST_EXCLUSIVE)
538 gfs2_glock_drop_th(gl);
539 else
540 gfs2_glock_xmote_th(gl, NULL);
541 spin_lock(&gl->gl_spin);
583 542
584 return 0; 543 return 0;
585} 544}
@@ -607,16 +566,8 @@ static void run_queue(struct gfs2_glock *gl)
607 else 566 else
608 gfs2_assert_warn(gl->gl_sbd, 0); 567 gfs2_assert_warn(gl->gl_sbd, 0);
609 568
610 } else if (!list_empty(&gl->gl_waiters2) && 569 } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
611 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { 570 blocked = rq_demote(gl);
612 gh = list_entry(gl->gl_waiters2.next,
613 struct gfs2_holder, gh_list);
614
615 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
616 blocked = rq_demote(gh);
617 else
618 gfs2_assert_warn(gl->gl_sbd, 0);
619
620 } else if (!list_empty(&gl->gl_waiters3)) { 571 } else if (!list_empty(&gl->gl_waiters3)) {
621 gh = list_entry(gl->gl_waiters3.next, 572 gh = list_entry(gl->gl_waiters3.next,
622 struct gfs2_holder, gh_list); 573 struct gfs2_holder, gh_list);
@@ -654,7 +605,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
654 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 605 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
655 list_add_tail(&gh.gh_list, &gl->gl_waiters1); 606 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
656 } else { 607 } else {
657 gl->gl_owner = current; 608 gl->gl_owner_pid = current->pid;
658 gl->gl_ip = (unsigned long)__builtin_return_address(0); 609 gl->gl_ip = (unsigned long)__builtin_return_address(0);
659 clear_bit(HIF_WAIT, &gh.gh_iflags); 610 clear_bit(HIF_WAIT, &gh.gh_iflags);
660 smp_mb(); 611 smp_mb();
@@ -681,7 +632,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
681 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 632 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
682 acquired = 0; 633 acquired = 0;
683 } else { 634 } else {
684 gl->gl_owner = current; 635 gl->gl_owner_pid = current->pid;
685 gl->gl_ip = (unsigned long)__builtin_return_address(0); 636 gl->gl_ip = (unsigned long)__builtin_return_address(0);
686 } 637 }
687 spin_unlock(&gl->gl_spin); 638 spin_unlock(&gl->gl_spin);
@@ -699,7 +650,7 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
699{ 650{
700 spin_lock(&gl->gl_spin); 651 spin_lock(&gl->gl_spin);
701 clear_bit(GLF_LOCK, &gl->gl_flags); 652 clear_bit(GLF_LOCK, &gl->gl_flags);
702 gl->gl_owner = NULL; 653 gl->gl_owner_pid = 0;
703 gl->gl_ip = 0; 654 gl->gl_ip = 0;
704 run_queue(gl); 655 run_queue(gl);
705 BUG_ON(!spin_is_locked(&gl->gl_spin)); 656 BUG_ON(!spin_is_locked(&gl->gl_spin));
@@ -707,50 +658,24 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
707} 658}
708 659
709/** 660/**
710 * handle_callback - add a demote request to a lock's queue 661 * handle_callback - process a demote request
711 * @gl: the glock 662 * @gl: the glock
712 * @state: the state the caller wants us to change to 663 * @state: the state the caller wants us to change to
713 * 664 *
714 * Note: This may fail sliently if we are out of memory. 665 * There are only two requests that we are going to see in actual
666 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
715 */ 667 */
716 668
717static void handle_callback(struct gfs2_glock *gl, unsigned int state) 669static void handle_callback(struct gfs2_glock *gl, unsigned int state)
718{ 670{
719 struct gfs2_holder *gh, *new_gh = NULL;
720
721restart:
722 spin_lock(&gl->gl_spin); 671 spin_lock(&gl->gl_spin);
723 672 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
724 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { 673 gl->gl_demote_state = state;
725 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && 674 gl->gl_demote_time = jiffies;
726 gl->gl_req_gh != gh) { 675 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
727 if (gh->gh_state != state) 676 gl->gl_demote_state = state;
728 gh->gh_state = LM_ST_UNLOCKED;
729 goto out;
730 }
731 }
732
733 if (new_gh) {
734 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
735 new_gh = NULL;
736 } else {
737 spin_unlock(&gl->gl_spin);
738
739 new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS);
740 if (!new_gh)
741 return;
742 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
743 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
744 set_bit(HIF_WAIT, &new_gh->gh_iflags);
745
746 goto restart;
747 } 677 }
748
749out:
750 spin_unlock(&gl->gl_spin); 678 spin_unlock(&gl->gl_spin);
751
752 if (new_gh)
753 gfs2_holder_put(new_gh);
754} 679}
755 680
756/** 681/**
@@ -810,56 +735,37 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
810 735
811 /* Deal with each possible exit condition */ 736 /* Deal with each possible exit condition */
812 737
813 if (!gh) 738 if (!gh) {
814 gl->gl_stamp = jiffies; 739 gl->gl_stamp = jiffies;
815 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { 740 if (ret & LM_OUT_CANCELED)
741 op_done = 0;
742 else
743 clear_bit(GLF_DEMOTE, &gl->gl_flags);
744 } else {
816 spin_lock(&gl->gl_spin); 745 spin_lock(&gl->gl_spin);
817 list_del_init(&gh->gh_list); 746 list_del_init(&gh->gh_list);
818 gh->gh_error = -EIO; 747 gh->gh_error = -EIO;
819 spin_unlock(&gl->gl_spin); 748 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
820 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { 749 goto out;
821 spin_lock(&gl->gl_spin); 750 gh->gh_error = GLR_CANCELED;
822 list_del_init(&gh->gh_list); 751 if (ret & LM_OUT_CANCELED)
823 if (gl->gl_state == gh->gh_state || 752 goto out;
824 gl->gl_state == LM_ST_UNLOCKED) { 753 if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
754 list_add_tail(&gh->gh_list, &gl->gl_holders);
825 gh->gh_error = 0; 755 gh->gh_error = 0;
826 } else { 756 set_bit(HIF_HOLDER, &gh->gh_iflags);
827 if (gfs2_assert_warn(sdp, gh->gh_flags & 757 set_bit(HIF_FIRST, &gh->gh_iflags);
828 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) 758 op_done = 0;
829 fs_warn(sdp, "ret = 0x%.8X\n", ret); 759 goto out;
830 gh->gh_error = GLR_TRYFAILED;
831 } 760 }
832 spin_unlock(&gl->gl_spin);
833
834 if (ret & LM_OUT_CANCELED)
835 handle_callback(gl, LM_ST_UNLOCKED);
836
837 } else if (ret & LM_OUT_CANCELED) {
838 spin_lock(&gl->gl_spin);
839 list_del_init(&gh->gh_list);
840 gh->gh_error = GLR_CANCELED;
841 spin_unlock(&gl->gl_spin);
842
843 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
844 spin_lock(&gl->gl_spin);
845 list_move_tail(&gh->gh_list, &gl->gl_holders);
846 gh->gh_error = 0;
847 set_bit(HIF_HOLDER, &gh->gh_iflags);
848 spin_unlock(&gl->gl_spin);
849
850 set_bit(HIF_FIRST, &gh->gh_iflags);
851
852 op_done = 0;
853
854 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
855 spin_lock(&gl->gl_spin);
856 list_del_init(&gh->gh_list);
857 gh->gh_error = GLR_TRYFAILED; 761 gh->gh_error = GLR_TRYFAILED;
858 spin_unlock(&gl->gl_spin); 762 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
859 763 goto out;
860 } else { 764 gh->gh_error = -EINVAL;
861 if (gfs2_assert_withdraw(sdp, 0) == -1) 765 if (gfs2_assert_withdraw(sdp, 0) == -1)
862 fs_err(sdp, "ret = 0x%.8X\n", ret); 766 fs_err(sdp, "ret = 0x%.8X\n", ret);
767out:
768 spin_unlock(&gl->gl_spin);
863 } 769 }
864 770
865 if (glops->go_xmote_bh) 771 if (glops->go_xmote_bh)
@@ -877,7 +783,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
877 gfs2_glock_put(gl); 783 gfs2_glock_put(gl);
878 784
879 if (gh) 785 if (gh)
880 gfs2_holder_dispose_or_wake(gh); 786 gfs2_holder_wake(gh);
881} 787}
882 788
883/** 789/**
@@ -888,12 +794,11 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
888 * 794 *
889 */ 795 */
890 796
891void gfs2_glock_xmote_th(struct gfs2_holder *gh) 797void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
892{ 798{
893 struct gfs2_glock *gl = gh->gh_gl;
894 struct gfs2_sbd *sdp = gl->gl_sbd; 799 struct gfs2_sbd *sdp = gl->gl_sbd;
895 int flags = gh->gh_flags; 800 int flags = gh ? gh->gh_flags : 0;
896 unsigned state = gh->gh_state; 801 unsigned state = gh ? gh->gh_state : gl->gl_demote_state;
897 const struct gfs2_glock_operations *glops = gl->gl_ops; 802 const struct gfs2_glock_operations *glops = gl->gl_ops;
898 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | 803 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
899 LM_FLAG_NOEXP | LM_FLAG_ANY | 804 LM_FLAG_NOEXP | LM_FLAG_ANY |
@@ -943,6 +848,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
943 gfs2_assert_warn(sdp, !ret); 848 gfs2_assert_warn(sdp, !ret);
944 849
945 state_change(gl, LM_ST_UNLOCKED); 850 state_change(gl, LM_ST_UNLOCKED);
851 clear_bit(GLF_DEMOTE, &gl->gl_flags);
946 852
947 if (glops->go_inval) 853 if (glops->go_inval)
948 glops->go_inval(gl, DIO_METADATA); 854 glops->go_inval(gl, DIO_METADATA);
@@ -964,7 +870,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
964 gfs2_glock_put(gl); 870 gfs2_glock_put(gl);
965 871
966 if (gh) 872 if (gh)
967 gfs2_holder_dispose_or_wake(gh); 873 gfs2_holder_wake(gh);
968} 874}
969 875
970/** 876/**
@@ -1097,18 +1003,32 @@ static int glock_wait_internal(struct gfs2_holder *gh)
1097} 1003}
1098 1004
1099static inline struct gfs2_holder * 1005static inline struct gfs2_holder *
1100find_holder_by_owner(struct list_head *head, struct task_struct *owner) 1006find_holder_by_owner(struct list_head *head, pid_t pid)
1101{ 1007{
1102 struct gfs2_holder *gh; 1008 struct gfs2_holder *gh;
1103 1009
1104 list_for_each_entry(gh, head, gh_list) { 1010 list_for_each_entry(gh, head, gh_list) {
1105 if (gh->gh_owner == owner) 1011 if (gh->gh_owner_pid == pid)
1106 return gh; 1012 return gh;
1107 } 1013 }
1108 1014
1109 return NULL; 1015 return NULL;
1110} 1016}
1111 1017
1018static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
1019{
1020 va_list args;
1021
1022 va_start(args, fmt);
1023 if (gi) {
1024 vsprintf(gi->string, fmt, args);
1025 seq_printf(gi->seq, gi->string);
1026 }
1027 else
1028 vprintk(fmt, args);
1029 va_end(args);
1030}
1031
1112/** 1032/**
1113 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1033 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1114 * @gh: the holder structure to add 1034 * @gh: the holder structure to add
@@ -1120,24 +1040,24 @@ static void add_to_queue(struct gfs2_holder *gh)
1120 struct gfs2_glock *gl = gh->gh_gl; 1040 struct gfs2_glock *gl = gh->gh_gl;
1121 struct gfs2_holder *existing; 1041 struct gfs2_holder *existing;
1122 1042
1123 BUG_ON(!gh->gh_owner); 1043 BUG_ON(!gh->gh_owner_pid);
1124 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1044 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1125 BUG(); 1045 BUG();
1126 1046
1127 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); 1047 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid);
1128 if (existing) { 1048 if (existing) {
1129 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1049 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1130 printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); 1050 printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
1131 printk(KERN_INFO "lock type : %d lock state : %d\n", 1051 printk(KERN_INFO "lock type : %d lock state : %d\n",
1132 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); 1052 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
1133 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1053 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1134 printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); 1054 printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
1135 printk(KERN_INFO "lock type : %d lock state : %d\n", 1055 printk(KERN_INFO "lock type : %d lock state : %d\n",
1136 gl->gl_name.ln_type, gl->gl_state); 1056 gl->gl_name.ln_type, gl->gl_state);
1137 BUG(); 1057 BUG();
1138 } 1058 }
1139 1059
1140 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); 1060 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid);
1141 if (existing) { 1061 if (existing) {
1142 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1062 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1143 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1063 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
@@ -1267,9 +1187,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1267 if (glops->go_unlock) 1187 if (glops->go_unlock)
1268 glops->go_unlock(gh); 1188 glops->go_unlock(gh);
1269 1189
1270 gl->gl_stamp = jiffies;
1271
1272 spin_lock(&gl->gl_spin); 1190 spin_lock(&gl->gl_spin);
1191 gl->gl_stamp = jiffies;
1273 } 1192 }
1274 1193
1275 clear_bit(GLF_LOCK, &gl->gl_flags); 1194 clear_bit(GLF_LOCK, &gl->gl_flags);
@@ -1841,6 +1760,15 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1841 * Diagnostic routines to help debug distributed deadlock 1760 * Diagnostic routines to help debug distributed deadlock
1842 */ 1761 */
1843 1762
1763static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt,
1764 unsigned long address)
1765{
1766 char buffer[KSYM_SYMBOL_LEN];
1767
1768 sprint_symbol(buffer, address);
1769 print_dbg(gi, fmt, buffer);
1770}
1771
1844/** 1772/**
1845 * dump_holder - print information about a glock holder 1773 * dump_holder - print information about a glock holder
1846 * @str: a string naming the type of holder 1774 * @str: a string naming the type of holder
@@ -1849,31 +1777,37 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1849 * Returns: 0 on success, -ENOBUFS when we run out of space 1777 * Returns: 0 on success, -ENOBUFS when we run out of space
1850 */ 1778 */
1851 1779
1852static int dump_holder(char *str, struct gfs2_holder *gh) 1780static int dump_holder(struct glock_iter *gi, char *str,
1781 struct gfs2_holder *gh)
1853{ 1782{
1854 unsigned int x; 1783 unsigned int x;
1855 int error = -ENOBUFS; 1784 struct task_struct *gh_owner;
1856 1785
1857 printk(KERN_INFO " %s\n", str); 1786 print_dbg(gi, " %s\n", str);
1858 printk(KERN_INFO " owner = %ld\n", 1787 if (gh->gh_owner_pid) {
1859 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); 1788 print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid);
1860 printk(KERN_INFO " gh_state = %u\n", gh->gh_state); 1789 gh_owner = find_task_by_pid(gh->gh_owner_pid);
1861 printk(KERN_INFO " gh_flags ="); 1790 if (gh_owner)
1791 print_dbg(gi, "(%s)\n", gh_owner->comm);
1792 else
1793 print_dbg(gi, "(ended)\n");
1794 } else
1795 print_dbg(gi, " owner = -1\n");
1796 print_dbg(gi, " gh_state = %u\n", gh->gh_state);
1797 print_dbg(gi, " gh_flags =");
1862 for (x = 0; x < 32; x++) 1798 for (x = 0; x < 32; x++)
1863 if (gh->gh_flags & (1 << x)) 1799 if (gh->gh_flags & (1 << x))
1864 printk(" %u", x); 1800 print_dbg(gi, " %u", x);
1865 printk(" \n"); 1801 print_dbg(gi, " \n");
1866 printk(KERN_INFO " error = %d\n", gh->gh_error); 1802 print_dbg(gi, " error = %d\n", gh->gh_error);
1867 printk(KERN_INFO " gh_iflags ="); 1803 print_dbg(gi, " gh_iflags =");
1868 for (x = 0; x < 32; x++) 1804 for (x = 0; x < 32; x++)
1869 if (test_bit(x, &gh->gh_iflags)) 1805 if (test_bit(x, &gh->gh_iflags))
1870 printk(" %u", x); 1806 print_dbg(gi, " %u", x);
1871 printk(" \n"); 1807 print_dbg(gi, " \n");
1872 print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); 1808 gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip);
1873
1874 error = 0;
1875 1809
1876 return error; 1810 return 0;
1877} 1811}
1878 1812
1879/** 1813/**
@@ -1883,25 +1817,20 @@ static int dump_holder(char *str, struct gfs2_holder *gh)
1883 * Returns: 0 on success, -ENOBUFS when we run out of space 1817 * Returns: 0 on success, -ENOBUFS when we run out of space
1884 */ 1818 */
1885 1819
1886static int dump_inode(struct gfs2_inode *ip) 1820static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
1887{ 1821{
1888 unsigned int x; 1822 unsigned int x;
1889 int error = -ENOBUFS;
1890 1823
1891 printk(KERN_INFO " Inode:\n"); 1824 print_dbg(gi, " Inode:\n");
1892 printk(KERN_INFO " num = %llu %llu\n", 1825 print_dbg(gi, " num = %llu/%llu\n",
1893 (unsigned long long)ip->i_num.no_formal_ino, 1826 ip->i_num.no_formal_ino, ip->i_num.no_addr);
1894 (unsigned long long)ip->i_num.no_addr); 1827 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode));
1895 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); 1828 print_dbg(gi, " i_flags =");
1896 printk(KERN_INFO " i_flags =");
1897 for (x = 0; x < 32; x++) 1829 for (x = 0; x < 32; x++)
1898 if (test_bit(x, &ip->i_flags)) 1830 if (test_bit(x, &ip->i_flags))
1899 printk(" %u", x); 1831 print_dbg(gi, " %u", x);
1900 printk(" \n"); 1832 print_dbg(gi, " \n");
1901 1833 return 0;
1902 error = 0;
1903
1904 return error;
1905} 1834}
1906 1835
1907/** 1836/**
@@ -1912,74 +1841,86 @@ static int dump_inode(struct gfs2_inode *ip)
1912 * Returns: 0 on success, -ENOBUFS when we run out of space 1841 * Returns: 0 on success, -ENOBUFS when we run out of space
1913 */ 1842 */
1914 1843
1915static int dump_glock(struct gfs2_glock *gl) 1844static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1916{ 1845{
1917 struct gfs2_holder *gh; 1846 struct gfs2_holder *gh;
1918 unsigned int x; 1847 unsigned int x;
1919 int error = -ENOBUFS; 1848 int error = -ENOBUFS;
1849 struct task_struct *gl_owner;
1920 1850
1921 spin_lock(&gl->gl_spin); 1851 spin_lock(&gl->gl_spin);
1922 1852
1923 printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, 1853 print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
1924 (unsigned long long)gl->gl_name.ln_number); 1854 (unsigned long long)gl->gl_name.ln_number);
1925 printk(KERN_INFO " gl_flags ="); 1855 print_dbg(gi, " gl_flags =");
1926 for (x = 0; x < 32; x++) { 1856 for (x = 0; x < 32; x++) {
1927 if (test_bit(x, &gl->gl_flags)) 1857 if (test_bit(x, &gl->gl_flags))
1928 printk(" %u", x); 1858 print_dbg(gi, " %u", x);
1929 } 1859 }
1930 printk(" \n"); 1860 if (!test_bit(GLF_LOCK, &gl->gl_flags))
1931 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref)); 1861 print_dbg(gi, " (unlocked)");
1932 printk(KERN_INFO " gl_state = %u\n", gl->gl_state); 1862 print_dbg(gi, " \n");
1933 printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); 1863 print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref));
1934 print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); 1864 print_dbg(gi, " gl_state = %u\n", gl->gl_state);
1935 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); 1865 if (gl->gl_owner_pid) {
1936 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); 1866 gl_owner = find_task_by_pid(gl->gl_owner_pid);
1937 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); 1867 if (gl_owner)
1938 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); 1868 print_dbg(gi, " gl_owner = pid %d (%s)\n",
1939 printk(KERN_INFO " le = %s\n", 1869 gl->gl_owner_pid, gl_owner->comm);
1870 else
1871 print_dbg(gi, " gl_owner = %d (ended)\n",
1872 gl->gl_owner_pid);
1873 } else
1874 print_dbg(gi, " gl_owner = -1\n");
1875 print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip);
1876 print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
1877 print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
1878 print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
1879 print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no");
1880 print_dbg(gi, " le = %s\n",
1940 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); 1881 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
1941 printk(KERN_INFO " reclaim = %s\n", 1882 print_dbg(gi, " reclaim = %s\n",
1942 (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); 1883 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
1943 if (gl->gl_aspace) 1884 if (gl->gl_aspace)
1944 printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, 1885 print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
1945 gl->gl_aspace->i_mapping->nrpages); 1886 gl->gl_aspace->i_mapping->nrpages);
1946 else 1887 else
1947 printk(KERN_INFO " aspace = no\n"); 1888 print_dbg(gi, " aspace = no\n");
1948 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); 1889 print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count));
1949 if (gl->gl_req_gh) { 1890 if (gl->gl_req_gh) {
1950 error = dump_holder("Request", gl->gl_req_gh); 1891 error = dump_holder(gi, "Request", gl->gl_req_gh);
1951 if (error) 1892 if (error)
1952 goto out; 1893 goto out;
1953 } 1894 }
1954 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1895 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1955 error = dump_holder("Holder", gh); 1896 error = dump_holder(gi, "Holder", gh);
1956 if (error) 1897 if (error)
1957 goto out; 1898 goto out;
1958 } 1899 }
1959 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { 1900 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
1960 error = dump_holder("Waiter1", gh); 1901 error = dump_holder(gi, "Waiter1", gh);
1961 if (error)
1962 goto out;
1963 }
1964 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
1965 error = dump_holder("Waiter2", gh);
1966 if (error) 1902 if (error)
1967 goto out; 1903 goto out;
1968 } 1904 }
1969 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { 1905 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
1970 error = dump_holder("Waiter3", gh); 1906 error = dump_holder(gi, "Waiter3", gh);
1971 if (error) 1907 if (error)
1972 goto out; 1908 goto out;
1973 } 1909 }
1910 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
1911 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n",
1912 gl->gl_demote_state,
1913 (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
1914 }
1974 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { 1915 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
1975 if (!test_bit(GLF_LOCK, &gl->gl_flags) && 1916 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
1976 list_empty(&gl->gl_holders)) { 1917 list_empty(&gl->gl_holders)) {
1977 error = dump_inode(gl->gl_object); 1918 error = dump_inode(gi, gl->gl_object);
1978 if (error) 1919 if (error)
1979 goto out; 1920 goto out;
1980 } else { 1921 } else {
1981 error = -ENOBUFS; 1922 error = -ENOBUFS;
1982 printk(KERN_INFO " Inode: busy\n"); 1923 print_dbg(gi, " Inode: busy\n");
1983 } 1924 }
1984 } 1925 }
1985 1926
@@ -2014,7 +1955,7 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2014 if (gl->gl_sbd != sdp) 1955 if (gl->gl_sbd != sdp)
2015 continue; 1956 continue;
2016 1957
2017 error = dump_glock(gl); 1958 error = dump_glock(NULL, gl);
2018 if (error) 1959 if (error)
2019 break; 1960 break;
2020 } 1961 }
@@ -2043,3 +1984,189 @@ int __init gfs2_glock_init(void)
2043 return 0; 1984 return 0;
2044} 1985}
2045 1986
1987static int gfs2_glock_iter_next(struct glock_iter *gi)
1988{
1989 read_lock(gl_lock_addr(gi->hash));
1990 while (1) {
1991 if (!gi->hb_list) { /* If we don't have a hash bucket yet */
1992 gi->hb_list = &gl_hash_table[gi->hash].hb_list;
1993 if (hlist_empty(gi->hb_list)) {
1994 read_unlock(gl_lock_addr(gi->hash));
1995 gi->hash++;
1996 read_lock(gl_lock_addr(gi->hash));
1997 gi->hb_list = NULL;
1998 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1999 read_unlock(gl_lock_addr(gi->hash));
2000 return 1;
2001 }
2002 else
2003 continue;
2004 }
2005 if (!hlist_empty(gi->hb_list)) {
2006 gi->gl = list_entry(gi->hb_list->first,
2007 struct gfs2_glock,
2008 gl_list);
2009 }
2010 } else {
2011 if (gi->gl->gl_list.next == NULL) {
2012 read_unlock(gl_lock_addr(gi->hash));
2013 gi->hash++;
2014 read_lock(gl_lock_addr(gi->hash));
2015 gi->hb_list = NULL;
2016 continue;
2017 }
2018 gi->gl = list_entry(gi->gl->gl_list.next,
2019 struct gfs2_glock, gl_list);
2020 }
2021 if (gi->gl)
2022 break;
2023 }
2024 read_unlock(gl_lock_addr(gi->hash));
2025 return 0;
2026}
2027
2028static void gfs2_glock_iter_free(struct glock_iter *gi)
2029{
2030 kfree(gi);
2031}
2032
2033static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
2034{
2035 struct glock_iter *gi;
2036
2037 gi = kmalloc(sizeof (*gi), GFP_KERNEL);
2038 if (!gi)
2039 return NULL;
2040
2041 gi->sdp = sdp;
2042 gi->hash = 0;
2043 gi->gl = NULL;
2044 gi->hb_list = NULL;
2045 gi->seq = NULL;
2046 memset(gi->string, 0, sizeof(gi->string));
2047
2048 if (gfs2_glock_iter_next(gi)) {
2049 gfs2_glock_iter_free(gi);
2050 return NULL;
2051 }
2052
2053 return gi;
2054}
2055
2056static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
2057{
2058 struct glock_iter *gi;
2059 loff_t n = *pos;
2060
2061 gi = gfs2_glock_iter_init(file->private);
2062 if (!gi)
2063 return NULL;
2064
2065 while (n--) {
2066 if (gfs2_glock_iter_next(gi)) {
2067 gfs2_glock_iter_free(gi);
2068 return NULL;
2069 }
2070 }
2071
2072 return gi;
2073}
2074
2075static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
2076 loff_t *pos)
2077{
2078 struct glock_iter *gi = iter_ptr;
2079
2080 (*pos)++;
2081
2082 if (gfs2_glock_iter_next(gi)) {
2083 gfs2_glock_iter_free(gi);
2084 return NULL;
2085 }
2086
2087 return gi;
2088}
2089
2090static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
2091{
2092 /* nothing for now */
2093}
2094
2095static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
2096{
2097 struct glock_iter *gi = iter_ptr;
2098
2099 gi->seq = file;
2100 dump_glock(gi, gi->gl);
2101
2102 return 0;
2103}
2104
2105static struct seq_operations gfs2_glock_seq_ops = {
2106 .start = gfs2_glock_seq_start,
2107 .next = gfs2_glock_seq_next,
2108 .stop = gfs2_glock_seq_stop,
2109 .show = gfs2_glock_seq_show,
2110};
2111
2112static int gfs2_debugfs_open(struct inode *inode, struct file *file)
2113{
2114 struct seq_file *seq;
2115 int ret;
2116
2117 ret = seq_open(file, &gfs2_glock_seq_ops);
2118 if (ret)
2119 return ret;
2120
2121 seq = file->private_data;
2122 seq->private = inode->i_private;
2123
2124 return 0;
2125}
2126
2127static const struct file_operations gfs2_debug_fops = {
2128 .owner = THIS_MODULE,
2129 .open = gfs2_debugfs_open,
2130 .read = seq_read,
2131 .llseek = seq_lseek,
2132 .release = seq_release
2133};
2134
2135int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2136{
2137 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2138 if (!sdp->debugfs_dir)
2139 return -ENOMEM;
2140 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
2141 S_IFREG | S_IRUGO,
2142 sdp->debugfs_dir, sdp,
2143 &gfs2_debug_fops);
2144 if (!sdp->debugfs_dentry_glocks)
2145 return -ENOMEM;
2146
2147 return 0;
2148}
2149
2150void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2151{
2152 if (sdp && sdp->debugfs_dir) {
2153 if (sdp->debugfs_dentry_glocks) {
2154 debugfs_remove(sdp->debugfs_dentry_glocks);
2155 sdp->debugfs_dentry_glocks = NULL;
2156 }
2157 debugfs_remove(sdp->debugfs_dir);
2158 sdp->debugfs_dir = NULL;
2159 }
2160}
2161
2162int gfs2_register_debugfs(void)
2163{
2164 gfs2_root = debugfs_create_dir("gfs2", NULL);
2165 return gfs2_root ? 0 : -ENOMEM;
2166}
2167
2168void gfs2_unregister_debugfs(void)
2169{
2170 debugfs_remove(gfs2_root);
2171 gfs2_root = NULL;
2172}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index f50e40ceca43..11477ca3a3c0 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -38,7 +38,7 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
38 /* Look in glock's list of holders for one with current task as owner */ 38 /* Look in glock's list of holders for one with current task as owner */
39 spin_lock(&gl->gl_spin); 39 spin_lock(&gl->gl_spin);
40 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 40 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
41 if (gh->gh_owner == current) { 41 if (gh->gh_owner_pid == current->pid) {
42 locked = 1; 42 locked = 1;
43 break; 43 break;
44 } 44 }
@@ -67,7 +67,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
67{ 67{
68 int ret; 68 int ret;
69 spin_lock(&gl->gl_spin); 69 spin_lock(&gl->gl_spin);
70 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3); 70 ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3);
71 spin_unlock(&gl->gl_spin); 71 spin_unlock(&gl->gl_spin);
72 return ret; 72 return ret;
73} 73}
@@ -135,5 +135,9 @@ void gfs2_scand_internal(struct gfs2_sbd *sdp);
135void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); 135void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
136 136
137int __init gfs2_glock_init(void); 137int __init gfs2_glock_init(void);
138int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
139void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
140int gfs2_register_debugfs(void);
141void gfs2_unregister_debugfs(void);
138 142
139#endif /* __GLOCK_DOT_H__ */ 143#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 39c8ae23bd9c..7b82657a9910 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -163,10 +163,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
163 if (ip) { 163 if (ip) {
164 struct address_space *mapping = ip->i_inode.i_mapping; 164 struct address_space *mapping = ip->i_inode.i_mapping;
165 int error = filemap_fdatawait(mapping); 165 int error = filemap_fdatawait(mapping);
166 if (error == -ENOSPC) 166 mapping_set_error(mapping, error);
167 set_bit(AS_ENOSPC, &mapping->flags);
168 else if (error)
169 set_bit(AS_EIO, &mapping->flags);
170 } 167 }
171 clear_bit(GLF_DIRTY, &gl->gl_flags); 168 clear_bit(GLF_DIRTY, &gl->gl_flags);
172 gfs2_ail_empty_gl(gl); 169 gfs2_ail_empty_gl(gl);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 49f0dbf40d86..d995441373ab 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -115,11 +115,8 @@ enum {
115 /* Actions */ 115 /* Actions */
116 HIF_MUTEX = 0, 116 HIF_MUTEX = 0,
117 HIF_PROMOTE = 1, 117 HIF_PROMOTE = 1,
118 HIF_DEMOTE = 2,
119 118
120 /* States */ 119 /* States */
121 HIF_ALLOCED = 4,
122 HIF_DEALLOC = 5,
123 HIF_HOLDER = 6, 120 HIF_HOLDER = 6,
124 HIF_FIRST = 7, 121 HIF_FIRST = 7,
125 HIF_ABORTED = 9, 122 HIF_ABORTED = 9,
@@ -130,7 +127,7 @@ struct gfs2_holder {
130 struct list_head gh_list; 127 struct list_head gh_list;
131 128
132 struct gfs2_glock *gh_gl; 129 struct gfs2_glock *gh_gl;
133 struct task_struct *gh_owner; 130 pid_t gh_owner_pid;
134 unsigned int gh_state; 131 unsigned int gh_state;
135 unsigned gh_flags; 132 unsigned gh_flags;
136 133
@@ -142,8 +139,8 @@ struct gfs2_holder {
142enum { 139enum {
143 GLF_LOCK = 1, 140 GLF_LOCK = 1,
144 GLF_STICKY = 2, 141 GLF_STICKY = 2,
142 GLF_DEMOTE = 3,
145 GLF_DIRTY = 5, 143 GLF_DIRTY = 5,
146 GLF_SKIP_WAITERS2 = 6,
147}; 144};
148 145
149struct gfs2_glock { 146struct gfs2_glock {
@@ -156,11 +153,12 @@ struct gfs2_glock {
156 153
157 unsigned int gl_state; 154 unsigned int gl_state;
158 unsigned int gl_hash; 155 unsigned int gl_hash;
159 struct task_struct *gl_owner; 156 unsigned int gl_demote_state; /* state requested by remote node */
157 unsigned long gl_demote_time; /* time of first demote request */
158 pid_t gl_owner_pid;
160 unsigned long gl_ip; 159 unsigned long gl_ip;
161 struct list_head gl_holders; 160 struct list_head gl_holders;
162 struct list_head gl_waiters1; /* HIF_MUTEX */ 161 struct list_head gl_waiters1; /* HIF_MUTEX */
163 struct list_head gl_waiters2; /* HIF_DEMOTE */
164 struct list_head gl_waiters3; /* HIF_PROMOTE */ 162 struct list_head gl_waiters3; /* HIF_PROMOTE */
165 163
166 const struct gfs2_glock_operations *gl_ops; 164 const struct gfs2_glock_operations *gl_ops;
@@ -611,6 +609,8 @@ struct gfs2_sbd {
611 609
612 unsigned long sd_last_warning; 610 unsigned long sd_last_warning;
613 struct vfsmount *sd_gfs2mnt; 611 struct vfsmount *sd_gfs2mnt;
612 struct dentry *debugfs_dir; /* debugfs directory */
613 struct dentry *debugfs_dentry_glocks; /* for debugfs */
614}; 614};
615 615
616#endif /* __INCORE_DOT_H__ */ 616#endif /* __INCORE_DOT_H__ */
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index b167addf9fd1..c305255bfe8a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -151,7 +151,7 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
151 151
152/* make_strname - convert GFS lock numbers to a string */ 152/* make_strname - convert GFS lock numbers to a string */
153 153
154static inline void make_strname(struct lm_lockname *lockname, 154static inline void make_strname(const struct lm_lockname *lockname,
155 struct gdlm_strname *str) 155 struct gdlm_strname *str)
156{ 156{
157 sprintf(str->name, "%8x%16llx", lockname->ln_type, 157 sprintf(str->name, "%8x%16llx", lockname->ln_type,
@@ -169,6 +169,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
169 return -ENOMEM; 169 return -ENOMEM;
170 170
171 lp->lockname = *name; 171 lp->lockname = *name;
172 make_strname(name, &lp->strname);
172 lp->ls = ls; 173 lp->ls = ls;
173 lp->cur = DLM_LOCK_IV; 174 lp->cur = DLM_LOCK_IV;
174 lp->lvb = NULL; 175 lp->lvb = NULL;
@@ -227,7 +228,6 @@ void gdlm_put_lock(void *lock)
227unsigned int gdlm_do_lock(struct gdlm_lock *lp) 228unsigned int gdlm_do_lock(struct gdlm_lock *lp)
228{ 229{
229 struct gdlm_ls *ls = lp->ls; 230 struct gdlm_ls *ls = lp->ls;
230 struct gdlm_strname str;
231 int error, bast = 1; 231 int error, bast = 1;
232 232
233 /* 233 /*
@@ -249,8 +249,6 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
249 if (test_bit(LFL_NOBAST, &lp->flags)) 249 if (test_bit(LFL_NOBAST, &lp->flags))
250 bast = 0; 250 bast = 0;
251 251
252 make_strname(&lp->lockname, &str);
253
254 set_bit(LFL_ACTIVE, &lp->flags); 252 set_bit(LFL_ACTIVE, &lp->flags);
255 253
256 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, 254 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
@@ -258,8 +256,8 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
258 lp->cur, lp->req, lp->lkf); 256 lp->cur, lp->req, lp->lkf);
259 257
260 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, 258 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
261 str.name, str.namelen, 0, gdlm_ast, lp, 259 lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
262 bast ? gdlm_bast : NULL); 260 lp, bast ? gdlm_bast : NULL);
263 261
264 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { 262 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
265 lp->lksb.sb_status = -EAGAIN; 263 lp->lksb.sb_status = -EAGAIN;
@@ -268,7 +266,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
268 } 266 }
269 267
270 if (error) { 268 if (error) {
271 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " 269 log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
272 "flags=%lx", ls->fsname, lp->lockname.ln_type, 270 "flags=%lx", ls->fsname, lp->lockname.ln_type,
273 (unsigned long long)lp->lockname.ln_number, error, 271 (unsigned long long)lp->lockname.ln_number, error,
274 lp->cur, lp->req, lp->lkf, lp->flags); 272 lp->cur, lp->req, lp->lkf, lp->flags);
@@ -296,7 +294,7 @@ static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
296 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); 294 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
297 295
298 if (error) { 296 if (error) {
299 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " 297 log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
300 "flags=%lx", ls->fsname, lp->lockname.ln_type, 298 "flags=%lx", ls->fsname, lp->lockname.ln_type,
301 (unsigned long long)lp->lockname.ln_number, error, 299 (unsigned long long)lp->lockname.ln_number, error,
302 lp->cur, lp->req, lp->lkf, lp->flags); 300 lp->cur, lp->req, lp->lkf, lp->flags);
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index a87c7bf3c568..d074c6e6f9bf 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -36,7 +36,7 @@
36 36
37#define GDLM_STRNAME_BYTES 24 37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32 38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 200000 39#define GDLM_DROP_COUNT 0
40#define GDLM_DROP_PERIOD 60 40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128 41#define GDLM_NAME_LEN 128
42 42
@@ -106,6 +106,7 @@ enum {
106struct gdlm_lock { 106struct gdlm_lock {
107 struct gdlm_ls *ls; 107 struct gdlm_ls *ls;
108 struct lm_lockname lockname; 108 struct lm_lockname lockname;
109 struct gdlm_strname strname;
109 char *lvb; 110 char *lvb;
110 struct dlm_lksb lksb; 111 struct dlm_lksb lksb;
111 112
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 1dd4215b83d0..f82495e18c2d 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -25,6 +25,15 @@ struct plock_op {
25 struct gdlm_plock_info info; 25 struct gdlm_plock_info info;
26}; 26};
27 27
28struct plock_xop {
29 struct plock_op xop;
30 void *callback;
31 void *fl;
32 void *file;
33 struct file_lock flc;
34};
35
36
28static inline void set_version(struct gdlm_plock_info *info) 37static inline void set_version(struct gdlm_plock_info *info)
29{ 38{
30 info->version[0] = GDLM_PLOCK_VERSION_MAJOR; 39 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
@@ -64,12 +73,14 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name,
64{ 73{
65 struct gdlm_ls *ls = lockspace; 74 struct gdlm_ls *ls = lockspace;
66 struct plock_op *op; 75 struct plock_op *op;
76 struct plock_xop *xop;
67 int rv; 77 int rv;
68 78
69 op = kzalloc(sizeof(*op), GFP_KERNEL); 79 xop = kzalloc(sizeof(*xop), GFP_KERNEL);
70 if (!op) 80 if (!xop)
71 return -ENOMEM; 81 return -ENOMEM;
72 82
83 op = &xop->xop;
73 op->info.optype = GDLM_PLOCK_OP_LOCK; 84 op->info.optype = GDLM_PLOCK_OP_LOCK;
74 op->info.pid = fl->fl_pid; 85 op->info.pid = fl->fl_pid;
75 op->info.ex = (fl->fl_type == F_WRLCK); 86 op->info.ex = (fl->fl_type == F_WRLCK);
@@ -79,9 +90,21 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name,
79 op->info.start = fl->fl_start; 90 op->info.start = fl->fl_start;
80 op->info.end = fl->fl_end; 91 op->info.end = fl->fl_end;
81 op->info.owner = (__u64)(long) fl->fl_owner; 92 op->info.owner = (__u64)(long) fl->fl_owner;
93 if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
94 xop->callback = fl->fl_lmops->fl_grant;
95 locks_init_lock(&xop->flc);
96 locks_copy_lock(&xop->flc, fl);
97 xop->fl = fl;
98 xop->file = file;
99 } else
100 xop->callback = NULL;
82 101
83 send_op(op); 102 send_op(op);
84 wait_event(recv_wq, (op->done != 0)); 103
104 if (xop->callback == NULL)
105 wait_event(recv_wq, (op->done != 0));
106 else
107 return -EINPROGRESS;
85 108
86 spin_lock(&ops_lock); 109 spin_lock(&ops_lock);
87 if (!list_empty(&op->list)) { 110 if (!list_empty(&op->list)) {
@@ -99,7 +122,63 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name,
99 (unsigned long long)name->ln_number); 122 (unsigned long long)name->ln_number);
100 } 123 }
101 124
102 kfree(op); 125 kfree(xop);
126 return rv;
127}
128
129/* Returns failure iff a succesful lock operation should be canceled */
130static int gdlm_plock_callback(struct plock_op *op)
131{
132 struct file *file;
133 struct file_lock *fl;
134 struct file_lock *flc;
135 int (*notify)(void *, void *, int) = NULL;
136 struct plock_xop *xop = (struct plock_xop *)op;
137 int rv = 0;
138
139 spin_lock(&ops_lock);
140 if (!list_empty(&op->list)) {
141 printk(KERN_INFO "plock op on list\n");
142 list_del(&op->list);
143 }
144 spin_unlock(&ops_lock);
145
146 /* check if the following 2 are still valid or make a copy */
147 file = xop->file;
148 flc = &xop->flc;
149 fl = xop->fl;
150 notify = xop->callback;
151
152 if (op->info.rv) {
153 notify(flc, NULL, op->info.rv);
154 goto out;
155 }
156
157 /* got fs lock; bookkeep locally as well: */
158 flc->fl_flags &= ~FL_SLEEP;
159 if (posix_lock_file(file, flc, NULL)) {
160 /*
161 * This can only happen in the case of kmalloc() failure.
162 * The filesystem's own lock is the authoritative lock,
163 * so a failure to get the lock locally is not a disaster.
164 * As long as GFS cannot reliably cancel locks (especially
165 * in a low-memory situation), we're better off ignoring
166 * this failure than trying to recover.
167 */
168 log_error("gdlm_plock: vfs lock error file %p fl %p",
169 file, fl);
170 }
171
172 rv = notify(flc, NULL, 0);
173 if (rv) {
174 /* XXX: We need to cancel the fs lock here: */
175 printk("gfs2 lock granted after lock request failed;"
176 " dangling lock!\n");
177 goto out;
178 }
179
180out:
181 kfree(xop);
103 return rv; 182 return rv;
104} 183}
105 184
@@ -138,6 +217,9 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name,
138 217
139 rv = op->info.rv; 218 rv = op->info.rv;
140 219
220 if (rv == -ENOENT)
221 rv = 0;
222
141 kfree(op); 223 kfree(op);
142 return rv; 224 return rv;
143} 225}
@@ -161,6 +243,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
161 op->info.start = fl->fl_start; 243 op->info.start = fl->fl_start;
162 op->info.end = fl->fl_end; 244 op->info.end = fl->fl_end;
163 245
246
164 send_op(op); 247 send_op(op);
165 wait_event(recv_wq, (op->done != 0)); 248 wait_event(recv_wq, (op->done != 0));
166 249
@@ -173,9 +256,10 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
173 256
174 rv = op->info.rv; 257 rv = op->info.rv;
175 258
176 if (rv == 0) 259 fl->fl_type = F_UNLCK;
177 fl->fl_type = F_UNLCK; 260 if (rv == -ENOENT)
178 else if (rv > 0) { 261 rv = 0;
262 else if (rv == 0 && op->info.pid != fl->fl_pid) {
179 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 263 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
180 fl->fl_pid = op->info.pid; 264 fl->fl_pid = op->info.pid;
181 fl->fl_start = op->info.start; 265 fl->fl_start = op->info.start;
@@ -243,9 +327,14 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
243 } 327 }
244 spin_unlock(&ops_lock); 328 spin_unlock(&ops_lock);
245 329
246 if (found) 330 if (found) {
247 wake_up(&recv_wq); 331 struct plock_xop *xop;
248 else 332 xop = (struct plock_xop *)op;
333 if (xop->callback)
334 count = gdlm_plock_callback(op);
335 else
336 wake_up(&recv_wq);
337 } else
249 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid, 338 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
250 (unsigned long long)info.number); 339 (unsigned long long)info.number);
251 return count; 340 return count;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 4746b884662d..d9fe3ca40e18 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -190,7 +190,6 @@ static struct kobj_type gdlm_ktype = {
190}; 190};
191 191
192static struct kset gdlm_kset = { 192static struct kset gdlm_kset = {
193 .subsys = &kernel_subsys,
194 .kobj = {.name = "lock_dlm",}, 193 .kobj = {.name = "lock_dlm",},
195 .ktype = &gdlm_ktype, 194 .ktype = &gdlm_ktype,
196}; 195};
@@ -225,6 +224,7 @@ int gdlm_sysfs_init(void)
225{ 224{
226 int error; 225 int error;
227 226
227 kobj_set_kset_s(&gdlm_kset, kernel_subsys);
228 error = kset_register(&gdlm_kset); 228 error = kset_register(&gdlm_kset);
229 if (error) 229 if (error)
230 printk("lock_dlm: cannot register kset %d\n", error); 230 printk("lock_dlm: cannot register kset %d\n", error);
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index acfbc941f319..0d149c8c493a 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -13,7 +13,6 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17#include <linux/lm_interface.h> 16#include <linux/lm_interface.h>
18 17
19struct nolock_lockspace { 18struct nolock_lockspace {
@@ -164,13 +163,7 @@ static void nolock_unhold_lvb(void *lock, char *lvb)
164static int nolock_plock_get(void *lockspace, struct lm_lockname *name, 163static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
165 struct file *file, struct file_lock *fl) 164 struct file *file, struct file_lock *fl)
166{ 165{
167 struct file_lock tmp; 166 posix_test_lock(file, fl);
168 int ret;
169
170 ret = posix_test_lock(file, fl, &tmp);
171 fl->fl_type = F_UNLCK;
172 if (ret)
173 memcpy(fl, &tmp, sizeof(struct file_lock));
174 167
175 return 0; 168 return 0;
176} 169}
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 16bb4b4561ae..f82d84d05d23 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -33,16 +33,17 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
33 33
34 tr->tr_touched = 1; 34 tr->tr_touched = 1;
35 35
36 if (!list_empty(&le->le_list))
37 return;
38
39 gl = container_of(le, struct gfs2_glock, gl_le); 36 gl = container_of(le, struct gfs2_glock, gl_le);
40 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) 37 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
41 return; 38 return;
42 gfs2_glock_hold(gl);
43 set_bit(GLF_DIRTY, &gl->gl_flags);
44 39
45 gfs2_log_lock(sdp); 40 gfs2_log_lock(sdp);
41 if (!list_empty(&le->le_list)){
42 gfs2_log_unlock(sdp);
43 return;
44 }
45 gfs2_glock_hold(gl);
46 set_bit(GLF_DIRTY, &gl->gl_flags);
46 sdp->sd_log_num_gl++; 47 sdp->sd_log_num_gl++;
47 list_add(&le->le_list, &sdp->sd_log_le_gl); 48 list_add(&le->le_list, &sdp->sd_log_le_gl);
48 gfs2_log_unlock(sdp); 49 gfs2_log_unlock(sdp);
@@ -415,13 +416,14 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
415 416
416 tr->tr_touched = 1; 417 tr->tr_touched = 1;
417 418
418 if (!list_empty(&le->le_list))
419 return;
420
421 rgd = container_of(le, struct gfs2_rgrpd, rd_le); 419 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
422 gfs2_rgrp_bh_hold(rgd);
423 420
424 gfs2_log_lock(sdp); 421 gfs2_log_lock(sdp);
422 if (!list_empty(&le->le_list)){
423 gfs2_log_unlock(sdp);
424 return;
425 }
426 gfs2_rgrp_bh_hold(rgd);
425 sdp->sd_log_num_rg++; 427 sdp->sd_log_num_rg++;
426 list_add(&le->le_list, &sdp->sd_log_le_rg); 428 list_add(&le->le_list, &sdp->sd_log_le_rg);
427 gfs2_log_unlock(sdp); 429 gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6e8a59809abf..e460487c0557 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -27,8 +27,7 @@
27static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 27static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
28{ 28{
29 struct gfs2_inode *ip = foo; 29 struct gfs2_inode *ip = foo;
30 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 30 if (flags & SLAB_CTOR_CONSTRUCTOR) {
31 SLAB_CTOR_CONSTRUCTOR) {
32 inode_init_once(&ip->i_inode); 31 inode_init_once(&ip->i_inode);
33 spin_lock_init(&ip->i_spin); 32 spin_lock_init(&ip->i_spin);
34 init_rwsem(&ip->i_rw_mutex); 33 init_rwsem(&ip->i_rw_mutex);
@@ -39,13 +38,11 @@ static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned
39static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 38static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
40{ 39{
41 struct gfs2_glock *gl = foo; 40 struct gfs2_glock *gl = foo;
42 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 41 if (flags & SLAB_CTOR_CONSTRUCTOR) {
43 SLAB_CTOR_CONSTRUCTOR) {
44 INIT_HLIST_NODE(&gl->gl_list); 42 INIT_HLIST_NODE(&gl->gl_list);
45 spin_lock_init(&gl->gl_spin); 43 spin_lock_init(&gl->gl_spin);
46 INIT_LIST_HEAD(&gl->gl_holders); 44 INIT_LIST_HEAD(&gl->gl_holders);
47 INIT_LIST_HEAD(&gl->gl_waiters1); 45 INIT_LIST_HEAD(&gl->gl_waiters1);
48 INIT_LIST_HEAD(&gl->gl_waiters2);
49 INIT_LIST_HEAD(&gl->gl_waiters3); 46 INIT_LIST_HEAD(&gl->gl_waiters3);
50 gl->gl_lvb = NULL; 47 gl->gl_lvb = NULL;
51 atomic_set(&gl->gl_lvb_count, 0); 48 atomic_set(&gl->gl_lvb_count, 0);
@@ -103,6 +100,8 @@ static int __init init_gfs2_fs(void)
103 if (error) 100 if (error)
104 goto fail_unregister; 101 goto fail_unregister;
105 102
103 gfs2_register_debugfs();
104
106 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); 105 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
107 106
108 return 0; 107 return 0;
@@ -130,6 +129,7 @@ fail:
130 129
131static void __exit exit_gfs2_fs(void) 130static void __exit exit_gfs2_fs(void)
132{ 131{
132 gfs2_unregister_debugfs();
133 unregister_filesystem(&gfs2_fs_type); 133 unregister_filesystem(&gfs2_fs_type);
134 unregister_filesystem(&gfs2meta_fs_type); 134 unregister_filesystem(&gfs2meta_fs_type);
135 135
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 32caecd20300..4864659555d4 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -13,6 +13,7 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h> 15#include <linux/lm_interface.h>
16#include <linux/parser.h>
16 17
17#include "gfs2.h" 18#include "gfs2.h"
18#include "incore.h" 19#include "incore.h"
@@ -20,6 +21,52 @@
20#include "sys.h" 21#include "sys.h"
21#include "util.h" 22#include "util.h"
22 23
24enum {
25 Opt_lockproto,
26 Opt_locktable,
27 Opt_hostdata,
28 Opt_spectator,
29 Opt_ignore_local_fs,
30 Opt_localflocks,
31 Opt_localcaching,
32 Opt_debug,
33 Opt_nodebug,
34 Opt_upgrade,
35 Opt_num_glockd,
36 Opt_acl,
37 Opt_noacl,
38 Opt_quota_off,
39 Opt_quota_account,
40 Opt_quota_on,
41 Opt_suiddir,
42 Opt_nosuiddir,
43 Opt_data_writeback,
44 Opt_data_ordered,
45};
46
47static match_table_t tokens = {
48 {Opt_lockproto, "lockproto=%s"},
49 {Opt_locktable, "locktable=%s"},
50 {Opt_hostdata, "hostdata=%s"},
51 {Opt_spectator, "spectator"},
52 {Opt_ignore_local_fs, "ignore_local_fs"},
53 {Opt_localflocks, "localflocks"},
54 {Opt_localcaching, "localcaching"},
55 {Opt_debug, "debug"},
56 {Opt_nodebug, "nodebug"},
57 {Opt_upgrade, "upgrade"},
58 {Opt_num_glockd, "num_glockd=%d"},
59 {Opt_acl, "acl"},
60 {Opt_noacl, "noacl"},
61 {Opt_quota_off, "quota=off"},
62 {Opt_quota_account, "quota=account"},
63 {Opt_quota_on, "quota=on"},
64 {Opt_suiddir, "suiddir"},
65 {Opt_nosuiddir, "nosuiddir"},
66 {Opt_data_writeback, "data=writeback"},
67 {Opt_data_ordered, "data=ordered"}
68};
69
23/** 70/**
24 * gfs2_mount_args - Parse mount options 71 * gfs2_mount_args - Parse mount options
25 * @sdp: 72 * @sdp:
@@ -54,146 +101,150 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
54 process them */ 101 process them */
55 102
56 for (options = data; (o = strsep(&options, ",")); ) { 103 for (options = data; (o = strsep(&options, ",")); ) {
104 int token, option;
105 substring_t tmp[MAX_OPT_ARGS];
106
57 if (!*o) 107 if (!*o)
58 continue; 108 continue;
59 109
60 v = strchr(o, '='); 110 token = match_token(o, tokens, tmp);
61 if (v) 111 switch (token) {
62 *v++ = 0; 112 case Opt_lockproto:
113 v = match_strdup(&tmp[0]);
114 if (!v) {
115 fs_info(sdp, "no memory for lockproto\n");
116 error = -ENOMEM;
117 goto out_error;
118 }
63 119
64 if (!strcmp(o, "lockproto")) { 120 if (remount && strcmp(v, args->ar_lockproto)) {
65 if (!v) 121 kfree(v);
66 goto need_value;
67 if (remount && strcmp(v, args->ar_lockproto))
68 goto cant_remount; 122 goto cant_remount;
123 }
124
69 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); 125 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
70 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; 126 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
71 } 127 kfree(v);
128 break;
129 case Opt_locktable:
130 v = match_strdup(&tmp[0]);
131 if (!v) {
132 fs_info(sdp, "no memory for locktable\n");
133 error = -ENOMEM;
134 goto out_error;
135 }
72 136
73 else if (!strcmp(o, "locktable")) { 137 if (remount && strcmp(v, args->ar_locktable)) {
74 if (!v) 138 kfree(v);
75 goto need_value;
76 if (remount && strcmp(v, args->ar_locktable))
77 goto cant_remount; 139 goto cant_remount;
140 }
141
78 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); 142 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
79 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; 143 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
80 } 144 kfree(v);
145 break;
146 case Opt_hostdata:
147 v = match_strdup(&tmp[0]);
148 if (!v) {
149 fs_info(sdp, "no memory for hostdata\n");
150 error = -ENOMEM;
151 goto out_error;
152 }
81 153
82 else if (!strcmp(o, "hostdata")) { 154 if (remount && strcmp(v, args->ar_hostdata)) {
83 if (!v) 155 kfree(v);
84 goto need_value;
85 if (remount && strcmp(v, args->ar_hostdata))
86 goto cant_remount; 156 goto cant_remount;
157 }
158
87 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); 159 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
88 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; 160 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
89 } 161 kfree(v);
90 162 break;
91 else if (!strcmp(o, "spectator")) { 163 case Opt_spectator:
92 if (remount && !args->ar_spectator) 164 if (remount && !args->ar_spectator)
93 goto cant_remount; 165 goto cant_remount;
94 args->ar_spectator = 1; 166 args->ar_spectator = 1;
95 sdp->sd_vfs->s_flags |= MS_RDONLY; 167 sdp->sd_vfs->s_flags |= MS_RDONLY;
96 } 168 break;
97 169 case Opt_ignore_local_fs:
98 else if (!strcmp(o, "ignore_local_fs")) {
99 if (remount && !args->ar_ignore_local_fs) 170 if (remount && !args->ar_ignore_local_fs)
100 goto cant_remount; 171 goto cant_remount;
101 args->ar_ignore_local_fs = 1; 172 args->ar_ignore_local_fs = 1;
102 } 173 break;
103 174 case Opt_localflocks:
104 else if (!strcmp(o, "localflocks")) {
105 if (remount && !args->ar_localflocks) 175 if (remount && !args->ar_localflocks)
106 goto cant_remount; 176 goto cant_remount;
107 args->ar_localflocks = 1; 177 args->ar_localflocks = 1;
108 } 178 break;
109 179 case Opt_localcaching:
110 else if (!strcmp(o, "localcaching")) {
111 if (remount && !args->ar_localcaching) 180 if (remount && !args->ar_localcaching)
112 goto cant_remount; 181 goto cant_remount;
113 args->ar_localcaching = 1; 182 args->ar_localcaching = 1;
114 } 183 break;
115 184 case Opt_debug:
116 else if (!strcmp(o, "debug"))
117 args->ar_debug = 1; 185 args->ar_debug = 1;
118 186 break;
119 else if (!strcmp(o, "nodebug")) 187 case Opt_nodebug:
120 args->ar_debug = 0; 188 args->ar_debug = 0;
121 189 break;
122 else if (!strcmp(o, "upgrade")) { 190 case Opt_upgrade:
123 if (remount && !args->ar_upgrade) 191 if (remount && !args->ar_upgrade)
124 goto cant_remount; 192 goto cant_remount;
125 args->ar_upgrade = 1; 193 args->ar_upgrade = 1;
126 } 194 break;
195 case Opt_num_glockd:
196 if ((error = match_int(&tmp[0], &option))) {
197 fs_info(sdp, "problem getting num_glockd\n");
198 goto out_error;
199 }
127 200
128 else if (!strcmp(o, "num_glockd")) { 201 if (remount && option != args->ar_num_glockd)
129 unsigned int x;
130 if (!v)
131 goto need_value;
132 sscanf(v, "%u", &x);
133 if (remount && x != args->ar_num_glockd)
134 goto cant_remount; 202 goto cant_remount;
135 if (!x || x > GFS2_GLOCKD_MAX) { 203 if (!option || option > GFS2_GLOCKD_MAX) {
136 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", 204 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
137 GFS2_GLOCKD_MAX, x); 205 GFS2_GLOCKD_MAX, option);
138 error = -EINVAL; 206 error = -EINVAL;
139 break; 207 goto out_error;
140 } 208 }
141 args->ar_num_glockd = x; 209 args->ar_num_glockd = option;
142 } 210 break;
143 211 case Opt_acl:
144 else if (!strcmp(o, "acl")) {
145 args->ar_posix_acl = 1; 212 args->ar_posix_acl = 1;
146 sdp->sd_vfs->s_flags |= MS_POSIXACL; 213 sdp->sd_vfs->s_flags |= MS_POSIXACL;
147 } 214 break;
148 215 case Opt_noacl:
149 else if (!strcmp(o, "noacl")) {
150 args->ar_posix_acl = 0; 216 args->ar_posix_acl = 0;
151 sdp->sd_vfs->s_flags &= ~MS_POSIXACL; 217 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
152 } 218 break;
153 219 case Opt_quota_off:
154 else if (!strcmp(o, "quota")) { 220 args->ar_quota = GFS2_QUOTA_OFF;
155 if (!v) 221 break;
156 goto need_value; 222 case Opt_quota_account:
157 if (!strcmp(v, "off")) 223 args->ar_quota = GFS2_QUOTA_ACCOUNT;
158 args->ar_quota = GFS2_QUOTA_OFF; 224 break;
159 else if (!strcmp(v, "account")) 225 case Opt_quota_on:
160 args->ar_quota = GFS2_QUOTA_ACCOUNT; 226 args->ar_quota = GFS2_QUOTA_ON;
161 else if (!strcmp(v, "on")) 227 break;
162 args->ar_quota = GFS2_QUOTA_ON; 228 case Opt_suiddir:
163 else {
164 fs_info(sdp, "invalid value for quota\n");
165 error = -EINVAL;
166 break;
167 }
168 }
169
170 else if (!strcmp(o, "suiddir"))
171 args->ar_suiddir = 1; 229 args->ar_suiddir = 1;
172 230 break;
173 else if (!strcmp(o, "nosuiddir")) 231 case Opt_nosuiddir:
174 args->ar_suiddir = 0; 232 args->ar_suiddir = 0;
175 233 break;
176 else if (!strcmp(o, "data")) { 234 case Opt_data_writeback:
177 if (!v) 235 args->ar_data = GFS2_DATA_WRITEBACK;
178 goto need_value; 236 break;
179 if (!strcmp(v, "writeback")) 237 case Opt_data_ordered:
180 args->ar_data = GFS2_DATA_WRITEBACK; 238 args->ar_data = GFS2_DATA_ORDERED;
181 else if (!strcmp(v, "ordered")) 239 break;
182 args->ar_data = GFS2_DATA_ORDERED; 240 default:
183 else {
184 fs_info(sdp, "invalid value for data\n");
185 error = -EINVAL;
186 break;
187 }
188 }
189
190 else {
191 fs_info(sdp, "unknown option: %s\n", o); 241 fs_info(sdp, "unknown option: %s\n", o);
192 error = -EINVAL; 242 error = -EINVAL;
193 break; 243 goto out_error;
194 } 244 }
195 } 245 }
196 246
247out_error:
197 if (error) 248 if (error)
198 fs_info(sdp, "invalid mount option(s)\n"); 249 fs_info(sdp, "invalid mount option(s)\n");
199 250
@@ -202,10 +253,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
202 253
203 return error; 254 return error;
204 255
205need_value:
206 fs_info(sdp, "need value for option %s\n", o);
207 return -EINVAL;
208
209cant_remount: 256cant_remount:
210 fs_info(sdp, "can't remount with option %s\n", o); 257 fs_info(sdp, "can't remount with option %s\n", o);
211 return -EINVAL; 258 return -EINVAL;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index b3b7e8475359..30c15622174f 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -197,7 +197,19 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
197 void *kaddr; 197 void *kaddr;
198 int error; 198 int error;
199 199
200 BUG_ON(page->index); 200 /*
201 * Due to the order of unstuffing files and ->nopage(), we can be
202 * asked for a zero page in the case of a stuffed file being extended,
203 * so we need to supply one here. It doesn't happen often.
204 */
205 if (unlikely(page->index)) {
206 kaddr = kmap_atomic(page, KM_USER0);
207 memset(kaddr, 0, PAGE_CACHE_SIZE);
208 kunmap_atomic(kaddr, KM_USER0);
209 flush_dcache_page(page);
210 SetPageUptodate(page);
211 return 0;
212 }
201 213
202 error = gfs2_meta_inode_buffer(ip, &dibh); 214 error = gfs2_meta_inode_buffer(ip, &dibh);
203 if (error) 215 if (error)
@@ -208,9 +220,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
208 ip->i_di.di_size); 220 ip->i_di.di_size);
209 memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); 221 memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
210 kunmap_atomic(kaddr, KM_USER0); 222 kunmap_atomic(kaddr, KM_USER0);
211 223 flush_dcache_page(page);
212 brelse(dibh); 224 brelse(dibh);
213
214 SetPageUptodate(page); 225 SetPageUptodate(page);
215 226
216 return 0; 227 return 0;
@@ -507,7 +518,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
507 gfs2_quota_unlock(ip); 518 gfs2_quota_unlock(ip);
508 gfs2_alloc_put(ip); 519 gfs2_alloc_put(ip);
509 } 520 }
521 unlock_page(page);
510 gfs2_glock_dq_m(1, &ip->i_gh); 522 gfs2_glock_dq_m(1, &ip->i_gh);
523 lock_page(page);
511 gfs2_holder_uninit(&ip->i_gh); 524 gfs2_holder_uninit(&ip->i_gh);
512 return 0; 525 return 0;
513 526
@@ -520,7 +533,9 @@ fail_endtrans:
520 gfs2_quota_unlock(ip); 533 gfs2_quota_unlock(ip);
521 gfs2_alloc_put(ip); 534 gfs2_alloc_put(ip);
522 } 535 }
536 unlock_page(page);
523 gfs2_glock_dq_m(1, &ip->i_gh); 537 gfs2_glock_dq_m(1, &ip->i_gh);
538 lock_page(page);
524 gfs2_holder_uninit(&ip->i_gh); 539 gfs2_holder_uninit(&ip->i_gh);
525fail_nounlock: 540fail_nounlock:
526 ClearPageUptodate(page); 541 ClearPageUptodate(page);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c6bac6b69420..a6fdc52f554a 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -11,7 +11,6 @@
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/smp_lock.h>
15#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 15#include <linux/crc32.h>
17#include <linux/lm_interface.h> 16#include <linux/lm_interface.h>
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index b50180e22779..064df8804582 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -15,7 +15,6 @@
15#include <linux/uio.h> 15#include <linux/uio.h>
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/smp_lock.h>
19#include <linux/fs.h> 18#include <linux/fs.h>
20#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
21#include <linux/ext2_fs.h> 20#include <linux/ext2_fs.h>
@@ -513,18 +512,18 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
513 512
514 if (sdp->sd_args.ar_localflocks) { 513 if (sdp->sd_args.ar_localflocks) {
515 if (IS_GETLK(cmd)) { 514 if (IS_GETLK(cmd)) {
516 struct file_lock tmp; 515 posix_test_lock(file, fl);
517 int ret;
518 ret = posix_test_lock(file, fl, &tmp);
519 fl->fl_type = F_UNLCK;
520 if (ret)
521 memcpy(fl, &tmp, sizeof(struct file_lock));
522 return 0; 516 return 0;
523 } else { 517 } else {
524 return posix_lock_file_wait(file, fl); 518 return posix_lock_file_wait(file, fl);
525 } 519 }
526 } 520 }
527 521
522 if (cmd == F_CANCELLK) {
523 /* Hack: */
524 cmd = F_SETLK;
525 fl->fl_type = F_UNLCK;
526 }
528 if (IS_GETLK(cmd)) 527 if (IS_GETLK(cmd))
529 return gfs2_lm_plock_get(sdp, &name, file, fl); 528 return gfs2_lm_plock_get(sdp, &name, file, fl);
530 else if (fl->fl_type == F_UNLCK) 529 else if (fl->fl_type == F_UNLCK)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ee54cb667083..2c5f8e7def0d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -690,6 +690,8 @@ static int fill_super(struct super_block *sb, void *data, int silent)
690 if (error) 690 if (error)
691 goto fail; 691 goto fail;
692 692
693 gfs2_create_debugfs_file(sdp);
694
693 error = gfs2_sys_fs_add(sdp); 695 error = gfs2_sys_fs_add(sdp);
694 if (error) 696 if (error)
695 goto fail; 697 goto fail;
@@ -754,6 +756,7 @@ fail_lm:
754fail_sys: 756fail_sys:
755 gfs2_sys_fs_del(sdp); 757 gfs2_sys_fs_del(sdp);
756fail: 758fail:
759 gfs2_delete_debugfs_file(sdp);
757 kfree(sdp); 760 kfree(sdp);
758 sb->s_fs_info = NULL; 761 sb->s_fs_info = NULL;
759 return error; 762 return error;
@@ -896,6 +899,7 @@ error:
896 899
897static void gfs2_kill_sb(struct super_block *sb) 900static void gfs2_kill_sb(struct super_block *sb)
898{ 901{
902 gfs2_delete_debugfs_file(sb->s_fs_info);
899 kill_block_super(sb); 903 kill_block_super(sb);
900} 904}
901 905
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index b89999d3a767..485ce3d49923 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -284,6 +284,31 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
284} 284}
285 285
286/** 286/**
287 * gfs2_drop_inode - Drop an inode (test for remote unlink)
288 * @inode: The inode to drop
289 *
290 * If we've received a callback on an iopen lock then its because a
291 * remote node tried to deallocate the inode but failed due to this node
292 * still having the inode open. Here we mark the link count zero
293 * since we know that it must have reached zero if the GLF_DEMOTE flag
294 * is set on the iopen glock. If we didn't do a disk read since the
295 * remote node removed the final link then we might otherwise miss
296 * this event. This check ensures that this node will deallocate the
297 * inode's blocks, or alternatively pass the baton on to another
298 * node for later deallocation.
299 */
300static void gfs2_drop_inode(struct inode *inode)
301{
302 if (inode->i_private && inode->i_nlink) {
303 struct gfs2_inode *ip = GFS2_I(inode);
304 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
305 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
306 clear_nlink(inode);
307 }
308 generic_drop_inode(inode);
309}
310
311/**
287 * gfs2_clear_inode - Deallocate an inode when VFS is done with it 312 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
288 * @inode: The VFS inode 313 * @inode: The VFS inode
289 * 314 *
@@ -441,7 +466,7 @@ out_unlock:
441out_uninit: 466out_uninit:
442 gfs2_holder_uninit(&ip->i_iopen_gh); 467 gfs2_holder_uninit(&ip->i_iopen_gh);
443 gfs2_glock_dq_uninit(&gh); 468 gfs2_glock_dq_uninit(&gh);
444 if (error) 469 if (error && error != GLR_TRYFAILED)
445 fs_warn(sdp, "gfs2_delete_inode: %d\n", error); 470 fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
446out: 471out:
447 truncate_inode_pages(&inode->i_data, 0); 472 truncate_inode_pages(&inode->i_data, 0);
@@ -481,6 +506,7 @@ const struct super_operations gfs2_super_ops = {
481 .statfs = gfs2_statfs, 506 .statfs = gfs2_statfs,
482 .remount_fs = gfs2_remount_fs, 507 .remount_fs = gfs2_remount_fs,
483 .clear_inode = gfs2_clear_inode, 508 .clear_inode = gfs2_clear_inode,
509 .drop_inode = gfs2_drop_inode,
484 .show_options = gfs2_show_options, 510 .show_options = gfs2_show_options,
485}; 511};
486 512
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8d9c08b5c4b6..1727f5012efe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -27,6 +27,7 @@
27#include "trans.h" 27#include "trans.h"
28#include "ops_file.h" 28#include "ops_file.h"
29#include "util.h" 29#include "util.h"
30#include "log.h"
30 31
31#define BFITNOENT ((u32)~0) 32#define BFITNOENT ((u32)~0)
32 33
@@ -697,8 +698,6 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
697 * @al: the struct gfs2_alloc structure describing the reservation 698 * @al: the struct gfs2_alloc structure describing the reservation
698 * 699 *
699 * If there's room for the requested blocks to be allocated from the RG: 700 * If there's room for the requested blocks to be allocated from the RG:
700 * Sets the $al_reserved_data field in @al.
701 * Sets the $al_reserved_meta field in @al.
702 * Sets the $al_rgd field in @al. 701 * Sets the $al_rgd field in @al.
703 * 702 *
704 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 703 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
@@ -709,6 +708,9 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
709 struct gfs2_sbd *sdp = rgd->rd_sbd; 708 struct gfs2_sbd *sdp = rgd->rd_sbd;
710 int ret = 0; 709 int ret = 0;
711 710
711 if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC)
712 return 0;
713
712 spin_lock(&sdp->sd_rindex_spin); 714 spin_lock(&sdp->sd_rindex_spin);
713 if (rgd->rd_free_clone >= al->al_requested) { 715 if (rgd->rd_free_clone >= al->al_requested) {
714 al->al_rgd = rgd; 716 al->al_rgd = rgd;
@@ -941,9 +943,13 @@ static int get_local_rgrp(struct gfs2_inode *ip)
941 rgd = gfs2_rgrpd_get_first(sdp); 943 rgd = gfs2_rgrpd_get_first(sdp);
942 944
943 if (rgd == begin) { 945 if (rgd == begin) {
944 if (++loops >= 2 || !skipped) 946 if (++loops >= 3)
945 return -ENOSPC; 947 return -ENOSPC;
948 if (!skipped)
949 loops++;
946 flags = 0; 950 flags = 0;
951 if (loops == 2)
952 gfs2_log_flush(sdp, NULL);
947 } 953 }
948 } 954 }
949 955
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d01f9f0fda26..c26c21b53c19 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -222,7 +222,6 @@ static struct kobj_type gfs2_ktype = {
222}; 222};
223 223
224static struct kset gfs2_kset = { 224static struct kset gfs2_kset = {
225 .subsys = &fs_subsys,
226 .kobj = {.name = "gfs2"}, 225 .kobj = {.name = "gfs2"},
227 .ktype = &gfs2_ktype, 226 .ktype = &gfs2_ktype,
228}; 227};
@@ -554,6 +553,7 @@ int gfs2_sys_init(void)
554{ 553{
555 gfs2_sys_margs = NULL; 554 gfs2_sys_margs = NULL;
556 spin_lock_init(&gfs2_sys_margs_lock); 555 spin_lock_init(&gfs2_sys_margs_lock);
556 kobj_set_kset_s(&gfs2_kset, fs_subsys);
557 return kset_register(&gfs2_kset); 557 return kset_register(&gfs2_kset);
558} 558}
559 559
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 5fd0ed71f923..8a3a650abc87 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/log2.h>
12 13
13#include "btree.h" 14#include "btree.h"
14 15
@@ -76,7 +77,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
76 tree->depth = be16_to_cpu(head->depth); 77 tree->depth = be16_to_cpu(head->depth);
77 78
78 size = tree->node_size; 79 size = tree->node_size;
79 if (!size || size & (size - 1)) 80 if (!is_power_of_2(size))
80 goto fail_page; 81 goto fail_page;
81 if (!tree->node_count) 82 if (!tree->node_count)
82 goto fail_page; 83 goto fail_page;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 623f509f1d47..4f1888f16cf0 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -434,7 +434,7 @@ static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flag
434{ 434{
435 struct hfs_inode_info *i = p; 435 struct hfs_inode_info *i = p;
436 436
437 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) 437 if (flags & SLAB_CTOR_CONSTRUCTOR)
438 inode_init_once(&i->vfs_inode); 438 inode_init_once(&i->vfs_inode);
439} 439}
440 440
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index a9b9e872e29a..90ebab753d30 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/log2.h>
13 14
14#include "hfsplus_fs.h" 15#include "hfsplus_fs.h"
15#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
@@ -69,7 +70,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
69 } 70 }
70 71
71 size = tree->node_size; 72 size = tree->node_size;
72 if (!size || size & (size - 1)) 73 if (!is_power_of_2(size))
73 goto fail_page; 74 goto fail_page;
74 if (!tree->node_count) 75 if (!tree->node_count)
75 goto fail_page; 76 goto fail_page;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 1a97f9293447..37afbec8a761 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -470,7 +470,7 @@ static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long
470{ 470{
471 struct hfsplus_inode_info *i = p; 471 struct hfsplus_inode_info *i = p;
472 472
473 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) 473 if (flags & SLAB_CTOR_CONSTRUCTOR)
474 inode_init_once(&i->vfs_inode); 474 inode_init_once(&i->vfs_inode);
475} 475}
476 476
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 70543b17e4c7..06e5930515fe 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -55,7 +55,7 @@ extern int stat_file(const char *path, unsigned long long *inode_out,
55 int *mode_out, int *nlink_out, int *uid_out, int *gid_out, 55 int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
56 unsigned long long *size_out, struct timespec *atime_out, 56 unsigned long long *size_out, struct timespec *atime_out,
57 struct timespec *mtime_out, struct timespec *ctime_out, 57 struct timespec *mtime_out, struct timespec *ctime_out,
58 int *blksize_out, unsigned long long *blocks_out); 58 int *blksize_out, unsigned long long *blocks_out, int fd);
59extern int access_file(char *path, int r, int w, int x); 59extern int access_file(char *path, int r, int w, int x);
60extern int open_file(char *path, int r, int w, int append); 60extern int open_file(char *path, int r, int w, int append);
61extern int file_type(const char *path, int *maj, int *min); 61extern int file_type(const char *path, int *maj, int *min);
@@ -71,7 +71,7 @@ extern int lseek_file(int fd, long long offset, int whence);
71extern int fsync_file(int fd, int datasync); 71extern int fsync_file(int fd, int datasync);
72extern int file_create(char *name, int ur, int uw, int ux, int gr, 72extern int file_create(char *name, int ur, int uw, int ux, int gr,
73 int gw, int gx, int or, int ow, int ox); 73 int gw, int gx, int or, int ow, int ox);
74extern int set_attr(const char *file, struct hostfs_iattr *attrs); 74extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
75extern int make_symlink(const char *from, const char *to); 75extern int make_symlink(const char *from, const char *to);
76extern int unlink_file(const char *file); 76extern int unlink_file(const char *file);
77extern int do_mkdir(const char *file, int mode); 77extern int do_mkdir(const char *file, int mode);
@@ -87,14 +87,3 @@ extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
87 long *spare_out); 87 long *spare_out);
88 88
89#endif 89#endif
90
91/*
92 * Overrides for Emacs so that we follow Linus's tabbing style.
93 * Emacs will notice this stuff at the end of the file and automatically
94 * adjust the settings for this buffer only. This must remain at the end
95 * of the file.
96 * ---------------------------------------------------------------------------
97 * Local variables:
98 * c-file-style: "linux"
99 * End:
100 */
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fd301a910122..8286491dbf31 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 * 4 *
5 * Ported the filesystem routines to 2.5. 5 * Ported the filesystem routines to 2.5.
@@ -31,14 +31,14 @@ struct hostfs_inode_info {
31 31
32static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) 32static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
33{ 33{
34 return(list_entry(inode, struct hostfs_inode_info, vfs_inode)); 34 return list_entry(inode, struct hostfs_inode_info, vfs_inode);
35} 35}
36 36
37#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) 37#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
38 38
39int hostfs_d_delete(struct dentry *dentry) 39int hostfs_d_delete(struct dentry *dentry)
40{ 40{
41 return(1); 41 return 1;
42} 42}
43 43
44struct dentry_operations hostfs_dentry_ops = { 44struct dentry_operations hostfs_dentry_ops = {
@@ -79,7 +79,7 @@ static int __init hostfs_args(char *options, int *add)
79 } 79 }
80 options = ptr; 80 options = ptr;
81 } 81 }
82 return(0); 82 return 0;
83} 83}
84 84
85__uml_setup("hostfs=", hostfs_args, 85__uml_setup("hostfs=", hostfs_args,
@@ -110,7 +110,8 @@ static char *dentry_name(struct dentry *dentry, int extra)
110 root = HOSTFS_I(parent->d_inode)->host_filename; 110 root = HOSTFS_I(parent->d_inode)->host_filename;
111 len += strlen(root); 111 len += strlen(root);
112 name = kmalloc(len + extra + 1, GFP_KERNEL); 112 name = kmalloc(len + extra + 1, GFP_KERNEL);
113 if(name == NULL) return(NULL); 113 if(name == NULL)
114 return NULL;
114 115
115 name[len] = '\0'; 116 name[len] = '\0';
116 parent = dentry; 117 parent = dentry;
@@ -122,7 +123,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
122 parent = parent->d_parent; 123 parent = parent->d_parent;
123 } 124 }
124 strncpy(name, root, strlen(root)); 125 strncpy(name, root, strlen(root));
125 return(name); 126 return name;
126} 127}
127 128
128static char *inode_name(struct inode *ino, int extra) 129static char *inode_name(struct inode *ino, int extra)
@@ -130,7 +131,7 @@ static char *inode_name(struct inode *ino, int extra)
130 struct dentry *dentry; 131 struct dentry *dentry;
131 132
132 dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); 133 dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
133 return(dentry_name(dentry, extra)); 134 return dentry_name(dentry, extra);
134} 135}
135 136
136static int read_name(struct inode *ino, char *name) 137static int read_name(struct inode *ino, char *name)
@@ -147,16 +148,16 @@ static int read_name(struct inode *ino, char *name)
147 148
148 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, 149 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
149 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, 150 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
150 &ino->i_ctime, &i_blksize, &i_blocks); 151 &ino->i_ctime, &i_blksize, &i_blocks, -1);
151 if(err) 152 if(err)
152 return(err); 153 return err;
153 154
154 ino->i_ino = i_ino; 155 ino->i_ino = i_ino;
155 ino->i_mode = i_mode; 156 ino->i_mode = i_mode;
156 ino->i_nlink = i_nlink; 157 ino->i_nlink = i_nlink;
157 ino->i_size = i_size; 158 ino->i_size = i_size;
158 ino->i_blocks = i_blocks; 159 ino->i_blocks = i_blocks;
159 return(0); 160 return 0;
160} 161}
161 162
162static char *follow_link(char *link) 163static char *follow_link(char *link)
@@ -181,11 +182,11 @@ static char *follow_link(char *link)
181 goto out_free; 182 goto out_free;
182 183
183 if(*name == '/') 184 if(*name == '/')
184 return(name); 185 return name;
185 186
186 end = strrchr(link, '/'); 187 end = strrchr(link, '/');
187 if(end == NULL) 188 if(end == NULL)
188 return(name); 189 return name;
189 190
190 *(end + 1) = '\0'; 191 *(end + 1) = '\0';
191 len = strlen(link) + strlen(name) + 1; 192 len = strlen(link) + strlen(name) + 1;
@@ -199,12 +200,12 @@ static char *follow_link(char *link)
199 sprintf(resolved, "%s%s", link, name); 200 sprintf(resolved, "%s%s", link, name);
200 kfree(name); 201 kfree(name);
201 kfree(link); 202 kfree(link);
202 return(resolved); 203 return resolved;
203 204
204 out_free: 205 out_free:
205 kfree(name); 206 kfree(name);
206 out: 207 out:
207 return(ERR_PTR(n)); 208 return ERR_PTR(n);
208} 209}
209 210
210static int read_inode(struct inode *ino) 211static int read_inode(struct inode *ino)
@@ -234,7 +235,7 @@ static int read_inode(struct inode *ino)
234 err = read_name(ino, name); 235 err = read_name(ino, name);
235 kfree(name); 236 kfree(name);
236 out: 237 out:
237 return(err); 238 return err;
238} 239}
239 240
240int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) 241int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
@@ -254,14 +255,15 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
254 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 255 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
255 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 256 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
256 &sf->f_namelen, sf->f_spare); 257 &sf->f_namelen, sf->f_spare);
257 if(err) return(err); 258 if(err)
259 return err;
258 sf->f_blocks = f_blocks; 260 sf->f_blocks = f_blocks;
259 sf->f_bfree = f_bfree; 261 sf->f_bfree = f_bfree;
260 sf->f_bavail = f_bavail; 262 sf->f_bavail = f_bavail;
261 sf->f_files = f_files; 263 sf->f_files = f_files;
262 sf->f_ffree = f_ffree; 264 sf->f_ffree = f_ffree;
263 sf->f_type = HOSTFS_SUPER_MAGIC; 265 sf->f_type = HOSTFS_SUPER_MAGIC;
264 return(0); 266 return 0;
265} 267}
266 268
267static struct inode *hostfs_alloc_inode(struct super_block *sb) 269static struct inode *hostfs_alloc_inode(struct super_block *sb)
@@ -270,13 +272,13 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
270 272
271 hi = kmalloc(sizeof(*hi), GFP_KERNEL); 273 hi = kmalloc(sizeof(*hi), GFP_KERNEL);
272 if(hi == NULL) 274 if(hi == NULL)
273 return(NULL); 275 return NULL;
274 276
275 *hi = ((struct hostfs_inode_info) { .host_filename = NULL, 277 *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
276 .fd = -1, 278 .fd = -1,
277 .mode = 0 }); 279 .mode = 0 });
278 inode_init_once(&hi->vfs_inode); 280 inode_init_once(&hi->vfs_inode);
279 return(&hi->vfs_inode); 281 return &hi->vfs_inode;
280} 282}
281 283
282static void hostfs_delete_inode(struct inode *inode) 284static void hostfs_delete_inode(struct inode *inode)
@@ -325,10 +327,12 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
325 int error, len; 327 int error, len;
326 328
327 name = dentry_name(file->f_path.dentry, 0); 329 name = dentry_name(file->f_path.dentry, 0);
328 if(name == NULL) return(-ENOMEM); 330 if(name == NULL)
331 return -ENOMEM;
329 dir = open_dir(name, &error); 332 dir = open_dir(name, &error);
330 kfree(name); 333 kfree(name);
331 if(dir == NULL) return(-error); 334 if(dir == NULL)
335 return -error;
332 next = file->f_pos; 336 next = file->f_pos;
333 while((name = read_dir(dir, &next, &ino, &len)) != NULL){ 337 while((name = read_dir(dir, &next, &ino, &len)) != NULL){
334 error = (*filldir)(ent, name, len, file->f_pos, 338 error = (*filldir)(ent, name, len, file->f_pos,
@@ -337,7 +341,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
337 file->f_pos = next; 341 file->f_pos = next;
338 } 342 }
339 close_dir(dir); 343 close_dir(dir);
340 return(0); 344 return 0;
341} 345}
342 346
343int hostfs_file_open(struct inode *ino, struct file *file) 347int hostfs_file_open(struct inode *ino, struct file *file)
@@ -347,7 +351,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
347 351
348 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 352 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
349 if((mode & HOSTFS_I(ino)->mode) == mode) 353 if((mode & HOSTFS_I(ino)->mode) == mode)
350 return(0); 354 return 0;
351 355
352 /* The file may already have been opened, but with the wrong access, 356 /* The file may already have been opened, but with the wrong access,
353 * so this resets things and reopens the file with the new access. 357 * so this resets things and reopens the file with the new access.
@@ -367,14 +371,15 @@ int hostfs_file_open(struct inode *ino, struct file *file)
367 371
368 name = dentry_name(file->f_path.dentry, 0); 372 name = dentry_name(file->f_path.dentry, 0);
369 if(name == NULL) 373 if(name == NULL)
370 return(-ENOMEM); 374 return -ENOMEM;
371 375
372 fd = open_file(name, r, w, append); 376 fd = open_file(name, r, w, append);
373 kfree(name); 377 kfree(name);
374 if(fd < 0) return(fd); 378 if(fd < 0)
379 return fd;
375 FILE_HOSTFS_I(file)->fd = fd; 380 FILE_HOSTFS_I(file)->fd = fd;
376 381
377 return(0); 382 return 0;
378} 383}
379 384
380int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) 385int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
@@ -458,7 +463,7 @@ int hostfs_readpage(struct file *file, struct page *page)
458 out: 463 out:
459 kunmap(page); 464 kunmap(page);
460 unlock_page(page); 465 unlock_page(page);
461 return(err); 466 return err;
462} 467}
463 468
464int hostfs_prepare_write(struct file *file, struct page *page, 469int hostfs_prepare_write(struct file *file, struct page *page,
@@ -485,7 +490,7 @@ int hostfs_prepare_write(struct file *file, struct page *page,
485 err = 0; 490 err = 0;
486 out: 491 out:
487 kunmap(page); 492 kunmap(page);
488 return(err); 493 return err;
489} 494}
490 495
491int hostfs_commit_write(struct file *file, struct page *page, unsigned from, 496int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
@@ -511,7 +516,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
511 inode->i_size = start; 516 inode->i_size = start;
512 517
513 kunmap(page); 518 kunmap(page);
514 return(err); 519 return err;
515} 520}
516 521
517static const struct address_space_operations hostfs_aops = { 522static const struct address_space_operations hostfs_aops = {
@@ -569,7 +574,7 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
569 break; 574 break;
570 } 575 }
571 out: 576 out:
572 return(err); 577 return err;
573} 578}
574 579
575int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 580int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -607,16 +612,16 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
607 HOSTFS_I(inode)->fd = fd; 612 HOSTFS_I(inode)->fd = fd;
608 HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE; 613 HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
609 d_instantiate(dentry, inode); 614 d_instantiate(dentry, inode);
610 return(0); 615 return 0;
611 616
612 out_put: 617 out_put:
613 iput(inode); 618 iput(inode);
614 out: 619 out:
615 return(error); 620 return error;
616} 621}
617 622
618struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, 623struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
619 struct nameidata *nd) 624 struct nameidata *nd)
620{ 625{
621 struct inode *inode; 626 struct inode *inode;
622 char *name; 627 char *name;
@@ -647,44 +652,45 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
647 652
648 d_add(dentry, inode); 653 d_add(dentry, inode);
649 dentry->d_op = &hostfs_dentry_ops; 654 dentry->d_op = &hostfs_dentry_ops;
650 return(NULL); 655 return NULL;
651 656
652 out_put: 657 out_put:
653 iput(inode); 658 iput(inode);
654 out: 659 out:
655 return(ERR_PTR(err)); 660 return ERR_PTR(err);
656} 661}
657 662
658static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) 663static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
659{ 664{
660 char *file; 665 char *file;
661 int len; 666 int len;
662 667
663 file = inode_name(ino, dentry->d_name.len + 1); 668 file = inode_name(ino, dentry->d_name.len + 1);
664 if(file == NULL) return(NULL); 669 if(file == NULL)
665 strcat(file, "/"); 670 return NULL;
671 strcat(file, "/");
666 len = strlen(file); 672 len = strlen(file);
667 strncat(file, dentry->d_name.name, dentry->d_name.len); 673 strncat(file, dentry->d_name.name, dentry->d_name.len);
668 file[len + dentry->d_name.len] = '\0'; 674 file[len + dentry->d_name.len] = '\0';
669 return(file); 675 return file;
670} 676}
671 677
672int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) 678int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
673{ 679{
674 char *from_name, *to_name; 680 char *from_name, *to_name;
675 int err; 681 int err;
676 682
677 if((from_name = inode_dentry_name(ino, from)) == NULL) 683 if((from_name = inode_dentry_name(ino, from)) == NULL)
678 return(-ENOMEM); 684 return -ENOMEM;
679 to_name = dentry_name(to, 0); 685 to_name = dentry_name(to, 0);
680 if(to_name == NULL){ 686 if(to_name == NULL){
681 kfree(from_name); 687 kfree(from_name);
682 return(-ENOMEM); 688 return -ENOMEM;
683 } 689 }
684 err = link_file(to_name, from_name); 690 err = link_file(to_name, from_name);
685 kfree(from_name); 691 kfree(from_name);
686 kfree(to_name); 692 kfree(to_name);
687 return(err); 693 return err;
688} 694}
689 695
690int hostfs_unlink(struct inode *ino, struct dentry *dentry) 696int hostfs_unlink(struct inode *ino, struct dentry *dentry)
@@ -692,13 +698,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
692 char *file; 698 char *file;
693 int err; 699 int err;
694 700
695 if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); 701 if((file = inode_dentry_name(ino, dentry)) == NULL)
702 return -ENOMEM;
696 if(append) 703 if(append)
697 return(-EPERM); 704 return -EPERM;
698 705
699 err = unlink_file(file); 706 err = unlink_file(file);
700 kfree(file); 707 kfree(file);
701 return(err); 708 return err;
702} 709}
703 710
704int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) 711int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
@@ -706,10 +713,11 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
706 char *file; 713 char *file;
707 int err; 714 int err;
708 715
709 if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); 716 if((file = inode_dentry_name(ino, dentry)) == NULL)
717 return -ENOMEM;
710 err = make_symlink(file, to); 718 err = make_symlink(file, to);
711 kfree(file); 719 kfree(file);
712 return(err); 720 return err;
713} 721}
714 722
715int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) 723int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
@@ -717,10 +725,11 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
717 char *file; 725 char *file;
718 int err; 726 int err;
719 727
720 if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); 728 if((file = inode_dentry_name(ino, dentry)) == NULL)
729 return -ENOMEM;
721 err = do_mkdir(file, mode); 730 err = do_mkdir(file, mode);
722 kfree(file); 731 kfree(file);
723 return(err); 732 return err;
724} 733}
725 734
726int hostfs_rmdir(struct inode *ino, struct dentry *dentry) 735int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
@@ -728,10 +737,11 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
728 char *file; 737 char *file;
729 int err; 738 int err;
730 739
731 if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); 740 if((file = inode_dentry_name(ino, dentry)) == NULL)
741 return -ENOMEM;
732 err = do_rmdir(file); 742 err = do_rmdir(file);
733 kfree(file); 743 kfree(file);
734 return(err); 744 return err;
735} 745}
736 746
737int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 747int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
@@ -764,14 +774,14 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
764 goto out_put; 774 goto out_put;
765 775
766 d_instantiate(dentry, inode); 776 d_instantiate(dentry, inode);
767 return(0); 777 return 0;
768 778
769 out_free: 779 out_free:
770 kfree(name); 780 kfree(name);
771 out_put: 781 out_put:
772 iput(inode); 782 iput(inode);
773 out: 783 out:
774 return(err); 784 return err;
775} 785}
776 786
777int hostfs_rename(struct inode *from_ino, struct dentry *from, 787int hostfs_rename(struct inode *from_ino, struct dentry *from,
@@ -781,15 +791,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
781 int err; 791 int err;
782 792
783 if((from_name = inode_dentry_name(from_ino, from)) == NULL) 793 if((from_name = inode_dentry_name(from_ino, from)) == NULL)
784 return(-ENOMEM); 794 return -ENOMEM;
785 if((to_name = inode_dentry_name(to_ino, to)) == NULL){ 795 if((to_name = inode_dentry_name(to_ino, to)) == NULL){
786 kfree(from_name); 796 kfree(from_name);
787 return(-ENOMEM); 797 return -ENOMEM;
788 } 798 }
789 err = rename_file(from_name, to_name); 799 err = rename_file(from_name, to_name);
790 kfree(from_name); 800 kfree(from_name);
791 kfree(to_name); 801 kfree(to_name);
792 return(err); 802 return err;
793} 803}
794 804
795int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) 805int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
@@ -801,7 +811,8 @@ int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
801 if (desired & MAY_WRITE) w = 1; 811 if (desired & MAY_WRITE) w = 1;
802 if (desired & MAY_EXEC) x = 1; 812 if (desired & MAY_EXEC) x = 1;
803 name = inode_name(ino, 0); 813 name = inode_name(ino, 0);
804 if (name == NULL) return(-ENOMEM); 814 if (name == NULL)
815 return -ENOMEM;
805 816
806 if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) || 817 if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) ||
807 S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode)) 818 S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode))
@@ -820,6 +831,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
820 char *name; 831 char *name;
821 int err; 832 int err;
822 833
834 int fd = HOSTFS_I(dentry->d_inode)->fd;
835
823 err = inode_change_ok(dentry->d_inode, attr); 836 err = inode_change_ok(dentry->d_inode, attr);
824 if (err) 837 if (err)
825 return err; 838 return err;
@@ -863,20 +876,21 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
863 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; 876 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
864 } 877 }
865 name = dentry_name(dentry, 0); 878 name = dentry_name(dentry, 0);
866 if(name == NULL) return(-ENOMEM); 879 if(name == NULL)
867 err = set_attr(name, &attrs); 880 return -ENOMEM;
881 err = set_attr(name, &attrs, fd);
868 kfree(name); 882 kfree(name);
869 if(err) 883 if(err)
870 return(err); 884 return err;
871 885
872 return(inode_setattr(dentry->d_inode, attr)); 886 return inode_setattr(dentry->d_inode, attr);
873} 887}
874 888
875int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 889int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
876 struct kstat *stat) 890 struct kstat *stat)
877{ 891{
878 generic_fillattr(dentry->d_inode, stat); 892 generic_fillattr(dentry->d_inode, stat);
879 return(0); 893 return 0;
880} 894}
881 895
882static const struct inode_operations hostfs_iops = { 896static const struct inode_operations hostfs_iops = {
@@ -915,7 +929,8 @@ int hostfs_link_readpage(struct file *file, struct page *page)
915 929
916 buffer = kmap(page); 930 buffer = kmap(page);
917 name = inode_name(page->mapping->host, 0); 931 name = inode_name(page->mapping->host, 0);
918 if(name == NULL) return(-ENOMEM); 932 if(name == NULL)
933 return -ENOMEM;
919 err = do_readlink(name, buffer, PAGE_CACHE_SIZE); 934 err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
920 kfree(name); 935 kfree(name);
921 if(err == PAGE_CACHE_SIZE) 936 if(err == PAGE_CACHE_SIZE)
@@ -928,7 +943,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
928 } 943 }
929 kunmap(page); 944 kunmap(page);
930 unlock_page(page); 945 unlock_page(page);
931 return(err); 946 return err;
932} 947}
933 948
934static const struct address_space_operations hostfs_link_aops = { 949static const struct address_space_operations hostfs_link_aops = {
@@ -978,20 +993,20 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
978 993
979 err = read_inode(root_inode); 994 err = read_inode(root_inode);
980 if(err){ 995 if(err){
981 /* No iput in this case because the dput does that for us */ 996 /* No iput in this case because the dput does that for us */
982 dput(sb->s_root); 997 dput(sb->s_root);
983 sb->s_root = NULL; 998 sb->s_root = NULL;
984 goto out; 999 goto out;
985 } 1000 }
986 1001
987 return(0); 1002 return 0;
988 1003
989 out_put: 1004out_put:
990 iput(root_inode); 1005 iput(root_inode);
991 out_free: 1006out_free:
992 kfree(host_root_path); 1007 kfree(host_root_path);
993 out: 1008out:
994 return(err); 1009 return err;
995} 1010}
996 1011
997static int hostfs_read_sb(struct file_system_type *type, 1012static int hostfs_read_sb(struct file_system_type *type,
@@ -1011,7 +1026,7 @@ static struct file_system_type hostfs_type = {
1011 1026
1012static int __init init_hostfs(void) 1027static int __init init_hostfs(void)
1013{ 1028{
1014 return(register_filesystem(&hostfs_type)); 1029 return register_filesystem(&hostfs_type);
1015} 1030}
1016 1031
1017static void __exit exit_hostfs(void) 1032static void __exit exit_hostfs(void)
@@ -1022,14 +1037,3 @@ static void __exit exit_hostfs(void)
1022module_init(init_hostfs) 1037module_init(init_hostfs)
1023module_exit(exit_hostfs) 1038module_exit(exit_hostfs)
1024MODULE_LICENSE("GPL"); 1039MODULE_LICENSE("GPL");
1025
1026/*
1027 * Overrides for Emacs so that we follow Linus's tabbing style.
1028 * Emacs will notice this stuff at the end of the file and automatically
1029 * adjust the settings for this buffer only. This must remain at the end
1030 * of the file.
1031 * ---------------------------------------------------------------------------
1032 * Local variables:
1033 * c-file-style: "linux"
1034 * End:
1035 */
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 1ed5ea389f15..5625e2481dd3 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
@@ -21,12 +21,16 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
21 int *nlink_out, int *uid_out, int *gid_out, 21 int *nlink_out, int *uid_out, int *gid_out,
22 unsigned long long *size_out, struct timespec *atime_out, 22 unsigned long long *size_out, struct timespec *atime_out,
23 struct timespec *mtime_out, struct timespec *ctime_out, 23 struct timespec *mtime_out, struct timespec *ctime_out,
24 int *blksize_out, unsigned long long *blocks_out) 24 int *blksize_out, unsigned long long *blocks_out, int fd)
25{ 25{
26 struct stat64 buf; 26 struct stat64 buf;
27 27
28 if(lstat64(path, &buf) < 0) 28 if(fd >= 0) {
29 return(-errno); 29 if (fstat64(fd, &buf) < 0)
30 return -errno;
31 } else if(lstat64(path, &buf) < 0) {
32 return -errno;
33 }
30 34
31 if(inode_out != NULL) *inode_out = buf.st_ino; 35 if(inode_out != NULL) *inode_out = buf.st_ino;
32 if(mode_out != NULL) *mode_out = buf.st_mode; 36 if(mode_out != NULL) *mode_out = buf.st_mode;
@@ -48,7 +52,7 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
48 } 52 }
49 if(blksize_out != NULL) *blksize_out = buf.st_blksize; 53 if(blksize_out != NULL) *blksize_out = buf.st_blksize;
50 if(blocks_out != NULL) *blocks_out = buf.st_blocks; 54 if(blocks_out != NULL) *blocks_out = buf.st_blocks;
51 return(0); 55 return 0;
52} 56}
53 57
54int file_type(const char *path, int *maj, int *min) 58int file_type(const char *path, int *maj, int *min)
@@ -56,7 +60,7 @@ int file_type(const char *path, int *maj, int *min)
56 struct stat64 buf; 60 struct stat64 buf;
57 61
58 if(lstat64(path, &buf) < 0) 62 if(lstat64(path, &buf) < 0)
59 return(-errno); 63 return -errno;
60 /*We cannot pass rdev as is because glibc and the kernel disagree 64 /*We cannot pass rdev as is because glibc and the kernel disagree
61 *about its definition.*/ 65 *about its definition.*/
62 if(maj != NULL) 66 if(maj != NULL)
@@ -64,13 +68,13 @@ int file_type(const char *path, int *maj, int *min)
64 if(min != NULL) 68 if(min != NULL)
65 *min = minor(buf.st_rdev); 69 *min = minor(buf.st_rdev);
66 70
67 if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); 71 if(S_ISDIR(buf.st_mode)) return OS_TYPE_DIR;
68 else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); 72 else if(S_ISLNK(buf.st_mode)) return OS_TYPE_SYMLINK;
69 else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); 73 else if(S_ISCHR(buf.st_mode)) return OS_TYPE_CHARDEV;
70 else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); 74 else if(S_ISBLK(buf.st_mode)) return OS_TYPE_BLOCKDEV;
71 else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); 75 else if(S_ISFIFO(buf.st_mode))return OS_TYPE_FIFO;
72 else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); 76 else if(S_ISSOCK(buf.st_mode))return OS_TYPE_SOCK;
73 else return(OS_TYPE_FILE); 77 else return OS_TYPE_FILE;
74} 78}
75 79
76int access_file(char *path, int r, int w, int x) 80int access_file(char *path, int r, int w, int x)
@@ -80,8 +84,9 @@ int access_file(char *path, int r, int w, int x)
80 if(r) mode = R_OK; 84 if(r) mode = R_OK;
81 if(w) mode |= W_OK; 85 if(w) mode |= W_OK;
82 if(x) mode |= X_OK; 86 if(x) mode |= X_OK;
83 if(access(path, mode) != 0) return(-errno); 87 if(access(path, mode) != 0)
84 else return(0); 88 return -errno;
89 else return 0;
85} 90}
86 91
87int open_file(char *path, int r, int w, int append) 92int open_file(char *path, int r, int w, int append)
@@ -99,8 +104,9 @@ int open_file(char *path, int r, int w, int append)
99 if(append) 104 if(append)
100 mode |= O_APPEND; 105 mode |= O_APPEND;
101 fd = open64(path, mode); 106 fd = open64(path, mode);
102 if(fd < 0) return(-errno); 107 if(fd < 0)
103 else return(fd); 108 return -errno;
109 else return fd;
104} 110}
105 111
106void *open_dir(char *path, int *err_out) 112void *open_dir(char *path, int *err_out)
@@ -109,8 +115,9 @@ void *open_dir(char *path, int *err_out)
109 115
110 dir = opendir(path); 116 dir = opendir(path);
111 *err_out = errno; 117 *err_out = errno;
112 if(dir == NULL) return(NULL); 118 if(dir == NULL)
113 return(dir); 119 return NULL;
120 return dir;
114} 121}
115 122
116char *read_dir(void *stream, unsigned long long *pos, 123char *read_dir(void *stream, unsigned long long *pos,
@@ -121,11 +128,12 @@ char *read_dir(void *stream, unsigned long long *pos,
121 128
122 seekdir(dir, *pos); 129 seekdir(dir, *pos);
123 ent = readdir(dir); 130 ent = readdir(dir);
124 if(ent == NULL) return(NULL); 131 if(ent == NULL)
132 return NULL;
125 *len_out = strlen(ent->d_name); 133 *len_out = strlen(ent->d_name);
126 *ino_out = ent->d_ino; 134 *ino_out = ent->d_ino;
127 *pos = telldir(dir); 135 *pos = telldir(dir);
128 return(ent->d_name); 136 return ent->d_name;
129} 137}
130 138
131int read_file(int fd, unsigned long long *offset, char *buf, int len) 139int read_file(int fd, unsigned long long *offset, char *buf, int len)
@@ -133,9 +141,10 @@ int read_file(int fd, unsigned long long *offset, char *buf, int len)
133 int n; 141 int n;
134 142
135 n = pread64(fd, buf, len, *offset); 143 n = pread64(fd, buf, len, *offset);
136 if(n < 0) return(-errno); 144 if(n < 0)
145 return -errno;
137 *offset += n; 146 *offset += n;
138 return(n); 147 return n;
139} 148}
140 149
141int write_file(int fd, unsigned long long *offset, const char *buf, int len) 150int write_file(int fd, unsigned long long *offset, const char *buf, int len)
@@ -143,9 +152,10 @@ int write_file(int fd, unsigned long long *offset, const char *buf, int len)
143 int n; 152 int n;
144 153
145 n = pwrite64(fd, buf, len, *offset); 154 n = pwrite64(fd, buf, len, *offset);
146 if(n < 0) return(-errno); 155 if(n < 0)
156 return -errno;
147 *offset += n; 157 *offset += n;
148 return(n); 158 return n;
149} 159}
150 160
151int lseek_file(int fd, long long offset, int whence) 161int lseek_file(int fd, long long offset, int whence)
@@ -154,8 +164,8 @@ int lseek_file(int fd, long long offset, int whence)
154 164
155 ret = lseek64(fd, offset, whence); 165 ret = lseek64(fd, offset, whence);
156 if(ret < 0) 166 if(ret < 0)
157 return(-errno); 167 return -errno;
158 return(0); 168 return 0;
159} 169}
160 170
161int fsync_file(int fd, int datasync) 171int fsync_file(int fd, int datasync)
@@ -198,65 +208,90 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
198 mode |= ox ? S_IXOTH : 0; 208 mode |= ox ? S_IXOTH : 0;
199 fd = open64(name, O_CREAT | O_RDWR, mode); 209 fd = open64(name, O_CREAT | O_RDWR, mode);
200 if(fd < 0) 210 if(fd < 0)
201 return(-errno); 211 return -errno;
202 return(fd); 212 return fd;
203} 213}
204 214
205int set_attr(const char *file, struct hostfs_iattr *attrs) 215int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
206{ 216{
207 struct utimbuf buf; 217 struct timeval times[2];
218 struct timespec atime_ts, mtime_ts;
208 int err, ma; 219 int err, ma;
209 220
210 if(attrs->ia_valid & HOSTFS_ATTR_MODE){ 221 if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
211 if(chmod(file, attrs->ia_mode) != 0) return(-errno); 222 if (fd >= 0) {
212 } 223 if (fchmod(fd, attrs->ia_mode) != 0)
213 if(attrs->ia_valid & HOSTFS_ATTR_UID){ 224 return (-errno);
214 if(chown(file, attrs->ia_uid, -1)) return(-errno); 225 } else if (chmod(file, attrs->ia_mode) != 0) {
226 return -errno;
227 }
215 } 228 }
216 if(attrs->ia_valid & HOSTFS_ATTR_GID){ 229 if (attrs->ia_valid & HOSTFS_ATTR_UID) {
217 if(chown(file, -1, attrs->ia_gid)) return(-errno); 230 if (fd >= 0) {
231 if (fchown(fd, attrs->ia_uid, -1))
232 return -errno;
233 } else if(chown(file, attrs->ia_uid, -1)) {
234 return -errno;
235 }
218 } 236 }
219 if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ 237 if (attrs->ia_valid & HOSTFS_ATTR_GID) {
220 if(truncate(file, attrs->ia_size)) return(-errno); 238 if (fd >= 0) {
239 if (fchown(fd, -1, attrs->ia_gid))
240 return -errno;
241 } else if (chown(file, -1, attrs->ia_gid)) {
242 return -errno;
243 }
221 } 244 }
222 ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; 245 if (attrs->ia_valid & HOSTFS_ATTR_SIZE) {
223 if((attrs->ia_valid & ma) == ma){ 246 if (fd >= 0) {
224 buf.actime = attrs->ia_atime.tv_sec; 247 if (ftruncate(fd, attrs->ia_size))
225 buf.modtime = attrs->ia_mtime.tv_sec; 248 return -errno;
226 if(utime(file, &buf) != 0) return(-errno); 249 } else if (truncate(file, attrs->ia_size)) {
250 return -errno;
251 }
227 } 252 }
228 else { 253
229 struct timespec ts; 254 /* Update accessed and/or modified time, in two parts: first set
230 255 * times according to the changes to perform, and then call futimes()
231 if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ 256 * or utimes() to apply them. */
232 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 257 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
233 NULL, NULL, &ts, NULL, NULL, NULL); 258 if (attrs->ia_valid & ma) {
234 if(err != 0) 259 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
235 return(err); 260 &atime_ts, &mtime_ts, NULL, NULL, NULL, fd);
236 buf.actime = attrs->ia_atime.tv_sec; 261 if (err != 0)
237 buf.modtime = ts.tv_sec; 262 return err;
238 if(utime(file, &buf) != 0) 263
239 return(-errno); 264 times[0].tv_sec = atime_ts.tv_sec;
265 times[0].tv_usec = atime_ts.tv_nsec * 1000;
266 times[1].tv_sec = mtime_ts.tv_sec;
267 times[1].tv_usec = mtime_ts.tv_nsec * 1000;
268
269 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
270 times[0].tv_sec = attrs->ia_atime.tv_sec;
271 times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000;
272 }
273 if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) {
274 times[1].tv_sec = attrs->ia_mtime.tv_sec;
275 times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000;
240 } 276 }
241 if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ 277
242 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 278 if (fd >= 0) {
243 NULL, &ts, NULL, NULL, NULL, NULL); 279 if (futimes(fd, times) != 0)
244 if(err != 0) 280 return -errno;
245 return(err); 281 } else if (utimes(file, times) != 0) {
246 buf.actime = ts.tv_sec; 282 return -errno;
247 buf.modtime = attrs->ia_mtime.tv_sec;
248 if(utime(file, &buf) != 0)
249 return(-errno);
250 } 283 }
251 } 284 }
285
252 if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; 286 if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
253 if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ 287 if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
254 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 288 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
255 &attrs->ia_atime, &attrs->ia_mtime, NULL, 289 &attrs->ia_atime, &attrs->ia_mtime, NULL,
256 NULL, NULL); 290 NULL, NULL, fd);
257 if(err != 0) return(err); 291 if(err != 0)
292 return err;
258 } 293 }
259 return(0); 294 return 0;
260} 295}
261 296
262int make_symlink(const char *from, const char *to) 297int make_symlink(const char *from, const char *to)
@@ -264,8 +299,9 @@ int make_symlink(const char *from, const char *to)
264 int err; 299 int err;
265 300
266 err = symlink(to, from); 301 err = symlink(to, from);
267 if(err) return(-errno); 302 if(err)
268 return(0); 303 return -errno;
304 return 0;
269} 305}
270 306
271int unlink_file(const char *file) 307int unlink_file(const char *file)
@@ -273,8 +309,9 @@ int unlink_file(const char *file)
273 int err; 309 int err;
274 310
275 err = unlink(file); 311 err = unlink(file);
276 if(err) return(-errno); 312 if(err)
277 return(0); 313 return -errno;
314 return 0;
278} 315}
279 316
280int do_mkdir(const char *file, int mode) 317int do_mkdir(const char *file, int mode)
@@ -282,8 +319,9 @@ int do_mkdir(const char *file, int mode)
282 int err; 319 int err;
283 320
284 err = mkdir(file, mode); 321 err = mkdir(file, mode);
285 if(err) return(-errno); 322 if(err)
286 return(0); 323 return -errno;
324 return 0;
287} 325}
288 326
289int do_rmdir(const char *file) 327int do_rmdir(const char *file)
@@ -291,8 +329,9 @@ int do_rmdir(const char *file)
291 int err; 329 int err;
292 330
293 err = rmdir(file); 331 err = rmdir(file);
294 if(err) return(-errno); 332 if(err)
295 return(0); 333 return -errno;
334 return 0;
296} 335}
297 336
298int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) 337int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
@@ -300,8 +339,9 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
300 int err; 339 int err;
301 340
302 err = mknod(file, mode, makedev(major, minor)); 341 err = mknod(file, mode, makedev(major, minor));
303 if(err) return(-errno); 342 if(err)
304 return(0); 343 return -errno;
344 return 0;
305} 345}
306 346
307int link_file(const char *to, const char *from) 347int link_file(const char *to, const char *from)
@@ -309,8 +349,9 @@ int link_file(const char *to, const char *from)
309 int err; 349 int err;
310 350
311 err = link(to, from); 351 err = link(to, from);
312 if(err) return(-errno); 352 if(err)
313 return(0); 353 return -errno;
354 return 0;
314} 355}
315 356
316int do_readlink(char *file, char *buf, int size) 357int do_readlink(char *file, char *buf, int size)
@@ -319,10 +360,10 @@ int do_readlink(char *file, char *buf, int size)
319 360
320 n = readlink(file, buf, size); 361 n = readlink(file, buf, size);
321 if(n < 0) 362 if(n < 0)
322 return(-errno); 363 return -errno;
323 if(n < size) 364 if(n < size)
324 buf[n] = '\0'; 365 buf[n] = '\0';
325 return(n); 366 return n;
326} 367}
327 368
328int rename_file(char *from, char *to) 369int rename_file(char *from, char *to)
@@ -330,8 +371,9 @@ int rename_file(char *from, char *to)
330 int err; 371 int err;
331 372
332 err = rename(from, to); 373 err = rename(from, to);
333 if(err < 0) return(-errno); 374 if(err < 0)
334 return(0); 375 return -errno;
376 return 0;
335} 377}
336 378
337int do_statfs(char *root, long *bsize_out, long long *blocks_out, 379int do_statfs(char *root, long *bsize_out, long long *blocks_out,
@@ -344,7 +386,9 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
344 int err; 386 int err;
345 387
346 err = statfs64(root, &buf); 388 err = statfs64(root, &buf);
347 if(err < 0) return(-errno); 389 if(err < 0)
390 return -errno;
391
348 *bsize_out = buf.f_bsize; 392 *bsize_out = buf.f_bsize;
349 *blocks_out = buf.f_blocks; 393 *blocks_out = buf.f_blocks;
350 *bfree_out = buf.f_bfree; 394 *bfree_out = buf.f_bfree;
@@ -360,16 +404,5 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
360 spare_out[2] = buf.f_spare[2]; 404 spare_out[2] = buf.f_spare[2];
361 spare_out[3] = buf.f_spare[3]; 405 spare_out[3] = buf.f_spare[3];
362 spare_out[4] = buf.f_spare[4]; 406 spare_out[4] = buf.f_spare[4];
363 return(0); 407 return 0;
364} 408}
365
366/*
367 * Overrides for Emacs so that we follow Linus's tabbing style.
368 * Emacs will notice this stuff at the end of the file and automatically
369 * adjust the settings for this buffer only. This must remain at the end
370 * of the file.
371 * ---------------------------------------------------------------------------
372 * Local variables:
373 * c-file-style: "linux"
374 * End:
375 */
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index e0174e338526..1b95f39fbc37 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -176,8 +176,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
176{ 176{
177 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 177 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
178 178
179 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 179 if (flags & SLAB_CTOR_CONSTRUCTOR) {
180 SLAB_CTOR_CONSTRUCTOR) {
181 mutex_init(&ei->i_mutex); 180 mutex_init(&ei->i_mutex);
182 mutex_init(&ei->i_parent_mutex); 181 mutex_init(&ei->i_parent_mutex);
183 inode_init_once(&ei->vfs_inode); 182 inode_init_once(&ei->vfs_inode);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8c718a3d413f..98959b87cdf8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -22,6 +22,7 @@
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/hugetlb.h> 23#include <linux/hugetlb.h>
24#include <linux/pagevec.h> 24#include <linux/pagevec.h>
25#include <linux/mman.h>
25#include <linux/quotaops.h> 26#include <linux/quotaops.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
27#include <linux/dnotify.h> 28#include <linux/dnotify.h>
@@ -98,10 +99,7 @@ out:
98 * Called under down_write(mmap_sem). 99 * Called under down_write(mmap_sem).
99 */ 100 */
100 101
101#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 102#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
102unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
103 unsigned long len, unsigned long pgoff, unsigned long flags);
104#else
105static unsigned long 103static unsigned long
106hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 104hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
107 unsigned long len, unsigned long pgoff, unsigned long flags) 105 unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -115,6 +113,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
115 if (len > TASK_SIZE) 113 if (len > TASK_SIZE)
116 return -ENOMEM; 114 return -ENOMEM;
117 115
116 if (flags & MAP_FIXED) {
117 if (prepare_hugepage_range(addr, len, pgoff))
118 return -EINVAL;
119 return addr;
120 }
121
118 if (addr) { 122 if (addr) {
119 addr = ALIGN(addr, HPAGE_SIZE); 123 addr = ALIGN(addr, HPAGE_SIZE);
120 vma = find_vma(mm, addr); 124 vma = find_vma(mm, addr);
@@ -453,7 +457,7 @@ static int hugetlbfs_symlink(struct inode *dir,
453 */ 457 */
454static int hugetlbfs_set_page_dirty(struct page *page) 458static int hugetlbfs_set_page_dirty(struct page *page)
455{ 459{
456 struct page *head = (struct page *)page_private(page); 460 struct page *head = compound_head(page);
457 461
458 SetPageDirty(head); 462 SetPageDirty(head);
459 return 0; 463 return 0;
@@ -552,8 +556,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
552{ 556{
553 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 557 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
554 558
555 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 559 if (flags & SLAB_CTOR_CONSTRUCTOR)
556 SLAB_CTOR_CONSTRUCTOR)
557 inode_init_once(&ei->vfs_inode); 560 inode_init_once(&ei->vfs_inode);
558} 561}
559 562
@@ -744,6 +747,9 @@ struct file *hugetlb_zero_setup(size_t size)
744 char buf[16]; 747 char buf[16];
745 static atomic_t counter; 748 static atomic_t counter;
746 749
750 if (!hugetlbfs_vfsmount)
751 return ERR_PTR(-ENOENT);
752
747 if (!can_do_hugetlb_shm()) 753 if (!can_do_hugetlb_shm())
748 return ERR_PTR(-EPERM); 754 return ERR_PTR(-EPERM);
749 755
diff --git a/fs/inode.c b/fs/inode.c
index 5abb097ab1b0..df2ef15d03d2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -213,8 +213,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
213{ 213{
214 struct inode * inode = (struct inode *) foo; 214 struct inode * inode = (struct inode *) foo;
215 215
216 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 216 if (flags & SLAB_CTOR_CONSTRUCTOR)
217 SLAB_CTOR_CONSTRUCTOR)
218 inode_init_once(inode); 217 inode_init_once(inode);
219} 218}
220 219
@@ -251,7 +250,7 @@ void clear_inode(struct inode *inode)
251 BUG_ON(inode->i_state & I_CLEAR); 250 BUG_ON(inode->i_state & I_CLEAR);
252 wait_on_inode(inode); 251 wait_on_inode(inode);
253 DQUOT_DROP(inode); 252 DQUOT_DROP(inode);
254 if (inode->i_sb && inode->i_sb->s_op->clear_inode) 253 if (inode->i_sb->s_op->clear_inode)
255 inode->i_sb->s_op->clear_inode(inode); 254 inode->i_sb->s_op->clear_inode(inode);
256 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 255 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
257 bd_forget(inode); 256 bd_forget(inode);
@@ -276,7 +275,7 @@ static void dispose_list(struct list_head *head)
276 while (!list_empty(head)) { 275 while (!list_empty(head)) {
277 struct inode *inode; 276 struct inode *inode;
278 277
279 inode = list_entry(head->next, struct inode, i_list); 278 inode = list_first_entry(head, struct inode, i_list);
280 list_del(&inode->i_list); 279 list_del(&inode->i_list);
281 280
282 if (inode->i_data.nrpages) 281 if (inode->i_data.nrpages)
@@ -525,7 +524,12 @@ repeat:
525 */ 524 */
526struct inode *new_inode(struct super_block *sb) 525struct inode *new_inode(struct super_block *sb)
527{ 526{
528 static unsigned long last_ino; 527 /*
528 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
529 * error if st_ino won't fit in target struct field. Use 32bit counter
530 * here to attempt to avoid that.
531 */
532 static unsigned int last_ino;
529 struct inode * inode; 533 struct inode * inode;
530 534
531 spin_lock_prefetch(&inode_lock); 535 spin_lock_prefetch(&inode_lock);
@@ -684,27 +688,28 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
684 */ 688 */
685ino_t iunique(struct super_block *sb, ino_t max_reserved) 689ino_t iunique(struct super_block *sb, ino_t max_reserved)
686{ 690{
687 static ino_t counter; 691 /*
692 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
693 * error if st_ino won't fit in target struct field. Use 32bit counter
694 * here to attempt to avoid that.
695 */
696 static unsigned int counter;
688 struct inode *inode; 697 struct inode *inode;
689 struct hlist_head * head; 698 struct hlist_head *head;
690 ino_t res; 699 ino_t res;
700
691 spin_lock(&inode_lock); 701 spin_lock(&inode_lock);
692retry: 702 do {
693 if (counter > max_reserved) { 703 if (counter <= max_reserved)
694 head = inode_hashtable + hash(sb,counter); 704 counter = max_reserved + 1;
695 res = counter++; 705 res = counter++;
706 head = inode_hashtable + hash(sb, res);
696 inode = find_inode_fast(sb, head, res); 707 inode = find_inode_fast(sb, head, res);
697 if (!inode) { 708 } while (inode != NULL);
698 spin_unlock(&inode_lock); 709 spin_unlock(&inode_lock);
699 return res;
700 }
701 } else {
702 counter = max_reserved + 1;
703 }
704 goto retry;
705
706}
707 710
711 return res;
712}
708EXPORT_SYMBOL(iunique); 713EXPORT_SYMBOL(iunique);
709 714
710struct inode *igrab(struct inode *inode) 715struct inode *igrab(struct inode *inode)
@@ -1041,7 +1046,7 @@ static void generic_forget_inode(struct inode *inode)
1041 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 1046 if (!(inode->i_state & (I_DIRTY|I_LOCK)))
1042 list_move(&inode->i_list, &inode_unused); 1047 list_move(&inode->i_list, &inode_unused);
1043 inodes_stat.nr_unused++; 1048 inodes_stat.nr_unused++;
1044 if (!sb || (sb->s_flags & MS_ACTIVE)) { 1049 if (sb->s_flags & MS_ACTIVE) {
1045 spin_unlock(&inode_lock); 1050 spin_unlock(&inode_lock);
1046 return; 1051 return;
1047 } 1052 }
diff --git a/fs/inotify.c b/fs/inotify.c
index f5099d86fd91..7457501b9565 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -509,7 +509,7 @@ void inotify_destroy(struct inotify_handle *ih)
509 mutex_unlock(&ih->mutex); 509 mutex_unlock(&ih->mutex);
510 break; 510 break;
511 } 511 }
512 watch = list_entry(watches->next, struct inotify_watch, h_list); 512 watch = list_first_entry(watches, struct inotify_watch, h_list);
513 get_inotify_watch(watch); 513 get_inotify_watch(watch);
514 mutex_unlock(&ih->mutex); 514 mutex_unlock(&ih->mutex);
515 515
diff --git a/fs/internal.h b/fs/internal.h
index ea00126c9a59..392e8ccd6fc4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,8 +9,6 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/ioctl32.h>
13
14struct super_block; 12struct super_block;
15 13
16/* 14/*
@@ -42,14 +40,6 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
42extern void __init chrdev_init(void); 40extern void __init chrdev_init(void);
43 41
44/* 42/*
45 * compat_ioctl.c
46 */
47#ifdef CONFIG_COMPAT
48extern struct ioctl_trans ioctl_start[];
49extern int ioctl_table_size;
50#endif
51
52/*
53 * namespace.c 43 * namespace.c
54 */ 44 */
55extern int copy_mount_options(const void __user *, unsigned long *); 45extern int copy_mount_options(const void __user *, unsigned long *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index ff61772ceedd..479c1038ed4a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -67,8 +67,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
67 return put_user(res, p); 67 return put_user(res, p);
68 } 68 }
69 case FIGETBSZ: 69 case FIGETBSZ:
70 if (inode->i_sb == NULL)
71 return -EBADF;
72 return put_user(inode->i_sb->s_blocksize, p); 70 return put_user(inode->i_sb->s_blocksize, p);
73 case FIONREAD: 71 case FIONREAD:
74 return put_user(i_size_read(inode) - filp->f_pos, p); 72 return put_user(i_size_read(inode) - filp->f_pos, p);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 64a96cdfe3a4..e99f7ff4ecb4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -77,8 +77,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
77{ 77{
78 struct iso_inode_info *ei = foo; 78 struct iso_inode_info *ei = foo;
79 79
80 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 80 if (flags & SLAB_CTOR_CONSTRUCTOR)
81 SLAB_CTOR_CONSTRUCTOR)
82 inode_init_once(&ei->vfs_inode); 81 inode_init_once(&ei->vfs_inode);
83} 82}
84 83
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 0208cc7ac5d0..47552d4a6324 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/checkpoint.c 2 * linux/fs/jbd/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index be4648bc7a2f..1facfaff97cb 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -20,7 +20,6 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/smp_lock.h>
24 23
25/* 24/*
26 * Default IO end handler for temporary BJ_IO buffer_heads. 25 * Default IO end handler for temporary BJ_IO buffer_heads.
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 10fff9443938..46fe7439fb91 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -28,7 +28,6 @@
28#include <linux/jbd.h> 28#include <linux/jbd.h>
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/smp_lock.h>
32#include <linux/init.h> 31#include <linux/init.h>
33#include <linux/mm.h> 32#include <linux/mm.h>
34#include <linux/freezer.h> 33#include <linux/freezer.h>
@@ -211,10 +210,16 @@ end_loop:
211 return 0; 210 return 0;
212} 211}
213 212
214static void journal_start_thread(journal_t *journal) 213static int journal_start_thread(journal_t *journal)
215{ 214{
216 kthread_run(kjournald, journal, "kjournald"); 215 struct task_struct *t;
216
217 t = kthread_run(kjournald, journal, "kjournald");
218 if (IS_ERR(t))
219 return PTR_ERR(t);
220
217 wait_event(journal->j_wait_done_commit, journal->j_task != 0); 221 wait_event(journal->j_wait_done_commit, journal->j_task != 0);
222 return 0;
218} 223}
219 224
220static void journal_kill_thread(journal_t *journal) 225static void journal_kill_thread(journal_t *journal)
@@ -840,8 +845,7 @@ static int journal_reset(journal_t *journal)
840 845
841 /* Add the dynamic fields and write it to disk. */ 846 /* Add the dynamic fields and write it to disk. */
842 journal_update_superblock(journal, 1); 847 journal_update_superblock(journal, 1);
843 journal_start_thread(journal); 848 return journal_start_thread(journal);
844 return 0;
845} 849}
846 850
847/** 851/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 11563fe2a52b..2a5f4b833e35 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/recovery.c 2 * linux/fs/jbd/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d204ab394f36..824e3b7d4ec1 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/revoke.c 2 * linux/fs/jbd/revoke.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5 * 5 *
@@ -66,7 +66,6 @@
66#include <linux/errno.h> 66#include <linux/errno.h>
67#include <linux/slab.h> 67#include <linux/slab.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/smp_lock.h>
70#include <linux/init.h> 69#include <linux/init.h>
71#endif 70#endif
72 71
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index cceaf57e3778..772b6531a2a2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/transaction.c 2 * linux/fs/jbd/transaction.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
@@ -23,7 +23,6 @@
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/smp_lock.h>
27#include <linux/mm.h> 26#include <linux/mm.h>
28#include <linux/highmem.h> 27#include <linux/highmem.h>
29 28
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 68039fa9a566..3fccde7ba008 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/checkpoint.c 2 * linux/fs/jbd2/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6bd8005e3d34..2856e1100a5f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -20,7 +20,6 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/smp_lock.h>
24 23
25/* 24/*
26 * Default IO end handler for temporary BJ_IO buffer_heads. 25 * Default IO end handler for temporary BJ_IO buffer_heads.
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 44fc32bfd7f1..78d63b818f0b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -28,7 +28,6 @@
28#include <linux/jbd2.h> 28#include <linux/jbd2.h>
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/smp_lock.h>
32#include <linux/init.h> 31#include <linux/init.h>
33#include <linux/mm.h> 32#include <linux/mm.h>
34#include <linux/freezer.h> 33#include <linux/freezer.h>
@@ -211,10 +210,16 @@ end_loop:
211 return 0; 210 return 0;
212} 211}
213 212
214static void jbd2_journal_start_thread(journal_t *journal) 213static int jbd2_journal_start_thread(journal_t *journal)
215{ 214{
216 kthread_run(kjournald2, journal, "kjournald2"); 215 struct task_struct *t;
216
217 t = kthread_run(kjournald2, journal, "kjournald2");
218 if (IS_ERR(t))
219 return PTR_ERR(t);
220
217 wait_event(journal->j_wait_done_commit, journal->j_task != 0); 221 wait_event(journal->j_wait_done_commit, journal->j_task != 0);
222 return 0;
218} 223}
219 224
220static void journal_kill_thread(journal_t *journal) 225static void journal_kill_thread(journal_t *journal)
@@ -840,8 +845,7 @@ static int journal_reset(journal_t *journal)
840 845
841 /* Add the dynamic fields and write it to disk. */ 846 /* Add the dynamic fields and write it to disk. */
842 jbd2_journal_update_superblock(journal, 1); 847 jbd2_journal_update_superblock(journal, 1);
843 jbd2_journal_start_thread(journal); 848 return jbd2_journal_start_thread(journal);
844 return 0;
845} 849}
846 850
847/** 851/**
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 9f10acafaf70..395c92a04ac9 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/recovery.c 2 * linux/fs/jbd2/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index f506646ad0ff..9246e763da78 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/revoke.c 2 * linux/fs/jbd2/revoke.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5 * 5 *
@@ -66,7 +66,6 @@
66#include <linux/errno.h> 66#include <linux/errno.h>
67#include <linux/slab.h> 67#include <linux/slab.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/smp_lock.h>
70#include <linux/init.h> 69#include <linux/init.h>
71#endif 70#endif
72 71
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3a8700153cb0..7946ff43fc40 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/transaction.c 2 * linux/fs/jbd2/transaction.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
@@ -23,7 +23,6 @@
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/smp_lock.h>
27#include <linux/mm.h> 26#include <linux/mm.h>
28#include <linux/highmem.h> 27#include <linux/highmem.h>
29 28
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e51164a8a8d4..45368f8bbe72 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,8 +47,7 @@ static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned l
47{ 47{
48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; 48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
49 49
50 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 50 if (flags & SLAB_CTOR_CONSTRUCTOR) {
51 SLAB_CTOR_CONSTRUCTOR) {
52 init_MUTEX(&ei->sem); 51 init_MUTEX(&ei->sem);
53 inode_init_once(&ei->vfs_inode); 52 inode_init_once(&ei->vfs_inode);
54 } 53 }
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index e285022f006c..3467dde27e5a 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -55,7 +55,6 @@ void jfs_read_inode(struct inode *inode)
55 inode->i_op = &jfs_file_inode_operations; 55 inode->i_op = &jfs_file_inode_operations;
56 init_special_inode(inode, inode->i_mode, inode->i_rdev); 56 init_special_inode(inode, inode->i_mode, inode->i_rdev);
57 } 57 }
58 jfs_set_inode_flags(inode);
59} 58}
60 59
61/* 60/*
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index ed814b1ff4d9..fe063af6fd2f 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -59,6 +59,7 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
59 59
60 switch (cmd) { 60 switch (cmd) {
61 case JFS_IOC_GETFLAGS: 61 case JFS_IOC_GETFLAGS:
62 jfs_get_inode_flags(jfs_inode);
62 flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; 63 flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
63 flags = jfs_map_ext2(flags, 0); 64 flags = jfs_map_ext2(flags, 0);
64 return put_user(flags, (int __user *) arg); 65 return put_user(flags, (int __user *) arg);
@@ -78,6 +79,7 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
78 if (!S_ISDIR(inode->i_mode)) 79 if (!S_ISDIR(inode->i_mode))
79 flags &= ~JFS_DIRSYNC_FL; 80 flags &= ~JFS_DIRSYNC_FL;
80 81
82 jfs_get_inode_flags(jfs_inode);
81 oldflags = jfs_inode->mode2; 83 oldflags = jfs_inode->mode2;
82 84
83 /* 85 /*
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 82b0544bd76d..f3b1ebb22280 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1507,7 +1507,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1507 if (l2nb < budmin) { 1507 if (l2nb < budmin) {
1508 1508
1509 /* search the lower level dmap control pages to get 1509 /* search the lower level dmap control pages to get
1510 * the starting block number of the the dmap that 1510 * the starting block number of the dmap that
1511 * contains or starts off the free space. 1511 * contains or starts off the free space.
1512 */ 1512 */
1513 if ((rc = 1513 if ((rc =
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index aa5124b643b1..c6530227cda6 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -386,7 +386,7 @@ int diRead(struct inode *ip)
386 return -EIO; 386 return -EIO;
387 } 387 }
388 388
389 /* locate the the disk inode requested */ 389 /* locate the disk inode requested */
390 dp = (struct dinode *) mp->data; 390 dp = (struct dinode *) mp->data;
391 dp += rel_inode; 391 dp += rel_inode;
392 392
@@ -1407,7 +1407,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1407 inum = pip->i_ino + 1; 1407 inum = pip->i_ino + 1;
1408 ino = inum & (INOSPERIAG - 1); 1408 ino = inum & (INOSPERIAG - 1);
1409 1409
1410 /* back off the the hint if it is outside of the iag */ 1410 /* back off the hint if it is outside of the iag */
1411 if (ino == 0) 1411 if (ino == 0)
1412 inum = pip->i_ino; 1412 inum = pip->i_ino;
1413 1413
@@ -3078,6 +3078,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3078 3078
3079 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3079 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3080 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3080 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3081 jfs_set_inode_flags(ip);
3081 3082
3082 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3083 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3083 if (sbi->umask != -1) { 3084 if (sbi->umask != -1) {
@@ -3174,6 +3175,7 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3174 dip->di_gid = cpu_to_le32(ip->i_gid); 3175 dip->di_gid = cpu_to_le32(ip->i_gid);
3175 else 3176 else
3176 dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); 3177 dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
3178 jfs_get_inode_flags(jfs_ip);
3177 /* 3179 /*
3178 * mode2 is only needed for storing the higher order bits. 3180 * mode2 is only needed for storing the higher order bits.
3179 * Trust i_mode for the lower order ones 3181 * Trust i_mode for the lower order ones
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 4c67ed97682b..ed6574bee51a 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -45,6 +45,24 @@ void jfs_set_inode_flags(struct inode *inode)
45 inode->i_flags |= S_SYNC; 45 inode->i_flags |= S_SYNC;
46} 46}
47 47
48void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
49{
50 unsigned int flags = jfs_ip->vfs_inode.i_flags;
51
52 jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
53 JFS_DIRSYNC_FL | JFS_SYNC_FL);
54 if (flags & S_IMMUTABLE)
55 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
56 if (flags & S_APPEND)
57 jfs_ip->mode2 |= JFS_APPEND_FL;
58 if (flags & S_NOATIME)
59 jfs_ip->mode2 |= JFS_NOATIME_FL;
60 if (flags & S_DIRSYNC)
61 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
62 if (flags & S_SYNC)
63 jfs_ip->mode2 |= JFS_SYNC_FL;
64}
65
48/* 66/*
49 * NAME: ialloc() 67 * NAME: ialloc()
50 * 68 *
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 6802837f757e..2374b595f2e1 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -31,6 +31,7 @@ extern void jfs_truncate(struct inode *);
31extern void jfs_truncate_nolock(struct inode *, loff_t); 31extern void jfs_truncate_nolock(struct inode *, loff_t);
32extern void jfs_free_zero_link(struct inode *); 32extern void jfs_free_zero_link(struct inode *);
33extern struct dentry *jfs_get_parent(struct dentry *dentry); 33extern struct dentry *jfs_get_parent(struct dentry *dentry);
34extern void jfs_get_inode_flags(struct jfs_inode_info *);
34extern void jfs_set_inode_flags(struct inode *); 35extern void jfs_set_inode_flags(struct inode *);
35extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 36extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
36 37
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index df48ece4b7a3..ecf04882265e 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -45,7 +45,7 @@ do { \
45 io_schedule(); \ 45 io_schedule(); \
46 lock_cmd; \ 46 lock_cmd; \
47 } \ 47 } \
48 current->state = TASK_RUNNING; \ 48 __set_current_state(TASK_RUNNING); \
49 remove_wait_queue(&wq, &__wait); \ 49 remove_wait_queue(&wq, &__wait); \
50} while (0) 50} while (0)
51 51
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 5065baa530b6..44a2f33cb98d 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -62,7 +62,6 @@
62#include <linux/fs.h> 62#include <linux/fs.h>
63#include <linux/blkdev.h> 63#include <linux/blkdev.h>
64#include <linux/interrupt.h> 64#include <linux/interrupt.h>
65#include <linux/smp_lock.h>
66#include <linux/completion.h> 65#include <linux/completion.h>
67#include <linux/kthread.h> 66#include <linux/kthread.h>
68#include <linux/buffer_head.h> /* for sync_blockdev() */ 67#include <linux/buffer_head.h> /* for sync_blockdev() */
@@ -1590,7 +1589,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1590 set_current_state(TASK_UNINTERRUPTIBLE); 1589 set_current_state(TASK_UNINTERRUPTIBLE);
1591 LOGGC_UNLOCK(log); 1590 LOGGC_UNLOCK(log);
1592 schedule(); 1591 schedule();
1593 current->state = TASK_RUNNING; 1592 __set_current_state(TASK_RUNNING);
1594 LOGGC_LOCK(log); 1593 LOGGC_LOCK(log);
1595 remove_wait_queue(&target->gcwait, &__wait); 1594 remove_wait_queue(&target->gcwait, &__wait);
1596 } 1595 }
@@ -1961,7 +1960,7 @@ static void lbmfree(struct lbuf * bp)
1961/* 1960/*
1962 * NAME: lbmRedrive 1961 * NAME: lbmRedrive
1963 * 1962 *
1964 * FUNCTION: add a log buffer to the the log redrive list 1963 * FUNCTION: add a log buffer to the log redrive list
1965 * 1964 *
1966 * PARAMETER: 1965 * PARAMETER:
1967 * bp - log buffer 1966 * bp - log buffer
@@ -2354,14 +2353,15 @@ int jfsIOWait(void *arg)
2354 lbmStartIO(bp); 2353 lbmStartIO(bp);
2355 spin_lock_irq(&log_redrive_lock); 2354 spin_lock_irq(&log_redrive_lock);
2356 } 2355 }
2357 spin_unlock_irq(&log_redrive_lock);
2358 2356
2359 if (freezing(current)) { 2357 if (freezing(current)) {
2358 spin_unlock_irq(&log_redrive_lock);
2360 refrigerator(); 2359 refrigerator();
2361 } else { 2360 } else {
2362 set_current_state(TASK_INTERRUPTIBLE); 2361 set_current_state(TASK_INTERRUPTIBLE);
2362 spin_unlock_irq(&log_redrive_lock);
2363 schedule(); 2363 schedule();
2364 current->state = TASK_RUNNING; 2364 __set_current_state(TASK_RUNNING);
2365 } 2365 }
2366 } while (!kthread_should_stop()); 2366 } while (!kthread_should_stop());
2367 2367
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 58deae007507..6b3acb0b5781 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -184,8 +184,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
184{ 184{
185 struct metapage *mp = (struct metapage *)foo; 185 struct metapage *mp = (struct metapage *)foo;
186 186
187 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 187 if (flags & SLAB_CTOR_CONSTRUCTOR) {
188 SLAB_CTOR_CONSTRUCTOR) {
189 mp->lid = 0; 188 mp->lid = 0;
190 mp->lsn = 0; 189 mp->lsn = 0;
191 mp->flag = 0; 190 mp->flag = 0;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 03893acbfda4..25430d0b0d59 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -44,7 +44,6 @@
44 44
45#include <linux/fs.h> 45#include <linux/fs.h>
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/smp_lock.h>
48#include <linux/completion.h> 47#include <linux/completion.h>
49#include <linux/freezer.h> 48#include <linux/freezer.h>
50#include <linux/module.h> 49#include <linux/module.h>
@@ -136,7 +135,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
136 set_current_state(TASK_UNINTERRUPTIBLE); 135 set_current_state(TASK_UNINTERRUPTIBLE);
137 TXN_UNLOCK(); 136 TXN_UNLOCK();
138 io_schedule(); 137 io_schedule();
139 current->state = TASK_RUNNING; 138 __set_current_state(TASK_RUNNING);
140 remove_wait_queue(event, &wait); 139 remove_wait_queue(event, &wait);
141} 140}
142 141
@@ -2798,7 +2797,7 @@ int jfs_lazycommit(void *arg)
2798 set_current_state(TASK_INTERRUPTIBLE); 2797 set_current_state(TASK_INTERRUPTIBLE);
2799 LAZY_UNLOCK(flags); 2798 LAZY_UNLOCK(flags);
2800 schedule(); 2799 schedule();
2801 current->state = TASK_RUNNING; 2800 __set_current_state(TASK_RUNNING);
2802 remove_wait_queue(&jfs_commit_thread_wait, &wq); 2801 remove_wait_queue(&jfs_commit_thread_wait, &wq);
2803 } 2802 }
2804 } while (!kthread_should_stop()); 2803 } while (!kthread_should_stop());
@@ -2990,7 +2989,7 @@ int jfs_sync(void *arg)
2990 set_current_state(TASK_INTERRUPTIBLE); 2989 set_current_state(TASK_INTERRUPTIBLE);
2991 TXN_UNLOCK(); 2990 TXN_UNLOCK();
2992 schedule(); 2991 schedule();
2993 current->state = TASK_RUNNING; 2992 __set_current_state(TASK_RUNNING);
2994 } 2993 }
2995 } while (!kthread_should_stop()); 2994 } while (!kthread_should_stop());
2996 2995
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 52d73d54a931..ea9dc3e65dcf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -752,8 +752,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
752{ 752{
753 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; 753 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
754 754
755 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 755 if (flags & SLAB_CTOR_CONSTRUCTOR) {
756 SLAB_CTOR_CONSTRUCTOR) {
757 memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); 756 memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
758 INIT_LIST_HEAD(&jfs_ip->anon_inode_list); 757 INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
759 init_rwsem(&jfs_ip->rdwrlock); 758 init_rwsem(&jfs_ip->rdwrlock);
diff --git a/fs/libfs.c b/fs/libfs.c
index d93842d3c0a0..5294de1f40c4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -159,7 +159,10 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
159 continue; 159 continue;
160 160
161 spin_unlock(&dcache_lock); 161 spin_unlock(&dcache_lock);
162 if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) 162 if (filldir(dirent, next->d_name.name,
163 next->d_name.len, filp->f_pos,
164 next->d_inode->i_ino,
165 dt_type(next->d_inode)) < 0)
163 return 0; 166 return 0;
164 spin_lock(&dcache_lock); 167 spin_lock(&dcache_lock);
165 /* next is still alive */ 168 /* next is still alive */
@@ -220,6 +223,12 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
220 root = new_inode(s); 223 root = new_inode(s);
221 if (!root) 224 if (!root)
222 goto Enomem; 225 goto Enomem;
226 /*
227 * since this is the first inode, make it number 1. New inodes created
228 * after this must take care not to collide with it (by passing
229 * max_reserved of 1 to iunique).
230 */
231 root->i_ino = 1;
223 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 232 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
224 root->i_uid = root->i_gid = 0; 233 root->i_uid = root->i_gid = 0;
225 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; 234 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
@@ -360,6 +369,11 @@ int simple_commit_write(struct file *file, struct page *page,
360 return 0; 369 return 0;
361} 370}
362 371
372/*
373 * the inodes created here are not hashed. If you use iunique to generate
374 * unique inode values later for this filesystem, then you must take care
375 * to pass it an appropriate max_reserved value to avoid collisions.
376 */
363int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) 377int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
364{ 378{
365 struct inode *inode; 379 struct inode *inode;
@@ -376,6 +390,11 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
376 inode = new_inode(s); 390 inode = new_inode(s);
377 if (!inode) 391 if (!inode)
378 return -ENOMEM; 392 return -ENOMEM;
393 /*
394 * because the root inode is 1, the files array must not contain an
395 * entry at index 1
396 */
397 inode->i_ino = 1;
379 inode->i_mode = S_IFDIR | 0755; 398 inode->i_mode = S_IFDIR | 0755;
380 inode->i_uid = inode->i_gid = 0; 399 inode->i_uid = inode->i_gid = 0;
381 inode->i_blocks = 0; 400 inode->i_blocks = 0;
@@ -391,6 +410,13 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
391 for (i = 0; !files->name || files->name[0]; i++, files++) { 410 for (i = 0; !files->name || files->name[0]; i++, files++) {
392 if (!files->name) 411 if (!files->name)
393 continue; 412 continue;
413
414 /* warn if it tries to conflict with the root inode */
415 if (unlikely(i == 1))
416 printk(KERN_WARNING "%s: %s passed in a files array"
417 "with an index of 1!\n", __func__,
418 s->s_type->name);
419
394 dentry = d_alloc_name(root, files->name); 420 dentry = d_alloc_name(root, files->name);
395 if (!dentry) 421 if (!dentry)
396 goto out; 422 goto out;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a5c019e1a447..a10343bed160 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -12,7 +12,6 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/nfs_fs.h> 13#include <linux/nfs_fs.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/smp_lock.h>
16#include <linux/freezer.h> 15#include <linux/freezer.h>
17#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
18#include <linux/sunrpc/svc.h> 17#include <linux/sunrpc/svc.h>
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index eb243edf8932..2102e2d0134d 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -225,16 +225,13 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
225#define SM_monres_sz 2 225#define SM_monres_sz 2
226#define SM_unmonres_sz 1 226#define SM_unmonres_sz 1
227 227
228#ifndef MAX
229# define MAX(a, b) (((a) > (b))? (a) : (b))
230#endif
231
232static struct rpc_procinfo nsm_procedures[] = { 228static struct rpc_procinfo nsm_procedures[] = {
233[SM_MON] = { 229[SM_MON] = {
234 .p_proc = SM_MON, 230 .p_proc = SM_MON,
235 .p_encode = (kxdrproc_t) xdr_encode_mon, 231 .p_encode = (kxdrproc_t) xdr_encode_mon,
236 .p_decode = (kxdrproc_t) xdr_decode_stat_res, 232 .p_decode = (kxdrproc_t) xdr_decode_stat_res,
237 .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, 233 .p_arglen = SM_mon_sz,
234 .p_replen = SM_monres_sz,
238 .p_statidx = SM_MON, 235 .p_statidx = SM_MON,
239 .p_name = "MONITOR", 236 .p_name = "MONITOR",
240 }, 237 },
@@ -242,7 +239,8 @@ static struct rpc_procinfo nsm_procedures[] = {
242 .p_proc = SM_UNMON, 239 .p_proc = SM_UNMON,
243 .p_encode = (kxdrproc_t) xdr_encode_unmon, 240 .p_encode = (kxdrproc_t) xdr_encode_unmon,
244 .p_decode = (kxdrproc_t) xdr_decode_stat, 241 .p_decode = (kxdrproc_t) xdr_decode_stat,
245 .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, 242 .p_arglen = SM_mon_id_sz,
243 .p_replen = SM_unmonres_sz,
246 .p_statidx = SM_UNMON, 244 .p_statidx = SM_UNMON,
247 .p_name = "UNMONITOR", 245 .p_name = "UNMONITOR",
248 }, 246 },
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 47a66aa5d55b..bf27b6c6cb6b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -99,7 +99,9 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
99 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 99 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
100 100
101 /* Now check for conflicting locks */ 101 /* Now check for conflicting locks */
102 resp->status = nlmsvc_testlock(file, &argp->lock, &resp->lock); 102 resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
103 if (resp->status == nlm_drop_reply)
104 return rpc_drop_reply;
103 105
104 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); 106 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
105 nlm_release_host(host); 107 nlm_release_host(host);
@@ -143,6 +145,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
143 /* Now try to lock the file */ 145 /* Now try to lock the file */
144 resp->status = nlmsvc_lock(rqstp, file, &argp->lock, 146 resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
145 argp->block, &argp->cookie); 147 argp->block, &argp->cookie);
148 if (resp->status == nlm_drop_reply)
149 return rpc_drop_reply;
146 150
147 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 151 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
148 nlm_release_host(host); 152 nlm_release_host(host);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index cf51f849e76c..b3efa4536cc5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -173,7 +173,7 @@ found:
173 */ 173 */
174static inline struct nlm_block * 174static inline struct nlm_block *
175nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, 175nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
176 struct nlm_lock *lock, struct nlm_cookie *cookie) 176 struct nlm_lock *lock, struct nlm_cookie *cookie)
177{ 177{
178 struct nlm_block *block; 178 struct nlm_block *block;
179 struct nlm_host *host; 179 struct nlm_host *host;
@@ -210,6 +210,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
210 block->b_daemon = rqstp->rq_server; 210 block->b_daemon = rqstp->rq_server;
211 block->b_host = host; 211 block->b_host = host;
212 block->b_file = file; 212 block->b_file = file;
213 block->b_fl = NULL;
213 file->f_count++; 214 file->f_count++;
214 215
215 /* Add to file's list of blocks */ 216 /* Add to file's list of blocks */
@@ -261,6 +262,7 @@ static void nlmsvc_free_block(struct kref *kref)
261 nlmsvc_freegrantargs(block->b_call); 262 nlmsvc_freegrantargs(block->b_call);
262 nlm_release_call(block->b_call); 263 nlm_release_call(block->b_call);
263 nlm_release_file(block->b_file); 264 nlm_release_file(block->b_file);
265 kfree(block->b_fl);
264 kfree(block); 266 kfree(block);
265} 267}
266 268
@@ -331,6 +333,31 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
331} 333}
332 334
333/* 335/*
336 * Deferred lock request handling for non-blocking lock
337 */
338static u32
339nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
340{
341 u32 status = nlm_lck_denied_nolocks;
342
343 block->b_flags |= B_QUEUED;
344
345 nlmsvc_insert_block(block, NLM_TIMEOUT);
346
347 block->b_cache_req = &rqstp->rq_chandle;
348 if (rqstp->rq_chandle.defer) {
349 block->b_deferred_req =
350 rqstp->rq_chandle.defer(block->b_cache_req);
351 if (block->b_deferred_req != NULL)
352 status = nlm_drop_reply;
353 }
354 dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
355 block, block->b_flags, status);
356
357 return status;
358}
359
360/*
334 * Attempt to establish a lock, and if it can't be granted, block it 361 * Attempt to establish a lock, and if it can't be granted, block it
335 * if required. 362 * if required.
336 */ 363 */
@@ -338,7 +365,7 @@ __be32
338nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, 365nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
339 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) 366 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
340{ 367{
341 struct nlm_block *block, *newblock = NULL; 368 struct nlm_block *block = NULL;
342 int error; 369 int error;
343 __be32 ret; 370 __be32 ret;
344 371
@@ -351,29 +378,58 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
351 wait); 378 wait);
352 379
353 380
354 lock->fl.fl_flags &= ~FL_SLEEP;
355again:
356 /* Lock file against concurrent access */ 381 /* Lock file against concurrent access */
357 mutex_lock(&file->f_mutex); 382 mutex_lock(&file->f_mutex);
358 /* Get existing block (in case client is busy-waiting) */ 383 /* Get existing block (in case client is busy-waiting)
384 * or create new block
385 */
359 block = nlmsvc_lookup_block(file, lock); 386 block = nlmsvc_lookup_block(file, lock);
360 if (block == NULL) { 387 if (block == NULL) {
361 if (newblock != NULL) 388 block = nlmsvc_create_block(rqstp, file, lock, cookie);
362 lock = &newblock->b_call->a_args.lock; 389 ret = nlm_lck_denied_nolocks;
363 } else 390 if (block == NULL)
391 goto out;
364 lock = &block->b_call->a_args.lock; 392 lock = &block->b_call->a_args.lock;
393 } else
394 lock->fl.fl_flags &= ~FL_SLEEP;
365 395
366 error = posix_lock_file(file->f_file, &lock->fl); 396 if (block->b_flags & B_QUEUED) {
367 lock->fl.fl_flags &= ~FL_SLEEP; 397 dprintk("lockd: nlmsvc_lock deferred block %p flags %d\n",
398 block, block->b_flags);
399 if (block->b_granted) {
400 nlmsvc_unlink_block(block);
401 ret = nlm_granted;
402 goto out;
403 }
404 if (block->b_flags & B_TIMED_OUT) {
405 nlmsvc_unlink_block(block);
406 ret = nlm_lck_denied;
407 goto out;
408 }
409 ret = nlm_drop_reply;
410 goto out;
411 }
368 412
369 dprintk("lockd: posix_lock_file returned %d\n", error); 413 if (!wait)
414 lock->fl.fl_flags &= ~FL_SLEEP;
415 error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
416 lock->fl.fl_flags &= ~FL_SLEEP;
370 417
418 dprintk("lockd: vfs_lock_file returned %d\n", error);
371 switch(error) { 419 switch(error) {
372 case 0: 420 case 0:
373 ret = nlm_granted; 421 ret = nlm_granted;
374 goto out; 422 goto out;
375 case -EAGAIN: 423 case -EAGAIN:
424 ret = nlm_lck_denied;
376 break; 425 break;
426 case -EINPROGRESS:
427 if (wait)
428 break;
429 /* Filesystem lock operation is in progress
430 Add it to the queue waiting for callback */
431 ret = nlmsvc_defer_lock_rqst(rqstp, block);
432 goto out;
377 case -EDEADLK: 433 case -EDEADLK:
378 ret = nlm_deadlock; 434 ret = nlm_deadlock;
379 goto out; 435 goto out;
@@ -387,26 +443,11 @@ again:
387 goto out; 443 goto out;
388 444
389 ret = nlm_lck_blocked; 445 ret = nlm_lck_blocked;
390 if (block != NULL)
391 goto out;
392
393 /* If we don't have a block, create and initialize it. Then
394 * retry because we may have slept in kmalloc. */
395 /* We have to release f_mutex as nlmsvc_create_block may try to
396 * to claim it while doing host garbage collection */
397 if (newblock == NULL) {
398 mutex_unlock(&file->f_mutex);
399 dprintk("lockd: blocking on this lock (allocating).\n");
400 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
401 return nlm_lck_denied_nolocks;
402 goto again;
403 }
404 446
405 /* Append to list of blocked */ 447 /* Append to list of blocked */
406 nlmsvc_insert_block(newblock, NLM_NEVER); 448 nlmsvc_insert_block(block, NLM_NEVER);
407out: 449out:
408 mutex_unlock(&file->f_mutex); 450 mutex_unlock(&file->f_mutex);
409 nlmsvc_release_block(newblock);
410 nlmsvc_release_block(block); 451 nlmsvc_release_block(block);
411 dprintk("lockd: nlmsvc_lock returned %u\n", ret); 452 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
412 return ret; 453 return ret;
@@ -416,9 +457,14 @@ out:
416 * Test for presence of a conflicting lock. 457 * Test for presence of a conflicting lock.
417 */ 458 */
418__be32 459__be32
419nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, 460nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
420 struct nlm_lock *conflock) 461 struct nlm_lock *lock, struct nlm_lock *conflock,
462 struct nlm_cookie *cookie)
421{ 463{
464 struct nlm_block *block = NULL;
465 int error;
466 __be32 ret;
467
422 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", 468 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
423 file->f_file->f_path.dentry->d_inode->i_sb->s_id, 469 file->f_file->f_path.dentry->d_inode->i_sb->s_id,
424 file->f_file->f_path.dentry->d_inode->i_ino, 470 file->f_file->f_path.dentry->d_inode->i_ino,
@@ -426,19 +472,70 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
426 (long long)lock->fl.fl_start, 472 (long long)lock->fl.fl_start,
427 (long long)lock->fl.fl_end); 473 (long long)lock->fl.fl_end);
428 474
429 if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) { 475 /* Get existing block (in case client is busy-waiting) */
430 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", 476 block = nlmsvc_lookup_block(file, lock);
431 conflock->fl.fl_type, 477
432 (long long)conflock->fl.fl_start, 478 if (block == NULL) {
433 (long long)conflock->fl.fl_end); 479 struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
434 conflock->caller = "somehost"; /* FIXME */ 480
435 conflock->len = strlen(conflock->caller); 481 if (conf == NULL)
436 conflock->oh.len = 0; /* don't return OH info */ 482 return nlm_granted;
437 conflock->svid = conflock->fl.fl_pid; 483 block = nlmsvc_create_block(rqstp, file, lock, cookie);
438 return nlm_lck_denied; 484 if (block == NULL) {
485 kfree(conf);
486 return nlm_granted;
487 }
488 block->b_fl = conf;
489 }
490 if (block->b_flags & B_QUEUED) {
491 dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n",
492 block, block->b_flags, block->b_fl);
493 if (block->b_flags & B_TIMED_OUT) {
494 nlmsvc_unlink_block(block);
495 return nlm_lck_denied;
496 }
497 if (block->b_flags & B_GOT_CALLBACK) {
498 if (block->b_fl != NULL
499 && block->b_fl->fl_type != F_UNLCK) {
500 lock->fl = *block->b_fl;
501 goto conf_lock;
502 }
503 else {
504 nlmsvc_unlink_block(block);
505 return nlm_granted;
506 }
507 }
508 return nlm_drop_reply;
439 } 509 }
440 510
441 return nlm_granted; 511 error = vfs_test_lock(file->f_file, &lock->fl);
512 if (error == -EINPROGRESS)
513 return nlmsvc_defer_lock_rqst(rqstp, block);
514 if (error) {
515 ret = nlm_lck_denied_nolocks;
516 goto out;
517 }
518 if (lock->fl.fl_type == F_UNLCK) {
519 ret = nlm_granted;
520 goto out;
521 }
522
523conf_lock:
524 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
525 lock->fl.fl_type, (long long)lock->fl.fl_start,
526 (long long)lock->fl.fl_end);
527 conflock->caller = "somehost"; /* FIXME */
528 conflock->len = strlen(conflock->caller);
529 conflock->oh.len = 0; /* don't return OH info */
530 conflock->svid = lock->fl.fl_pid;
531 conflock->fl.fl_type = lock->fl.fl_type;
532 conflock->fl.fl_start = lock->fl.fl_start;
533 conflock->fl.fl_end = lock->fl.fl_end;
534 ret = nlm_lck_denied;
535out:
536 if (block)
537 nlmsvc_release_block(block);
538 return ret;
442} 539}
443 540
444/* 541/*
@@ -464,7 +561,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
464 nlmsvc_cancel_blocked(file, lock); 561 nlmsvc_cancel_blocked(file, lock);
465 562
466 lock->fl.fl_type = F_UNLCK; 563 lock->fl.fl_type = F_UNLCK;
467 error = posix_lock_file(file->f_file, &lock->fl); 564 error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
468 565
469 return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; 566 return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
470} 567}
@@ -493,6 +590,8 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
493 block = nlmsvc_lookup_block(file, lock); 590 block = nlmsvc_lookup_block(file, lock);
494 mutex_unlock(&file->f_mutex); 591 mutex_unlock(&file->f_mutex);
495 if (block != NULL) { 592 if (block != NULL) {
593 vfs_cancel_lock(block->b_file->f_file,
594 &block->b_call->a_args.lock.fl);
496 status = nlmsvc_unlink_block(block); 595 status = nlmsvc_unlink_block(block);
497 nlmsvc_release_block(block); 596 nlmsvc_release_block(block);
498 } 597 }
@@ -500,6 +599,63 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
500} 599}
501 600
502/* 601/*
602 * This is a callback from the filesystem for VFS file lock requests.
603 * It will be used if fl_grant is defined and the filesystem can not
604 * respond to the request immediately.
605 * For GETLK request it will copy the reply to the nlm_block.
606 * For SETLK or SETLKW request it will get the local posix lock.
607 * In all cases it will move the block to the head of nlm_blocked q where
608 * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the
609 * deferred rpc for GETLK and SETLK.
610 */
611static void
612nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
613 int result)
614{
615 block->b_flags |= B_GOT_CALLBACK;
616 if (result == 0)
617 block->b_granted = 1;
618 else
619 block->b_flags |= B_TIMED_OUT;
620 if (conf) {
621 if (block->b_fl)
622 locks_copy_lock(block->b_fl, conf);
623 }
624}
625
626static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
627 int result)
628{
629 struct nlm_block *block;
630 int rc = -ENOENT;
631
632 lock_kernel();
633 list_for_each_entry(block, &nlm_blocked, b_list) {
634 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
635 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
636 block, block->b_flags);
637 if (block->b_flags & B_QUEUED) {
638 if (block->b_flags & B_TIMED_OUT) {
639 rc = -ENOLCK;
640 break;
641 }
642 nlmsvc_update_deferred_block(block, conf, result);
643 } else if (result == 0)
644 block->b_granted = 1;
645
646 nlmsvc_insert_block(block, 0);
647 svc_wake_up(block->b_daemon);
648 rc = 0;
649 break;
650 }
651 }
652 unlock_kernel();
653 if (rc == -ENOENT)
654 printk(KERN_WARNING "lockd: grant for unknown block\n");
655 return rc;
656}
657
658/*
503 * Unblock a blocked lock request. This is a callback invoked from the 659 * Unblock a blocked lock request. This is a callback invoked from the
504 * VFS layer when a lock on which we blocked is removed. 660 * VFS layer when a lock on which we blocked is removed.
505 * 661 *
@@ -531,6 +687,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
531struct lock_manager_operations nlmsvc_lock_operations = { 687struct lock_manager_operations nlmsvc_lock_operations = {
532 .fl_compare_owner = nlmsvc_same_owner, 688 .fl_compare_owner = nlmsvc_same_owner,
533 .fl_notify = nlmsvc_notify_blocked, 689 .fl_notify = nlmsvc_notify_blocked,
690 .fl_grant = nlmsvc_grant_deferred,
534}; 691};
535 692
536/* 693/*
@@ -553,6 +710,8 @@ nlmsvc_grant_blocked(struct nlm_block *block)
553 710
554 dprintk("lockd: grant blocked lock %p\n", block); 711 dprintk("lockd: grant blocked lock %p\n", block);
555 712
713 kref_get(&block->b_count);
714
556 /* Unlink block request from list */ 715 /* Unlink block request from list */
557 nlmsvc_unlink_block(block); 716 nlmsvc_unlink_block(block);
558 717
@@ -566,20 +725,23 @@ nlmsvc_grant_blocked(struct nlm_block *block)
566 725
567 /* Try the lock operation again */ 726 /* Try the lock operation again */
568 lock->fl.fl_flags |= FL_SLEEP; 727 lock->fl.fl_flags |= FL_SLEEP;
569 error = posix_lock_file(file->f_file, &lock->fl); 728 error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
570 lock->fl.fl_flags &= ~FL_SLEEP; 729 lock->fl.fl_flags &= ~FL_SLEEP;
571 730
572 switch (error) { 731 switch (error) {
573 case 0: 732 case 0:
574 break; 733 break;
575 case -EAGAIN: 734 case -EAGAIN:
576 dprintk("lockd: lock still blocked\n"); 735 case -EINPROGRESS:
736 dprintk("lockd: lock still blocked error %d\n", error);
577 nlmsvc_insert_block(block, NLM_NEVER); 737 nlmsvc_insert_block(block, NLM_NEVER);
738 nlmsvc_release_block(block);
578 return; 739 return;
579 default: 740 default:
580 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 741 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
581 -error, __FUNCTION__); 742 -error, __FUNCTION__);
582 nlmsvc_insert_block(block, 10 * HZ); 743 nlmsvc_insert_block(block, 10 * HZ);
744 nlmsvc_release_block(block);
583 return; 745 return;
584 } 746 }
585 747
@@ -592,7 +754,6 @@ callback:
592 nlmsvc_insert_block(block, 30 * HZ); 754 nlmsvc_insert_block(block, 30 * HZ);
593 755
594 /* Call the client */ 756 /* Call the client */
595 kref_get(&block->b_count);
596 nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops); 757 nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
597} 758}
598 759
@@ -665,6 +826,23 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
665 nlmsvc_release_block(block); 826 nlmsvc_release_block(block);
666} 827}
667 828
829/* Helper function to handle retry of a deferred block.
830 * If it is a blocking lock, call grant_blocked.
831 * For a non-blocking lock or test lock, revisit the request.
832 */
833static void
834retry_deferred_block(struct nlm_block *block)
835{
836 if (!(block->b_flags & B_GOT_CALLBACK))
837 block->b_flags |= B_TIMED_OUT;
838 nlmsvc_insert_block(block, NLM_TIMEOUT);
839 dprintk("revisit block %p flags %d\n", block, block->b_flags);
840 if (block->b_deferred_req) {
841 block->b_deferred_req->revisit(block->b_deferred_req, 0);
842 block->b_deferred_req = NULL;
843 }
844}
845
668/* 846/*
669 * Retry all blocked locks that have been notified. This is where lockd 847 * Retry all blocked locks that have been notified. This is where lockd
670 * picks up locks that can be granted, or grant notifications that must 848 * picks up locks that can be granted, or grant notifications that must
@@ -688,9 +866,12 @@ nlmsvc_retry_blocked(void)
688 866
689 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", 867 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
690 block, block->b_when); 868 block, block->b_when);
691 kref_get(&block->b_count); 869 if (block->b_flags & B_QUEUED) {
692 nlmsvc_grant_blocked(block); 870 dprintk("nlmsvc_retry_blocked delete block (%p, granted=%d, flags=%d)\n",
693 nlmsvc_release_block(block); 871 block, block->b_granted, block->b_flags);
872 retry_deferred_block(block);
873 } else
874 nlmsvc_grant_blocked(block);
694 } 875 }
695 876
696 return timeout; 877 return timeout;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 31cb48425733..9cd5c8b37593 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -33,6 +33,7 @@ cast_to_nlm(__be32 status, u32 vers)
33 case nlm_lck_denied_nolocks: 33 case nlm_lck_denied_nolocks:
34 case nlm_lck_blocked: 34 case nlm_lck_blocked:
35 case nlm_lck_denied_grace_period: 35 case nlm_lck_denied_grace_period:
36 case nlm_drop_reply:
36 break; 37 break;
37 case nlm4_deadlock: 38 case nlm4_deadlock:
38 status = nlm_lck_denied; 39 status = nlm_lck_denied;
@@ -127,7 +128,9 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
127 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 128 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
128 129
129 /* Now check for conflicting locks */ 130 /* Now check for conflicting locks */
130 resp->status = cast_status(nlmsvc_testlock(file, &argp->lock, &resp->lock)); 131 resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
132 if (resp->status == nlm_drop_reply)
133 return rpc_drop_reply;
131 134
132 dprintk("lockd: TEST status %d vers %d\n", 135 dprintk("lockd: TEST status %d vers %d\n",
133 ntohl(resp->status), rqstp->rq_vers); 136 ntohl(resp->status), rqstp->rq_vers);
@@ -172,6 +175,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
172 /* Now try to lock the file */ 175 /* Now try to lock the file */
173 resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, 176 resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
174 argp->block, &argp->cookie)); 177 argp->block, &argp->cookie));
178 if (resp->status == nlm_drop_reply)
179 return rpc_drop_reply;
175 180
176 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 181 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
177 nlm_release_host(host); 182 nlm_release_host(host);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c0df00c74ce3..84ebba33b98d 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ again:
182 lock.fl_type = F_UNLCK; 182 lock.fl_type = F_UNLCK;
183 lock.fl_start = 0; 183 lock.fl_start = 0;
184 lock.fl_end = OFFSET_MAX; 184 lock.fl_end = OFFSET_MAX;
185 if (posix_lock_file(file->f_file, &lock) < 0) { 185 if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
186 printk("lockd: unlock failure in %s:%d\n", 186 printk("lockd: unlock failure in %s:%d\n",
187 __FILE__, __LINE__); 187 __FILE__, __LINE__);
188 return 1; 188 return 1;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 34dae5d70738..9702956d206c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -510,17 +510,20 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
510 return 0; 510 return 0;
511} 511}
512 512
513#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
514# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
515#endif
516
513/* 517/*
514 * Buffer requirements for NLM 518 * Buffer requirements for NLM
515 */ 519 */
516#define NLM_void_sz 0 520#define NLM_void_sz 0
517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) 521#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
518#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename)) 522#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
519#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) 523#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
520/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */
521#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) 524#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
522#define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz 525#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
523#define NLM_holder_sz 4+NLM_netobj_sz 526#define NLM_holder_sz 4+NLM_owner_sz
524 527
525#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz 528#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
526#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz 529#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
@@ -531,10 +534,6 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
531#define NLM_res_sz NLM_cookie_sz+1 534#define NLM_res_sz NLM_cookie_sz+1
532#define NLM_norep_sz 0 535#define NLM_norep_sz 0
533 536
534#ifndef MAX
535# define MAX(a, b) (((a) > (b))? (a) : (b))
536#endif
537
538/* 537/*
539 * For NLM, a void procedure really returns nothing 538 * For NLM, a void procedure really returns nothing
540 */ 539 */
@@ -545,7 +544,8 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
545 .p_proc = NLMPROC_##proc, \ 544 .p_proc = NLMPROC_##proc, \
546 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ 545 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
547 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ 546 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
548 .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ 547 .p_arglen = NLM_##argtype##_sz, \
548 .p_replen = NLM_##restype##_sz, \
549 .p_statidx = NLMPROC_##proc, \ 549 .p_statidx = NLMPROC_##proc, \
550 .p_name = #proc, \ 550 .p_name = #proc, \
551 } 551 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index a78240551219..ce1efdbe1b3a 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -516,17 +516,24 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
516 return 0; 516 return 0;
517} 517}
518 518
519#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
520# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
521#endif
522
523#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
524# error "NLM host name cannot be larger than NLM's maximum string length!"
525#endif
526
519/* 527/*
520 * Buffer requirements for NLM 528 * Buffer requirements for NLM
521 */ 529 */
522#define NLM4_void_sz 0 530#define NLM4_void_sz 0
523#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) 531#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
524#define NLM4_caller_sz 1+XDR_QUADLEN(NLM_MAXSTRLEN) 532#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
525#define NLM4_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) 533#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
526/* #define NLM4_owner_sz 1+XDR_QUADLEN(NLM4_MAXOWNER) */
527#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE) 534#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
528#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_netobj_sz+NLM4_fhandle_sz 535#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
529#define NLM4_holder_sz 6+NLM4_netobj_sz 536#define NLM4_holder_sz 6+NLM4_owner_sz
530 537
531#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz 538#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
532#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz 539#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
@@ -537,10 +544,6 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
537#define NLM4_res_sz NLM4_cookie_sz+1 544#define NLM4_res_sz NLM4_cookie_sz+1
538#define NLM4_norep_sz 0 545#define NLM4_norep_sz 0
539 546
540#ifndef MAX
541# define MAX(a,b) (((a) > (b))? (a) : (b))
542#endif
543
544/* 547/*
545 * For NLM, a void procedure really returns nothing 548 * For NLM, a void procedure really returns nothing
546 */ 549 */
@@ -551,7 +554,8 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
551 .p_proc = NLMPROC_##proc, \ 554 .p_proc = NLMPROC_##proc, \
552 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ 555 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
553 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ 556 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
554 .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ 557 .p_arglen = NLM4_##argtype##_sz, \
558 .p_replen = NLM4_##restype##_sz, \
555 .p_statidx = NLMPROC_##proc, \ 559 .p_statidx = NLMPROC_##proc, \
556 .p_name = #proc, \ 560 .p_name = #proc, \
557 } 561 }
diff --git a/fs/locks.c b/fs/locks.c
index 52a81005dab4..671a034dc999 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -203,8 +203,7 @@ static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags)
203{ 203{
204 struct file_lock *lock = (struct file_lock *) foo; 204 struct file_lock *lock = (struct file_lock *) foo;
205 205
206 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) != 206 if (!(flags & SLAB_CTOR_CONSTRUCTOR))
207 SLAB_CTOR_CONSTRUCTOR)
208 return; 207 return;
209 208
210 locks_init_lock(lock); 209 locks_init_lock(lock);
@@ -666,11 +665,11 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
666} 665}
667 666
668int 667int
669posix_test_lock(struct file *filp, struct file_lock *fl, 668posix_test_lock(struct file *filp, struct file_lock *fl)
670 struct file_lock *conflock)
671{ 669{
672 struct file_lock *cfl; 670 struct file_lock *cfl;
673 671
672 fl->fl_type = F_UNLCK;
674 lock_kernel(); 673 lock_kernel();
675 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { 674 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
676 if (!IS_POSIX(cfl)) 675 if (!IS_POSIX(cfl))
@@ -679,7 +678,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl,
679 break; 678 break;
680 } 679 }
681 if (cfl) { 680 if (cfl) {
682 __locks_copy_lock(conflock, cfl); 681 __locks_copy_lock(fl, cfl);
683 unlock_kernel(); 682 unlock_kernel();
684 return 1; 683 return 1;
685 } 684 }
@@ -801,7 +800,7 @@ out:
801 return error; 800 return error;
802} 801}
803 802
804static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 803static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
805{ 804{
806 struct file_lock *fl; 805 struct file_lock *fl;
807 struct file_lock *new_fl = NULL; 806 struct file_lock *new_fl = NULL;
@@ -1007,6 +1006,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
1007 * posix_lock_file - Apply a POSIX-style lock to a file 1006 * posix_lock_file - Apply a POSIX-style lock to a file
1008 * @filp: The file to apply the lock to 1007 * @filp: The file to apply the lock to
1009 * @fl: The lock to be applied 1008 * @fl: The lock to be applied
1009 * @conflock: Place to return a copy of the conflicting lock, if found.
1010 * 1010 *
1011 * Add a POSIX style lock to a file. 1011 * Add a POSIX style lock to a file.
1012 * We merge adjacent & overlapping locks whenever possible. 1012 * We merge adjacent & overlapping locks whenever possible.
@@ -1016,26 +1016,12 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
1016 * whether or not a lock was successfully freed by testing the return 1016 * whether or not a lock was successfully freed by testing the return
1017 * value for -ENOENT. 1017 * value for -ENOENT.
1018 */ 1018 */
1019int posix_lock_file(struct file *filp, struct file_lock *fl) 1019int posix_lock_file(struct file *filp, struct file_lock *fl,
1020{
1021 return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, NULL);
1022}
1023EXPORT_SYMBOL(posix_lock_file);
1024
1025/**
1026 * posix_lock_file_conf - Apply a POSIX-style lock to a file
1027 * @filp: The file to apply the lock to
1028 * @fl: The lock to be applied
1029 * @conflock: Place to return a copy of the conflicting lock, if found.
1030 *
1031 * Except for the conflock parameter, acts just like posix_lock_file.
1032 */
1033int posix_lock_file_conf(struct file *filp, struct file_lock *fl,
1034 struct file_lock *conflock) 1020 struct file_lock *conflock)
1035{ 1021{
1036 return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, conflock); 1022 return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
1037} 1023}
1038EXPORT_SYMBOL(posix_lock_file_conf); 1024EXPORT_SYMBOL(posix_lock_file);
1039 1025
1040/** 1026/**
1041 * posix_lock_file_wait - Apply a POSIX-style lock to a file 1027 * posix_lock_file_wait - Apply a POSIX-style lock to a file
@@ -1051,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1051 int error; 1037 int error;
1052 might_sleep (); 1038 might_sleep ();
1053 for (;;) { 1039 for (;;) {
1054 error = posix_lock_file(filp, fl); 1040 error = posix_lock_file(filp, fl, NULL);
1055 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1041 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
1056 break; 1042 break;
1057 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1043 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1123,7 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1123 fl.fl_end = offset + count - 1; 1109 fl.fl_end = offset + count - 1;
1124 1110
1125 for (;;) { 1111 for (;;) {
1126 error = __posix_lock_file_conf(inode, &fl, NULL); 1112 error = __posix_lock_file(inode, &fl, NULL);
1127 if (error != -EAGAIN) 1113 if (error != -EAGAIN)
1128 break; 1114 break;
1129 if (!(fl.fl_flags & FL_SLEEP)) 1115 if (!(fl.fl_flags & FL_SLEEP))
@@ -1611,12 +1597,62 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1611 return error; 1597 return error;
1612} 1598}
1613 1599
1600/**
1601 * vfs_test_lock - test file byte range lock
1602 * @filp: The file to test lock for
1603 * @fl: The lock to test
1604 * @conf: Place to return a copy of the conflicting lock, if found
1605 *
1606 * Returns -ERRNO on failure. Indicates presence of conflicting lock by
1607 * setting conf->fl_type to something other than F_UNLCK.
1608 */
1609int vfs_test_lock(struct file *filp, struct file_lock *fl)
1610{
1611 if (filp->f_op && filp->f_op->lock)
1612 return filp->f_op->lock(filp, F_GETLK, fl);
1613 posix_test_lock(filp, fl);
1614 return 0;
1615}
1616EXPORT_SYMBOL_GPL(vfs_test_lock);
1617
1618static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1619{
1620 flock->l_pid = fl->fl_pid;
1621#if BITS_PER_LONG == 32
1622 /*
1623 * Make sure we can represent the posix lock via
1624 * legacy 32bit flock.
1625 */
1626 if (fl->fl_start > OFFT_OFFSET_MAX)
1627 return -EOVERFLOW;
1628 if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
1629 return -EOVERFLOW;
1630#endif
1631 flock->l_start = fl->fl_start;
1632 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1633 fl->fl_end - fl->fl_start + 1;
1634 flock->l_whence = 0;
1635 return 0;
1636}
1637
1638#if BITS_PER_LONG == 32
1639static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
1640{
1641 flock->l_pid = fl->fl_pid;
1642 flock->l_start = fl->fl_start;
1643 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1644 fl->fl_end - fl->fl_start + 1;
1645 flock->l_whence = 0;
1646 flock->l_type = fl->fl_type;
1647}
1648#endif
1649
1614/* Report the first existing lock that would conflict with l. 1650/* Report the first existing lock that would conflict with l.
1615 * This implements the F_GETLK command of fcntl(). 1651 * This implements the F_GETLK command of fcntl().
1616 */ 1652 */
1617int fcntl_getlk(struct file *filp, struct flock __user *l) 1653int fcntl_getlk(struct file *filp, struct flock __user *l)
1618{ 1654{
1619 struct file_lock *fl, cfl, file_lock; 1655 struct file_lock file_lock;
1620 struct flock flock; 1656 struct flock flock;
1621 int error; 1657 int error;
1622 1658
@@ -1631,38 +1667,15 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
1631 if (error) 1667 if (error)
1632 goto out; 1668 goto out;
1633 1669
1634 if (filp->f_op && filp->f_op->lock) { 1670 error = vfs_test_lock(filp, &file_lock);
1635 error = filp->f_op->lock(filp, F_GETLK, &file_lock); 1671 if (error)
1636 if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) 1672 goto out;
1637 file_lock.fl_ops->fl_release_private(&file_lock);
1638 if (error < 0)
1639 goto out;
1640 else
1641 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
1642 } else {
1643 fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
1644 }
1645 1673
1646 flock.l_type = F_UNLCK; 1674 flock.l_type = file_lock.fl_type;
1647 if (fl != NULL) { 1675 if (file_lock.fl_type != F_UNLCK) {
1648 flock.l_pid = fl->fl_pid; 1676 error = posix_lock_to_flock(&flock, &file_lock);
1649#if BITS_PER_LONG == 32 1677 if (error)
1650 /*
1651 * Make sure we can represent the posix lock via
1652 * legacy 32bit flock.
1653 */
1654 error = -EOVERFLOW;
1655 if (fl->fl_start > OFFT_OFFSET_MAX)
1656 goto out;
1657 if ((fl->fl_end != OFFSET_MAX)
1658 && (fl->fl_end > OFFT_OFFSET_MAX))
1659 goto out; 1678 goto out;
1660#endif
1661 flock.l_start = fl->fl_start;
1662 flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
1663 fl->fl_end - fl->fl_start + 1;
1664 flock.l_whence = 0;
1665 flock.l_type = fl->fl_type;
1666 } 1679 }
1667 error = -EFAULT; 1680 error = -EFAULT;
1668 if (!copy_to_user(l, &flock, sizeof(flock))) 1681 if (!copy_to_user(l, &flock, sizeof(flock)))
@@ -1671,6 +1684,48 @@ out:
1671 return error; 1684 return error;
1672} 1685}
1673 1686
1687/**
1688 * vfs_lock_file - file byte range lock
1689 * @filp: The file to apply the lock to
1690 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
1691 * @fl: The lock to be applied
1692 * @conf: Place to return a copy of the conflicting lock, if found.
1693 *
1694 * A caller that doesn't care about the conflicting lock may pass NULL
1695 * as the final argument.
1696 *
1697 * If the filesystem defines a private ->lock() method, then @conf will
1698 * be left unchanged; so a caller that cares should initialize it to
1699 * some acceptable default.
1700 *
1701 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
1702 * locks, the ->lock() interface may return asynchronously, before the lock has
1703 * been granted or denied by the underlying filesystem, if (and only if)
1704 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1705 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1706 * the request is for a blocking lock. When ->lock() does return asynchronously,
1707 * it must return -EINPROGRESS, and call ->fl_grant() when the lock
1708 * request completes.
1709 * If the request is for non-blocking lock the file system should return
1710 * -EINPROGRESS then try to get the lock and call the callback routine with
1711 * the result. If the request timed out the callback routine will return a
1712 * nonzero return code and the file system should release the lock. The file
1713 * system is also responsible to keep a corresponding posix lock when it
1714 * grants a lock so the VFS can find out which locks are locally held and do
1715 * the correct lock cleanup when required.
1716 * The underlying filesystem must not drop the kernel lock or call
1717 * ->fl_grant() before returning to the caller with a -EINPROGRESS
1718 * return code.
1719 */
1720int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
1721{
1722 if (filp->f_op && filp->f_op->lock)
1723 return filp->f_op->lock(filp, cmd, fl);
1724 else
1725 return posix_lock_file(filp, fl, conf);
1726}
1727EXPORT_SYMBOL_GPL(vfs_lock_file);
1728
1674/* Apply the lock described by l to an open file descriptor. 1729/* Apply the lock described by l to an open file descriptor.
1675 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1730 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1676 */ 1731 */
@@ -1733,21 +1788,17 @@ again:
1733 if (error) 1788 if (error)
1734 goto out; 1789 goto out;
1735 1790
1736 if (filp->f_op && filp->f_op->lock != NULL) 1791 for (;;) {
1737 error = filp->f_op->lock(filp, cmd, file_lock); 1792 error = vfs_lock_file(filp, cmd, file_lock, NULL);
1738 else { 1793 if (error != -EAGAIN || cmd == F_SETLK)
1739 for (;;) {
1740 error = posix_lock_file(filp, file_lock);
1741 if ((error != -EAGAIN) || (cmd == F_SETLK))
1742 break;
1743 error = wait_event_interruptible(file_lock->fl_wait,
1744 !file_lock->fl_next);
1745 if (!error)
1746 continue;
1747
1748 locks_delete_block(file_lock);
1749 break; 1794 break;
1750 } 1795 error = wait_event_interruptible(file_lock->fl_wait,
1796 !file_lock->fl_next);
1797 if (!error)
1798 continue;
1799
1800 locks_delete_block(file_lock);
1801 break;
1751 } 1802 }
1752 1803
1753 /* 1804 /*
@@ -1770,7 +1821,7 @@ out:
1770 */ 1821 */
1771int fcntl_getlk64(struct file *filp, struct flock64 __user *l) 1822int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
1772{ 1823{
1773 struct file_lock *fl, cfl, file_lock; 1824 struct file_lock file_lock;
1774 struct flock64 flock; 1825 struct flock64 flock;
1775 int error; 1826 int error;
1776 1827
@@ -1785,27 +1836,14 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
1785 if (error) 1836 if (error)
1786 goto out; 1837 goto out;
1787 1838
1788 if (filp->f_op && filp->f_op->lock) { 1839 error = vfs_test_lock(filp, &file_lock);
1789 error = filp->f_op->lock(filp, F_GETLK, &file_lock); 1840 if (error)
1790 if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) 1841 goto out;
1791 file_lock.fl_ops->fl_release_private(&file_lock); 1842
1792 if (error < 0) 1843 flock.l_type = file_lock.fl_type;
1793 goto out; 1844 if (file_lock.fl_type != F_UNLCK)
1794 else 1845 posix_lock_to_flock64(&flock, &file_lock);
1795 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); 1846
1796 } else {
1797 fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
1798 }
1799
1800 flock.l_type = F_UNLCK;
1801 if (fl != NULL) {
1802 flock.l_pid = fl->fl_pid;
1803 flock.l_start = fl->fl_start;
1804 flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
1805 fl->fl_end - fl->fl_start + 1;
1806 flock.l_whence = 0;
1807 flock.l_type = fl->fl_type;
1808 }
1809 error = -EFAULT; 1847 error = -EFAULT;
1810 if (!copy_to_user(l, &flock, sizeof(flock))) 1848 if (!copy_to_user(l, &flock, sizeof(flock)))
1811 error = 0; 1849 error = 0;
@@ -1876,21 +1914,17 @@ again:
1876 if (error) 1914 if (error)
1877 goto out; 1915 goto out;
1878 1916
1879 if (filp->f_op && filp->f_op->lock != NULL) 1917 for (;;) {
1880 error = filp->f_op->lock(filp, cmd, file_lock); 1918 error = vfs_lock_file(filp, cmd, file_lock, NULL);
1881 else { 1919 if (error != -EAGAIN || cmd == F_SETLK64)
1882 for (;;) {
1883 error = posix_lock_file(filp, file_lock);
1884 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1885 break;
1886 error = wait_event_interruptible(file_lock->fl_wait,
1887 !file_lock->fl_next);
1888 if (!error)
1889 continue;
1890
1891 locks_delete_block(file_lock);
1892 break; 1920 break;
1893 } 1921 error = wait_event_interruptible(file_lock->fl_wait,
1922 !file_lock->fl_next);
1923 if (!error)
1924 continue;
1925
1926 locks_delete_block(file_lock);
1927 break;
1894 } 1928 }
1895 1929
1896 /* 1930 /*
@@ -1935,10 +1969,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
1935 lock.fl_ops = NULL; 1969 lock.fl_ops = NULL;
1936 lock.fl_lmops = NULL; 1970 lock.fl_lmops = NULL;
1937 1971
1938 if (filp->f_op && filp->f_op->lock != NULL) 1972 vfs_lock_file(filp, F_SETLK, &lock, NULL);
1939 filp->f_op->lock(filp, F_SETLK, &lock);
1940 else
1941 posix_lock_file(filp, &lock);
1942 1973
1943 if (lock.fl_ops && lock.fl_ops->fl_release_private) 1974 if (lock.fl_ops && lock.fl_ops->fl_release_private)
1944 lock.fl_ops->fl_release_private(&lock); 1975 lock.fl_ops->fl_release_private(&lock);
@@ -2015,6 +2046,22 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
2015 2046
2016EXPORT_SYMBOL(posix_unblock_lock); 2047EXPORT_SYMBOL(posix_unblock_lock);
2017 2048
2049/**
2050 * vfs_cancel_lock - file byte range unblock lock
2051 * @filp: The file to apply the unblock to
2052 * @fl: The lock to be unblocked
2053 *
2054 * Used by lock managers to cancel blocked requests
2055 */
2056int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2057{
2058 if (filp->f_op && filp->f_op->lock)
2059 return filp->f_op->lock(filp, F_CANCELLK, fl);
2060 return 0;
2061}
2062
2063EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2064
2018static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) 2065static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
2019{ 2066{
2020 struct inode *inode = NULL; 2067 struct inode *inode = NULL;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index cb4cb571fddf..e207cbe70951 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -65,7 +65,6 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
65 struct address_space *mapping = dir->i_mapping; 65 struct address_space *mapping = dir->i_mapping;
66 struct page *page = read_mapping_page(mapping, n, NULL); 66 struct page *page = read_mapping_page(mapping, n, NULL);
67 if (!IS_ERR(page)) { 67 if (!IS_ERR(page)) {
68 wait_on_page_locked(page);
69 kmap(page); 68 kmap(page);
70 if (!PageUptodate(page)) 69 if (!PageUptodate(page))
71 goto fail; 70 goto fail;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 92e383af3709..2f4d43a2a310 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -73,8 +73,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
73{ 73{
74 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 74 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
75 75
76 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 76 if (flags & SLAB_CTOR_CONSTRUCTOR)
77 SLAB_CTOR_CONSTRUCTOR)
78 inode_init_once(&ei->vfs_inode); 77 inode_init_once(&ei->vfs_inode);
79} 78}
80 79
diff --git a/fs/mpage.c b/fs/mpage.c
index 692a3e578fc8..0fb914fc2ee0 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -284,11 +284,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
284 } 284 }
285 285
286 if (first_hole != blocks_per_page) { 286 if (first_hole != blocks_per_page) {
287 char *kaddr = kmap_atomic(page, KM_USER0); 287 zero_user_page(page, first_hole << blkbits,
288 memset(kaddr + (first_hole << blkbits), 0, 288 PAGE_CACHE_SIZE - (first_hole << blkbits),
289 PAGE_CACHE_SIZE - (first_hole << blkbits)); 289 KM_USER0);
290 flush_dcache_page(page);
291 kunmap_atomic(kaddr, KM_USER0);
292 if (first_hole == 0) { 290 if (first_hole == 0) {
293 SetPageUptodate(page); 291 SetPageUptodate(page);
294 unlock_page(page); 292 unlock_page(page);
@@ -576,14 +574,11 @@ page_is_mapped:
576 * written out to the file." 574 * written out to the file."
577 */ 575 */
578 unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); 576 unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
579 char *kaddr;
580 577
581 if (page->index > end_index || !offset) 578 if (page->index > end_index || !offset)
582 goto confused; 579 goto confused;
583 kaddr = kmap_atomic(page, KM_USER0); 580 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
584 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); 581 KM_USER0);
585 flush_dcache_page(page);
586 kunmap_atomic(kaddr, KM_USER0);
587 } 582 }
588 583
589 /* 584 /*
@@ -663,12 +658,7 @@ confused:
663 /* 658 /*
664 * The caller has a ref on the inode, so *mapping is stable 659 * The caller has a ref on the inode, so *mapping is stable
665 */ 660 */
666 if (*ret) { 661 mapping_set_error(mapping, *ret);
667 if (*ret == -ENOSPC)
668 set_bit(AS_ENOSPC, &mapping->flags);
669 else
670 set_bit(AS_EIO, &mapping->flags);
671 }
672out: 662out:
673 return bio; 663 return bio;
674} 664}
@@ -776,14 +766,7 @@ retry:
776 766
777 if (writepage) { 767 if (writepage) {
778 ret = (*writepage)(page, wbc); 768 ret = (*writepage)(page, wbc);
779 if (ret) { 769 mapping_set_error(mapping, ret);
780 if (ret == -ENOSPC)
781 set_bit(AS_ENOSPC,
782 &mapping->flags);
783 else
784 set_bit(AS_EIO,
785 &mapping->flags);
786 }
787 } else { 770 } else {
788 bio = __mpage_writepage(bio, page, get_block, 771 bio = __mpage_writepage(bio, page, get_block,
789 &last_block_in_bio, &ret, wbc, 772 &last_block_in_bio, &ret, wbc,
diff --git a/fs/namei.c b/fs/namei.c
index 880052cadbcd..b3780e3fc88e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -22,7 +22,6 @@
22#include <linux/quotaops.h> 22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/smp_lock.h>
26#include <linux/personality.h> 25#include <linux/personality.h>
27#include <linux/security.h> 26#include <linux/security.h>
28#include <linux/syscalls.h> 27#include <linux/syscalls.h>
@@ -1153,14 +1152,12 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1153 1152
1154 fput_light(file, fput_needed); 1153 fput_light(file, fput_needed);
1155 } 1154 }
1156 current->total_link_count = 0; 1155
1157 retval = link_path_walk(name, nd); 1156 retval = path_walk(name, nd);
1158out: 1157out:
1159 if (likely(retval == 0)) { 1158 if (unlikely(!retval && !audit_dummy_context() && nd->dentry &&
1160 if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
1161 nd->dentry->d_inode)) 1159 nd->dentry->d_inode))
1162 audit_inode(name, nd->dentry->d_inode); 1160 audit_inode(name, nd->dentry->d_inode);
1163 }
1164out_fail: 1161out_fail:
1165 return retval; 1162 return retval;
1166 1163
@@ -1350,17 +1347,6 @@ struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int le
1350 return __lookup_hash_kern(&this, base, NULL); 1347 return __lookup_hash_kern(&this, base, NULL);
1351} 1348}
1352 1349
1353/*
1354 * namei()
1355 *
1356 * is used by most simple commands to get the inode of a specified name.
1357 * Open, link etc use their own routines, but this is enough for things
1358 * like 'chmod' etc.
1359 *
1360 * namei exists in two versions: namei/lnamei. The only difference is
1361 * that namei follows links, while lnamei does not.
1362 * SMP-safe
1363 */
1364int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1350int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
1365 struct nameidata *nd) 1351 struct nameidata *nd)
1366{ 1352{
@@ -2671,19 +2657,9 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
2671 struct address_space *mapping = dentry->d_inode->i_mapping; 2657 struct address_space *mapping = dentry->d_inode->i_mapping;
2672 page = read_mapping_page(mapping, 0, NULL); 2658 page = read_mapping_page(mapping, 0, NULL);
2673 if (IS_ERR(page)) 2659 if (IS_ERR(page))
2674 goto sync_fail; 2660 return (char*)page;
2675 wait_on_page_locked(page);
2676 if (!PageUptodate(page))
2677 goto async_fail;
2678 *ppage = page; 2661 *ppage = page;
2679 return kmap(page); 2662 return kmap(page);
2680
2681async_fail:
2682 page_cache_release(page);
2683 return ERR_PTR(-EIO);
2684
2685sync_fail:
2686 return (char*)page;
2687} 2663}
2688 2664
2689int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2665int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
diff --git a/fs/namespace.c b/fs/namespace.c
index fd999cab7b57..b696e3a0d18f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -377,6 +377,10 @@ static int show_vfsmnt(struct seq_file *m, void *v)
377 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 377 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
378 seq_putc(m, ' '); 378 seq_putc(m, ' ');
379 mangle(m, mnt->mnt_sb->s_type->name); 379 mangle(m, mnt->mnt_sb->s_type->name);
380 if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
381 seq_putc(m, '.');
382 mangle(m, mnt->mnt_sb->s_subtype);
383 }
380 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 384 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
381 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 385 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
382 if (mnt->mnt_sb->s_flags & fs_infop->flag) 386 if (mnt->mnt_sb->s_flags & fs_infop->flag)
@@ -495,7 +499,7 @@ void release_mounts(struct list_head *head)
495{ 499{
496 struct vfsmount *mnt; 500 struct vfsmount *mnt;
497 while (!list_empty(head)) { 501 while (!list_empty(head)) {
498 mnt = list_entry(head->next, struct vfsmount, mnt_hash); 502 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
499 list_del_init(&mnt->mnt_hash); 503 list_del_init(&mnt->mnt_hash);
500 if (mnt->mnt_parent != mnt) { 504 if (mnt->mnt_parent != mnt) {
501 struct dentry *dentry; 505 struct dentry *dentry;
@@ -882,6 +886,9 @@ static int do_change_type(struct nameidata *nd, int flag)
882 int recurse = flag & MS_REC; 886 int recurse = flag & MS_REC;
883 int type = flag & ~MS_REC; 887 int type = flag & ~MS_REC;
884 888
889 if (!capable(CAP_SYS_ADMIN))
890 return -EPERM;
891
885 if (nd->dentry != nd->mnt->mnt_root) 892 if (nd->dentry != nd->mnt->mnt_root)
886 return -EINVAL; 893 return -EINVAL;
887 894
@@ -1173,7 +1180,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
1173 1180
1174 while (!list_empty(graveyard)) { 1181 while (!list_empty(graveyard)) {
1175 LIST_HEAD(umounts); 1182 LIST_HEAD(umounts);
1176 mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire); 1183 mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire);
1177 list_del_init(&mnt->mnt_expire); 1184 list_del_init(&mnt->mnt_expire);
1178 1185
1179 /* don't do anything if the namespace is dead - all the 1186 /* don't do anything if the namespace is dead - all the
@@ -1441,10 +1448,9 @@ dput_out:
1441 * Allocate a new namespace structure and populate it with contents 1448 * Allocate a new namespace structure and populate it with contents
1442 * copied from the namespace of the passed in task structure. 1449 * copied from the namespace of the passed in task structure.
1443 */ 1450 */
1444struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk, 1451static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1445 struct fs_struct *fs) 1452 struct fs_struct *fs)
1446{ 1453{
1447 struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
1448 struct mnt_namespace *new_ns; 1454 struct mnt_namespace *new_ns;
1449 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1455 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
1450 struct vfsmount *p, *q; 1456 struct vfsmount *p, *q;
@@ -1509,36 +1515,21 @@ struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
1509 return new_ns; 1515 return new_ns;
1510} 1516}
1511 1517
1512int copy_mnt_ns(int flags, struct task_struct *tsk) 1518struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns,
1519 struct fs_struct *new_fs)
1513{ 1520{
1514 struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
1515 struct mnt_namespace *new_ns; 1521 struct mnt_namespace *new_ns;
1516 int err = 0;
1517
1518 if (!ns)
1519 return 0;
1520 1522
1523 BUG_ON(!ns);
1521 get_mnt_ns(ns); 1524 get_mnt_ns(ns);
1522 1525
1523 if (!(flags & CLONE_NEWNS)) 1526 if (!(flags & CLONE_NEWNS))
1524 return 0; 1527 return ns;
1525 1528
1526 if (!capable(CAP_SYS_ADMIN)) { 1529 new_ns = dup_mnt_ns(ns, new_fs);
1527 err = -EPERM;
1528 goto out;
1529 }
1530
1531 new_ns = dup_mnt_ns(tsk, tsk->fs);
1532 if (!new_ns) {
1533 err = -ENOMEM;
1534 goto out;
1535 }
1536 1530
1537 tsk->nsproxy->mnt_ns = new_ns;
1538
1539out:
1540 put_mnt_ns(ns); 1531 put_mnt_ns(ns);
1541 return err; 1532 return new_ns;
1542} 1533}
1543 1534
1544asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, 1535asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6b1f6d27099a..addfd3147ea7 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
20#include <linux/smp_lock.h>
21 20
22#include <linux/ncp_fs.h> 21#include <linux/ncp_fs.h>
23#include "ncplib_kernel.h" 22#include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7285c94956c4..c29f00ad495d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -60,8 +60,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
60{ 60{
61 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 61 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
62 62
63 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 63 if (flags & SLAB_CTOR_CONSTRUCTOR) {
64 SLAB_CTOR_CONSTRUCTOR) {
65 mutex_init(&ei->open_mutex); 64 mutex_init(&ei->open_mutex);
66 inode_init_once(&ei->vfs_inode); 65 inode_init_once(&ei->vfs_inode);
67 } 66 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2190e6c2792e..50c6821bad26 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -27,7 +27,6 @@
27#include <linux/nfs_mount.h> 27#include <linux/nfs_mount.h>
28#include <linux/nfs4_mount.h> 28#include <linux/nfs4_mount.h>
29#include <linux/lockd/bind.h> 29#include <linux/lockd/bind.h>
30#include <linux/smp_lock.h>
31#include <linux/seq_file.h> 30#include <linux/seq_file.h>
32#include <linux/mount.h> 31#include <linux/mount.h>
33#include <linux/nfs_idmap.h> 32#include <linux/nfs_idmap.h>
@@ -618,7 +617,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
618 if (clp->cl_nfsversion == 3) { 617 if (clp->cl_nfsversion == 3) {
619 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) 618 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
620 server->namelen = NFS3_MAXNAMLEN; 619 server->namelen = NFS3_MAXNAMLEN;
621 server->caps |= NFS_CAP_READDIRPLUS; 620 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
621 server->caps |= NFS_CAP_READDIRPLUS;
622 } else { 622 } else {
623 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) 623 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
624 server->namelen = NFS2_MAXNAMLEN; 624 server->namelen = NFS2_MAXNAMLEN;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cd3469720cbf..625d8e5fb39d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -154,6 +154,8 @@ typedef struct {
154 decode_dirent_t decode; 154 decode_dirent_t decode;
155 int plus; 155 int plus;
156 int error; 156 int error;
157 unsigned long timestamp;
158 int timestamp_valid;
157} nfs_readdir_descriptor_t; 159} nfs_readdir_descriptor_t;
158 160
159/* Now we cache directories properly, by stuffing the dirent 161/* Now we cache directories properly, by stuffing the dirent
@@ -195,6 +197,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
195 } 197 }
196 goto error; 198 goto error;
197 } 199 }
200 desc->timestamp = timestamp;
201 desc->timestamp_valid = 1;
198 SetPageUptodate(page); 202 SetPageUptodate(page);
199 spin_lock(&inode->i_lock); 203 spin_lock(&inode->i_lock);
200 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 204 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -225,6 +229,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
225 if (IS_ERR(p)) 229 if (IS_ERR(p))
226 return PTR_ERR(p); 230 return PTR_ERR(p);
227 desc->ptr = p; 231 desc->ptr = p;
232 if (desc->timestamp_valid)
233 desc->entry->fattr->time_start = desc->timestamp;
234 else
235 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
228 return 0; 236 return 0;
229} 237}
230 238
@@ -316,14 +324,16 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
316 __FUNCTION__, desc->page_index, 324 __FUNCTION__, desc->page_index,
317 (long long) *desc->dir_cookie); 325 (long long) *desc->dir_cookie);
318 326
327 /* If we find the page in the page_cache, we cannot be sure
328 * how fresh the data is, so we will ignore readdir_plus attributes.
329 */
330 desc->timestamp_valid = 0;
319 page = read_cache_page(inode->i_mapping, desc->page_index, 331 page = read_cache_page(inode->i_mapping, desc->page_index,
320 (filler_t *)nfs_readdir_filler, desc); 332 (filler_t *)nfs_readdir_filler, desc);
321 if (IS_ERR(page)) { 333 if (IS_ERR(page)) {
322 status = PTR_ERR(page); 334 status = PTR_ERR(page);
323 goto out; 335 goto out;
324 } 336 }
325 if (!PageUptodate(page))
326 goto read_error;
327 337
328 /* NOTE: Someone else may have changed the READDIRPLUS flag */ 338 /* NOTE: Someone else may have changed the READDIRPLUS flag */
329 desc->page = page; 339 desc->page = page;
@@ -337,9 +347,6 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
337 out: 347 out:
338 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status); 348 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
339 return status; 349 return status;
340 read_error:
341 page_cache_release(page);
342 return -EIO;
343} 350}
344 351
345/* 352/*
@@ -468,6 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
468 struct rpc_cred *cred = nfs_file_cred(file); 475 struct rpc_cred *cred = nfs_file_cred(file);
469 struct page *page = NULL; 476 struct page *page = NULL;
470 int status; 477 int status;
478 unsigned long timestamp;
471 479
472 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 480 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
473 (unsigned long long)*desc->dir_cookie); 481 (unsigned long long)*desc->dir_cookie);
@@ -477,6 +485,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
477 status = -ENOMEM; 485 status = -ENOMEM;
478 goto out; 486 goto out;
479 } 487 }
488 timestamp = jiffies;
480 desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, 489 desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie,
481 page, 490 page,
482 NFS_SERVER(inode)->dtsize, 491 NFS_SERVER(inode)->dtsize,
@@ -487,6 +496,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
487 desc->page = page; 496 desc->page = page;
488 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 497 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
489 if (desc->error >= 0) { 498 if (desc->error >= 0) {
499 desc->timestamp = timestamp;
500 desc->timestamp_valid = 1;
490 if ((status = dir_decode(desc)) == 0) 501 if ((status = dir_decode(desc)) == 0)
491 desc->entry->prev_cookie = *desc->dir_cookie; 502 desc->entry->prev_cookie = *desc->dir_cookie;
492 } else 503 } else
@@ -849,6 +860,10 @@ static int nfs_dentry_delete(struct dentry *dentry)
849static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) 860static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
850{ 861{
851 nfs_inode_return_delegation(inode); 862 nfs_inode_return_delegation(inode);
863 if (S_ISDIR(inode->i_mode))
864 /* drop any readdir cache as it could easily be old */
865 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
866
852 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 867 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
853 lock_kernel(); 868 lock_kernel();
854 drop_nlink(inode); 869 drop_nlink(inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2877744cb606..345aa5c0f382 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -41,7 +41,6 @@
41#include <linux/errno.h> 41#include <linux/errno.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/smp_lock.h>
45#include <linux/file.h> 44#include <linux/file.h>
46#include <linux/pagemap.h> 45#include <linux/pagemap.h>
47#include <linux/kref.h> 46#include <linux/kref.h>
@@ -54,6 +53,7 @@
54#include <asm/uaccess.h> 53#include <asm/uaccess.h>
55#include <asm/atomic.h> 54#include <asm/atomic.h>
56 55
56#include "internal.h"
57#include "iostat.h" 57#include "iostat.h"
58 58
59#define NFSDBG_FACILITY NFSDBG_VFS 59#define NFSDBG_FACILITY NFSDBG_VFS
@@ -271,7 +271,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
271 bytes = min(rsize,count); 271 bytes = min(rsize,count);
272 272
273 result = -ENOMEM; 273 result = -ENOMEM;
274 data = nfs_readdata_alloc(pgbase + bytes); 274 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
275 if (unlikely(!data)) 275 if (unlikely(!data))
276 break; 276 break;
277 277
@@ -602,7 +602,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
602 bytes = min(wsize,count); 602 bytes = min(wsize,count);
603 603
604 result = -ENOMEM; 604 result = -ENOMEM;
605 data = nfs_writedata_alloc(pgbase + bytes); 605 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
606 if (unlikely(!data)) 606 if (unlikely(!data))
607 break; 607 break;
608 608
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8e66b5a2d490..5eaee6dd040b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -391,17 +391,12 @@ out_swapfile:
391 391
392static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 392static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
393{ 393{
394 struct file_lock cfl;
395 struct inode *inode = filp->f_mapping->host; 394 struct inode *inode = filp->f_mapping->host;
396 int status = 0; 395 int status = 0;
397 396
398 lock_kernel(); 397 lock_kernel();
399 /* Try local locking first */ 398 /* Try local locking first */
400 if (posix_test_lock(filp, fl, &cfl)) { 399 if (posix_test_lock(filp, fl)) {
401 fl->fl_start = cfl.fl_start;
402 fl->fl_end = cfl.fl_end;
403 fl->fl_type = cfl.fl_type;
404 fl->fl_pid = cfl.fl_pid;
405 goto out; 400 goto out;
406 } 401 }
407 402
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 6ef268f7c300..234778576f09 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -25,7 +25,6 @@
25#include <linux/nfs_mount.h> 25#include <linux/nfs_mount.h>
26#include <linux/nfs4_mount.h> 26#include <linux/nfs4_mount.h>
27#include <linux/lockd/bind.h> 27#include <linux/lockd/bind.h>
28#include <linux/smp_lock.h>
29#include <linux/seq_file.h> 28#include <linux/seq_file.h>
30#include <linux/mount.h> 29#include <linux/mount.h>
31#include <linux/nfs_idmap.h> 30#include <linux/nfs_idmap.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 44aa9b726573..1e9a915d1fea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1167,8 +1167,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1167{ 1167{
1168 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1168 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1169 1169
1170 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 1170 if (flags & SLAB_CTOR_CONSTRUCTOR) {
1171 SLAB_CTOR_CONSTRUCTOR) {
1172 inode_init_once(&nfsi->vfs_inode); 1171 inode_init_once(&nfsi->vfs_inode);
1173 spin_lock_init(&nfsi->req_lock); 1172 spin_lock_init(&nfsi->req_lock);
1174 INIT_LIST_HEAD(&nfsi->dirty); 1173 INIT_LIST_HEAD(&nfsi->dirty);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6610f2b02077..ad2b40db1e65 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -231,3 +231,15 @@ unsigned int nfs_page_length(struct page *page)
231 } 231 }
232 return 0; 232 return 0;
233} 233}
234
235/*
236 * Determine the number of pages in an array of length 'len' and
237 * with a base offset of 'base'
238 */
239static inline
240unsigned int nfs_page_array_len(unsigned int base, size_t len)
241{
242 return ((unsigned long)len + (unsigned long)base +
243 PAGE_SIZE - 1) >> PAGE_SHIFT;
244}
245
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index f75fe72b4160..ca5a266a3140 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -133,13 +133,15 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
133 133
134#define MNT_dirpath_sz (1 + 256) 134#define MNT_dirpath_sz (1 + 256)
135#define MNT_fhstatus_sz (1 + 8) 135#define MNT_fhstatus_sz (1 + 8)
136#define MNT_fhstatus3_sz (1 + 16)
136 137
137static struct rpc_procinfo mnt_procedures[] = { 138static struct rpc_procinfo mnt_procedures[] = {
138[MNTPROC_MNT] = { 139[MNTPROC_MNT] = {
139 .p_proc = MNTPROC_MNT, 140 .p_proc = MNTPROC_MNT,
140 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 141 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
141 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 142 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
142 .p_bufsiz = MNT_dirpath_sz << 2, 143 .p_arglen = MNT_dirpath_sz,
144 .p_replen = MNT_fhstatus_sz,
143 .p_statidx = MNTPROC_MNT, 145 .p_statidx = MNTPROC_MNT,
144 .p_name = "MOUNT", 146 .p_name = "MOUNT",
145 }, 147 },
@@ -150,7 +152,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
150 .p_proc = MOUNTPROC3_MNT, 152 .p_proc = MOUNTPROC3_MNT,
151 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 153 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
152 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 154 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
153 .p_bufsiz = MNT_dirpath_sz << 2, 155 .p_arglen = MNT_dirpath_sz,
156 .p_replen = MNT_fhstatus3_sz,
154 .p_statidx = MOUNTPROC3_MNT, 157 .p_statidx = MOUNTPROC3_MNT,
155 .p_name = "MOUNT", 158 .p_name = "MOUNT",
156 }, 159 },
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 3be4e72a0227..abd9f8b48943 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -687,16 +687,13 @@ nfs_stat_to_errno(int stat)
687 return nfs_errtbl[i].errno; 687 return nfs_errtbl[i].errno;
688} 688}
689 689
690#ifndef MAX
691# define MAX(a, b) (((a) > (b))? (a) : (b))
692#endif
693
694#define PROC(proc, argtype, restype, timer) \ 690#define PROC(proc, argtype, restype, timer) \
695[NFSPROC_##proc] = { \ 691[NFSPROC_##proc] = { \
696 .p_proc = NFSPROC_##proc, \ 692 .p_proc = NFSPROC_##proc, \
697 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ 693 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
698 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ 694 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
699 .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ 695 .p_arglen = NFS_##argtype##_sz, \
696 .p_replen = NFS_##restype##_sz, \
700 .p_timer = timer, \ 697 .p_timer = timer, \
701 .p_statidx = NFSPROC_##proc, \ 698 .p_statidx = NFSPROC_##proc, \
702 .p_name = #proc, \ 699 .p_name = #proc, \
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7d0371e2bad5..45268d6def2e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -16,7 +16,6 @@
16#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/lockd/bind.h> 18#include <linux/lockd/bind.h>
19#include <linux/smp_lock.h>
20#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
21 20
22#include "iostat.h" 21#include "iostat.h"
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0ace092d126f..b51df8eb9f01 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1102,16 +1102,13 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
1102} 1102}
1103#endif /* CONFIG_NFS_V3_ACL */ 1103#endif /* CONFIG_NFS_V3_ACL */
1104 1104
1105#ifndef MAX
1106# define MAX(a, b) (((a) > (b))? (a) : (b))
1107#endif
1108
1109#define PROC(proc, argtype, restype, timer) \ 1105#define PROC(proc, argtype, restype, timer) \
1110[NFS3PROC_##proc] = { \ 1106[NFS3PROC_##proc] = { \
1111 .p_proc = NFS3PROC_##proc, \ 1107 .p_proc = NFS3PROC_##proc, \
1112 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ 1108 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
1113 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ 1109 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
1114 .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ 1110 .p_arglen = NFS3_##argtype##_sz, \
1111 .p_replen = NFS3_##restype##_sz, \
1115 .p_timer = timer, \ 1112 .p_timer = timer, \
1116 .p_statidx = NFS3PROC_##proc, \ 1113 .p_statidx = NFS3PROC_##proc, \
1117 .p_name = #proc, \ 1114 .p_name = #proc, \
@@ -1153,7 +1150,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1153 .p_proc = ACLPROC3_GETACL, 1150 .p_proc = ACLPROC3_GETACL,
1154 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, 1151 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
1155 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, 1152 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
1156 .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, 1153 .p_arglen = ACL3_getaclargs_sz,
1154 .p_replen = ACL3_getaclres_sz,
1157 .p_timer = 1, 1155 .p_timer = 1,
1158 .p_name = "GETACL", 1156 .p_name = "GETACL",
1159 }, 1157 },
@@ -1161,7 +1159,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1161 .p_proc = ACLPROC3_SETACL, 1159 .p_proc = ACLPROC3_SETACL,
1162 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, 1160 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
1163 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, 1161 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
1164 .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, 1162 .p_arglen = ACL3_setaclargs_sz,
1163 .p_replen = ACL3_setaclres_sz,
1165 .p_timer = 0, 1164 .p_timer = 0,
1166 .p_name = "SETACL", 1165 .p_name = "SETACL",
1167 }, 1166 },
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f52cf5c33c6c..d6a30e965787 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2647,8 +2647,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2647 nfs_inode_return_delegation(inode); 2647 nfs_inode_return_delegation(inode);
2648 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 2648 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2649 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 2649 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
2650 if (ret == 0) 2650 nfs_zap_caches(inode);
2651 nfs4_write_cached_acl(inode, buf, buflen);
2652 return ret; 2651 return ret;
2653} 2652}
2654 2653
@@ -3018,6 +3017,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3018 case -NFS4ERR_DENIED: 3017 case -NFS4ERR_DENIED:
3019 status = 0; 3018 status = 0;
3020 } 3019 }
3020 request->fl_ops->fl_release_private(request);
3021out: 3021out:
3022 up_read(&clp->cl_sem); 3022 up_read(&clp->cl_sem);
3023 return status; 3023 return status;
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index f5f4430fb2a4..0505ca124034 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -43,7 +43,6 @@
43 * child task framework of the RPC layer? 43 * child task framework of the RPC layer?
44 */ 44 */
45 45
46#include <linux/smp_lock.h>
47#include <linux/mm.h> 46#include <linux/mm.h>
48#include <linux/pagemap.h> 47#include <linux/pagemap.h>
49#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f02d522fd788..b8c28f2380a5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4546,16 +4546,13 @@ nfs4_stat_to_errno(int stat)
4546 return stat; 4546 return stat;
4547} 4547}
4548 4548
4549#ifndef MAX
4550# define MAX(a, b) (((a) > (b))? (a) : (b))
4551#endif
4552
4553#define PROC(proc, argtype, restype) \ 4549#define PROC(proc, argtype, restype) \
4554[NFSPROC4_CLNT_##proc] = { \ 4550[NFSPROC4_CLNT_##proc] = { \
4555 .p_proc = NFSPROC4_COMPOUND, \ 4551 .p_proc = NFSPROC4_COMPOUND, \
4556 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 4552 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
4557 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 4553 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
4558 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 4554 .p_arglen = NFS4_##argtype##_sz, \
4555 .p_replen = NFS4_##restype##_sz, \
4559 .p_statidx = NFSPROC4_CLNT_##proc, \ 4556 .p_statidx = NFSPROC4_CLNT_##proc, \
4560 .p_name = #proc, \ 4557 .p_name = #proc, \
4561 } 4558 }
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 75f819dc0255..49d1008ce1d7 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
429 program, version, NIPQUAD(servaddr)); 429 program, version, NIPQUAD(servaddr));
430 set_sockaddr(&sin, servaddr, 0); 430 set_sockaddr(&sin, servaddr, 0);
431 return rpc_getport_external(&sin, program, version, proto); 431 return rpcb_getport_external(&sin, program, version, proto);
432} 432}
433 433
434 434
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ca4b1d4ff42b..388950118f59 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -17,7 +17,8 @@
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/nfs_fs.h> 18#include <linux/nfs_fs.h>
19#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
20#include <linux/writeback.h> 20
21#include "internal.h"
21 22
22#define NFS_PARANOIA 1 23#define NFS_PARANOIA 1
23 24
@@ -50,9 +51,7 @@ nfs_page_free(struct nfs_page *p)
50 * @count: number of bytes to read/write 51 * @count: number of bytes to read/write
51 * 52 *
52 * The page must be locked by the caller. This makes sure we never 53 * The page must be locked by the caller. This makes sure we never
53 * create two different requests for the same page, and avoids 54 * create two different requests for the same page.
54 * a possible deadlock when we reach the hard limit on the number
55 * of dirty pages.
56 * User should ensure it is safe to sleep in this function. 55 * User should ensure it is safe to sleep in this function.
57 */ 56 */
58struct nfs_page * 57struct nfs_page *
@@ -63,16 +62,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
63 struct nfs_server *server = NFS_SERVER(inode); 62 struct nfs_server *server = NFS_SERVER(inode);
64 struct nfs_page *req; 63 struct nfs_page *req;
65 64
66 /* Deal with hard limits. */
67 for (;;) { 65 for (;;) {
68 /* try to allocate the request struct */ 66 /* try to allocate the request struct */
69 req = nfs_page_alloc(); 67 req = nfs_page_alloc();
70 if (req != NULL) 68 if (req != NULL)
71 break; 69 break;
72 70
73 /* Try to free up at least one request in order to stay
74 * below the hard limit
75 */
76 if (signalled() && (server->flags & NFS_MOUNT_INTR)) 71 if (signalled() && (server->flags & NFS_MOUNT_INTR))
77 return ERR_PTR(-ERESTARTSYS); 72 return ERR_PTR(-ERESTARTSYS);
78 yield(); 73 yield();
@@ -223,124 +218,151 @@ out:
223} 218}
224 219
225/** 220/**
226 * nfs_coalesce_requests - Split coalesced requests out from a list. 221 * nfs_pageio_init - initialise a page io descriptor
227 * @head: source list 222 * @desc: pointer to descriptor
228 * @dst: destination list 223 * @inode: pointer to inode
229 * @nmax: maximum number of requests to coalesce 224 * @doio: pointer to io function
230 * 225 * @bsize: io block size
231 * Moves a maximum of 'nmax' elements from one list to another. 226 * @io_flags: extra parameters for the io function
232 * The elements are checked to ensure that they form a contiguous set
233 * of pages, and that the RPC credentials are the same.
234 */ 227 */
235int 228void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
236nfs_coalesce_requests(struct list_head *head, struct list_head *dst, 229 struct inode *inode,
237 unsigned int nmax) 230 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
231 size_t bsize,
232 int io_flags)
238{ 233{
239 struct nfs_page *req = NULL; 234 INIT_LIST_HEAD(&desc->pg_list);
240 unsigned int npages = 0; 235 desc->pg_bytes_written = 0;
241 236 desc->pg_count = 0;
242 while (!list_empty(head)) { 237 desc->pg_bsize = bsize;
243 struct nfs_page *prev = req; 238 desc->pg_base = 0;
244 239 desc->pg_inode = inode;
245 req = nfs_list_entry(head->next); 240 desc->pg_doio = doio;
246 if (prev) { 241 desc->pg_ioflags = io_flags;
247 if (req->wb_context->cred != prev->wb_context->cred) 242 desc->pg_error = 0;
248 break;
249 if (req->wb_context->lockowner != prev->wb_context->lockowner)
250 break;
251 if (req->wb_context->state != prev->wb_context->state)
252 break;
253 if (req->wb_index != (prev->wb_index + 1))
254 break;
255
256 if (req->wb_pgbase != 0)
257 break;
258 }
259 nfs_list_remove_request(req);
260 nfs_list_add_request(req, dst);
261 npages++;
262 if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
263 break;
264 if (npages >= nmax)
265 break;
266 }
267 return npages;
268} 243}
269 244
270#define NFS_SCAN_MAXENTRIES 16
271/** 245/**
272 * nfs_scan_dirty - Scan the radix tree for dirty requests 246 * nfs_can_coalesce_requests - test two requests for compatibility
273 * @mapping: pointer to address space 247 * @prev: pointer to nfs_page
274 * @wbc: writeback_control structure 248 * @req: pointer to nfs_page
275 * @dst: Destination list
276 * 249 *
277 * Moves elements from one of the inode request lists. 250 * The nfs_page structures 'prev' and 'req' are compared to ensure that the
278 * If the number of requests is set to 0, the entire address_space 251 * page data area they describe is contiguous, and that their RPC
279 * starting at index idx_start, is scanned. 252 * credentials, NFSv4 open state, and lockowners are the same.
280 * The requests are *not* checked to ensure that they form a contiguous set. 253 *
281 * You must be holding the inode's req_lock when calling this function 254 * Return 'true' if this is the case, else return 'false'.
282 */ 255 */
283long nfs_scan_dirty(struct address_space *mapping, 256static int nfs_can_coalesce_requests(struct nfs_page *prev,
284 struct writeback_control *wbc, 257 struct nfs_page *req)
285 struct list_head *dst)
286{ 258{
287 struct nfs_inode *nfsi = NFS_I(mapping->host); 259 if (req->wb_context->cred != prev->wb_context->cred)
288 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
289 struct nfs_page *req;
290 pgoff_t idx_start, idx_end;
291 long res = 0;
292 int found, i;
293
294 if (nfsi->ndirty == 0)
295 return 0; 260 return 0;
296 if (wbc->range_cyclic) { 261 if (req->wb_context->lockowner != prev->wb_context->lockowner)
297 idx_start = 0; 262 return 0;
298 idx_end = ULONG_MAX; 263 if (req->wb_context->state != prev->wb_context->state)
299 } else if (wbc->range_end == 0) { 264 return 0;
300 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 265 if (req->wb_index != (prev->wb_index + 1))
301 idx_end = ULONG_MAX; 266 return 0;
302 } else { 267 if (req->wb_pgbase != 0)
303 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 268 return 0;
304 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 269 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
305 } 270 return 0;
271 return 1;
272}
306 273
307 for (;;) { 274/**
308 unsigned int toscan = NFS_SCAN_MAXENTRIES; 275 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
276 * @desc: destination io descriptor
277 * @req: request
278 *
279 * Returns true if the request 'req' was successfully coalesced into the
280 * existing list of pages 'desc'.
281 */
282static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
283 struct nfs_page *req)
284{
285 size_t newlen = req->wb_bytes;
309 286
310 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 287 if (desc->pg_count != 0) {
311 (void **)&pgvec[0], idx_start, toscan, 288 struct nfs_page *prev;
312 NFS_PAGE_TAG_DIRTY);
313 289
314 /* Did we make progress? */ 290 /*
315 if (found <= 0) 291 * FIXME: ideally we should be able to coalesce all requests
316 break; 292 * that are not block boundary aligned, but currently this
293 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
294 * since nfs_flush_multi and nfs_pagein_multi assume you
295 * can have only one struct nfs_page.
296 */
297 if (desc->pg_bsize < PAGE_SIZE)
298 return 0;
299 newlen += desc->pg_count;
300 if (newlen > desc->pg_bsize)
301 return 0;
302 prev = nfs_list_entry(desc->pg_list.prev);
303 if (!nfs_can_coalesce_requests(prev, req))
304 return 0;
305 } else
306 desc->pg_base = req->wb_pgbase;
307 nfs_list_remove_request(req);
308 nfs_list_add_request(req, &desc->pg_list);
309 desc->pg_count = newlen;
310 return 1;
311}
317 312
318 for (i = 0; i < found; i++) { 313/*
319 req = pgvec[i]; 314 * Helper for nfs_pageio_add_request and nfs_pageio_complete
320 if (!wbc->range_cyclic && req->wb_index > idx_end) 315 */
321 goto out; 316static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
317{
318 if (!list_empty(&desc->pg_list)) {
319 int error = desc->pg_doio(desc->pg_inode,
320 &desc->pg_list,
321 nfs_page_array_len(desc->pg_base,
322 desc->pg_count),
323 desc->pg_count,
324 desc->pg_ioflags);
325 if (error < 0)
326 desc->pg_error = error;
327 else
328 desc->pg_bytes_written += desc->pg_count;
329 }
330 if (list_empty(&desc->pg_list)) {
331 desc->pg_count = 0;
332 desc->pg_base = 0;
333 }
334}
322 335
323 /* Try to lock request and mark it for writeback */ 336/**
324 if (!nfs_set_page_writeback_locked(req)) 337 * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
325 goto next; 338 * @desc: destination io descriptor
326 radix_tree_tag_clear(&nfsi->nfs_page_tree, 339 * @req: request
327 req->wb_index, NFS_PAGE_TAG_DIRTY); 340 *
328 nfsi->ndirty--; 341 * Returns true if the request 'req' was successfully coalesced into the
329 nfs_list_remove_request(req); 342 * existing list of pages 'desc'.
330 nfs_list_add_request(req, dst); 343 */
331 res++; 344int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
332 if (res == LONG_MAX) 345 struct nfs_page *req)
333 goto out; 346{
334next: 347 while (!nfs_pageio_do_add_request(desc, req)) {
335 idx_start = req->wb_index + 1; 348 nfs_pageio_doio(desc);
336 } 349 if (desc->pg_error < 0)
350 return 0;
337 } 351 }
338out: 352 return 1;
339 WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
340 return res;
341} 353}
342 354
343/** 355/**
356 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
357 * @desc: pointer to io descriptor
358 */
359void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
360{
361 nfs_pageio_doio(desc);
362}
363
364#define NFS_SCAN_MAXENTRIES 16
365/**
344 * nfs_scan_list - Scan a list for matching requests 366 * nfs_scan_list - Scan a list for matching requests
345 * @nfsi: NFS inode 367 * @nfsi: NFS inode
346 * @head: One of the NFS inode request lists 368 * @head: One of the NFS inode request lists
@@ -355,12 +377,12 @@ out:
355 * You must be holding the inode's req_lock when calling this function 377 * You must be holding the inode's req_lock when calling this function
356 */ 378 */
357int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, 379int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
358 struct list_head *dst, unsigned long idx_start, 380 struct list_head *dst, pgoff_t idx_start,
359 unsigned int npages) 381 unsigned int npages)
360{ 382{
361 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 383 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
362 struct nfs_page *req; 384 struct nfs_page *req;
363 unsigned long idx_end; 385 pgoff_t idx_end;
364 int found, i; 386 int found, i;
365 int res; 387 int res;
366 388
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 1dcf56de9482..7be0ee2782cb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -43,7 +43,6 @@
43#include <linux/nfs_fs.h> 43#include <linux/nfs_fs.h>
44#include <linux/nfs_page.h> 44#include <linux/nfs_page.h>
45#include <linux/lockd/bind.h> 45#include <linux/lockd/bind.h>
46#include <linux/smp_lock.h>
47#include "internal.h" 46#include "internal.h"
48 47
49#define NFSDBG_FACILITY NFSDBG_PROC 48#define NFSDBG_FACILITY NFSDBG_PROC
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6ab4d5a9edf2..9a55807b2a70 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -27,7 +27,8 @@
27 27
28#define NFSDBG_FACILITY NFSDBG_PAGECACHE 28#define NFSDBG_FACILITY NFSDBG_PAGECACHE
29 29
30static int nfs_pagein_one(struct list_head *, struct inode *); 30static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int);
31static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int);
31static const struct rpc_call_ops nfs_read_partial_ops; 32static const struct rpc_call_ops nfs_read_partial_ops;
32static const struct rpc_call_ops nfs_read_full_ops; 33static const struct rpc_call_ops nfs_read_full_ops;
33 34
@@ -36,9 +37,8 @@ static mempool_t *nfs_rdata_mempool;
36 37
37#define MIN_POOL_READ (32) 38#define MIN_POOL_READ (32)
38 39
39struct nfs_read_data *nfs_readdata_alloc(size_t len) 40struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
40{ 41{
41 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
43 43
44 if (p) { 44 if (p) {
@@ -133,7 +133,10 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
134 134
135 nfs_list_add_request(new, &one_request); 135 nfs_list_add_request(new, &one_request);
136 nfs_pagein_one(&one_request, inode); 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
137 nfs_pagein_multi(inode, &one_request, 1, len, 0);
138 else
139 nfs_pagein_one(inode, &one_request, 1, len, 0);
137 return 0; 140 return 0;
138} 141}
139 142
@@ -230,7 +233,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
230 * won't see the new data until our attribute cache is updated. This is more 233 * won't see the new data until our attribute cache is updated. This is more
231 * or less conventional NFS client behavior. 234 * or less conventional NFS client behavior.
232 */ 235 */
233static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 236static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
234{ 237{
235 struct nfs_page *req = nfs_list_entry(head->next); 238 struct nfs_page *req = nfs_list_entry(head->next);
236 struct page *page = req->wb_page; 239 struct page *page = req->wb_page;
@@ -242,11 +245,11 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
242 245
243 nfs_list_remove_request(req); 246 nfs_list_remove_request(req);
244 247
245 nbytes = req->wb_bytes; 248 nbytes = count;
246 do { 249 do {
247 size_t len = min(nbytes,rsize); 250 size_t len = min(nbytes,rsize);
248 251
249 data = nfs_readdata_alloc(len); 252 data = nfs_readdata_alloc(1);
250 if (!data) 253 if (!data)
251 goto out_bad; 254 goto out_bad;
252 INIT_LIST_HEAD(&data->pages); 255 INIT_LIST_HEAD(&data->pages);
@@ -258,23 +261,19 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
258 261
259 ClearPageError(page); 262 ClearPageError(page);
260 offset = 0; 263 offset = 0;
261 nbytes = req->wb_bytes; 264 nbytes = count;
262 do { 265 do {
263 data = list_entry(list.next, struct nfs_read_data, pages); 266 data = list_entry(list.next, struct nfs_read_data, pages);
264 list_del_init(&data->pages); 267 list_del_init(&data->pages);
265 268
266 data->pagevec[0] = page; 269 data->pagevec[0] = page;
267 270
268 if (nbytes > rsize) { 271 if (nbytes < rsize)
269 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 272 rsize = nbytes;
270 rsize, offset); 273 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
271 offset += rsize; 274 rsize, offset);
272 nbytes -= rsize; 275 offset += rsize;
273 } else { 276 nbytes -= rsize;
274 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
275 nbytes, offset);
276 nbytes = 0;
277 }
278 nfs_execute_read(data); 277 nfs_execute_read(data);
279 } while (nbytes != 0); 278 } while (nbytes != 0);
280 279
@@ -291,30 +290,24 @@ out_bad:
291 return -ENOMEM; 290 return -ENOMEM;
292} 291}
293 292
294static int nfs_pagein_one(struct list_head *head, struct inode *inode) 293static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
295{ 294{
296 struct nfs_page *req; 295 struct nfs_page *req;
297 struct page **pages; 296 struct page **pages;
298 struct nfs_read_data *data; 297 struct nfs_read_data *data;
299 unsigned int count;
300 298
301 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 299 data = nfs_readdata_alloc(npages);
302 return nfs_pagein_multi(head, inode);
303
304 data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
305 if (!data) 300 if (!data)
306 goto out_bad; 301 goto out_bad;
307 302
308 INIT_LIST_HEAD(&data->pages); 303 INIT_LIST_HEAD(&data->pages);
309 pages = data->pagevec; 304 pages = data->pagevec;
310 count = 0;
311 while (!list_empty(head)) { 305 while (!list_empty(head)) {
312 req = nfs_list_entry(head->next); 306 req = nfs_list_entry(head->next);
313 nfs_list_remove_request(req); 307 nfs_list_remove_request(req);
314 nfs_list_add_request(req, &data->pages); 308 nfs_list_add_request(req, &data->pages);
315 ClearPageError(req->wb_page); 309 ClearPageError(req->wb_page);
316 *pages++ = req->wb_page; 310 *pages++ = req->wb_page;
317 count += req->wb_bytes;
318 } 311 }
319 req = nfs_list_entry(data->pages.next); 312 req = nfs_list_entry(data->pages.next);
320 313
@@ -327,28 +320,6 @@ out_bad:
327 return -ENOMEM; 320 return -ENOMEM;
328} 321}
329 322
330static int
331nfs_pagein_list(struct list_head *head, int rpages)
332{
333 LIST_HEAD(one_request);
334 struct nfs_page *req;
335 int error = 0;
336 unsigned int pages = 0;
337
338 while (!list_empty(head)) {
339 pages += nfs_coalesce_requests(head, &one_request, rpages);
340 req = nfs_list_entry(one_request.next);
341 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
342 if (error < 0)
343 break;
344 }
345 if (error >= 0)
346 return pages;
347
348 nfs_async_read_error(head);
349 return error;
350}
351
352/* 323/*
353 * This is the callback from RPC telling us whether a reply was 324 * This is the callback from RPC telling us whether a reply was
354 * received or some error occurred (timeout or socket shutdown). 325 * received or some error occurred (timeout or socket shutdown).
@@ -538,7 +509,7 @@ out_error:
538} 509}
539 510
540struct nfs_readdesc { 511struct nfs_readdesc {
541 struct list_head *head; 512 struct nfs_pageio_descriptor *pgio;
542 struct nfs_open_context *ctx; 513 struct nfs_open_context *ctx;
543}; 514};
544 515
@@ -562,19 +533,21 @@ readpage_async_filler(void *data, struct page *page)
562 } 533 }
563 if (len < PAGE_CACHE_SIZE) 534 if (len < PAGE_CACHE_SIZE)
564 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 535 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
565 nfs_list_add_request(new, desc->head); 536 nfs_pageio_add_request(desc->pgio, new);
566 return 0; 537 return 0;
567} 538}
568 539
569int nfs_readpages(struct file *filp, struct address_space *mapping, 540int nfs_readpages(struct file *filp, struct address_space *mapping,
570 struct list_head *pages, unsigned nr_pages) 541 struct list_head *pages, unsigned nr_pages)
571{ 542{
572 LIST_HEAD(head); 543 struct nfs_pageio_descriptor pgio;
573 struct nfs_readdesc desc = { 544 struct nfs_readdesc desc = {
574 .head = &head, 545 .pgio = &pgio,
575 }; 546 };
576 struct inode *inode = mapping->host; 547 struct inode *inode = mapping->host;
577 struct nfs_server *server = NFS_SERVER(inode); 548 struct nfs_server *server = NFS_SERVER(inode);
549 size_t rsize = server->rsize;
550 unsigned long npages;
578 int ret = -ESTALE; 551 int ret = -ESTALE;
579 552
580 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 553 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
@@ -593,13 +566,16 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
593 } else 566 } else
594 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 567 desc.ctx = get_nfs_open_context((struct nfs_open_context *)
595 filp->private_data); 568 filp->private_data);
569 if (rsize < PAGE_CACHE_SIZE)
570 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
571 else
572 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
573
596 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 574 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
597 if (!list_empty(&head)) { 575
598 int err = nfs_pagein_list(&head, server->rpages); 576 nfs_pageio_complete(&pgio);
599 if (!ret) 577 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
600 nfs_add_stats(inode, NFSIOS_READPAGES, err); 578 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
601 ret = err;
602 }
603 put_nfs_open_context(desc.ctx); 579 put_nfs_open_context(desc.ctx);
604out: 580out:
605 return ret; 581 return ret;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1eae44b9a1a..ca20d3cc2609 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -204,9 +204,9 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
204 lock_kernel(); 204 lock_kernel();
205 205
206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
207 buf->f_type = NFS_SUPER_MAGIC;
208 if (error < 0) 207 if (error < 0)
209 goto out_err; 208 goto out_err;
209 buf->f_type = NFS_SUPER_MAGIC;
210 210
211 /* 211 /*
212 * Current versions of glibc do not correctly handle the 212 * Current versions of glibc do not correctly handle the
@@ -233,15 +233,14 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
233 buf->f_ffree = res.afiles; 233 buf->f_ffree = res.afiles;
234 234
235 buf->f_namelen = server->namelen; 235 buf->f_namelen = server->namelen;
236 out: 236
237 unlock_kernel(); 237 unlock_kernel();
238 return 0; 238 return 0;
239 239
240 out_err: 240 out_err:
241 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); 241 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error);
242 buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; 242 unlock_kernel();
243 goto out; 243 return error;
244
245} 244}
246 245
247/* 246/*
@@ -291,6 +290,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
291 { NFS_MOUNT_NOAC, ",noac", "" }, 290 { NFS_MOUNT_NOAC, ",noac", "" },
292 { NFS_MOUNT_NONLM, ",nolock", "" }, 291 { NFS_MOUNT_NONLM, ",nolock", "" },
293 { NFS_MOUNT_NOACL, ",noacl", "" }, 292 { NFS_MOUNT_NOACL, ",noacl", "" },
293 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
294 { 0, NULL, NULL } 294 { 0, NULL, NULL }
295 }; 295 };
296 const struct proc_nfs_info *nfs_infop; 296 const struct proc_nfs_info *nfs_infop;
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index f4a0548b9ce8..83e865a16ad1 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -22,7 +22,6 @@
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/smp_lock.h>
26#include <linux/namei.h> 25#include <linux/namei.h>
27 26
28/* Symlink caching in the page cache is even more simplistic 27/* Symlink caching in the page cache is even more simplistic
@@ -61,15 +60,9 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
61 err = page; 60 err = page;
62 goto read_failed; 61 goto read_failed;
63 } 62 }
64 if (!PageUptodate(page)) {
65 err = ERR_PTR(-EIO);
66 goto getlink_read_error;
67 }
68 nd_set_link(nd, kmap(page)); 63 nd_set_link(nd, kmap(page));
69 return page; 64 return page;
70 65
71getlink_read_error:
72 page_cache_release(page);
73read_failed: 66read_failed:
74 nd_set_link(nd, err); 67 nd_set_link(nd, err);
75 return NULL; 68 return NULL;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 797558941745..de92b9509d94 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,7 +21,6 @@
21#include <linux/backing-dev.h> 21#include <linux/backing-dev.h>
22 22
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <linux/smp_lock.h>
25 24
26#include "delegation.h" 25#include "delegation.h"
27#include "internal.h" 26#include "internal.h"
@@ -38,7 +37,8 @@
38static struct nfs_page * nfs_update_request(struct nfs_open_context*, 37static struct nfs_page * nfs_update_request(struct nfs_open_context*,
39 struct page *, 38 struct page *,
40 unsigned int, unsigned int); 39 unsigned int, unsigned int);
41static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); 40static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
41 struct inode *inode, int ioflags);
42static const struct rpc_call_ops nfs_write_partial_ops; 42static const struct rpc_call_ops nfs_write_partial_ops;
43static const struct rpc_call_ops nfs_write_full_ops; 43static const struct rpc_call_ops nfs_write_full_ops;
44static const struct rpc_call_ops nfs_commit_ops; 44static const struct rpc_call_ops nfs_commit_ops;
@@ -71,9 +71,8 @@ void nfs_commit_free(struct nfs_write_data *wdata)
71 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); 71 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
72} 72}
73 73
74struct nfs_write_data *nfs_writedata_alloc(size_t len) 74struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75{ 75{
76 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
77 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 76 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
78 77
79 if (p) { 78 if (p) {
@@ -139,7 +138,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
139{ 138{
140 struct inode *inode = page->mapping->host; 139 struct inode *inode = page->mapping->host;
141 loff_t end, i_size = i_size_read(inode); 140 loff_t end, i_size = i_size_read(inode);
142 unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 141 pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
143 142
144 if (i_size > 0 && page->index < end_index) 143 if (i_size > 0 && page->index < end_index)
145 return; 144 return;
@@ -201,7 +200,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
201static int wb_priority(struct writeback_control *wbc) 200static int wb_priority(struct writeback_control *wbc)
202{ 201{
203 if (wbc->for_reclaim) 202 if (wbc->for_reclaim)
204 return FLUSH_HIGHPRI; 203 return FLUSH_HIGHPRI | FLUSH_STABLE;
205 if (wbc->for_kupdate) 204 if (wbc->for_kupdate)
206 return FLUSH_LOWPRI; 205 return FLUSH_LOWPRI;
207 return 0; 206 return 0;
@@ -225,7 +224,7 @@ static int nfs_set_page_writeback(struct page *page)
225 struct inode *inode = page->mapping->host; 224 struct inode *inode = page->mapping->host;
226 struct nfs_server *nfss = NFS_SERVER(inode); 225 struct nfs_server *nfss = NFS_SERVER(inode);
227 226
228 if (atomic_inc_return(&nfss->writeback) > 227 if (atomic_long_inc_return(&nfss->writeback) >
229 NFS_CONGESTION_ON_THRESH) 228 NFS_CONGESTION_ON_THRESH)
230 set_bdi_congested(&nfss->backing_dev_info, WRITE); 229 set_bdi_congested(&nfss->backing_dev_info, WRITE);
231 } 230 }
@@ -238,7 +237,7 @@ static void nfs_end_page_writeback(struct page *page)
238 struct nfs_server *nfss = NFS_SERVER(inode); 237 struct nfs_server *nfss = NFS_SERVER(inode);
239 238
240 end_page_writeback(page); 239 end_page_writeback(page);
241 if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { 240 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) {
242 clear_bdi_congested(&nfss->backing_dev_info, WRITE); 241 clear_bdi_congested(&nfss->backing_dev_info, WRITE);
243 congestion_end(WRITE); 242 congestion_end(WRITE);
244 } 243 }
@@ -251,7 +250,8 @@ static void nfs_end_page_writeback(struct page *page)
251 * was not tagged. 250 * was not tagged.
252 * May also return an error if the user signalled nfs_wait_on_request(). 251 * May also return an error if the user signalled nfs_wait_on_request().
253 */ 252 */
254static int nfs_page_mark_flush(struct page *page) 253static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
254 struct page *page)
255{ 255{
256 struct nfs_page *req; 256 struct nfs_page *req;
257 struct nfs_inode *nfsi = NFS_I(page->mapping->host); 257 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
@@ -273,6 +273,8 @@ static int nfs_page_mark_flush(struct page *page)
273 * request as dirty (in which case we don't care). 273 * request as dirty (in which case we don't care).
274 */ 274 */
275 spin_unlock(req_lock); 275 spin_unlock(req_lock);
276 /* Prevent deadlock! */
277 nfs_pageio_complete(pgio);
276 ret = nfs_wait_on_request(req); 278 ret = nfs_wait_on_request(req);
277 nfs_release_request(req); 279 nfs_release_request(req);
278 if (ret != 0) 280 if (ret != 0)
@@ -283,21 +285,18 @@ static int nfs_page_mark_flush(struct page *page)
283 /* This request is marked for commit */ 285 /* This request is marked for commit */
284 spin_unlock(req_lock); 286 spin_unlock(req_lock);
285 nfs_unlock_request(req); 287 nfs_unlock_request(req);
288 nfs_pageio_complete(pgio);
286 return 1; 289 return 1;
287 } 290 }
288 if (nfs_set_page_writeback(page) == 0) { 291 if (nfs_set_page_writeback(page) != 0) {
289 nfs_list_remove_request(req);
290 /* add the request to the inode's dirty list. */
291 radix_tree_tag_set(&nfsi->nfs_page_tree,
292 req->wb_index, NFS_PAGE_TAG_DIRTY);
293 nfs_list_add_request(req, &nfsi->dirty);
294 nfsi->ndirty++;
295 spin_unlock(req_lock);
296 __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES);
297 } else
298 spin_unlock(req_lock); 292 spin_unlock(req_lock);
293 BUG();
294 }
295 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
296 NFS_PAGE_TAG_WRITEBACK);
299 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); 297 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
300 nfs_unlock_request(req); 298 spin_unlock(req_lock);
299 nfs_pageio_add_request(pgio, req);
301 return ret; 300 return ret;
302} 301}
303 302
@@ -306,6 +305,7 @@ static int nfs_page_mark_flush(struct page *page)
306 */ 305 */
307static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 306static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
308{ 307{
308 struct nfs_pageio_descriptor mypgio, *pgio;
309 struct nfs_open_context *ctx; 309 struct nfs_open_context *ctx;
310 struct inode *inode = page->mapping->host; 310 struct inode *inode = page->mapping->host;
311 unsigned offset; 311 unsigned offset;
@@ -314,7 +314,14 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
314 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 314 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
315 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 315 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
316 316
317 err = nfs_page_mark_flush(page); 317 if (wbc->for_writepages)
318 pgio = wbc->fs_private;
319 else {
320 nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
321 pgio = &mypgio;
322 }
323
324 err = nfs_page_async_flush(pgio, page);
318 if (err <= 0) 325 if (err <= 0)
319 goto out; 326 goto out;
320 err = 0; 327 err = 0;
@@ -331,12 +338,12 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
331 put_nfs_open_context(ctx); 338 put_nfs_open_context(ctx);
332 if (err != 0) 339 if (err != 0)
333 goto out; 340 goto out;
334 err = nfs_page_mark_flush(page); 341 err = nfs_page_async_flush(pgio, page);
335 if (err > 0) 342 if (err > 0)
336 err = 0; 343 err = 0;
337out: 344out:
338 if (!wbc->for_writepages) 345 if (!wbc->for_writepages)
339 nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE|wb_priority(wbc)); 346 nfs_pageio_complete(pgio);
340 return err; 347 return err;
341} 348}
342 349
@@ -352,20 +359,20 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
352int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 359int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353{ 360{
354 struct inode *inode = mapping->host; 361 struct inode *inode = mapping->host;
362 struct nfs_pageio_descriptor pgio;
355 int err; 363 int err;
356 364
357 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 365 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
358 366
367 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
368 wbc->fs_private = &pgio;
359 err = generic_writepages(mapping, wbc); 369 err = generic_writepages(mapping, wbc);
370 nfs_pageio_complete(&pgio);
360 if (err) 371 if (err)
361 return err; 372 return err;
362 err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); 373 if (pgio.pg_error)
363 if (err < 0) 374 return pgio.pg_error;
364 goto out; 375 return 0;
365 nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
366 err = 0;
367out:
368 return err;
369} 376}
370 377
371/* 378/*
@@ -503,11 +510,11 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
503 * 510 *
504 * Interruptible by signals only if mounted with intr flag. 511 * Interruptible by signals only if mounted with intr flag.
505 */ 512 */
506static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) 513static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
507{ 514{
508 struct nfs_inode *nfsi = NFS_I(inode); 515 struct nfs_inode *nfsi = NFS_I(inode);
509 struct nfs_page *req; 516 struct nfs_page *req;
510 unsigned long idx_end, next; 517 pgoff_t idx_end, next;
511 unsigned int res = 0; 518 unsigned int res = 0;
512 int error; 519 int error;
513 520
@@ -536,18 +543,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st
536 return res; 543 return res;
537} 544}
538 545
539static void nfs_cancel_dirty_list(struct list_head *head)
540{
541 struct nfs_page *req;
542 while(!list_empty(head)) {
543 req = nfs_list_entry(head->next);
544 nfs_list_remove_request(req);
545 nfs_end_page_writeback(req->wb_page);
546 nfs_inode_remove_request(req);
547 nfs_clear_page_writeback(req);
548 }
549}
550
551static void nfs_cancel_commit_list(struct list_head *head) 546static void nfs_cancel_commit_list(struct list_head *head)
552{ 547{
553 struct nfs_page *req; 548 struct nfs_page *req;
@@ -574,7 +569,7 @@ static void nfs_cancel_commit_list(struct list_head *head)
574 * The requests are *not* checked to ensure that they form a contiguous set. 569 * The requests are *not* checked to ensure that they form a contiguous set.
575 */ 570 */
576static int 571static int
577nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 572nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
578{ 573{
579 struct nfs_inode *nfsi = NFS_I(inode); 574 struct nfs_inode *nfsi = NFS_I(inode);
580 int res = 0; 575 int res = 0;
@@ -588,40 +583,12 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
588 return res; 583 return res;
589} 584}
590#else 585#else
591static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 586static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
592{ 587{
593 return 0; 588 return 0;
594} 589}
595#endif 590#endif
596 591
597static int nfs_wait_on_write_congestion(struct address_space *mapping)
598{
599 struct inode *inode = mapping->host;
600 struct backing_dev_info *bdi = mapping->backing_dev_info;
601 int ret = 0;
602
603 might_sleep();
604
605 if (!bdi_write_congested(bdi))
606 return 0;
607
608 nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT);
609
610 do {
611 struct rpc_clnt *clnt = NFS_CLIENT(inode);
612 sigset_t oldset;
613
614 rpc_clnt_sigmask(clnt, &oldset);
615 ret = congestion_wait_interruptible(WRITE, HZ/10);
616 rpc_clnt_sigunmask(clnt, &oldset);
617 if (ret == -ERESTARTSYS)
618 break;
619 ret = 0;
620 } while (bdi_write_congested(bdi));
621
622 return ret;
623}
624
625/* 592/*
626 * Try to update any existing write request, or create one if there is none. 593 * Try to update any existing write request, or create one if there is none.
627 * In order to match, the request's credentials must match those of 594 * In order to match, the request's credentials must match those of
@@ -636,12 +603,10 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
636 struct inode *inode = mapping->host; 603 struct inode *inode = mapping->host;
637 struct nfs_inode *nfsi = NFS_I(inode); 604 struct nfs_inode *nfsi = NFS_I(inode);
638 struct nfs_page *req, *new = NULL; 605 struct nfs_page *req, *new = NULL;
639 unsigned long rqend, end; 606 pgoff_t rqend, end;
640 607
641 end = offset + bytes; 608 end = offset + bytes;
642 609
643 if (nfs_wait_on_write_congestion(mapping))
644 return ERR_PTR(-ERESTARTSYS);
645 for (;;) { 610 for (;;) {
646 /* Loop over all inode entries and see if we find 611 /* Loop over all inode entries and see if we find
647 * A request for the page we wish to update 612 * A request for the page we wish to update
@@ -865,7 +830,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
865 * Generate multiple small requests to write out a single 830 * Generate multiple small requests to write out a single
866 * contiguous dirty area on one page. 831 * contiguous dirty area on one page.
867 */ 832 */
868static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) 833static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
869{ 834{
870 struct nfs_page *req = nfs_list_entry(head->next); 835 struct nfs_page *req = nfs_list_entry(head->next);
871 struct page *page = req->wb_page; 836 struct page *page = req->wb_page;
@@ -877,11 +842,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
877 842
878 nfs_list_remove_request(req); 843 nfs_list_remove_request(req);
879 844
880 nbytes = req->wb_bytes; 845 nbytes = count;
881 do { 846 do {
882 size_t len = min(nbytes, wsize); 847 size_t len = min(nbytes, wsize);
883 848
884 data = nfs_writedata_alloc(len); 849 data = nfs_writedata_alloc(1);
885 if (!data) 850 if (!data)
886 goto out_bad; 851 goto out_bad;
887 list_add(&data->pages, &list); 852 list_add(&data->pages, &list);
@@ -892,23 +857,19 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
892 857
893 ClearPageError(page); 858 ClearPageError(page);
894 offset = 0; 859 offset = 0;
895 nbytes = req->wb_bytes; 860 nbytes = count;
896 do { 861 do {
897 data = list_entry(list.next, struct nfs_write_data, pages); 862 data = list_entry(list.next, struct nfs_write_data, pages);
898 list_del_init(&data->pages); 863 list_del_init(&data->pages);
899 864
900 data->pagevec[0] = page; 865 data->pagevec[0] = page;
901 866
902 if (nbytes > wsize) { 867 if (nbytes < wsize)
903 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 868 wsize = nbytes;
904 wsize, offset, how); 869 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
905 offset += wsize; 870 wsize, offset, how);
906 nbytes -= wsize; 871 offset += wsize;
907 } else { 872 nbytes -= wsize;
908 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
909 nbytes, offset, how);
910 nbytes = 0;
911 }
912 nfs_execute_write(data); 873 nfs_execute_write(data);
913 } while (nbytes != 0); 874 } while (nbytes != 0);
914 875
@@ -934,26 +895,23 @@ out_bad:
934 * This is the case if nfs_updatepage detects a conflicting request 895 * This is the case if nfs_updatepage detects a conflicting request
935 * that has been written but not committed. 896 * that has been written but not committed.
936 */ 897 */
937static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) 898static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
938{ 899{
939 struct nfs_page *req; 900 struct nfs_page *req;
940 struct page **pages; 901 struct page **pages;
941 struct nfs_write_data *data; 902 struct nfs_write_data *data;
942 unsigned int count;
943 903
944 data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); 904 data = nfs_writedata_alloc(npages);
945 if (!data) 905 if (!data)
946 goto out_bad; 906 goto out_bad;
947 907
948 pages = data->pagevec; 908 pages = data->pagevec;
949 count = 0;
950 while (!list_empty(head)) { 909 while (!list_empty(head)) {
951 req = nfs_list_entry(head->next); 910 req = nfs_list_entry(head->next);
952 nfs_list_remove_request(req); 911 nfs_list_remove_request(req);
953 nfs_list_add_request(req, &data->pages); 912 nfs_list_add_request(req, &data->pages);
954 ClearPageError(req->wb_page); 913 ClearPageError(req->wb_page);
955 *pages++ = req->wb_page; 914 *pages++ = req->wb_page;
956 count += req->wb_bytes;
957 } 915 }
958 req = nfs_list_entry(data->pages.next); 916 req = nfs_list_entry(data->pages.next);
959 917
@@ -973,40 +931,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
973 return -ENOMEM; 931 return -ENOMEM;
974} 932}
975 933
976static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) 934static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
935 struct inode *inode, int ioflags)
977{ 936{
978 LIST_HEAD(one_request);
979 int (*flush_one)(struct inode *, struct list_head *, int);
980 struct nfs_page *req;
981 int wpages = NFS_SERVER(inode)->wpages;
982 int wsize = NFS_SERVER(inode)->wsize; 937 int wsize = NFS_SERVER(inode)->wsize;
983 int error;
984 938
985 flush_one = nfs_flush_one;
986 if (wsize < PAGE_CACHE_SIZE) 939 if (wsize < PAGE_CACHE_SIZE)
987 flush_one = nfs_flush_multi; 940 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
988 /* For single writes, FLUSH_STABLE is more efficient */ 941 else
989 if (npages <= wpages && npages == NFS_I(inode)->npages 942 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
990 && nfs_list_entry(head->next)->wb_bytes <= wsize)
991 how |= FLUSH_STABLE;
992
993 do {
994 nfs_coalesce_requests(head, &one_request, wpages);
995 req = nfs_list_entry(one_request.next);
996 error = flush_one(inode, &one_request, how);
997 if (error < 0)
998 goto out_err;
999 } while (!list_empty(head));
1000 return 0;
1001out_err:
1002 while (!list_empty(head)) {
1003 req = nfs_list_entry(head->next);
1004 nfs_list_remove_request(req);
1005 nfs_redirty_request(req);
1006 nfs_end_page_writeback(req->wb_page);
1007 nfs_clear_page_writeback(req);
1008 }
1009 return error;
1010} 943}
1011 944
1012/* 945/*
@@ -1330,31 +1263,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1330 .rpc_call_done = nfs_commit_done, 1263 .rpc_call_done = nfs_commit_done,
1331 .rpc_release = nfs_commit_release, 1264 .rpc_release = nfs_commit_release,
1332}; 1265};
1333#else
1334static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1335{
1336 return 0;
1337}
1338#endif
1339
1340static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1341{
1342 struct nfs_inode *nfsi = NFS_I(mapping->host);
1343 LIST_HEAD(head);
1344 long res;
1345
1346 spin_lock(&nfsi->req_lock);
1347 res = nfs_scan_dirty(mapping, wbc, &head);
1348 spin_unlock(&nfsi->req_lock);
1349 if (res) {
1350 int error = nfs_flush_list(mapping->host, &head, res, how);
1351 if (error < 0)
1352 return error;
1353 }
1354 return res;
1355}
1356 1266
1357#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1358int nfs_commit_inode(struct inode *inode, int how) 1267int nfs_commit_inode(struct inode *inode, int how)
1359{ 1268{
1360 struct nfs_inode *nfsi = NFS_I(inode); 1269 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1371,13 +1280,18 @@ int nfs_commit_inode(struct inode *inode, int how)
1371 } 1280 }
1372 return res; 1281 return res;
1373} 1282}
1283#else
1284static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1285{
1286 return 0;
1287}
1374#endif 1288#endif
1375 1289
1376long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1290long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1377{ 1291{
1378 struct inode *inode = mapping->host; 1292 struct inode *inode = mapping->host;
1379 struct nfs_inode *nfsi = NFS_I(inode); 1293 struct nfs_inode *nfsi = NFS_I(inode);
1380 unsigned long idx_start, idx_end; 1294 pgoff_t idx_start, idx_end;
1381 unsigned int npages = 0; 1295 unsigned int npages = 0;
1382 LIST_HEAD(head); 1296 LIST_HEAD(head);
1383 int nocommit = how & FLUSH_NOCOMMIT; 1297 int nocommit = how & FLUSH_NOCOMMIT;
@@ -1390,41 +1304,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1390 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 1304 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
1391 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 1305 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
1392 if (idx_end > idx_start) { 1306 if (idx_end > idx_start) {
1393 unsigned long l_npages = 1 + idx_end - idx_start; 1307 pgoff_t l_npages = 1 + idx_end - idx_start;
1394 npages = l_npages; 1308 npages = l_npages;
1395 if (sizeof(npages) != sizeof(l_npages) && 1309 if (sizeof(npages) != sizeof(l_npages) &&
1396 (unsigned long)npages != l_npages) 1310 (pgoff_t)npages != l_npages)
1397 npages = 0; 1311 npages = 0;
1398 } 1312 }
1399 } 1313 }
1400 how &= ~FLUSH_NOCOMMIT; 1314 how &= ~FLUSH_NOCOMMIT;
1401 spin_lock(&nfsi->req_lock); 1315 spin_lock(&nfsi->req_lock);
1402 do { 1316 do {
1403 wbc->pages_skipped = 0;
1404 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1317 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1405 if (ret != 0) 1318 if (ret != 0)
1406 continue; 1319 continue;
1407 pages = nfs_scan_dirty(mapping, wbc, &head);
1408 if (pages != 0) {
1409 spin_unlock(&nfsi->req_lock);
1410 if (how & FLUSH_INVALIDATE) {
1411 nfs_cancel_dirty_list(&head);
1412 ret = pages;
1413 } else
1414 ret = nfs_flush_list(inode, &head, pages, how);
1415 spin_lock(&nfsi->req_lock);
1416 continue;
1417 }
1418 if (wbc->pages_skipped != 0)
1419 continue;
1420 if (nocommit) 1320 if (nocommit)
1421 break; 1321 break;
1422 pages = nfs_scan_commit(inode, &head, idx_start, npages); 1322 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1423 if (pages == 0) { 1323 if (pages == 0)
1424 if (wbc->pages_skipped != 0)
1425 continue;
1426 break; 1324 break;
1427 }
1428 if (how & FLUSH_INVALIDATE) { 1325 if (how & FLUSH_INVALIDATE) {
1429 spin_unlock(&nfsi->req_lock); 1326 spin_unlock(&nfsi->req_lock);
1430 nfs_cancel_commit_list(&head); 1327 nfs_cancel_commit_list(&head);
@@ -1456,7 +1353,7 @@ int nfs_wb_all(struct inode *inode)
1456 }; 1353 };
1457 int ret; 1354 int ret;
1458 1355
1459 ret = generic_writepages(mapping, &wbc); 1356 ret = nfs_writepages(mapping, &wbc);
1460 if (ret < 0) 1357 if (ret < 0)
1461 goto out; 1358 goto out;
1462 ret = nfs_sync_mapping_wait(mapping, &wbc, 0); 1359 ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
@@ -1479,11 +1376,9 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo
1479 }; 1376 };
1480 int ret; 1377 int ret;
1481 1378
1482 if (!(how & FLUSH_NOWRITEPAGE)) { 1379 ret = nfs_writepages(mapping, &wbc);
1483 ret = generic_writepages(mapping, &wbc); 1380 if (ret < 0)
1484 if (ret < 0) 1381 goto out;
1485 goto out;
1486 }
1487 ret = nfs_sync_mapping_wait(mapping, &wbc, how); 1382 ret = nfs_sync_mapping_wait(mapping, &wbc, how);
1488 if (ret >= 0) 1383 if (ret >= 0)
1489 return 0; 1384 return 0;
@@ -1506,7 +1401,7 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
1506 int ret; 1401 int ret;
1507 1402
1508 BUG_ON(!PageLocked(page)); 1403 BUG_ON(!PageLocked(page));
1509 if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) { 1404 if (clear_page_dirty_for_io(page)) {
1510 ret = nfs_writepage_locked(page, &wbc); 1405 ret = nfs_writepage_locked(page, &wbc);
1511 if (ret < 0) 1406 if (ret < 0)
1512 goto out; 1407 goto out;
@@ -1531,10 +1426,18 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1531 1426
1532int nfs_set_page_dirty(struct page *page) 1427int nfs_set_page_dirty(struct page *page)
1533{ 1428{
1534 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; 1429 struct address_space *mapping = page->mapping;
1430 struct inode *inode;
1431 spinlock_t *req_lock;
1535 struct nfs_page *req; 1432 struct nfs_page *req;
1536 int ret; 1433 int ret;
1537 1434
1435 if (!mapping)
1436 goto out_raced;
1437 inode = mapping->host;
1438 if (!inode)
1439 goto out_raced;
1440 req_lock = &NFS_I(inode)->req_lock;
1538 spin_lock(req_lock); 1441 spin_lock(req_lock);
1539 req = nfs_page_find_request_locked(page); 1442 req = nfs_page_find_request_locked(page);
1540 if (req != NULL) { 1443 if (req != NULL) {
@@ -1547,6 +1450,8 @@ int nfs_set_page_dirty(struct page *page)
1547 ret = __set_page_dirty_nobuffers(page); 1450 ret = __set_page_dirty_nobuffers(page);
1548 spin_unlock(req_lock); 1451 spin_unlock(req_lock);
1549 return ret; 1452 return ret;
1453out_raced:
1454 return !TestSetPageDirty(page);
1550} 1455}
1551 1456
1552 1457
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index ce341dc76d5e..9b118ee20193 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,4 +11,3 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
13 nfs4acl.o nfs4callback.o nfs4recover.o 13 nfs4acl.o nfs4callback.o nfs4recover.o
14nfsd-objs := $(nfsd-y)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 6f24768272a1..79bd03b8bbf8 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -469,6 +469,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
469 nd.dentry = NULL; 469 nd.dentry = NULL;
470 exp.ex_path = NULL; 470 exp.ex_path = NULL;
471 471
472 /* fs locations */
473 exp.ex_fslocs.locations = NULL;
474 exp.ex_fslocs.locations_count = 0;
475 exp.ex_fslocs.migrated = 0;
476
477 exp.ex_uuid = NULL;
478
472 if (mesg[mlen-1] != '\n') 479 if (mesg[mlen-1] != '\n')
473 return -EINVAL; 480 return -EINVAL;
474 mesg[mlen-1] = 0; 481 mesg[mlen-1] = 0;
@@ -509,13 +516,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
509 if (exp.h.expiry_time == 0) 516 if (exp.h.expiry_time == 0)
510 goto out; 517 goto out;
511 518
512 /* fs locations */
513 exp.ex_fslocs.locations = NULL;
514 exp.ex_fslocs.locations_count = 0;
515 exp.ex_fslocs.migrated = 0;
516
517 exp.ex_uuid = NULL;
518
519 /* flags */ 519 /* flags */
520 err = get_int(&mesg, &an_int); 520 err = get_int(&mesg, &an_int);
521 if (err == -ENOENT) 521 if (err == -ENOENT)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7f5bad0393b1..eac82830bfd7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -177,7 +177,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
177 if (max_blocksize < resp->count) 177 if (max_blocksize < resp->count)
178 resp->count = max_blocksize; 178 resp->count = max_blocksize;
179 179
180 svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 180 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
181 181
182 fh_copy(&resp->fh, &argp->fh); 182 fh_copy(&resp->fh, &argp->fh);
183 nfserr = nfsd_read(rqstp, &resp->fh, NULL, 183 nfserr = nfsd_read(rqstp, &resp->fh, NULL,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7e4bb0af24d7..10f6e7dcf633 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -239,7 +239,7 @@ static __be32 *
239encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) 239encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
240{ 240{
241 struct dentry *dentry = fhp->fh_dentry; 241 struct dentry *dentry = fhp->fh_dentry;
242 if (dentry && dentry->d_inode != NULL) { 242 if (dentry && dentry->d_inode) {
243 int err; 243 int err;
244 struct kstat stat; 244 struct kstat stat;
245 245
@@ -300,9 +300,9 @@ int
300nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, 300nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
301 struct nfsd3_sattrargs *args) 301 struct nfsd3_sattrargs *args)
302{ 302{
303 if (!(p = decode_fh(p, &args->fh)) 303 if (!(p = decode_fh(p, &args->fh)))
304 || !(p = decode_sattr3(p, &args->attrs)))
305 return 0; 304 return 0;
305 p = decode_sattr3(p, &args->attrs);
306 306
307 if ((args->check_guard = ntohl(*p++)) != 0) { 307 if ((args->check_guard = ntohl(*p++)) != 0) {
308 struct timespec time; 308 struct timespec time;
@@ -343,9 +343,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
343 int v,pn; 343 int v,pn;
344 u32 max_blocksize = svc_max_payload(rqstp); 344 u32 max_blocksize = svc_max_payload(rqstp);
345 345
346 if (!(p = decode_fh(p, &args->fh)) 346 if (!(p = decode_fh(p, &args->fh)))
347 || !(p = xdr_decode_hyper(p, &args->offset)))
348 return 0; 347 return 0;
348 p = xdr_decode_hyper(p, &args->offset);
349 349
350 len = args->count = ntohl(*p++); 350 len = args->count = ntohl(*p++);
351 351
@@ -369,28 +369,44 @@ int
369nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, 369nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
370 struct nfsd3_writeargs *args) 370 struct nfsd3_writeargs *args)
371{ 371{
372 unsigned int len, v, hdr; 372 unsigned int len, v, hdr, dlen;
373 u32 max_blocksize = svc_max_payload(rqstp); 373 u32 max_blocksize = svc_max_payload(rqstp);
374 374
375 if (!(p = decode_fh(p, &args->fh)) 375 if (!(p = decode_fh(p, &args->fh)))
376 || !(p = xdr_decode_hyper(p, &args->offset)))
377 return 0; 376 return 0;
377 p = xdr_decode_hyper(p, &args->offset);
378 378
379 args->count = ntohl(*p++); 379 args->count = ntohl(*p++);
380 args->stable = ntohl(*p++); 380 args->stable = ntohl(*p++);
381 len = args->len = ntohl(*p++); 381 len = args->len = ntohl(*p++);
382 /*
383 * The count must equal the amount of data passed.
384 */
385 if (args->count != args->len)
386 return 0;
382 387
388 /*
389 * Check to make sure that we got the right number of
390 * bytes.
391 */
383 hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; 392 hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
384 if (rqstp->rq_arg.len < hdr || 393 dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
385 rqstp->rq_arg.len - hdr < len) 394 - hdr;
395 /*
396 * Round the length of the data which was specified up to
397 * the next multiple of XDR units and then compare that
398 * against the length which was actually received.
399 */
400 if (dlen != XDR_QUADLEN(len)*4)
386 return 0; 401 return 0;
387 402
403 if (args->count > max_blocksize) {
404 args->count = max_blocksize;
405 len = args->len = max_blocksize;
406 }
388 rqstp->rq_vec[0].iov_base = (void*)p; 407 rqstp->rq_vec[0].iov_base = (void*)p;
389 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; 408 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
390 409 v = 0;
391 if (len > max_blocksize)
392 len = max_blocksize;
393 v= 0;
394 while (len > rqstp->rq_vec[v].iov_len) { 410 while (len > rqstp->rq_vec[v].iov_len) {
395 len -= rqstp->rq_vec[v].iov_len; 411 len -= rqstp->rq_vec[v].iov_len;
396 v++; 412 v++;
@@ -398,9 +414,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
398 rqstp->rq_vec[v].iov_len = PAGE_SIZE; 414 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
399 } 415 }
400 rqstp->rq_vec[v].iov_len = len; 416 rqstp->rq_vec[v].iov_len = len;
401 args->vlen = v+1; 417 args->vlen = v + 1;
402 418 return 1;
403 return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
404} 419}
405 420
406int 421int
@@ -414,8 +429,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
414 switch (args->createmode = ntohl(*p++)) { 429 switch (args->createmode = ntohl(*p++)) {
415 case NFS3_CREATE_UNCHECKED: 430 case NFS3_CREATE_UNCHECKED:
416 case NFS3_CREATE_GUARDED: 431 case NFS3_CREATE_GUARDED:
417 if (!(p = decode_sattr3(p, &args->attrs))) 432 p = decode_sattr3(p, &args->attrs);
418 return 0;
419 break; 433 break;
420 case NFS3_CREATE_EXCLUSIVE: 434 case NFS3_CREATE_EXCLUSIVE:
421 args->verf = p; 435 args->verf = p;
@@ -431,10 +445,10 @@ int
431nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, 445nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
432 struct nfsd3_createargs *args) 446 struct nfsd3_createargs *args)
433{ 447{
434 if (!(p = decode_fh(p, &args->fh)) 448 if (!(p = decode_fh(p, &args->fh)) ||
435 || !(p = decode_filename(p, &args->name, &args->len)) 449 !(p = decode_filename(p, &args->name, &args->len)))
436 || !(p = decode_sattr3(p, &args->attrs)))
437 return 0; 450 return 0;
451 p = decode_sattr3(p, &args->attrs);
438 452
439 return xdr_argsize_check(rqstp, p); 453 return xdr_argsize_check(rqstp, p);
440} 454}
@@ -448,11 +462,12 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
448 char *old, *new; 462 char *old, *new;
449 struct kvec *vec; 463 struct kvec *vec;
450 464
451 if (!(p = decode_fh(p, &args->ffh)) 465 if (!(p = decode_fh(p, &args->ffh)) ||
452 || !(p = decode_filename(p, &args->fname, &args->flen)) 466 !(p = decode_filename(p, &args->fname, &args->flen))
453 || !(p = decode_sattr3(p, &args->attrs))
454 ) 467 )
455 return 0; 468 return 0;
469 p = decode_sattr3(p, &args->attrs);
470
456 /* now decode the pathname, which might be larger than the first page. 471 /* now decode the pathname, which might be larger than the first page.
457 * As we have to check for nul's anyway, we copy it into a new page 472 * As we have to check for nul's anyway, we copy it into a new page
458 * This page appears in the rq_res.pages list, but as pages_len is always 473 * This page appears in the rq_res.pages list, but as pages_len is always
@@ -502,10 +517,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
502 args->ftype = ntohl(*p++); 517 args->ftype = ntohl(*p++);
503 518
504 if (args->ftype == NF3BLK || args->ftype == NF3CHR 519 if (args->ftype == NF3BLK || args->ftype == NF3CHR
505 || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { 520 || args->ftype == NF3SOCK || args->ftype == NF3FIFO)
506 if (!(p = decode_sattr3(p, &args->attrs))) 521 p = decode_sattr3(p, &args->attrs);
507 return 0;
508 }
509 522
510 if (args->ftype == NF3BLK || args->ftype == NF3CHR) { 523 if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
511 args->major = ntohl(*p++); 524 args->major = ntohl(*p++);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 673a53c014a3..cc3b7badd486 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -137,7 +137,6 @@ struct ace_container {
137static short ace2type(struct nfs4_ace *); 137static short ace2type(struct nfs4_ace *);
138static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, 138static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
139 unsigned int); 139 unsigned int);
140void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
141 140
142struct nfs4_acl * 141struct nfs4_acl *
143nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, 142nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -785,21 +784,6 @@ nfs4_acl_new(int n)
785 return acl; 784 return acl;
786} 785}
787 786
788void
789nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
790 int whotype, uid_t who)
791{
792 struct nfs4_ace *ace = acl->aces + acl->naces;
793
794 ace->type = type;
795 ace->flag = flag;
796 ace->access_mask = access_mask;
797 ace->whotype = whotype;
798 ace->who = who;
799
800 acl->naces++;
801}
802
803static struct { 787static struct {
804 char *string; 788 char *string;
805 int stringlen; 789 int stringlen;
@@ -851,6 +835,5 @@ nfs4_acl_write_who(int who, char *p)
851} 835}
852 836
853EXPORT_SYMBOL(nfs4_acl_new); 837EXPORT_SYMBOL(nfs4_acl_new);
854EXPORT_SYMBOL(nfs4_acl_add_ace);
855EXPORT_SYMBOL(nfs4_acl_get_whotype); 838EXPORT_SYMBOL(nfs4_acl_get_whotype);
856EXPORT_SYMBOL(nfs4_acl_write_who); 839EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index fb14d68eacab..32ffea033c7a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -315,16 +315,13 @@ out:
315/* 315/*
316 * RPC procedure tables 316 * RPC procedure tables
317 */ 317 */
318#ifndef MAX
319# define MAX(a, b) (((a) > (b))? (a) : (b))
320#endif
321
322#define PROC(proc, call, argtype, restype) \ 318#define PROC(proc, call, argtype, restype) \
323[NFSPROC4_CLNT_##proc] = { \ 319[NFSPROC4_CLNT_##proc] = { \
324 .p_proc = NFSPROC4_CB_##call, \ 320 .p_proc = NFSPROC4_CB_##call, \
325 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 321 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
326 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 322 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
327 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 323 .p_arglen = NFS4_##argtype##_sz, \
324 .p_replen = NFS4_##restype##_sz, \
328 .p_statidx = NFSPROC4_CB_##call, \ 325 .p_statidx = NFSPROC4_CB_##call, \
329 .p_name = #proc, \ 326 .p_name = #proc, \
330} 327}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index e4a83d727afd..45aa21ce6784 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -46,7 +46,6 @@
46#include <linux/nfs4.h> 46#include <linux/nfs4.h>
47#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
48#include <linux/nfs_page.h> 48#include <linux/nfs_page.h>
49#include <linux/smp_lock.h>
50#include <linux/sunrpc/cache.h> 49#include <linux/sunrpc/cache.h>
51#include <linux/nfsd_idmap.h> 50#include <linux/nfsd_idmap.h>
52#include <linux/list.h> 51#include <linux/list.h>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index af360705e551..3cc8ce422ab1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -50,6 +50,7 @@
50#include <linux/nfsd/xdr4.h> 50#include <linux/nfsd/xdr4.h>
51#include <linux/namei.h> 51#include <linux/namei.h>
52#include <linux/mutex.h> 52#include <linux/mutex.h>
53#include <linux/lockd/bind.h>
53 54
54#define NFSDDBG_FACILITY NFSDDBG_PROC 55#define NFSDDBG_FACILITY NFSDDBG_PROC
55 56
@@ -1325,8 +1326,6 @@ do_recall(void *__dp)
1325{ 1326{
1326 struct nfs4_delegation *dp = __dp; 1327 struct nfs4_delegation *dp = __dp;
1327 1328
1328 daemonize("nfsv4-recall");
1329
1330 nfsd4_cb_recall(dp); 1329 nfsd4_cb_recall(dp);
1331 return 0; 1330 return 0;
1332} 1331}
@@ -2657,6 +2656,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2657 struct file_lock conflock; 2656 struct file_lock conflock;
2658 __be32 status = 0; 2657 __be32 status = 0;
2659 unsigned int strhashval; 2658 unsigned int strhashval;
2659 unsigned int cmd;
2660 int err; 2660 int err;
2661 2661
2662 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", 2662 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -2739,10 +2739,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2739 case NFS4_READ_LT: 2739 case NFS4_READ_LT:
2740 case NFS4_READW_LT: 2740 case NFS4_READW_LT:
2741 file_lock.fl_type = F_RDLCK; 2741 file_lock.fl_type = F_RDLCK;
2742 cmd = F_SETLK;
2742 break; 2743 break;
2743 case NFS4_WRITE_LT: 2744 case NFS4_WRITE_LT:
2744 case NFS4_WRITEW_LT: 2745 case NFS4_WRITEW_LT:
2745 file_lock.fl_type = F_WRLCK; 2746 file_lock.fl_type = F_WRLCK;
2747 cmd = F_SETLK;
2746 break; 2748 break;
2747 default: 2749 default:
2748 status = nfserr_inval; 2750 status = nfserr_inval;
@@ -2769,9 +2771,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2769 2771
2770 /* XXX?: Just to divert the locks_release_private at the start of 2772 /* XXX?: Just to divert the locks_release_private at the start of
2771 * locks_copy_lock: */ 2773 * locks_copy_lock: */
2772 conflock.fl_ops = NULL; 2774 locks_init_lock(&conflock);
2773 conflock.fl_lmops = NULL; 2775 err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
2774 err = posix_lock_file_conf(filp, &file_lock, &conflock);
2775 switch (-err) { 2776 switch (-err) {
2776 case 0: /* success! */ 2777 case 0: /* success! */
2777 update_stateid(&lock_stp->st_stateid); 2778 update_stateid(&lock_stp->st_stateid);
@@ -2788,7 +2789,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2788 status = nfserr_deadlock; 2789 status = nfserr_deadlock;
2789 break; 2790 break;
2790 default: 2791 default:
2791 dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",err); 2792 dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
2792 status = nfserr_resource; 2793 status = nfserr_resource;
2793 break; 2794 break;
2794 } 2795 }
@@ -2813,7 +2814,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2813 struct inode *inode; 2814 struct inode *inode;
2814 struct file file; 2815 struct file file;
2815 struct file_lock file_lock; 2816 struct file_lock file_lock;
2816 struct file_lock conflock; 2817 int error;
2817 __be32 status; 2818 __be32 status;
2818 2819
2819 if (nfs4_in_grace()) 2820 if (nfs4_in_grace())
@@ -2869,18 +2870,23 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2869 2870
2870 nfs4_transform_lock_offset(&file_lock); 2871 nfs4_transform_lock_offset(&file_lock);
2871 2872
2872 /* posix_test_lock uses the struct file _only_ to resolve the inode. 2873 /* vfs_test_lock uses the struct file _only_ to resolve the inode.
2873 * since LOCKT doesn't require an OPEN, and therefore a struct 2874 * since LOCKT doesn't require an OPEN, and therefore a struct
2874 * file may not exist, pass posix_test_lock a struct file with 2875 * file may not exist, pass vfs_test_lock a struct file with
2875 * only the dentry:inode set. 2876 * only the dentry:inode set.
2876 */ 2877 */
2877 memset(&file, 0, sizeof (struct file)); 2878 memset(&file, 0, sizeof (struct file));
2878 file.f_path.dentry = cstate->current_fh.fh_dentry; 2879 file.f_path.dentry = cstate->current_fh.fh_dentry;
2879 2880
2880 status = nfs_ok; 2881 status = nfs_ok;
2881 if (posix_test_lock(&file, &file_lock, &conflock)) { 2882 error = vfs_test_lock(&file, &file_lock);
2883 if (error) {
2884 status = nfserrno(error);
2885 goto out;
2886 }
2887 if (file_lock.fl_type != F_UNLCK) {
2882 status = nfserr_denied; 2888 status = nfserr_denied;
2883 nfs4_set_lock_denied(&conflock, &lockt->lt_denied); 2889 nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
2884 } 2890 }
2885out: 2891out:
2886 nfs4_unlock_state(); 2892 nfs4_unlock_state();
@@ -2933,9 +2939,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2933 /* 2939 /*
2934 * Try to unlock the file in the VFS. 2940 * Try to unlock the file in the VFS.
2935 */ 2941 */
2936 err = posix_lock_file(filp, &file_lock); 2942 err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
2937 if (err) { 2943 if (err) {
2938 dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n"); 2944 dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
2939 goto out_nfserr; 2945 goto out_nfserr;
2940 } 2946 }
2941 /* 2947 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5d090f11f2be..15809dfd88a5 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -44,7 +44,6 @@
44 44
45#include <linux/param.h> 45#include <linux/param.h>
46#include <linux/smp.h> 46#include <linux/smp.h>
47#include <linux/smp_lock.h>
48#include <linux/fs.h> 47#include <linux/fs.h>
49#include <linux/namei.h> 48#include <linux/namei.h>
50#include <linux/vfs.h> 49#include <linux/vfs.h>
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8d995bcef806..6ca2d24fc216 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h>
14#include <linux/fs.h> 13#include <linux/fs.h>
15#include <linux/unistd.h> 14#include <linux/unistd.h>
16#include <linux/string.h> 15#include <linux/string.h>
@@ -324,7 +323,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
324 * 323 *
325 */ 324 */
326 325
327 u8 version = 1; 326 u8 version;
328 u8 fsid_type = 0; 327 u8 fsid_type = 0;
329 struct inode * inode = dentry->d_inode; 328 struct inode * inode = dentry->d_inode;
330 struct dentry *parent = dentry->d_parent; 329 struct dentry *parent = dentry->d_parent;
@@ -342,15 +341,59 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
342 * the reference filehandle (if it is in the same export) 341 * the reference filehandle (if it is in the same export)
343 * or the export options. 342 * or the export options.
344 */ 343 */
344 retry:
345 version = 1;
345 if (ref_fh && ref_fh->fh_export == exp) { 346 if (ref_fh && ref_fh->fh_export == exp) {
346 version = ref_fh->fh_handle.fh_version; 347 version = ref_fh->fh_handle.fh_version;
347 if (version == 0xca) 348 fsid_type = ref_fh->fh_handle.fh_fsid_type;
349
350 if (ref_fh == fhp)
351 fh_put(ref_fh);
352 ref_fh = NULL;
353
354 switch (version) {
355 case 0xca:
348 fsid_type = FSID_DEV; 356 fsid_type = FSID_DEV;
349 else 357 break;
350 fsid_type = ref_fh->fh_handle.fh_fsid_type; 358 case 1:
351 /* We know this version/type works for this export 359 break;
352 * so there is no need for further checks. 360 default:
361 goto retry;
362 }
363
364 /* Need to check that this type works for this
365 * export point. As the fsid -> filesystem mapping
366 * was guided by user-space, there is no guarantee
367 * that the filesystem actually supports that fsid
368 * type. If it doesn't we loop around again without
369 * ref_fh set.
353 */ 370 */
371 switch(fsid_type) {
372 case FSID_DEV:
373 if (!old_valid_dev(ex_dev))
374 goto retry;
375 /* FALL THROUGH */
376 case FSID_MAJOR_MINOR:
377 case FSID_ENCODE_DEV:
378 if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags
379 & FS_REQUIRES_DEV))
380 goto retry;
381 break;
382 case FSID_NUM:
383 if (! (exp->ex_flags & NFSEXP_FSID))
384 goto retry;
385 break;
386 case FSID_UUID8:
387 case FSID_UUID16:
388 if (!root_export)
389 goto retry;
390 /* fall through */
391 case FSID_UUID4_INUM:
392 case FSID_UUID16_INUM:
393 if (exp->ex_uuid == NULL)
394 goto retry;
395 break;
396 }
354 } else if (exp->ex_uuid) { 397 } else if (exp->ex_uuid) {
355 if (fhp->fh_maxsize >= 64) { 398 if (fhp->fh_maxsize >= 64) {
356 if (root_export) 399 if (root_export)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 5cc2eec981b8..b2c7147aa921 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -155,7 +155,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
155 argp->count); 155 argp->count);
156 argp->count = NFSSVC_MAXBLKSIZE_V2; 156 argp->count = NFSSVC_MAXBLKSIZE_V2;
157 } 157 }
158 svc_reserve(rqstp, (19<<2) + argp->count + 4); 158 svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
159 159
160 resp->count = argp->count; 160 resp->count = argp->count;
161 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 161 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 0c24b9e24fe8..cb3e7fadb772 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -231,9 +231,10 @@ int
231nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, 231nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
232 struct nfsd_sattrargs *args) 232 struct nfsd_sattrargs *args)
233{ 233{
234 if (!(p = decode_fh(p, &args->fh)) 234 p = decode_fh(p, &args->fh);
235 || !(p = decode_sattr(p, &args->attrs))) 235 if (!p)
236 return 0; 236 return 0;
237 p = decode_sattr(p, &args->attrs);
237 238
238 return xdr_argsize_check(rqstp, p); 239 return xdr_argsize_check(rqstp, p);
239} 240}
@@ -284,8 +285,9 @@ int
284nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, 285nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
285 struct nfsd_writeargs *args) 286 struct nfsd_writeargs *args)
286{ 287{
287 unsigned int len; 288 unsigned int len, hdr, dlen;
288 int v; 289 int v;
290
289 if (!(p = decode_fh(p, &args->fh))) 291 if (!(p = decode_fh(p, &args->fh)))
290 return 0; 292 return 0;
291 293
@@ -293,11 +295,30 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
293 args->offset = ntohl(*p++); /* offset */ 295 args->offset = ntohl(*p++); /* offset */
294 p++; /* totalcount */ 296 p++; /* totalcount */
295 len = args->len = ntohl(*p++); 297 len = args->len = ntohl(*p++);
296 rqstp->rq_vec[0].iov_base = (void*)p; 298 /*
297 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - 299 * The protocol specifies a maximum of 8192 bytes.
298 (((void*)p) - rqstp->rq_arg.head[0].iov_base); 300 */
299 if (len > NFSSVC_MAXBLKSIZE_V2) 301 if (len > NFSSVC_MAXBLKSIZE_V2)
300 len = NFSSVC_MAXBLKSIZE_V2; 302 return 0;
303
304 /*
305 * Check to make sure that we got the right number of
306 * bytes.
307 */
308 hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
309 dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
310 - hdr;
311
312 /*
313 * Round the length of the data which was specified up to
314 * the next multiple of XDR units and then compare that
315 * against the length which was actually received.
316 */
317 if (dlen != XDR_QUADLEN(len)*4)
318 return 0;
319
320 rqstp->rq_vec[0].iov_base = (void*)p;
321 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
301 v = 0; 322 v = 0;
302 while (len > rqstp->rq_vec[v].iov_len) { 323 while (len > rqstp->rq_vec[v].iov_len) {
303 len -= rqstp->rq_vec[v].iov_len; 324 len -= rqstp->rq_vec[v].iov_len;
@@ -306,18 +327,18 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
306 rqstp->rq_vec[v].iov_len = PAGE_SIZE; 327 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
307 } 328 }
308 rqstp->rq_vec[v].iov_len = len; 329 rqstp->rq_vec[v].iov_len = len;
309 args->vlen = v+1; 330 args->vlen = v + 1;
310 return rqstp->rq_vec[0].iov_len > 0; 331 return 1;
311} 332}
312 333
313int 334int
314nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, 335nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
315 struct nfsd_createargs *args) 336 struct nfsd_createargs *args)
316{ 337{
317 if (!(p = decode_fh(p, &args->fh)) 338 if ( !(p = decode_fh(p, &args->fh))
318 || !(p = decode_filename(p, &args->name, &args->len)) 339 || !(p = decode_filename(p, &args->name, &args->len)))
319 || !(p = decode_sattr(p, &args->attrs)))
320 return 0; 340 return 0;
341 p = decode_sattr(p, &args->attrs);
321 342
322 return xdr_argsize_check(rqstp, p); 343 return xdr_argsize_check(rqstp, p);
323} 344}
@@ -361,11 +382,11 @@ int
361nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, 382nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
362 struct nfsd_symlinkargs *args) 383 struct nfsd_symlinkargs *args)
363{ 384{
364 if (!(p = decode_fh(p, &args->ffh)) 385 if ( !(p = decode_fh(p, &args->ffh))
365 || !(p = decode_filename(p, &args->fname, &args->flen)) 386 || !(p = decode_filename(p, &args->fname, &args->flen))
366 || !(p = decode_pathname(p, &args->tname, &args->tlen)) 387 || !(p = decode_pathname(p, &args->tname, &args->tlen)))
367 || !(p = decode_sattr(p, &args->attrs)))
368 return 0; 388 return 0;
389 p = decode_sattr(p, &args->attrs);
369 390
370 return xdr_argsize_check(rqstp, p); 391 return xdr_argsize_check(rqstp, p);
371} 392}
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index 9393f4b1e298..caecc58f529c 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -89,9 +89,8 @@ static inline struct page *ntfs_map_page(struct address_space *mapping,
89 struct page *page = read_mapping_page(mapping, index, NULL); 89 struct page *page = read_mapping_page(mapping, index, NULL);
90 90
91 if (!IS_ERR(page)) { 91 if (!IS_ERR(page)) {
92 wait_on_page_locked(page);
93 kmap(page); 92 kmap(page);
94 if (PageUptodate(page) && !PageError(page)) 93 if (!PageError(page))
95 return page; 94 return page;
96 ntfs_unmap_page(page); 95 ntfs_unmap_page(page);
97 return ERR_PTR(-EIO); 96 return ERR_PTR(-EIO);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7659cc192995..1c08fefe487a 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2532,14 +2532,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2532 page = read_mapping_page(mapping, idx, NULL); 2532 page = read_mapping_page(mapping, idx, NULL);
2533 if (IS_ERR(page)) { 2533 if (IS_ERR(page)) {
2534 ntfs_error(vol->sb, "Failed to read first partial " 2534 ntfs_error(vol->sb, "Failed to read first partial "
2535 "page (sync error, index 0x%lx).", idx); 2535 "page (error, index 0x%lx).", idx);
2536 return PTR_ERR(page);
2537 }
2538 wait_on_page_locked(page);
2539 if (unlikely(!PageUptodate(page))) {
2540 ntfs_error(vol->sb, "Failed to read first partial page "
2541 "(async error, index 0x%lx).", idx);
2542 page_cache_release(page);
2543 return PTR_ERR(page); 2536 return PTR_ERR(page);
2544 } 2537 }
2545 /* 2538 /*
@@ -2602,14 +2595,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2602 page = read_mapping_page(mapping, idx, NULL); 2595 page = read_mapping_page(mapping, idx, NULL);
2603 if (IS_ERR(page)) { 2596 if (IS_ERR(page)) {
2604 ntfs_error(vol->sb, "Failed to read last partial page " 2597 ntfs_error(vol->sb, "Failed to read last partial page "
2605 "(sync error, index 0x%lx).", idx); 2598 "(error, index 0x%lx).", idx);
2606 return PTR_ERR(page);
2607 }
2608 wait_on_page_locked(page);
2609 if (unlikely(!PageUptodate(page))) {
2610 ntfs_error(vol->sb, "Failed to read last partial page "
2611 "(async error, index 0x%lx).", idx);
2612 page_cache_release(page);
2613 return PTR_ERR(page); 2599 return PTR_ERR(page);
2614 } 2600 }
2615 kaddr = kmap_atomic(page, KM_USER0); 2601 kaddr = kmap_atomic(page, KM_USER0);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 74f99a6a369b..34314b33dbd4 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -20,7 +20,6 @@
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */ 21 */
22 22
23#include <linux/smp_lock.h>
24#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
25 24
26#include "dir.h" 25#include "dir.h"
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d69c4595ccd0..621de369e6f8 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -236,8 +236,7 @@ do_non_resident_extend:
236 err = PTR_ERR(page); 236 err = PTR_ERR(page);
237 goto init_err_out; 237 goto init_err_out;
238 } 238 }
239 wait_on_page_locked(page); 239 if (unlikely(PageError(page))) {
240 if (unlikely(!PageUptodate(page) || PageError(page))) {
241 page_cache_release(page); 240 page_cache_release(page);
242 err = -EIO; 241 err = -EIO;
243 goto init_err_out; 242 goto init_err_out;
@@ -2130,28 +2129,13 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2130 struct address_space *mapping = file->f_mapping; 2129 struct address_space *mapping = file->f_mapping;
2131 struct inode *inode = mapping->host; 2130 struct inode *inode = mapping->host;
2132 loff_t pos; 2131 loff_t pos;
2133 unsigned long seg;
2134 size_t count; /* after file limit checks */ 2132 size_t count; /* after file limit checks */
2135 ssize_t written, err; 2133 ssize_t written, err;
2136 2134
2137 count = 0; 2135 count = 0;
2138 for (seg = 0; seg < nr_segs; seg++) { 2136 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
2139 const struct iovec *iv = &iov[seg]; 2137 if (err)
2140 /* 2138 return err;
2141 * If any segment has a negative length, or the cumulative
2142 * length ever wraps negative then return -EINVAL.
2143 */
2144 count += iv->iov_len;
2145 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2146 return -EINVAL;
2147 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
2148 continue;
2149 if (!seg)
2150 return -EFAULT;
2151 nr_segs = seg;
2152 count -= iv->iov_len; /* This segment is no good */
2153 break;
2154 }
2155 pos = *ppos; 2139 pos = *ppos;
2156 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 2140 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2157 /* We can write back this queue in page reclaim. */ 2141 /* We can write back this queue in page reclaim. */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index f8bf8da67ee8..074791ce4ab2 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -27,7 +27,6 @@
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/smp_lock.h>
31 30
32#include "aops.h" 31#include "aops.h"
33#include "attrib.h" 32#include "attrib.h"
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 1594c90b7164..21d834e5ed73 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2471,7 +2471,6 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2471 s64 nr_free = vol->nr_clusters; 2471 s64 nr_free = vol->nr_clusters;
2472 u32 *kaddr; 2472 u32 *kaddr;
2473 struct address_space *mapping = vol->lcnbmp_ino->i_mapping; 2473 struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
2474 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
2475 struct page *page; 2474 struct page *page;
2476 pgoff_t index, max_index; 2475 pgoff_t index, max_index;
2477 2476
@@ -2494,24 +2493,14 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2494 * Read the page from page cache, getting it from backing store 2493 * Read the page from page cache, getting it from backing store
2495 * if necessary, and increment the use count. 2494 * if necessary, and increment the use count.
2496 */ 2495 */
2497 page = read_cache_page(mapping, index, (filler_t*)readpage, 2496 page = read_mapping_page(mapping, index, NULL);
2498 NULL);
2499 /* Ignore pages which errored synchronously. */ 2497 /* Ignore pages which errored synchronously. */
2500 if (IS_ERR(page)) { 2498 if (IS_ERR(page)) {
2501 ntfs_debug("Sync read_cache_page() error. Skipping " 2499 ntfs_debug("read_mapping_page() error. Skipping "
2502 "page (index 0x%lx).", index); 2500 "page (index 0x%lx).", index);
2503 nr_free -= PAGE_CACHE_SIZE * 8; 2501 nr_free -= PAGE_CACHE_SIZE * 8;
2504 continue; 2502 continue;
2505 } 2503 }
2506 wait_on_page_locked(page);
2507 /* Ignore pages which errored asynchronously. */
2508 if (!PageUptodate(page)) {
2509 ntfs_debug("Async read_cache_page() error. Skipping "
2510 "page (index 0x%lx).", index);
2511 page_cache_release(page);
2512 nr_free -= PAGE_CACHE_SIZE * 8;
2513 continue;
2514 }
2515 kaddr = (u32*)kmap_atomic(page, KM_USER0); 2504 kaddr = (u32*)kmap_atomic(page, KM_USER0);
2516 /* 2505 /*
2517 * For each 4 bytes, subtract the number of set bits. If this 2506 * For each 4 bytes, subtract the number of set bits. If this
@@ -2562,7 +2551,6 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2562{ 2551{
2563 u32 *kaddr; 2552 u32 *kaddr;
2564 struct address_space *mapping = vol->mftbmp_ino->i_mapping; 2553 struct address_space *mapping = vol->mftbmp_ino->i_mapping;
2565 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
2566 struct page *page; 2554 struct page *page;
2567 pgoff_t index; 2555 pgoff_t index;
2568 2556
@@ -2576,21 +2564,11 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2576 * Read the page from page cache, getting it from backing store 2564 * Read the page from page cache, getting it from backing store
2577 * if necessary, and increment the use count. 2565 * if necessary, and increment the use count.
2578 */ 2566 */
2579 page = read_cache_page(mapping, index, (filler_t*)readpage, 2567 page = read_mapping_page(mapping, index, NULL);
2580 NULL);
2581 /* Ignore pages which errored synchronously. */ 2568 /* Ignore pages which errored synchronously. */
2582 if (IS_ERR(page)) { 2569 if (IS_ERR(page)) {
2583 ntfs_debug("Sync read_cache_page() error. Skipping " 2570 ntfs_debug("read_mapping_page() error. Skipping "
2584 "page (index 0x%lx).", index);
2585 nr_free -= PAGE_CACHE_SIZE * 8;
2586 continue;
2587 }
2588 wait_on_page_locked(page);
2589 /* Ignore pages which errored asynchronously. */
2590 if (!PageUptodate(page)) {
2591 ntfs_debug("Async read_cache_page() error. Skipping "
2592 "page (index 0x%lx).", index); 2571 "page (index 0x%lx).", index);
2593 page_cache_release(page);
2594 nr_free -= PAGE_CACHE_SIZE * 8; 2572 nr_free -= PAGE_CACHE_SIZE * 8;
2595 continue; 2573 continue;
2596 } 2574 }
@@ -3107,8 +3085,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep,
3107{ 3085{
3108 ntfs_inode *ni = (ntfs_inode *)foo; 3086 ntfs_inode *ni = (ntfs_inode *)foo;
3109 3087
3110 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 3088 if (flags & SLAB_CTOR_CONSTRUCTOR)
3111 SLAB_CTOR_CONSTRUCTOR)
3112 inode_init_once(VFS_I(ni)); 3089 inode_init_once(VFS_I(ni));
3113} 3090}
3114 3091
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a0c8667caa72..19712a7d145f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2869,7 +2869,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
2869 tl = &tl_copy->id2.i_dealloc; 2869 tl = &tl_copy->id2.i_dealloc;
2870 num_recs = le16_to_cpu(tl->tl_used); 2870 num_recs = le16_to_cpu(tl->tl_used);
2871 mlog(0, "cleanup %u records from %llu\n", num_recs, 2871 mlog(0, "cleanup %u records from %llu\n", num_recs,
2872 (unsigned long long)tl_copy->i_blkno); 2872 (unsigned long long)le64_to_cpu(tl_copy->i_blkno));
2873 2873
2874 mutex_lock(&tl_inode->i_mutex); 2874 mutex_lock(&tl_inode->i_mutex);
2875 for(i = 0; i < num_recs; i++) { 2875 for(i = 0; i < num_recs; i++) {
@@ -3801,8 +3801,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
3801 fe = (struct ocfs2_dinode *) fe_bh->b_data; 3801 fe = (struct ocfs2_dinode *) fe_bh->b_data;
3802 3802
3803 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 3803 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
3804 "%llu\n", fe->i_clusters, new_i_clusters, 3804 "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
3805 (unsigned long long)fe->i_size); 3805 (unsigned long long)le64_to_cpu(fe->i_size));
3806 3806
3807 *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 3807 *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
3808 if (!(*tc)) { 3808 if (!(*tc)) {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 56963e6c46c0..8e7cafb5fc6c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -78,7 +78,8 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
78 78
79 if (!OCFS2_IS_VALID_DINODE(fe)) { 79 if (!OCFS2_IS_VALID_DINODE(fe)) {
80 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 80 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
81 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 81 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
82 fe->i_signature);
82 goto bail; 83 goto bail;
83 } 84 }
84 85
@@ -939,9 +940,9 @@ out:
939 * Returns a negative error code or the number of bytes copied into 940 * Returns a negative error code or the number of bytes copied into
940 * the page. 941 * the page.
941 */ 942 */
942int ocfs2_write_data_page(struct inode *inode, handle_t *handle, 943static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
943 u64 *p_blkno, struct page *page, 944 u64 *p_blkno, struct page *page,
944 struct ocfs2_write_ctxt *wc, int new) 945 struct ocfs2_write_ctxt *wc, int new)
945{ 946{
946 int ret, copied = 0; 947 int ret, copied = 0;
947 unsigned int from = 0, to = 0; 948 unsigned int from = 0, to = 0;
@@ -1086,7 +1087,7 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
1086 for(i = 0; i < numpages; i++) { 1087 for(i = 0; i < numpages; i++) {
1087 index = start + i; 1088 index = start + i;
1088 1089
1089 cpages[i] = grab_cache_page(mapping, index); 1090 cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
1090 if (!cpages[i]) { 1091 if (!cpages[i]) {
1091 ret = -ENOMEM; 1092 ret = -ENOMEM;
1092 mlog_errno(ret); 1093 mlog_errno(ret);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index eba282da500e..979113479c66 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -438,7 +438,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
438 hb_block)); 438 hb_block));
439 439
440 mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n", 440 mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n",
441 (long long)cpu_to_le64(generation), 441 (long long)generation,
442 le32_to_cpu(hb_block->hb_cksum)); 442 le32_to_cpu(hb_block->hb_cksum));
443} 443}
444 444
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 636593bf4d17..2e975c0a35e1 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -147,7 +147,7 @@ static struct kset mlog_kset = {
147 .kobj = {.name = "logmask", .ktype = &mlog_ktype}, 147 .kobj = {.name = "logmask", .ktype = &mlog_ktype},
148}; 148};
149 149
150int mlog_sys_init(struct subsystem *o2cb_subsys) 150int mlog_sys_init(struct kset *o2cb_subsys)
151{ 151{
152 int i = 0; 152 int i = 0;
153 153
@@ -157,7 +157,7 @@ int mlog_sys_init(struct subsystem *o2cb_subsys)
157 } 157 }
158 mlog_attr_ptrs[i] = NULL; 158 mlog_attr_ptrs[i] = NULL;
159 159
160 mlog_kset.subsys = o2cb_subsys; 160 kobj_set_kset_s(&mlog_kset, o2cb_subsys);
161 return kset_register(&mlog_kset); 161 return kset_register(&mlog_kset);
162} 162}
163 163
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index a42628ba9ddf..75cd877f6d42 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -278,7 +278,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
278 278
279#include <linux/kobject.h> 279#include <linux/kobject.h>
280#include <linux/sysfs.h> 280#include <linux/sysfs.h>
281int mlog_sys_init(struct subsystem *o2cb_subsys); 281int mlog_sys_init(struct kset *o2cb_subsys);
282void mlog_sys_shutdown(void); 282void mlog_sys_shutdown(void);
283 283
284#endif /* O2CLUSTER_MASKLOG_H */ 284#endif /* O2CLUSTER_MASKLOG_H */
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 1d9f6acafa2e..64f6f378fd09 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -42,7 +42,6 @@ struct o2cb_attribute {
42#define O2CB_ATTR(_name, _mode, _show, _store) \ 42#define O2CB_ATTR(_name, _mode, _show, _store) \
43struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store) 43struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store)
44 44
45#define to_o2cb_subsys(k) container_of(to_kset(k), struct subsystem, kset)
46#define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr) 45#define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr)
47 46
48static ssize_t o2cb_interface_revision_show(char *buf) 47static ssize_t o2cb_interface_revision_show(char *buf)
@@ -79,7 +78,7 @@ static ssize_t
79o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer) 78o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer)
80{ 79{
81 struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); 80 struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr);
82 struct subsystem *sbs = to_o2cb_subsys(kobj); 81 struct kset *sbs = to_kset(kobj);
83 82
84 BUG_ON(sbs != &o2cb_subsys); 83 BUG_ON(sbs != &o2cb_subsys);
85 84
@@ -93,7 +92,7 @@ o2cb_store(struct kobject * kobj, struct attribute * attr,
93 const char * buffer, size_t count) 92 const char * buffer, size_t count)
94{ 93{
95 struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); 94 struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr);
96 struct subsystem *sbs = to_o2cb_subsys(kobj); 95 struct kset *sbs = to_kset(kobj);
97 96
98 BUG_ON(sbs != &o2cb_subsys); 97 BUG_ON(sbs != &o2cb_subsys);
99 98
@@ -112,7 +111,7 @@ int o2cb_sys_init(void)
112{ 111{
113 int ret; 112 int ret;
114 113
115 o2cb_subsys.kset.kobj.ktype = &o2cb_subsys_type; 114 o2cb_subsys.kobj.ktype = &o2cb_subsys_type;
116 ret = subsystem_register(&o2cb_subsys); 115 ret = subsystem_register(&o2cb_subsys);
117 if (ret) 116 if (ret)
118 return ret; 117 return ret;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 69caf3e12fea..0b229a9c7952 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1496,7 +1496,7 @@ static void o2net_start_connect(struct work_struct *work)
1496 sock->sk->sk_allocation = GFP_ATOMIC; 1496 sock->sk->sk_allocation = GFP_ATOMIC;
1497 1497
1498 myaddr.sin_family = AF_INET; 1498 myaddr.sin_family = AF_INET;
1499 myaddr.sin_addr.s_addr = (__force u32)mynode->nd_ipv4_address; 1499 myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;
1500 myaddr.sin_port = (__force u16)htons(0); /* any port */ 1500 myaddr.sin_port = (__force u16)htons(0); /* any port */
1501 1501
1502 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, 1502 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
@@ -1521,8 +1521,8 @@ static void o2net_start_connect(struct work_struct *work)
1521 spin_unlock(&nn->nn_lock); 1521 spin_unlock(&nn->nn_lock);
1522 1522
1523 remoteaddr.sin_family = AF_INET; 1523 remoteaddr.sin_family = AF_INET;
1524 remoteaddr.sin_addr.s_addr = (__force u32)node->nd_ipv4_address; 1524 remoteaddr.sin_addr.s_addr = node->nd_ipv4_address;
1525 remoteaddr.sin_port = (__force u16)node->nd_ipv4_port; 1525 remoteaddr.sin_port = node->nd_ipv4_port;
1526 1526
1527 ret = sc->sc_sock->ops->connect(sc->sc_sock, 1527 ret = sc->sc_sock->ops->connect(sc->sc_sock,
1528 (struct sockaddr *)&remoteaddr, 1528 (struct sockaddr *)&remoteaddr,
@@ -1810,8 +1810,8 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
1810 int ret; 1810 int ret;
1811 struct sockaddr_in sin = { 1811 struct sockaddr_in sin = {
1812 .sin_family = PF_INET, 1812 .sin_family = PF_INET,
1813 .sin_addr = { .s_addr = (__force u32)addr }, 1813 .sin_addr = { .s_addr = addr },
1814 .sin_port = (__force u16)port, 1814 .sin_port = port,
1815 }; 1815 };
1816 1816
1817 ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 1817 ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 67e6866a2a4f..c441ef1f2bad 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -403,7 +403,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
403 struct buffer_head **new_de_bh) 403 struct buffer_head **new_de_bh)
404{ 404{
405 int status = 0; 405 int status = 0;
406 int credits, num_free_extents; 406 int credits, num_free_extents, drop_alloc_sem = 0;
407 loff_t dir_i_size; 407 loff_t dir_i_size;
408 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 408 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
409 struct ocfs2_alloc_context *data_ac = NULL; 409 struct ocfs2_alloc_context *data_ac = NULL;
@@ -452,6 +452,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
452 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 452 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
453 } 453 }
454 454
455 down_write(&OCFS2_I(dir)->ip_alloc_sem);
456 drop_alloc_sem = 1;
457
455 handle = ocfs2_start_trans(osb, credits); 458 handle = ocfs2_start_trans(osb, credits);
456 if (IS_ERR(handle)) { 459 if (IS_ERR(handle)) {
457 status = PTR_ERR(handle); 460 status = PTR_ERR(handle);
@@ -497,6 +500,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
497 *new_de_bh = new_bh; 500 *new_de_bh = new_bh;
498 get_bh(*new_de_bh); 501 get_bh(*new_de_bh);
499bail: 502bail:
503 if (drop_alloc_sem)
504 up_write(&OCFS2_I(dir)->ip_alloc_sem);
500 if (handle) 505 if (handle)
501 ocfs2_commit_trans(osb, handle); 506 ocfs2_commit_trans(osb, handle);
502 507
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 241cad342a48..2fd8bded38f3 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -312,8 +312,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
312 past->type != DLM_BAST) { 312 past->type != DLM_BAST) {
313 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" 313 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
314 "name=%.*s\n", past->type, 314 "name=%.*s\n", past->type,
315 dlm_get_lock_cookie_node(be64_to_cpu(cookie)), 315 dlm_get_lock_cookie_node(cookie),
316 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), 316 dlm_get_lock_cookie_seq(cookie),
317 locklen, name); 317 locklen, name);
318 ret = DLM_IVLOCKID; 318 ret = DLM_IVLOCKID;
319 goto leave; 319 goto leave;
@@ -324,8 +324,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
324 mlog(0, "got %sast for unknown lockres! " 324 mlog(0, "got %sast for unknown lockres! "
325 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 325 "cookie=%u:%llu, name=%.*s, namelen=%u\n",
326 past->type == DLM_AST ? "" : "b", 326 past->type == DLM_AST ? "" : "b",
327 dlm_get_lock_cookie_node(be64_to_cpu(cookie)), 327 dlm_get_lock_cookie_node(cookie),
328 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), 328 dlm_get_lock_cookie_seq(cookie),
329 locklen, name, locklen); 329 locklen, name, locklen);
330 ret = DLM_IVLOCKID; 330 ret = DLM_IVLOCKID;
331 goto leave; 331 goto leave;
@@ -370,8 +370,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
370 370
371 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " 371 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
372 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 372 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
373 dlm_get_lock_cookie_node(be64_to_cpu(cookie)), 373 dlm_get_lock_cookie_node(cookie),
374 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), 374 dlm_get_lock_cookie_seq(cookie),
375 locklen, name, locklen); 375 locklen, name, locklen);
376 376
377 ret = DLM_NORMAL; 377 ret = DLM_NORMAL;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index de952eba29a9..5671cf9d6383 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -42,7 +42,6 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/init.h> 43#include <linux/init.h>
44#include <linux/string.h> 44#include <linux/string.h>
45#include <linux/smp_lock.h>
46#include <linux/backing-dev.h> 45#include <linux/backing-dev.h>
47 46
48#include <asm/uaccess.h> 47#include <asm/uaccess.h>
@@ -263,8 +262,7 @@ static void dlmfs_init_once(void *foo,
263 struct dlmfs_inode_private *ip = 262 struct dlmfs_inode_private *ip =
264 (struct dlmfs_inode_private *) foo; 263 (struct dlmfs_inode_private *) foo;
265 264
266 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 265 if (flags & SLAB_CTOR_CONSTRUCTOR) {
267 SLAB_CTOR_CONSTRUCTOR) {
268 ip->ip_dlm = NULL; 266 ip->ip_dlm = NULL;
269 ip->ip_parent = NULL; 267 ip->ip_parent = NULL;
270 268
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index c1807a42c49f..671c4ed58ee2 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1769,7 +1769,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1769 /* lock is always created locally first, and 1769 /* lock is always created locally first, and
1770 * destroyed locally last. it must be on the list */ 1770 * destroyed locally last. it must be on the list */
1771 if (!lock) { 1771 if (!lock) {
1772 u64 c = ml->cookie; 1772 __be64 c = ml->cookie;
1773 mlog(ML_ERROR, "could not find local lock " 1773 mlog(ML_ERROR, "could not find local lock "
1774 "with cookie %u:%llu!\n", 1774 "with cookie %u:%llu!\n",
1775 dlm_get_lock_cookie_node(be64_to_cpu(c)), 1775 dlm_get_lock_cookie_node(be64_to_cpu(c)),
@@ -1878,7 +1878,7 @@ skip_lvb:
1878 spin_lock(&res->spinlock); 1878 spin_lock(&res->spinlock);
1879 list_for_each_entry(lock, queue, list) { 1879 list_for_each_entry(lock, queue, list) {
1880 if (lock->ml.cookie == ml->cookie) { 1880 if (lock->ml.cookie == ml->cookie) {
1881 u64 c = lock->ml.cookie; 1881 __be64 c = lock->ml.cookie;
1882 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " 1882 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1883 "exists on this lockres!\n", dlm->name, 1883 "exists on this lockres!\n", dlm->name,
1884 res->lockname.len, res->lockname.name, 1884 res->lockname.len, res->lockname.name,
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2b264c6ba039..cebd089f8955 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -76,7 +76,7 @@ repeat:
76 goto repeat; 76 goto repeat;
77 } 77 }
78 remove_wait_queue(&res->wq, &wait); 78 remove_wait_queue(&res->wq, &wait);
79 current->state = TASK_RUNNING; 79 __set_current_state(TASK_RUNNING);
80} 80}
81 81
82int __dlm_lockres_has_locks(struct dlm_lock_resource *res) 82int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 27e43b0c0eae..d1bd305ef0d7 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -27,7 +27,6 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/smp_lock.h>
31#include <linux/crc32.h> 30#include <linux/crc32.h>
32#include <linux/kthread.h> 31#include <linux/kthread.h>
33#include <linux/pagemap.h> 32#include <linux/pagemap.h>
@@ -104,6 +103,35 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
104static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 103static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
105 struct ocfs2_lock_res *lockres); 104 struct ocfs2_lock_res *lockres);
106 105
106
107#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
108
109/* This aids in debugging situations where a bad LVB might be involved. */
110static void ocfs2_dump_meta_lvb_info(u64 level,
111 const char *function,
112 unsigned int line,
113 struct ocfs2_lock_res *lockres)
114{
115 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
116
117 mlog(level, "LVB information for %s (called from %s:%u):\n",
118 lockres->l_name, function, line);
119 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
120 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
121 be32_to_cpu(lvb->lvb_igeneration));
122 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
123 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
124 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
125 be16_to_cpu(lvb->lvb_imode));
126 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
127 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
128 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
129 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
130 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
131 be32_to_cpu(lvb->lvb_iattr));
132}
133
134
107/* 135/*
108 * OCFS2 Lock Resource Operations 136 * OCFS2 Lock Resource Operations
109 * 137 *
@@ -3078,28 +3106,3 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3078 3106
3079 mlog_exit_void(); 3107 mlog_exit_void();
3080} 3108}
3081
3082/* This aids in debugging situations where a bad LVB might be involved. */
3083void ocfs2_dump_meta_lvb_info(u64 level,
3084 const char *function,
3085 unsigned int line,
3086 struct ocfs2_lock_res *lockres)
3087{
3088 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
3089
3090 mlog(level, "LVB information for %s (called from %s:%u):\n",
3091 lockres->l_name, function, line);
3092 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
3093 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
3094 be32_to_cpu(lvb->lvb_igeneration));
3095 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
3096 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
3097 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
3098 be16_to_cpu(lvb->lvb_imode));
3099 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
3100 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
3101 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
3102 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
3103 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
3104 be32_to_cpu(lvb->lvb_iattr));
3105}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 59cb566e7983..492bad32a8c0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -119,11 +119,4 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
119struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); 119struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
120void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); 120void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
121 121
122/* aids in debugging and tracking lvbs */
123void ocfs2_dump_meta_lvb_info(u64 level,
124 const char *function,
125 unsigned int line,
126 struct ocfs2_lock_res *lockres);
127#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
128
129#endif /* DLMGLUE_H */ 122#endif /* DLMGLUE_H */
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 56e1fefc1205..bc48177bd183 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -140,7 +140,7 @@ bail:
140 return parent; 140 return parent;
141} 141}
142 142
143static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len, 143static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
144 int connectable) 144 int connectable)
145{ 145{
146 struct inode *inode = dentry->d_inode; 146 struct inode *inode = dentry->d_inode;
@@ -148,6 +148,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
148 int type = 1; 148 int type = 1;
149 u64 blkno; 149 u64 blkno;
150 u32 generation; 150 u32 generation;
151 __le32 *fh = (__force __le32 *) fh_in;
151 152
152 mlog_entry("(0x%p, '%.*s', 0x%p, %d, %d)\n", dentry, 153 mlog_entry("(0x%p, '%.*s', 0x%p, %d, %d)\n", dentry,
153 dentry->d_name.len, dentry->d_name.name, 154 dentry->d_name.len, dentry->d_name.name,
@@ -199,7 +200,7 @@ bail:
199 return type; 200 return type;
200} 201}
201 202
202static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh, 203static struct dentry *ocfs2_decode_fh(struct super_block *sb, u32 *fh_in,
203 int fh_len, int fileid_type, 204 int fh_len, int fileid_type,
204 int (*acceptable)(void *context, 205 int (*acceptable)(void *context,
205 struct dentry *de), 206 struct dentry *de),
@@ -207,6 +208,7 @@ static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh,
207{ 208{
208 struct ocfs2_inode_handle handle, parent; 209 struct ocfs2_inode_handle handle, parent;
209 struct dentry *ret = NULL; 210 struct dentry *ret = NULL;
211 __le32 *fh = (__force __le32 *) fh_in;
210 212
211 mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n", 213 mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n",
212 sb, fh, fh_len, fileid_type, acceptable, context); 214 sb, fh, fh_len, fileid_type, acceptable, context);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 520a2a6d7670..9395b4fa547d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -207,10 +207,10 @@ out:
207 return ret; 207 return ret;
208} 208}
209 209
210int ocfs2_set_inode_size(handle_t *handle, 210static int ocfs2_set_inode_size(handle_t *handle,
211 struct inode *inode, 211 struct inode *inode,
212 struct buffer_head *fe_bh, 212 struct buffer_head *fe_bh,
213 u64 new_i_size) 213 u64 new_i_size)
214{ 214{
215 int status; 215 int status;
216 216
@@ -713,7 +713,8 @@ restarted_transaction:
713 } 713 }
714 714
715 mlog(0, "fe: i_clusters = %u, i_size=%llu\n", 715 mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
716 fe->i_clusters, (unsigned long long)fe->i_size); 716 le32_to_cpu(fe->i_clusters),
717 (unsigned long long)le64_to_cpu(fe->i_size));
717 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", 718 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
718 OCFS2_I(inode)->ip_clusters, i_size_read(inode)); 719 OCFS2_I(inode)->ip_clusters, i_size_read(inode));
719 720
@@ -1853,6 +1854,9 @@ const struct file_operations ocfs2_fops = {
1853 .aio_read = ocfs2_file_aio_read, 1854 .aio_read = ocfs2_file_aio_read,
1854 .aio_write = ocfs2_file_aio_write, 1855 .aio_write = ocfs2_file_aio_write,
1855 .ioctl = ocfs2_ioctl, 1856 .ioctl = ocfs2_ioctl,
1857#ifdef CONFIG_COMPAT
1858 .compat_ioctl = ocfs2_compat_ioctl,
1859#endif
1856 .splice_read = ocfs2_file_splice_read, 1860 .splice_read = ocfs2_file_splice_read,
1857 .splice_write = ocfs2_file_splice_write, 1861 .splice_write = ocfs2_file_splice_write,
1858}; 1862};
@@ -1862,4 +1866,7 @@ const struct file_operations ocfs2_dops = {
1862 .readdir = ocfs2_readdir, 1866 .readdir = ocfs2_readdir,
1863 .fsync = ocfs2_sync_file, 1867 .fsync = ocfs2_sync_file,
1864 .ioctl = ocfs2_ioctl, 1868 .ioctl = ocfs2_ioctl,
1869#ifdef CONFIG_COMPAT
1870 .compat_ioctl = ocfs2_compat_ioctl,
1871#endif
1865}; 1872};
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 2c4460fced52..a4dd1fa1822b 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -56,11 +56,6 @@ int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
56int ocfs2_permission(struct inode *inode, int mask, 56int ocfs2_permission(struct inode *inode, int mask,
57 struct nameidata *nd); 57 struct nameidata *nd);
58 58
59int ocfs2_set_inode_size(handle_t *handle,
60 struct inode *inode,
61 struct buffer_head *fe_bh,
62 u64 new_i_size);
63
64int ocfs2_should_update_atime(struct inode *inode, 59int ocfs2_should_update_atime(struct inode *inode,
65 struct vfsmount *vfsmnt); 60 struct vfsmount *vfsmnt);
66int ocfs2_update_inode_atime(struct inode *inode, 61int ocfs2_update_inode_atime(struct inode *inode,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 21a605079c62..c53a6763bbbe 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/smp_lock.h>
32 31
33#include <asm/byteorder.h> 32#include <asm/byteorder.h>
34 33
@@ -89,6 +88,25 @@ void ocfs2_set_inode_flags(struct inode *inode)
89 inode->i_flags |= S_DIRSYNC; 88 inode->i_flags |= S_DIRSYNC;
90} 89}
91 90
91/* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
92void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
93{
94 unsigned int flags = oi->vfs_inode.i_flags;
95
96 oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
97 OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
98 if (flags & S_SYNC)
99 oi->ip_attr |= OCFS2_SYNC_FL;
100 if (flags & S_APPEND)
101 oi->ip_attr |= OCFS2_APPEND_FL;
102 if (flags & S_IMMUTABLE)
103 oi->ip_attr |= OCFS2_IMMUTABLE_FL;
104 if (flags & S_NOATIME)
105 oi->ip_attr |= OCFS2_NOATIME_FL;
106 if (flags & S_DIRSYNC)
107 oi->ip_attr |= OCFS2_DIRSYNC_FL;
108}
109
92struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
93{ 111{
94 struct inode *inode = NULL; 112 struct inode *inode = NULL;
@@ -196,7 +214,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
196 int status = -EINVAL; 214 int status = -EINVAL;
197 215
198 mlog_entry("(0x%p, size:%llu)\n", inode, 216 mlog_entry("(0x%p, size:%llu)\n", inode,
199 (unsigned long long)fe->i_size); 217 (unsigned long long)le64_to_cpu(fe->i_size));
200 218
201 sb = inode->i_sb; 219 sb = inode->i_sb;
202 osb = OCFS2_SB(sb); 220 osb = OCFS2_SB(sb);
@@ -248,7 +266,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
248 mlog(ML_ERROR, 266 mlog(ML_ERROR,
249 "ip_blkno %llu != i_blkno %llu!\n", 267 "ip_blkno %llu != i_blkno %llu!\n",
250 (unsigned long long)OCFS2_I(inode)->ip_blkno, 268 (unsigned long long)OCFS2_I(inode)->ip_blkno,
251 (unsigned long long)fe->i_blkno); 269 (unsigned long long)le64_to_cpu(fe->i_blkno));
252 270
253 inode->i_nlink = le16_to_cpu(fe->i_links_count); 271 inode->i_nlink = le16_to_cpu(fe->i_links_count);
254 272
@@ -301,7 +319,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
301 * the generation argument to 319 * the generation argument to
302 * ocfs2_inode_lock_res_init() will have to change. 320 * ocfs2_inode_lock_res_init() will have to change.
303 */ 321 */
304 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)); 322 BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL);
305 323
306 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 324 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
307 OCFS2_LOCK_TYPE_META, 0, inode); 325 OCFS2_LOCK_TYPE_META, 0, inode);
@@ -437,7 +455,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
437 fe = (struct ocfs2_dinode *) bh->b_data; 455 fe = (struct ocfs2_dinode *) bh->b_data;
438 if (!OCFS2_IS_VALID_DINODE(fe)) { 456 if (!OCFS2_IS_VALID_DINODE(fe)) {
439 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 457 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
440 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 458 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
459 fe->i_signature);
441 goto bail; 460 goto bail;
442 } 461 }
443 462
@@ -812,8 +831,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
812 "Inode %llu (on-disk %llu) not orphaned! " 831 "Inode %llu (on-disk %llu) not orphaned! "
813 "Disk flags 0x%x, inode flags 0x%x\n", 832 "Disk flags 0x%x, inode flags 0x%x\n",
814 (unsigned long long)oi->ip_blkno, 833 (unsigned long long)oi->ip_blkno,
815 (unsigned long long)di->i_blkno, di->i_flags, 834 (unsigned long long)le64_to_cpu(di->i_blkno),
816 oi->ip_flags); 835 le32_to_cpu(di->i_flags), oi->ip_flags);
817 goto bail; 836 goto bail;
818 } 837 }
819 838
@@ -1106,8 +1125,10 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
1106 return NULL; 1125 return NULL;
1107 } 1126 }
1108 1127
1128 down_read(&OCFS2_I(inode)->ip_alloc_sem);
1109 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, 1129 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
1110 NULL); 1130 NULL);
1131 up_read(&OCFS2_I(inode)->ip_alloc_sem);
1111 if (tmperr < 0) { 1132 if (tmperr < 0) {
1112 mlog_errno(tmperr); 1133 mlog_errno(tmperr);
1113 goto fail; 1134 goto fail;
@@ -1197,6 +1218,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1197 1218
1198 spin_lock(&OCFS2_I(inode)->ip_lock); 1219 spin_lock(&OCFS2_I(inode)->ip_lock);
1199 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1220 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1221 ocfs2_get_inode_flags(OCFS2_I(inode));
1200 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); 1222 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1201 spin_unlock(&OCFS2_I(inode)->ip_lock); 1223 spin_unlock(&OCFS2_I(inode)->ip_lock);
1202 1224
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 03ae075869ee..a41d0817121b 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -141,6 +141,7 @@ int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
141int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); 141int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
142 142
143void ocfs2_set_inode_flags(struct inode *inode); 143void ocfs2_set_inode_flags(struct inode *inode);
144void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
144 145
145static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) 146static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
146{ 147{
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 4768be5f3086..f3ad21ad9aed 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -31,6 +31,7 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
31 mlog_errno(status); 31 mlog_errno(status);
32 return status; 32 return status;
33 } 33 }
34 ocfs2_get_inode_flags(OCFS2_I(inode));
34 *flags = OCFS2_I(inode)->ip_attr; 35 *flags = OCFS2_I(inode)->ip_attr;
35 ocfs2_meta_unlock(inode, 0); 36 ocfs2_meta_unlock(inode, 0);
36 37
@@ -134,3 +135,26 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
134 } 135 }
135} 136}
136 137
138#ifdef CONFIG_COMPAT
139long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
140{
141 struct inode *inode = file->f_path.dentry->d_inode;
142 int ret;
143
144 switch (cmd) {
145 case OCFS2_IOC32_GETFLAGS:
146 cmd = OCFS2_IOC_GETFLAGS;
147 break;
148 case OCFS2_IOC32_SETFLAGS:
149 cmd = OCFS2_IOC_SETFLAGS;
150 break;
151 default:
152 return -ENOIOCTLCMD;
153 }
154
155 lock_kernel();
156 ret = ocfs2_ioctl(inode, file, cmd, arg);
157 unlock_kernel();
158 return ret;
159}
160#endif
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
index 4a7c82931dba..4d6c4f430d0d 100644
--- a/fs/ocfs2/ioctl.h
+++ b/fs/ocfs2/ioctl.h
@@ -12,5 +12,6 @@
12 12
13int ocfs2_ioctl(struct inode * inode, struct file * filp, 13int ocfs2_ioctl(struct inode * inode, struct file * filp,
14 unsigned int cmd, unsigned long arg); 14 unsigned int cmd, unsigned long arg);
15long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
15 16
16#endif /* OCFS2_IOCTL_H */ 17#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 5a8a90d1c787..dc1188081720 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -435,7 +435,8 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
435 * handle the errors in a specific manner, so no need 435 * handle the errors in a specific manner, so no need
436 * to call ocfs2_error() here. */ 436 * to call ocfs2_error() here. */
437 mlog(ML_ERROR, "Journal dinode %llu has invalid " 437 mlog(ML_ERROR, "Journal dinode %llu has invalid "
438 "signature: %.*s", (unsigned long long)fe->i_blkno, 7, 438 "signature: %.*s",
439 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
439 fe->i_signature); 440 fe->i_signature);
440 status = -EIO; 441 status = -EIO;
441 goto out; 442 goto out;
@@ -742,7 +743,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
742 la_dinode = item->lri_la_dinode; 743 la_dinode = item->lri_la_dinode;
743 if (la_dinode) { 744 if (la_dinode) {
744 mlog(0, "Clean up local alloc %llu\n", 745 mlog(0, "Clean up local alloc %llu\n",
745 (unsigned long long)la_dinode->i_blkno); 746 (unsigned long long)le64_to_cpu(la_dinode->i_blkno));
746 747
747 ret = ocfs2_complete_local_alloc_recovery(osb, 748 ret = ocfs2_complete_local_alloc_recovery(osb,
748 la_dinode); 749 la_dinode);
@@ -755,7 +756,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
755 tl_dinode = item->lri_tl_dinode; 756 tl_dinode = item->lri_tl_dinode;
756 if (tl_dinode) { 757 if (tl_dinode) {
757 mlog(0, "Clean up truncate log %llu\n", 758 mlog(0, "Clean up truncate log %llu\n",
758 (unsigned long long)tl_dinode->i_blkno); 759 (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
759 760
760 ret = ocfs2_complete_truncate_log_recovery(osb, 761 ret = ocfs2_complete_truncate_log_recovery(osb,
761 tl_dinode); 762 tl_dinode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2bcf353fd7c5..36289e6295ce 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -578,8 +578,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
578 if (ocfs2_populate_inode(inode, fe, 1) < 0) { 578 if (ocfs2_populate_inode(inode, fe, 1) < 0) {
579 mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " 579 mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
580 "i_blkno=%llu, i_ino=%lu\n", 580 "i_blkno=%llu, i_ino=%lu\n",
581 (unsigned long long) (*new_fe_bh)->b_blocknr, 581 (unsigned long long)(*new_fe_bh)->b_blocknr,
582 (unsigned long long)fe->i_blkno, inode->i_ino); 582 (unsigned long long)le64_to_cpu(fe->i_blkno),
583 inode->i_ino);
583 BUG(); 584 BUG();
584 } 585 }
585 586
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 82cc92dcf8a6..a860633e833f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -363,9 +363,9 @@ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
363 typeof(__di) ____di = (__di); \ 363 typeof(__di) ____di = (__di); \
364 ocfs2_error((__sb), \ 364 ocfs2_error((__sb), \
365 "Dinode # %llu has bad signature %.*s", \ 365 "Dinode # %llu has bad signature %.*s", \
366 (unsigned long long)(____di)->i_blkno, 7, \ 366 (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
367 (____di)->i_signature); \ 367 (____di)->i_signature); \
368} while (0); 368} while (0)
369 369
370#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ 370#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
371 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) 371 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
@@ -374,9 +374,9 @@ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
374 typeof(__eb) ____eb = (__eb); \ 374 typeof(__eb) ____eb = (__eb); \
375 ocfs2_error((__sb), \ 375 ocfs2_error((__sb), \
376 "Extent Block # %llu has bad signature %.*s", \ 376 "Extent Block # %llu has bad signature %.*s", \
377 (unsigned long long)(____eb)->h_blkno, 7, \ 377 (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
378 (____eb)->h_signature); \ 378 (____eb)->h_signature); \
379} while (0); 379} while (0)
380 380
381#define OCFS2_IS_VALID_GROUP_DESC(ptr) \ 381#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
382 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) 382 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
@@ -385,9 +385,9 @@ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
385 typeof(__gd) ____gd = (__gd); \ 385 typeof(__gd) ____gd = (__gd); \
386 ocfs2_error((__sb), \ 386 ocfs2_error((__sb), \
387 "Group Descriptor # %llu has bad signature %.*s", \ 387 "Group Descriptor # %llu has bad signature %.*s", \
388 (unsigned long long)(____gd)->bg_blkno, 7, \ 388 (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
389 (____gd)->bg_signature); \ 389 (____gd)->bg_signature); \
390} while (0); 390} while (0)
391 391
392static inline unsigned long ino_from_blkno(struct super_block *sb, 392static inline unsigned long ino_from_blkno(struct super_block *sb,
393 u64 blkno) 393 u64 blkno)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 71306479c68f..f0d9eb08547a 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -166,6 +166,8 @@
166 */ 166 */
167#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 167#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
168#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) 168#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long)
169#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int)
170#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
169 171
170/* 172/*
171 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 173 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d921a28329dc..d8b79067dc14 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -26,7 +26,6 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/smp_lock.h>
30 29
31#define MLOG_MASK_PREFIX ML_SUPER 30#define MLOG_MASK_PREFIX ML_SUPER
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0da655ae5d6f..e3437626d183 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -849,9 +849,9 @@ static int ocfs2_relink_block_group(handle_t *handle,
849 } 849 }
850 850
851 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 851 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
852 (unsigned long long)fe->i_blkno, chain, 852 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
853 (unsigned long long)bg->bg_blkno, 853 (unsigned long long)le64_to_cpu(bg->bg_blkno),
854 (unsigned long long)prev_bg->bg_blkno); 854 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
855 855
856 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 856 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
857 bg_ptr = le64_to_cpu(bg->bg_next_group); 857 bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -1162,7 +1162,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1162 } 1162 }
1163 1163
1164 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1164 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1165 tmp_bits, (unsigned long long)bg->bg_blkno); 1165 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1166 1166
1167 *num_bits = tmp_bits; 1167 *num_bits = tmp_bits;
1168 1168
@@ -1227,7 +1227,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1227 } 1227 }
1228 1228
1229 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1229 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
1230 (unsigned long long)fe->i_blkno); 1230 (unsigned long long)le64_to_cpu(fe->i_blkno));
1231 1231
1232 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1232 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1233 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1233 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 5c9e8243691f..7c5e3f5d6634 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -937,8 +937,7 @@ static void ocfs2_inode_init_once(void *data,
937{ 937{
938 struct ocfs2_inode_info *oi = data; 938 struct ocfs2_inode_info *oi = data;
939 939
940 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 940 if (flags & SLAB_CTOR_CONSTRUCTOR) {
941 SLAB_CTOR_CONSTRUCTOR) {
942 oi->ip_flags = 0; 941 oi->ip_flags = 0;
943 oi->ip_open_count = 0; 942 oi->ip_open_count = 0;
944 spin_lock_init(&oi->ip_lock); 943 spin_lock_init(&oi->ip_lock);
@@ -1538,7 +1537,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1538 } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) { 1537 } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
1539 mlog(ML_ERROR, "bad block number on superblock: " 1538 mlog(ML_ERROR, "bad block number on superblock: "
1540 "found %llu, should be %llu\n", 1539 "found %llu, should be %llu\n",
1541 (unsigned long long)di->i_blkno, 1540 (unsigned long long)le64_to_cpu(di->i_blkno),
1542 (unsigned long long)bh->b_blocknr); 1541 (unsigned long long)bh->b_blocknr);
1543 } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || 1542 } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
1544 le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { 1543 le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 40dc1a51f4a9..7134007ba22f 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -67,16 +67,9 @@ static char *ocfs2_page_getlink(struct dentry * dentry,
67 page = read_mapping_page(mapping, 0, NULL); 67 page = read_mapping_page(mapping, 0, NULL);
68 if (IS_ERR(page)) 68 if (IS_ERR(page))
69 goto sync_fail; 69 goto sync_fail;
70 wait_on_page_locked(page);
71 if (!PageUptodate(page))
72 goto async_fail;
73 *ppage = page; 70 *ppage = page;
74 return kmap(page); 71 return kmap(page);
75 72
76async_fail:
77 page_cache_release(page);
78 return ERR_PTR(-EIO);
79
80sync_fail: 73sync_fail:
81 return (char*)page; 74 return (char*)page;
82} 75}
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 4f82a2f0efef..66a13ee63d4c 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -26,7 +26,6 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/smp_lock.h>
30#include <linux/kthread.h> 29#include <linux/kthread.h>
31 30
32#include <cluster/heartbeat.h> 31#include <cluster/heartbeat.h>
diff --git a/fs/open.c b/fs/open.c
index c989fb4cf7b9..0d515d161974 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,7 +7,6 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/smp_lock.h>
11#include <linux/quotaops.h> 10#include <linux/quotaops.h>
12#include <linux/fsnotify.h> 11#include <linux/fsnotify.h>
13#include <linux/module.h> 12#include <linux/module.h>
@@ -211,6 +210,9 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
211 newattrs.ia_valid |= ATTR_FILE; 210 newattrs.ia_valid |= ATTR_FILE;
212 } 211 }
213 212
213 /* Remove suid/sgid on truncate too */
214 newattrs.ia_valid |= should_remove_suid(dentry);
215
214 mutex_lock(&dentry->d_inode->i_mutex); 216 mutex_lock(&dentry->d_inode->i_mutex);
215 err = notify_change(dentry, &newattrs); 217 err = notify_change(dentry, &newattrs);
216 mutex_unlock(&dentry->d_inode->i_mutex); 218 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index bde1c164417d..731a90e9f0cd 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -419,8 +419,7 @@ static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned
419{ 419{
420 struct op_inode_info *oi = (struct op_inode_info *) data; 420 struct op_inode_info *oi = (struct op_inode_info *) data;
421 421
422 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 422 if (flags & SLAB_CTOR_CONSTRUCTOR)
423 SLAB_CTOR_CONSTRUCTOR)
424 inode_init_once(&oi->vfs_inode); 423 inode_init_once(&oi->vfs_inode);
425} 424}
426 425
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index 6e8bb66fe619..01207042048b 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -236,3 +236,12 @@ config EFI_PARTITION
236 help 236 help
237 Say Y here if you would like to use hard disks under Linux which 237 Say Y here if you would like to use hard disks under Linux which
238 were partitioned using EFI GPT. 238 were partitioned using EFI GPT.
239
240config SYSV68_PARTITION
241 bool "SYSV68 partition table support" if PARTITION_ADVANCED
242 default y if M68K
243 help
244 Say Y here if you would like to be able to read the hard disk
245 partition table format used by Motorola Delta machines (using
246 sysv68).
247 Otherwise, say N.
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 67e665fdb7fc..03af8eac51da 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
17obj-$(CONFIG_IBM_PARTITION) += ibm.o 17obj-$(CONFIG_IBM_PARTITION) += ibm.o
18obj-$(CONFIG_EFI_PARTITION) += efi.o 18obj-$(CONFIG_EFI_PARTITION) += efi.o
19obj-$(CONFIG_KARMA_PARTITION) += karma.o 19obj-$(CONFIG_KARMA_PARTITION) += karma.o
20obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index 1bc9f372c7d4..e3491328596b 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -271,7 +271,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
271 extern void xd_set_geometry(struct block_device *, 271 extern void xd_set_geometry(struct block_device *,
272 unsigned char, unsigned char, unsigned int); 272 unsigned char, unsigned char, unsigned int);
273 xd_set_geometry(bdev, dr->secspertrack, heads, 1); 273 xd_set_geometry(bdev, dr->secspertrack, heads, 1);
274 invalidate_bdev(bdev, 1); 274 invalidate_bh_lrus();
275 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 275 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
276 } 276 }
277#endif 277#endif
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 8a7d0035ad7a..9a3a058f3553 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -34,6 +34,7 @@
34#include "ultrix.h" 34#include "ultrix.h"
35#include "efi.h" 35#include "efi.h"
36#include "karma.h" 36#include "karma.h"
37#include "sysv68.h"
37 38
38#ifdef CONFIG_BLK_DEV_MD 39#ifdef CONFIG_BLK_DEV_MD
39extern void md_autodetect_dev(dev_t dev); 40extern void md_autodetect_dev(dev_t dev);
@@ -105,6 +106,9 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
105#ifdef CONFIG_KARMA_PARTITION 106#ifdef CONFIG_KARMA_PARTITION
106 karma_partition, 107 karma_partition,
107#endif 108#endif
109#ifdef CONFIG_SYSV68_PARTITION
110 sysv68_partition,
111#endif
108 NULL 112 NULL
109}; 113};
110 114
@@ -312,7 +316,7 @@ static struct attribute * default_attrs[] = {
312 NULL, 316 NULL,
313}; 317};
314 318
315extern struct subsystem block_subsys; 319extern struct kset block_subsys;
316 320
317static void part_release(struct kobject *kobj) 321static void part_release(struct kobject *kobj)
318{ 322{
@@ -388,7 +392,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
388 kobject_add(&p->kobj); 392 kobject_add(&p->kobj);
389 if (!disk->part_uevent_suppress) 393 if (!disk->part_uevent_suppress)
390 kobject_uevent(&p->kobj, KOBJ_ADD); 394 kobject_uevent(&p->kobj, KOBJ_ADD);
391 sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem"); 395 sysfs_create_link(&p->kobj, &block_subsys.kobj, "subsystem");
392 if (flags & ADDPART_FLAG_WHOLEDISK) { 396 if (flags & ADDPART_FLAG_WHOLEDISK) {
393 static struct attribute addpartattr = { 397 static struct attribute addpartattr = {
394 .name = "whole_disk", 398 .name = "whole_disk",
@@ -444,7 +448,7 @@ static int disk_sysfs_symlinks(struct gendisk *disk)
444 goto err_out_dev_link; 448 goto err_out_dev_link;
445 } 449 }
446 450
447 err = sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, 451 err = sysfs_create_link(&disk->kobj, &block_subsys.kobj,
448 "subsystem"); 452 "subsystem");
449 if (err) 453 if (err)
450 goto err_out_disk_name_lnk; 454 goto err_out_disk_name_lnk;
@@ -569,9 +573,6 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
569 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 573 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
570 NULL); 574 NULL);
571 if (!IS_ERR(page)) { 575 if (!IS_ERR(page)) {
572 wait_on_page_locked(page);
573 if (!PageUptodate(page))
574 goto fail;
575 if (PageError(page)) 576 if (PageError(page))
576 goto fail; 577 goto fail;
577 p->v = page; 578 p->v = page;
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
new file mode 100644
index 000000000000..4eba27b78643
--- /dev/null
+++ b/fs/partitions/sysv68.c
@@ -0,0 +1,92 @@
1/*
2 * fs/partitions/sysv68.c
3 *
4 * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be>
5 */
6
7#include "check.h"
8#include "sysv68.h"
9
10/*
11 * Volume ID structure: on first 256-bytes sector of disk
12 */
13
14struct volumeid {
15 u8 vid_unused[248];
16 u8 vid_mac[8]; /* ASCII string "MOTOROLA" */
17};
18
19/*
20 * config block: second 256-bytes sector on disk
21 */
22
23struct dkconfig {
24 u8 ios_unused0[128];
25 __be32 ios_slcblk; /* Slice table block number */
26 __be16 ios_slccnt; /* Number of entries in slice table */
27 u8 ios_unused1[122];
28};
29
30/*
31 * combined volumeid and dkconfig block
32 */
33
34struct dkblk0 {
35 struct volumeid dk_vid;
36 struct dkconfig dk_ios;
37};
38
39/*
40 * Slice Table Structure
41 */
42
43struct slice {
44 __be32 nblocks; /* slice size (in blocks) */
45 __be32 blkoff; /* block offset of slice */
46};
47
48
49int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
50{
51 int i, slices;
52 int slot = 1;
53 Sector sect;
54 unsigned char *data;
55 struct dkblk0 *b;
56 struct slice *slice;
57
58 data = read_dev_sector(bdev, 0, &sect);
59 if (!data)
60 return -1;
61
62 b = (struct dkblk0 *)data;
63 if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) {
64 put_dev_sector(sect);
65 return 0;
66 }
67 slices = be16_to_cpu(b->dk_ios.ios_slccnt);
68 i = be32_to_cpu(b->dk_ios.ios_slcblk);
69 put_dev_sector(sect);
70
71 data = read_dev_sector(bdev, i, &sect);
72 if (!data)
73 return -1;
74
75 slices -= 1; /* last slice is the whole disk */
76 printk("sysV68: %s(s%u)", state->name, slices);
77 slice = (struct slice *)data;
78 for (i = 0; i < slices; i++, slice++) {
79 if (slot == state->limit)
80 break;
81 if (be32_to_cpu(slice->nblocks)) {
82 put_partition(state, slot,
83 be32_to_cpu(slice->blkoff),
84 be32_to_cpu(slice->nblocks));
85 printk("(s%u)", i);
86 }
87 slot++;
88 }
89 printk("\n");
90 put_dev_sector(sect);
91 return 1;
92}
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
new file mode 100644
index 000000000000..fa733f68431b
--- /dev/null
+++ b/fs/partitions/sysv68.h
@@ -0,0 +1 @@
extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev);
diff --git a/fs/pipe.c b/fs/pipe.c
index ebafde7d6aba..3a89592bdf57 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -841,8 +841,18 @@ static int pipefs_delete_dentry(struct dentry *dentry)
841 return 0; 841 return 0;
842} 842}
843 843
844/*
845 * pipefs_dname() is called from d_path().
846 */
847static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
848{
849 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
850 dentry->d_inode->i_ino);
851}
852
844static struct dentry_operations pipefs_dentry_operations = { 853static struct dentry_operations pipefs_dentry_operations = {
845 .d_delete = pipefs_delete_dentry, 854 .d_delete = pipefs_delete_dentry,
855 .d_dname = pipefs_dname,
846}; 856};
847 857
848static struct inode * get_pipe_inode(void) 858static struct inode * get_pipe_inode(void)
@@ -888,8 +898,7 @@ struct file *create_write_pipe(void)
888 struct inode *inode; 898 struct inode *inode;
889 struct file *f; 899 struct file *f;
890 struct dentry *dentry; 900 struct dentry *dentry;
891 char name[32]; 901 struct qstr name = { .name = "" };
892 struct qstr this;
893 902
894 f = get_empty_filp(); 903 f = get_empty_filp();
895 if (!f) 904 if (!f)
@@ -899,11 +908,8 @@ struct file *create_write_pipe(void)
899 if (!inode) 908 if (!inode)
900 goto err_file; 909 goto err_file;
901 910
902 this.len = sprintf(name, "[%lu]", inode->i_ino);
903 this.name = name;
904 this.hash = 0;
905 err = -ENOMEM; 911 err = -ENOMEM;
906 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 912 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
907 if (!dentry) 913 if (!dentry)
908 goto err_inode; 914 goto err_inode;
909 915
diff --git a/fs/pnode.c b/fs/pnode.c
index 56aacead8362..89940f243fc2 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -59,7 +59,7 @@ static int do_make_slave(struct vfsmount *mnt)
59 } else { 59 } else {
60 struct list_head *p = &mnt->mnt_slave_list; 60 struct list_head *p = &mnt->mnt_slave_list;
61 while (!list_empty(p)) { 61 while (!list_empty(p)) {
62 slave_mnt = list_entry(p->next, 62 slave_mnt = list_first_entry(p,
63 struct vfsmount, mnt_slave); 63 struct vfsmount, mnt_slave);
64 list_del_init(&slave_mnt->mnt_slave); 64 list_del_init(&slave_mnt->mnt_slave);
65 slave_mnt->mnt_master = NULL; 65 slave_mnt->mnt_master = NULL;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07c9cdbcdcac..74f30e0c0381 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -410,9 +410,9 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
410 /* convert nsec -> ticks */ 410 /* convert nsec -> ticks */
411 start_time = nsec_to_clock_t(start_time); 411 start_time = nsec_to_clock_t(start_time);
412 412
413 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ 413 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \
414%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 414%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
415%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", 415%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
416 task->pid, 416 task->pid,
417 tcomm, 417 tcomm,
418 state, 418 state,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 989af5e55d1b..a5fa1fdafc4e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -61,9 +61,9 @@
61#include <linux/namei.h> 61#include <linux/namei.h>
62#include <linux/mnt_namespace.h> 62#include <linux/mnt_namespace.h>
63#include <linux/mm.h> 63#include <linux/mm.h>
64#include <linux/smp_lock.h>
65#include <linux/rcupdate.h> 64#include <linux/rcupdate.h>
66#include <linux/kallsyms.h> 65#include <linux/kallsyms.h>
66#include <linux/module.h>
67#include <linux/mount.h> 67#include <linux/mount.h>
68#include <linux/security.h> 68#include <linux/security.h>
69#include <linux/ptrace.h> 69#include <linux/ptrace.h>
@@ -90,8 +90,8 @@
90#define PROC_NUMBUF 13 90#define PROC_NUMBUF 13
91 91
92struct pid_entry { 92struct pid_entry {
93 int len;
94 char *name; 93 char *name;
94 int len;
95 mode_t mode; 95 mode_t mode;
96 const struct inode_operations *iop; 96 const struct inode_operations *iop;
97 const struct file_operations *fop; 97 const struct file_operations *fop;
@@ -99,8 +99,8 @@ struct pid_entry {
99}; 99};
100 100
101#define NOD(NAME, MODE, IOP, FOP, OP) { \ 101#define NOD(NAME, MODE, IOP, FOP, OP) { \
102 .len = sizeof(NAME) - 1, \
103 .name = (NAME), \ 102 .name = (NAME), \
103 .len = sizeof(NAME) - 1, \
104 .mode = MODE, \ 104 .mode = MODE, \
105 .iop = IOP, \ 105 .iop = IOP, \
106 .fop = FOP, \ 106 .fop = FOP, \
@@ -123,6 +123,9 @@ struct pid_entry {
123 NULL, &proc_info_file_operations, \ 123 NULL, &proc_info_file_operations, \
124 { .proc_read = &proc_##OTYPE } ) 124 { .proc_read = &proc_##OTYPE } )
125 125
126int maps_protect;
127EXPORT_SYMBOL(maps_protect);
128
126static struct fs_struct *get_fs_struct(struct task_struct *task) 129static struct fs_struct *get_fs_struct(struct task_struct *task)
127{ 130{
128 struct fs_struct *fs; 131 struct fs_struct *fs;
@@ -275,17 +278,15 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
275 */ 278 */
276static int proc_pid_wchan(struct task_struct *task, char *buffer) 279static int proc_pid_wchan(struct task_struct *task, char *buffer)
277{ 280{
278 char *modname; 281 unsigned long wchan;
279 const char *sym_name; 282 char symname[KSYM_NAME_LEN+1];
280 unsigned long wchan, size, offset;
281 char namebuf[KSYM_NAME_LEN+1];
282 283
283 wchan = get_wchan(task); 284 wchan = get_wchan(task);
284 285
285 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 286 if (lookup_symbol_name(wchan, symname) < 0)
286 if (sym_name) 287 return sprintf(buffer, "%lu", wchan);
287 return sprintf(buffer, "%s", sym_name); 288 else
288 return sprintf(buffer, "%lu", wchan); 289 return sprintf(buffer, "%s", symname);
289} 290}
290#endif /* CONFIG_KALLSYMS */ 291#endif /* CONFIG_KALLSYMS */
291 292
@@ -310,7 +311,9 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
310 struct timespec uptime; 311 struct timespec uptime;
311 312
312 do_posix_clock_monotonic_gettime(&uptime); 313 do_posix_clock_monotonic_gettime(&uptime);
314 read_lock(&tasklist_lock);
313 points = badness(task, uptime.tv_sec); 315 points = badness(task, uptime.tv_sec);
316 read_unlock(&tasklist_lock);
314 return sprintf(buffer, "%lu\n", points); 317 return sprintf(buffer, "%lu\n", points);
315} 318}
316 319
@@ -344,11 +347,8 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
344 return -EPERM; 347 return -EPERM;
345 348
346 error = inode_change_ok(inode, attr); 349 error = inode_change_ok(inode, attr);
347 if (!error) { 350 if (!error)
348 error = security_inode_setattr(dentry, attr); 351 error = inode_setattr(inode, attr);
349 if (!error)
350 error = inode_setattr(inode, attr);
351 }
352 return error; 352 return error;
353} 353}
354 354
@@ -660,7 +660,6 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
660 char buffer[PROC_NUMBUF]; 660 char buffer[PROC_NUMBUF];
661 size_t len; 661 size_t len;
662 int oom_adjust; 662 int oom_adjust;
663 loff_t __ppos = *ppos;
664 663
665 if (!task) 664 if (!task)
666 return -ESRCH; 665 return -ESRCH;
@@ -668,14 +667,8 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
668 put_task_struct(task); 667 put_task_struct(task);
669 668
670 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 669 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
671 if (__ppos >= len) 670
672 return 0; 671 return simple_read_from_buffer(buf, count, ppos, buffer, len);
673 if (count > len-__ppos)
674 count = len-__ppos;
675 if (copy_to_user(buf, buffer + __ppos, count))
676 return -EFAULT;
677 *ppos = __ppos + count;
678 return count;
679} 672}
680 673
681static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 674static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
@@ -715,6 +708,42 @@ static const struct file_operations proc_oom_adjust_operations = {
715 .write = oom_adjust_write, 708 .write = oom_adjust_write,
716}; 709};
717 710
711#ifdef CONFIG_MMU
712static ssize_t clear_refs_write(struct file *file, const char __user *buf,
713 size_t count, loff_t *ppos)
714{
715 struct task_struct *task;
716 char buffer[PROC_NUMBUF], *end;
717 struct mm_struct *mm;
718
719 memset(buffer, 0, sizeof(buffer));
720 if (count > sizeof(buffer) - 1)
721 count = sizeof(buffer) - 1;
722 if (copy_from_user(buffer, buf, count))
723 return -EFAULT;
724 if (!simple_strtol(buffer, &end, 0))
725 return -EINVAL;
726 if (*end == '\n')
727 end++;
728 task = get_proc_task(file->f_path.dentry->d_inode);
729 if (!task)
730 return -ESRCH;
731 mm = get_task_mm(task);
732 if (mm) {
733 clear_refs_smap(mm);
734 mmput(mm);
735 }
736 put_task_struct(task);
737 if (end - buffer == 0)
738 return -EIO;
739 return end - buffer;
740}
741
742static struct file_operations proc_clear_refs_operations = {
743 .write = clear_refs_write,
744};
745#endif
746
718#ifdef CONFIG_AUDITSYSCALL 747#ifdef CONFIG_AUDITSYSCALL
719#define TMPBUFLEN 21 748#define TMPBUFLEN 21
720static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 749static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -789,7 +818,6 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
789{ 818{
790 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); 819 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
791 char __buf[20]; 820 char __buf[20];
792 loff_t __ppos = *ppos;
793 size_t len; 821 size_t len;
794 822
795 if (!tsk) 823 if (!tsk)
@@ -797,14 +825,8 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
797 /* no need to print the trailing zero, so use only len */ 825 /* no need to print the trailing zero, so use only len */
798 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 826 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
799 put_task_struct(tsk); 827 put_task_struct(tsk);
800 if (__ppos >= len) 828
801 return 0; 829 return simple_read_from_buffer(buf, count, ppos, __buf, len);
802 if (count > len - __ppos)
803 count = len - __ppos;
804 if (copy_to_user(buf, __buf + __ppos, count))
805 return -EFAULT;
806 *ppos = __ppos + count;
807 return count;
808} 830}
809 831
810static ssize_t seccomp_write(struct file *file, const char __user *buf, 832static ssize_t seccomp_write(struct file *file, const char __user *buf,
@@ -863,7 +885,6 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
863 char buffer[PROC_NUMBUF]; 885 char buffer[PROC_NUMBUF];
864 size_t len; 886 size_t len;
865 int make_it_fail; 887 int make_it_fail;
866 loff_t __ppos = *ppos;
867 888
868 if (!task) 889 if (!task)
869 return -ESRCH; 890 return -ESRCH;
@@ -871,14 +892,8 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
871 put_task_struct(task); 892 put_task_struct(task);
872 893
873 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 894 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
874 if (__ppos >= len) 895
875 return 0; 896 return simple_read_from_buffer(buf, count, ppos, buffer, len);
876 if (count > len-__ppos)
877 count = len-__ppos;
878 if (copy_to_user(buf, buffer + __ppos, count))
879 return -EFAULT;
880 *ppos = __ppos + count;
881 return count;
882} 897}
883 898
884static ssize_t proc_fault_inject_write(struct file * file, 899static ssize_t proc_fault_inject_write(struct file * file,
@@ -941,7 +956,7 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
941 956
942 if (!tmp) 957 if (!tmp)
943 return -ENOMEM; 958 return -ENOMEM;
944 959
945 inode = dentry->d_inode; 960 inode = dentry->d_inode;
946 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 961 path = d_path(dentry, mnt, tmp, PAGE_SIZE);
947 len = PTR_ERR(path); 962 len = PTR_ERR(path);
@@ -1121,7 +1136,8 @@ static struct dentry_operations pid_dentry_operations =
1121 1136
1122/* Lookups */ 1137/* Lookups */
1123 1138
1124typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *); 1139typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1140 struct task_struct *, const void *);
1125 1141
1126/* 1142/*
1127 * Fill a directory entry. 1143 * Fill a directory entry.
@@ -1137,7 +1153,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct tas
1137 */ 1153 */
1138static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1154static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1139 char *name, int len, 1155 char *name, int len,
1140 instantiate_t instantiate, struct task_struct *task, void *ptr) 1156 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1141{ 1157{
1142 struct dentry *child, *dir = filp->f_path.dentry; 1158 struct dentry *child, *dir = filp->f_path.dentry;
1143 struct inode *inode; 1159 struct inode *inode;
@@ -1199,7 +1215,10 @@ out:
1199 return ~0U; 1215 return ~0U;
1200} 1216}
1201 1217
1202static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 1218#define PROC_FDINFO_MAX 64
1219
1220static int proc_fd_info(struct inode *inode, struct dentry **dentry,
1221 struct vfsmount **mnt, char *info)
1203{ 1222{
1204 struct task_struct *task = get_proc_task(inode); 1223 struct task_struct *task = get_proc_task(inode);
1205 struct files_struct *files = NULL; 1224 struct files_struct *files = NULL;
@@ -1218,8 +1237,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
1218 spin_lock(&files->file_lock); 1237 spin_lock(&files->file_lock);
1219 file = fcheck_files(files, fd); 1238 file = fcheck_files(files, fd);
1220 if (file) { 1239 if (file) {
1221 *mnt = mntget(file->f_path.mnt); 1240 if (mnt)
1222 *dentry = dget(file->f_path.dentry); 1241 *mnt = mntget(file->f_path.mnt);
1242 if (dentry)
1243 *dentry = dget(file->f_path.dentry);
1244 if (info)
1245 snprintf(info, PROC_FDINFO_MAX,
1246 "pos:\t%lli\n"
1247 "flags:\t0%o\n",
1248 (long long) file->f_pos,
1249 file->f_flags);
1223 spin_unlock(&files->file_lock); 1250 spin_unlock(&files->file_lock);
1224 put_files_struct(files); 1251 put_files_struct(files);
1225 return 0; 1252 return 0;
@@ -1230,6 +1257,12 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
1230 return -ENOENT; 1257 return -ENOENT;
1231} 1258}
1232 1259
1260static int proc_fd_link(struct inode *inode, struct dentry **dentry,
1261 struct vfsmount **mnt)
1262{
1263 return proc_fd_info(inode, dentry, mnt, NULL);
1264}
1265
1233static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1266static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1234{ 1267{
1235 struct inode *inode = dentry->d_inode; 1268 struct inode *inode = dentry->d_inode;
@@ -1272,9 +1305,9 @@ static struct dentry_operations tid_fd_dentry_operations =
1272}; 1305};
1273 1306
1274static struct dentry *proc_fd_instantiate(struct inode *dir, 1307static struct dentry *proc_fd_instantiate(struct inode *dir,
1275 struct dentry *dentry, struct task_struct *task, void *ptr) 1308 struct dentry *dentry, struct task_struct *task, const void *ptr)
1276{ 1309{
1277 unsigned fd = *(unsigned *)ptr; 1310 unsigned fd = *(const unsigned *)ptr;
1278 struct file *file; 1311 struct file *file;
1279 struct files_struct *files; 1312 struct files_struct *files;
1280 struct inode *inode; 1313 struct inode *inode;
@@ -1325,7 +1358,9 @@ out_iput:
1325 goto out; 1358 goto out;
1326} 1359}
1327 1360
1328static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1361static struct dentry *proc_lookupfd_common(struct inode *dir,
1362 struct dentry *dentry,
1363 instantiate_t instantiate)
1329{ 1364{
1330 struct task_struct *task = get_proc_task(dir); 1365 struct task_struct *task = get_proc_task(dir);
1331 unsigned fd = name_to_int(dentry); 1366 unsigned fd = name_to_int(dentry);
@@ -1336,23 +1371,15 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1336 if (fd == ~0U) 1371 if (fd == ~0U)
1337 goto out; 1372 goto out;
1338 1373
1339 result = proc_fd_instantiate(dir, dentry, task, &fd); 1374 result = instantiate(dir, dentry, task, &fd);
1340out: 1375out:
1341 put_task_struct(task); 1376 put_task_struct(task);
1342out_no_task: 1377out_no_task:
1343 return result; 1378 return result;
1344} 1379}
1345 1380
1346static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1381static int proc_readfd_common(struct file * filp, void * dirent,
1347 struct task_struct *task, int fd) 1382 filldir_t filldir, instantiate_t instantiate)
1348{
1349 char name[PROC_NUMBUF];
1350 int len = snprintf(name, sizeof(name), "%d", fd);
1351 return proc_fill_cache(filp, dirent, filldir, name, len,
1352 proc_fd_instantiate, task, &fd);
1353}
1354
1355static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1356{ 1383{
1357 struct dentry *dentry = filp->f_path.dentry; 1384 struct dentry *dentry = filp->f_path.dentry;
1358 struct inode *inode = dentry->d_inode; 1385 struct inode *inode = dentry->d_inode;
@@ -1388,12 +1415,17 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1388 for (fd = filp->f_pos-2; 1415 for (fd = filp->f_pos-2;
1389 fd < fdt->max_fds; 1416 fd < fdt->max_fds;
1390 fd++, filp->f_pos++) { 1417 fd++, filp->f_pos++) {
1418 char name[PROC_NUMBUF];
1419 int len;
1391 1420
1392 if (!fcheck_files(files, fd)) 1421 if (!fcheck_files(files, fd))
1393 continue; 1422 continue;
1394 rcu_read_unlock(); 1423 rcu_read_unlock();
1395 1424
1396 if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) { 1425 len = snprintf(name, sizeof(name), "%d", fd);
1426 if (proc_fill_cache(filp, dirent, filldir,
1427 name, len, instantiate,
1428 p, &fd) < 0) {
1397 rcu_read_lock(); 1429 rcu_read_lock();
1398 break; 1430 break;
1399 } 1431 }
@@ -1408,23 +1440,119 @@ out_no_task:
1408 return retval; 1440 return retval;
1409} 1441}
1410 1442
1443static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1444 struct nameidata *nd)
1445{
1446 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1447}
1448
1449static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1450{
1451 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1452}
1453
1454static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1455 size_t len, loff_t *ppos)
1456{
1457 char tmp[PROC_FDINFO_MAX];
1458 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp);
1459 if (!err)
1460 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1461 return err;
1462}
1463
1464static const struct file_operations proc_fdinfo_file_operations = {
1465 .open = nonseekable_open,
1466 .read = proc_fdinfo_read,
1467};
1468
1411static const struct file_operations proc_fd_operations = { 1469static const struct file_operations proc_fd_operations = {
1412 .read = generic_read_dir, 1470 .read = generic_read_dir,
1413 .readdir = proc_readfd, 1471 .readdir = proc_readfd,
1414}; 1472};
1415 1473
1416/* 1474/*
1475 * /proc/pid/fd needs a special permission handler so that a process can still
1476 * access /proc/self/fd after it has executed a setuid().
1477 */
1478static int proc_fd_permission(struct inode *inode, int mask,
1479 struct nameidata *nd)
1480{
1481 int rv;
1482
1483 rv = generic_permission(inode, mask, NULL);
1484 if (rv == 0)
1485 return 0;
1486 if (task_pid(current) == proc_pid(inode))
1487 rv = 0;
1488 return rv;
1489}
1490
1491/*
1417 * proc directories can do almost nothing.. 1492 * proc directories can do almost nothing..
1418 */ 1493 */
1419static const struct inode_operations proc_fd_inode_operations = { 1494static const struct inode_operations proc_fd_inode_operations = {
1420 .lookup = proc_lookupfd, 1495 .lookup = proc_lookupfd,
1496 .permission = proc_fd_permission,
1497 .setattr = proc_setattr,
1498};
1499
1500static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
1501 struct dentry *dentry, struct task_struct *task, const void *ptr)
1502{
1503 unsigned fd = *(unsigned *)ptr;
1504 struct inode *inode;
1505 struct proc_inode *ei;
1506 struct dentry *error = ERR_PTR(-ENOENT);
1507
1508 inode = proc_pid_make_inode(dir->i_sb, task);
1509 if (!inode)
1510 goto out;
1511 ei = PROC_I(inode);
1512 ei->fd = fd;
1513 inode->i_mode = S_IFREG | S_IRUSR;
1514 inode->i_fop = &proc_fdinfo_file_operations;
1515 dentry->d_op = &tid_fd_dentry_operations;
1516 d_add(dentry, inode);
1517 /* Close the race of the process dying before we return the dentry */
1518 if (tid_fd_revalidate(dentry, NULL))
1519 error = NULL;
1520
1521 out:
1522 return error;
1523}
1524
1525static struct dentry *proc_lookupfdinfo(struct inode *dir,
1526 struct dentry *dentry,
1527 struct nameidata *nd)
1528{
1529 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
1530}
1531
1532static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
1533{
1534 return proc_readfd_common(filp, dirent, filldir,
1535 proc_fdinfo_instantiate);
1536}
1537
1538static const struct file_operations proc_fdinfo_operations = {
1539 .read = generic_read_dir,
1540 .readdir = proc_readfdinfo,
1541};
1542
1543/*
1544 * proc directories can do almost nothing..
1545 */
1546static const struct inode_operations proc_fdinfo_inode_operations = {
1547 .lookup = proc_lookupfdinfo,
1421 .setattr = proc_setattr, 1548 .setattr = proc_setattr,
1422}; 1549};
1423 1550
1551
1424static struct dentry *proc_pident_instantiate(struct inode *dir, 1552static struct dentry *proc_pident_instantiate(struct inode *dir,
1425 struct dentry *dentry, struct task_struct *task, void *ptr) 1553 struct dentry *dentry, struct task_struct *task, const void *ptr)
1426{ 1554{
1427 struct pid_entry *p = ptr; 1555 const struct pid_entry *p = ptr;
1428 struct inode *inode; 1556 struct inode *inode;
1429 struct proc_inode *ei; 1557 struct proc_inode *ei;
1430 struct dentry *error = ERR_PTR(-EINVAL); 1558 struct dentry *error = ERR_PTR(-EINVAL);
@@ -1453,13 +1581,13 @@ out:
1453 1581
1454static struct dentry *proc_pident_lookup(struct inode *dir, 1582static struct dentry *proc_pident_lookup(struct inode *dir,
1455 struct dentry *dentry, 1583 struct dentry *dentry,
1456 struct pid_entry *ents, 1584 const struct pid_entry *ents,
1457 unsigned int nents) 1585 unsigned int nents)
1458{ 1586{
1459 struct inode *inode; 1587 struct inode *inode;
1460 struct dentry *error; 1588 struct dentry *error;
1461 struct task_struct *task = get_proc_task(dir); 1589 struct task_struct *task = get_proc_task(dir);
1462 struct pid_entry *p, *last; 1590 const struct pid_entry *p, *last;
1463 1591
1464 error = ERR_PTR(-ENOENT); 1592 error = ERR_PTR(-ENOENT);
1465 inode = NULL; 1593 inode = NULL;
@@ -1488,8 +1616,8 @@ out_no_task:
1488 return error; 1616 return error;
1489} 1617}
1490 1618
1491static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1619static int proc_pident_fill_cache(struct file *filp, void *dirent,
1492 struct task_struct *task, struct pid_entry *p) 1620 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
1493{ 1621{
1494 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 1622 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
1495 proc_pident_instantiate, task, p); 1623 proc_pident_instantiate, task, p);
@@ -1497,14 +1625,14 @@ static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t fil
1497 1625
1498static int proc_pident_readdir(struct file *filp, 1626static int proc_pident_readdir(struct file *filp,
1499 void *dirent, filldir_t filldir, 1627 void *dirent, filldir_t filldir,
1500 struct pid_entry *ents, unsigned int nents) 1628 const struct pid_entry *ents, unsigned int nents)
1501{ 1629{
1502 int i; 1630 int i;
1503 int pid; 1631 int pid;
1504 struct dentry *dentry = filp->f_path.dentry; 1632 struct dentry *dentry = filp->f_path.dentry;
1505 struct inode *inode = dentry->d_inode; 1633 struct inode *inode = dentry->d_inode;
1506 struct task_struct *task = get_proc_task(inode); 1634 struct task_struct *task = get_proc_task(inode);
1507 struct pid_entry *p, *last; 1635 const struct pid_entry *p, *last;
1508 ino_t ino; 1636 ino_t ino;
1509 int ret; 1637 int ret;
1510 1638
@@ -1619,7 +1747,7 @@ static const struct file_operations proc_pid_attr_operations = {
1619 .write = proc_pid_attr_write, 1747 .write = proc_pid_attr_write,
1620}; 1748};
1621 1749
1622static struct pid_entry attr_dir_stuff[] = { 1750static const struct pid_entry attr_dir_stuff[] = {
1623 REG("current", S_IRUGO|S_IWUGO, pid_attr), 1751 REG("current", S_IRUGO|S_IWUGO, pid_attr),
1624 REG("prev", S_IRUGO, pid_attr), 1752 REG("prev", S_IRUGO, pid_attr),
1625 REG("exec", S_IRUGO|S_IWUGO, pid_attr), 1753 REG("exec", S_IRUGO|S_IWUGO, pid_attr),
@@ -1685,7 +1813,7 @@ static const struct inode_operations proc_self_inode_operations = {
1685 * that properly belong to the /proc filesystem, as they describe 1813 * that properly belong to the /proc filesystem, as they describe
1686 * describe something that is process related. 1814 * describe something that is process related.
1687 */ 1815 */
1688static struct pid_entry proc_base_stuff[] = { 1816static const struct pid_entry proc_base_stuff[] = {
1689 NOD("self", S_IFLNK|S_IRWXUGO, 1817 NOD("self", S_IFLNK|S_IRWXUGO,
1690 &proc_self_inode_operations, NULL, {}), 1818 &proc_self_inode_operations, NULL, {}),
1691}; 1819};
@@ -1714,9 +1842,9 @@ static struct dentry_operations proc_base_dentry_operations =
1714}; 1842};
1715 1843
1716static struct dentry *proc_base_instantiate(struct inode *dir, 1844static struct dentry *proc_base_instantiate(struct inode *dir,
1717 struct dentry *dentry, struct task_struct *task, void *ptr) 1845 struct dentry *dentry, struct task_struct *task, const void *ptr)
1718{ 1846{
1719 struct pid_entry *p = ptr; 1847 const struct pid_entry *p = ptr;
1720 struct inode *inode; 1848 struct inode *inode;
1721 struct proc_inode *ei; 1849 struct proc_inode *ei;
1722 struct dentry *error = ERR_PTR(-EINVAL); 1850 struct dentry *error = ERR_PTR(-EINVAL);
@@ -1764,7 +1892,7 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
1764{ 1892{
1765 struct dentry *error; 1893 struct dentry *error;
1766 struct task_struct *task = get_proc_task(dir); 1894 struct task_struct *task = get_proc_task(dir);
1767 struct pid_entry *p, *last; 1895 const struct pid_entry *p, *last;
1768 1896
1769 error = ERR_PTR(-ENOENT); 1897 error = ERR_PTR(-ENOENT);
1770 1898
@@ -1790,8 +1918,8 @@ out_no_task:
1790 return error; 1918 return error;
1791} 1919}
1792 1920
1793static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1921static int proc_base_fill_cache(struct file *filp, void *dirent,
1794 struct task_struct *task, struct pid_entry *p) 1922 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
1795{ 1923{
1796 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 1924 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
1797 proc_base_instantiate, task, p); 1925 proc_base_instantiate, task, p);
@@ -1828,9 +1956,10 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
1828static const struct file_operations proc_task_operations; 1956static const struct file_operations proc_task_operations;
1829static const struct inode_operations proc_task_inode_operations; 1957static const struct inode_operations proc_task_inode_operations;
1830 1958
1831static struct pid_entry tgid_base_stuff[] = { 1959static const struct pid_entry tgid_base_stuff[] = {
1832 DIR("task", S_IRUGO|S_IXUGO, task), 1960 DIR("task", S_IRUGO|S_IXUGO, task),
1833 DIR("fd", S_IRUSR|S_IXUSR, fd), 1961 DIR("fd", S_IRUSR|S_IXUSR, fd),
1962 DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
1834 INF("environ", S_IRUSR, pid_environ), 1963 INF("environ", S_IRUSR, pid_environ),
1835 INF("auxv", S_IRUSR, pid_auxv), 1964 INF("auxv", S_IRUSR, pid_auxv),
1836 INF("status", S_IRUGO, pid_status), 1965 INF("status", S_IRUGO, pid_status),
@@ -1851,6 +1980,7 @@ static struct pid_entry tgid_base_stuff[] = {
1851 REG("mounts", S_IRUGO, mounts), 1980 REG("mounts", S_IRUGO, mounts),
1852 REG("mountstats", S_IRUSR, mountstats), 1981 REG("mountstats", S_IRUSR, mountstats),
1853#ifdef CONFIG_MMU 1982#ifdef CONFIG_MMU
1983 REG("clear_refs", S_IWUSR, clear_refs),
1854 REG("smaps", S_IRUGO, smaps), 1984 REG("smaps", S_IRUGO, smaps),
1855#endif 1985#endif
1856#ifdef CONFIG_SECURITY 1986#ifdef CONFIG_SECURITY
@@ -1970,7 +2100,7 @@ out:
1970 2100
1971static struct dentry *proc_pid_instantiate(struct inode *dir, 2101static struct dentry *proc_pid_instantiate(struct inode *dir,
1972 struct dentry * dentry, 2102 struct dentry * dentry,
1973 struct task_struct *task, void *ptr) 2103 struct task_struct *task, const void *ptr)
1974{ 2104{
1975 struct dentry *error = ERR_PTR(-ENOENT); 2105 struct dentry *error = ERR_PTR(-ENOENT);
1976 struct inode *inode; 2106 struct inode *inode;
@@ -1983,7 +2113,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
1983 inode->i_op = &proc_tgid_base_inode_operations; 2113 inode->i_op = &proc_tgid_base_inode_operations;
1984 inode->i_fop = &proc_tgid_base_operations; 2114 inode->i_fop = &proc_tgid_base_operations;
1985 inode->i_flags|=S_IMMUTABLE; 2115 inode->i_flags|=S_IMMUTABLE;
1986 inode->i_nlink = 4; 2116 inode->i_nlink = 5;
1987#ifdef CONFIG_SECURITY 2117#ifdef CONFIG_SECURITY
1988 inode->i_nlink += 1; 2118 inode->i_nlink += 1;
1989#endif 2119#endif
@@ -2085,7 +2215,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2085 goto out_no_task; 2215 goto out_no_task;
2086 2216
2087 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { 2217 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2088 struct pid_entry *p = &proc_base_stuff[nr]; 2218 const struct pid_entry *p = &proc_base_stuff[nr];
2089 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) 2219 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2090 goto out; 2220 goto out;
2091 } 2221 }
@@ -2111,8 +2241,9 @@ out_no_task:
2111/* 2241/*
2112 * Tasks 2242 * Tasks
2113 */ 2243 */
2114static struct pid_entry tid_base_stuff[] = { 2244static const struct pid_entry tid_base_stuff[] = {
2115 DIR("fd", S_IRUSR|S_IXUSR, fd), 2245 DIR("fd", S_IRUSR|S_IXUSR, fd),
2246 DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
2116 INF("environ", S_IRUSR, pid_environ), 2247 INF("environ", S_IRUSR, pid_environ),
2117 INF("auxv", S_IRUSR, pid_auxv), 2248 INF("auxv", S_IRUSR, pid_auxv),
2118 INF("status", S_IRUGO, pid_status), 2249 INF("status", S_IRUGO, pid_status),
@@ -2132,6 +2263,7 @@ static struct pid_entry tid_base_stuff[] = {
2132 LNK("exe", exe), 2263 LNK("exe", exe),
2133 REG("mounts", S_IRUGO, mounts), 2264 REG("mounts", S_IRUGO, mounts),
2134#ifdef CONFIG_MMU 2265#ifdef CONFIG_MMU
2266 REG("clear_refs", S_IWUSR, clear_refs),
2135 REG("smaps", S_IRUGO, smaps), 2267 REG("smaps", S_IRUGO, smaps),
2136#endif 2268#endif
2137#ifdef CONFIG_SECURITY 2269#ifdef CONFIG_SECURITY
@@ -2180,7 +2312,7 @@ static const struct inode_operations proc_tid_base_inode_operations = {
2180}; 2312};
2181 2313
2182static struct dentry *proc_task_instantiate(struct inode *dir, 2314static struct dentry *proc_task_instantiate(struct inode *dir,
2183 struct dentry *dentry, struct task_struct *task, void *ptr) 2315 struct dentry *dentry, struct task_struct *task, const void *ptr)
2184{ 2316{
2185 struct dentry *error = ERR_PTR(-ENOENT); 2317 struct dentry *error = ERR_PTR(-ENOENT);
2186 struct inode *inode; 2318 struct inode *inode;
@@ -2192,7 +2324,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
2192 inode->i_op = &proc_tid_base_inode_operations; 2324 inode->i_op = &proc_tid_base_inode_operations;
2193 inode->i_fop = &proc_tid_base_operations; 2325 inode->i_fop = &proc_tid_base_operations;
2194 inode->i_flags|=S_IMMUTABLE; 2326 inode->i_flags|=S_IMMUTABLE;
2195 inode->i_nlink = 3; 2327 inode->i_nlink = 4;
2196#ifdef CONFIG_SECURITY 2328#ifdef CONFIG_SECURITY
2197 inode->i_nlink += 1; 2329 inode->i_nlink += 1;
2198#endif 2330#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 775fb21294d8..8a40e15f5ecb 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -398,6 +398,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
398 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 398 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
399 unsigned int ino = de->low_ino; 399 unsigned int ino = de->low_ino;
400 400
401 de_get(de);
401 spin_unlock(&proc_subdir_lock); 402 spin_unlock(&proc_subdir_lock);
402 error = -EINVAL; 403 error = -EINVAL;
403 inode = proc_get_inode(dir->i_sb, ino, de); 404 inode = proc_get_inode(dir->i_sb, ino, de);
@@ -414,6 +415,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
414 d_add(dentry, inode); 415 d_add(dentry, inode);
415 return NULL; 416 return NULL;
416 } 417 }
418 de_put(de);
417 return ERR_PTR(error); 419 return ERR_PTR(error);
418} 420}
419 421
@@ -476,14 +478,21 @@ int proc_readdir(struct file * filp,
476 } 478 }
477 479
478 do { 480 do {
481 struct proc_dir_entry *next;
482
479 /* filldir passes info to user space */ 483 /* filldir passes info to user space */
484 de_get(de);
480 spin_unlock(&proc_subdir_lock); 485 spin_unlock(&proc_subdir_lock);
481 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 486 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
482 de->low_ino, de->mode >> 12) < 0) 487 de->low_ino, de->mode >> 12) < 0) {
488 de_put(de);
483 goto out; 489 goto out;
490 }
484 spin_lock(&proc_subdir_lock); 491 spin_lock(&proc_subdir_lock);
485 filp->f_pos++; 492 filp->f_pos++;
486 de = de->next; 493 next = de->next;
494 de_put(de);
495 de = next;
487 } while (de); 496 } while (de);
488 spin_unlock(&proc_subdir_lock); 497 spin_unlock(&proc_subdir_lock);
489 } 498 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index c372eb151a3a..b8171907c83b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -21,7 +21,7 @@
21 21
22#include "internal.h" 22#include "internal.h"
23 23
24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) 24struct proc_dir_entry *de_get(struct proc_dir_entry *de)
25{ 25{
26 if (de) 26 if (de)
27 atomic_inc(&de->count); 27 atomic_inc(&de->count);
@@ -31,7 +31,7 @@ static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
31/* 31/*
32 * Decrements the use count and checks for deferred deletion. 32 * Decrements the use count and checks for deferred deletion.
33 */ 33 */
34static void de_put(struct proc_dir_entry *de) 34void de_put(struct proc_dir_entry *de)
35{ 35{
36 if (de) { 36 if (de) {
37 lock_kernel(); 37 lock_kernel();
@@ -109,8 +109,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
109{ 109{
110 struct proc_inode *ei = (struct proc_inode *) foo; 110 struct proc_inode *ei = (struct proc_inode *) foo;
111 111
112 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 112 if (flags & SLAB_CTOR_CONSTRUCTOR)
113 SLAB_CTOR_CONSTRUCTOR)
114 inode_init_once(&ei->vfs_inode); 113 inode_init_once(&ei->vfs_inode);
115} 114}
116 115
@@ -147,13 +146,6 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
147{ 146{
148 struct inode * inode; 147 struct inode * inode;
149 148
150 /*
151 * Increment the use count so the dir entry can't disappear.
152 */
153 de_get(de);
154
155 WARN_ON(de && de->deleted);
156
157 if (de != NULL && !try_module_get(de->owner)) 149 if (de != NULL && !try_module_get(de->owner))
158 goto out_mod; 150 goto out_mod;
159 151
@@ -185,7 +177,6 @@ out_ino:
185 if (de != NULL) 177 if (de != NULL)
186 module_put(de->owner); 178 module_put(de->owner);
187out_mod: 179out_mod:
188 de_put(de);
189 return NULL; 180 return NULL;
190} 181}
191 182
@@ -200,6 +191,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
200 s->s_op = &proc_sops; 191 s->s_op = &proc_sops;
201 s->s_time_gran = 1; 192 s->s_time_gran = 1;
202 193
194 de_get(&proc_root);
203 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 195 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
204 if (!root_inode) 196 if (!root_inode)
205 goto out_no_root; 197 goto out_no_root;
@@ -213,6 +205,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
213out_no_root: 205out_no_root:
214 printk("proc_read_super: get root inode failed\n"); 206 printk("proc_read_super: get root inode failed\n");
215 iput(root_inode); 207 iput(root_inode);
208 de_put(&proc_root);
216 return -ENOMEM; 209 return -ENOMEM;
217} 210}
218MODULE_LICENSE("GPL"); 211MODULE_LICENSE("GPL");
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f771889183c3..b215c3524fa6 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,6 +37,8 @@ do { \
37extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); 37extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
38#endif 38#endif
39 39
40extern int maps_protect;
41
40extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); 42extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
41extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); 43extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
42extern int proc_tid_stat(struct task_struct *, char *); 44extern int proc_tid_stat(struct task_struct *, char *);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index abdf068bc27f..eca471bc8512 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -38,7 +38,7 @@ static int property_read_proc(char *page, char **start, off_t off,
38 n = count; 38 n = count;
39 else 39 else
40 *eof = 1; 40 *eof = 1;
41 memcpy(page, pp->value + off, n); 41 memcpy(page, (char *)pp->value + off, n);
42 *start = page; 42 *start = page;
43 return n; 43 return n;
44} 44}
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index e2c4c0a5c90d..5fd49e47f83a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -35,7 +35,6 @@
35#include <linux/signal.h> 35#include <linux/signal.h>
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/smp_lock.h>
39#include <linux/seq_file.h> 38#include <linux/seq_file.h>
40#include <linux/times.h> 39#include <linux/times.h>
41#include <linux/profile.h> 40#include <linux/profile.h>
@@ -398,8 +397,6 @@ static const struct file_operations proc_modules_operations = {
398#endif 397#endif
399 398
400#ifdef CONFIG_SLAB 399#ifdef CONFIG_SLAB
401extern struct seq_operations slabinfo_op;
402extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
403static int slabinfo_open(struct inode *inode, struct file *file) 400static int slabinfo_open(struct inode *inode, struct file *file)
404{ 401{
405 return seq_open(file, &slabinfo_op); 402 return seq_open(file, &slabinfo_op);
@@ -431,18 +428,11 @@ static int slabstats_open(struct inode *inode, struct file *file)
431 return ret; 428 return ret;
432} 429}
433 430
434static int slabstats_release(struct inode *inode, struct file *file)
435{
436 struct seq_file *m = file->private_data;
437 kfree(m->private);
438 return seq_release(inode, file);
439}
440
441static const struct file_operations proc_slabstats_operations = { 431static const struct file_operations proc_slabstats_operations = {
442 .open = slabstats_open, 432 .open = slabstats_open,
443 .read = seq_read, 433 .read = seq_read,
444 .llseek = seq_lseek, 434 .llseek = seq_lseek,
445 .release = slabstats_release, 435 .release = seq_release_private,
446}; 436};
447#endif 437#endif
448#endif 438#endif
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 20e8cbb34364..680c429bfa22 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -429,11 +429,8 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
429 return -EPERM; 429 return -EPERM;
430 430
431 error = inode_change_ok(inode, attr); 431 error = inode_change_ok(inode, attr);
432 if (!error) { 432 if (!error)
433 error = security_inode_setattr(dentry, attr); 433 error = inode_setattr(inode, attr);
434 if (!error)
435 error = inode_setattr(inode, attr);
436 }
437 434
438 return error; 435 return error;
439} 436}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index c1bbfbeb035e..b3a473b0a191 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -108,6 +108,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
108{ 108{
109 struct list_head *p; 109 struct list_head *p;
110 loff_t l = *pos; 110 loff_t l = *pos;
111
112 mutex_lock(&tty_mutex);
111 list_for_each(p, &tty_drivers) 113 list_for_each(p, &tty_drivers)
112 if (!l--) 114 if (!l--)
113 return list_entry(p, struct tty_driver, tty_drivers); 115 return list_entry(p, struct tty_driver, tty_drivers);
@@ -124,6 +126,7 @@ static void *t_next(struct seq_file *m, void *v, loff_t *pos)
124 126
125static void t_stop(struct seq_file *m, void *v) 127static void t_stop(struct seq_file *m, void *v)
126{ 128{
129 mutex_unlock(&tty_mutex);
127} 130}
128 131
129static struct seq_operations tty_drivers_op = { 132static struct seq_operations tty_drivers_op = {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7445980c8022..c24d81a5a040 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -3,6 +3,7 @@
3#include <linux/mount.h> 3#include <linux/mount.h>
4#include <linux/seq_file.h> 4#include <linux/seq_file.h>
5#include <linux/highmem.h> 5#include <linux/highmem.h>
6#include <linux/ptrace.h>
6#include <linux/pagemap.h> 7#include <linux/pagemap.h>
7#include <linux/mempolicy.h> 8#include <linux/mempolicy.h>
8 9
@@ -120,6 +121,14 @@ struct mem_size_stats
120 unsigned long shared_dirty; 121 unsigned long shared_dirty;
121 unsigned long private_clean; 122 unsigned long private_clean;
122 unsigned long private_dirty; 123 unsigned long private_dirty;
124 unsigned long referenced;
125};
126
127struct pmd_walker {
128 struct vm_area_struct *vma;
129 void *private;
130 void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
131 unsigned long, void *);
123}; 132};
124 133
125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 134static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
@@ -134,6 +143,9 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
134 dev_t dev = 0; 143 dev_t dev = 0;
135 int len; 144 int len;
136 145
146 if (maps_protect && !ptrace_may_attach(task))
147 return -EACCES;
148
137 if (file) { 149 if (file) {
138 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 150 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
139 dev = inode->i_sb->s_dev; 151 dev = inode->i_sb->s_dev;
@@ -181,18 +193,20 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
181 193
182 if (mss) 194 if (mss)
183 seq_printf(m, 195 seq_printf(m,
184 "Size: %8lu kB\n" 196 "Size: %8lu kB\n"
185 "Rss: %8lu kB\n" 197 "Rss: %8lu kB\n"
186 "Shared_Clean: %8lu kB\n" 198 "Shared_Clean: %8lu kB\n"
187 "Shared_Dirty: %8lu kB\n" 199 "Shared_Dirty: %8lu kB\n"
188 "Private_Clean: %8lu kB\n" 200 "Private_Clean: %8lu kB\n"
189 "Private_Dirty: %8lu kB\n", 201 "Private_Dirty: %8lu kB\n"
202 "Referenced: %8lu kB\n",
190 (vma->vm_end - vma->vm_start) >> 10, 203 (vma->vm_end - vma->vm_start) >> 10,
191 mss->resident >> 10, 204 mss->resident >> 10,
192 mss->shared_clean >> 10, 205 mss->shared_clean >> 10,
193 mss->shared_dirty >> 10, 206 mss->shared_dirty >> 10,
194 mss->private_clean >> 10, 207 mss->private_clean >> 10,
195 mss->private_dirty >> 10); 208 mss->private_dirty >> 10,
209 mss->referenced >> 10);
196 210
197 if (m->count < m->size) /* vma is copied successfully */ 211 if (m->count < m->size) /* vma is copied successfully */
198 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 212 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
@@ -205,15 +219,16 @@ static int show_map(struct seq_file *m, void *v)
205} 219}
206 220
207static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 221static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
208 unsigned long addr, unsigned long end, 222 unsigned long addr, unsigned long end,
209 struct mem_size_stats *mss) 223 void *private)
210{ 224{
225 struct mem_size_stats *mss = private;
211 pte_t *pte, ptent; 226 pte_t *pte, ptent;
212 spinlock_t *ptl; 227 spinlock_t *ptl;
213 struct page *page; 228 struct page *page;
214 229
215 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 230 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
216 do { 231 for (; addr != end; pte++, addr += PAGE_SIZE) {
217 ptent = *pte; 232 ptent = *pte;
218 if (!pte_present(ptent)) 233 if (!pte_present(ptent))
219 continue; 234 continue;
@@ -224,6 +239,9 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
224 if (!page) 239 if (!page)
225 continue; 240 continue;
226 241
242 /* Accumulate the size in pages that have been accessed. */
243 if (pte_young(ptent) || PageReferenced(page))
244 mss->referenced += PAGE_SIZE;
227 if (page_mapcount(page) >= 2) { 245 if (page_mapcount(page) >= 2) {
228 if (pte_dirty(ptent)) 246 if (pte_dirty(ptent))
229 mss->shared_dirty += PAGE_SIZE; 247 mss->shared_dirty += PAGE_SIZE;
@@ -235,57 +253,99 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
235 else 253 else
236 mss->private_clean += PAGE_SIZE; 254 mss->private_clean += PAGE_SIZE;
237 } 255 }
238 } while (pte++, addr += PAGE_SIZE, addr != end); 256 }
239 pte_unmap_unlock(pte - 1, ptl); 257 pte_unmap_unlock(pte - 1, ptl);
240 cond_resched(); 258 cond_resched();
241} 259}
242 260
243static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud, 261static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
244 unsigned long addr, unsigned long end, 262 unsigned long addr, unsigned long end,
245 struct mem_size_stats *mss) 263 void *private)
264{
265 pte_t *pte, ptent;
266 spinlock_t *ptl;
267 struct page *page;
268
269 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
270 for (; addr != end; pte++, addr += PAGE_SIZE) {
271 ptent = *pte;
272 if (!pte_present(ptent))
273 continue;
274
275 page = vm_normal_page(vma, addr, ptent);
276 if (!page)
277 continue;
278
279 /* Clear accessed and referenced bits. */
280 ptep_test_and_clear_young(vma, addr, pte);
281 ClearPageReferenced(page);
282 }
283 pte_unmap_unlock(pte - 1, ptl);
284 cond_resched();
285}
286
287static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
288 unsigned long addr, unsigned long end)
246{ 289{
247 pmd_t *pmd; 290 pmd_t *pmd;
248 unsigned long next; 291 unsigned long next;
249 292
250 pmd = pmd_offset(pud, addr); 293 for (pmd = pmd_offset(pud, addr); addr != end;
251 do { 294 pmd++, addr = next) {
252 next = pmd_addr_end(addr, end); 295 next = pmd_addr_end(addr, end);
253 if (pmd_none_or_clear_bad(pmd)) 296 if (pmd_none_or_clear_bad(pmd))
254 continue; 297 continue;
255 smaps_pte_range(vma, pmd, addr, next, mss); 298 walker->action(walker->vma, pmd, addr, next, walker->private);
256 } while (pmd++, addr = next, addr != end); 299 }
257} 300}
258 301
259static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 302static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
260 unsigned long addr, unsigned long end, 303 unsigned long addr, unsigned long end)
261 struct mem_size_stats *mss)
262{ 304{
263 pud_t *pud; 305 pud_t *pud;
264 unsigned long next; 306 unsigned long next;
265 307
266 pud = pud_offset(pgd, addr); 308 for (pud = pud_offset(pgd, addr); addr != end;
267 do { 309 pud++, addr = next) {
268 next = pud_addr_end(addr, end); 310 next = pud_addr_end(addr, end);
269 if (pud_none_or_clear_bad(pud)) 311 if (pud_none_or_clear_bad(pud))
270 continue; 312 continue;
271 smaps_pmd_range(vma, pud, addr, next, mss); 313 walk_pmd_range(walker, pud, addr, next);
272 } while (pud++, addr = next, addr != end); 314 }
273} 315}
274 316
275static inline void smaps_pgd_range(struct vm_area_struct *vma, 317/*
276 unsigned long addr, unsigned long end, 318 * walk_page_range - walk the page tables of a VMA with a callback
277 struct mem_size_stats *mss) 319 * @vma - VMA to walk
320 * @action - callback invoked for every bottom-level (PTE) page table
321 * @private - private data passed to the callback function
322 *
323 * Recursively walk the page table for the memory area in a VMA, calling
324 * a callback for every bottom-level (PTE) page table.
325 */
326static inline void walk_page_range(struct vm_area_struct *vma,
327 void (*action)(struct vm_area_struct *,
328 pmd_t *, unsigned long,
329 unsigned long, void *),
330 void *private)
278{ 331{
332 unsigned long addr = vma->vm_start;
333 unsigned long end = vma->vm_end;
334 struct pmd_walker walker = {
335 .vma = vma,
336 .private = private,
337 .action = action,
338 };
279 pgd_t *pgd; 339 pgd_t *pgd;
280 unsigned long next; 340 unsigned long next;
281 341
282 pgd = pgd_offset(vma->vm_mm, addr); 342 for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
283 do { 343 pgd++, addr = next) {
284 next = pgd_addr_end(addr, end); 344 next = pgd_addr_end(addr, end);
285 if (pgd_none_or_clear_bad(pgd)) 345 if (pgd_none_or_clear_bad(pgd))
286 continue; 346 continue;
287 smaps_pud_range(vma, pgd, addr, next, mss); 347 walk_pud_range(&walker, pgd, addr, next);
288 } while (pgd++, addr = next, addr != end); 348 }
289} 349}
290 350
291static int show_smap(struct seq_file *m, void *v) 351static int show_smap(struct seq_file *m, void *v)
@@ -295,10 +355,22 @@ static int show_smap(struct seq_file *m, void *v)
295 355
296 memset(&mss, 0, sizeof mss); 356 memset(&mss, 0, sizeof mss);
297 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 357 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
298 smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss); 358 walk_page_range(vma, smaps_pte_range, &mss);
299 return show_map_internal(m, v, &mss); 359 return show_map_internal(m, v, &mss);
300} 360}
301 361
362void clear_refs_smap(struct mm_struct *mm)
363{
364 struct vm_area_struct *vma;
365
366 down_read(&mm->mmap_sem);
367 for (vma = mm->mmap; vma; vma = vma->vm_next)
368 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
369 walk_page_range(vma, clear_refs_pte_range, NULL);
370 flush_tlb_mm(mm);
371 up_read(&mm->mmap_sem);
372}
373
302static void *m_start(struct seq_file *m, loff_t *pos) 374static void *m_start(struct seq_file *m, loff_t *pos)
303{ 375{
304 struct proc_maps_private *priv = m->private; 376 struct proc_maps_private *priv = m->private;
@@ -444,11 +516,22 @@ const struct file_operations proc_maps_operations = {
444#ifdef CONFIG_NUMA 516#ifdef CONFIG_NUMA
445extern int show_numa_map(struct seq_file *m, void *v); 517extern int show_numa_map(struct seq_file *m, void *v);
446 518
519static int show_numa_map_checked(struct seq_file *m, void *v)
520{
521 struct proc_maps_private *priv = m->private;
522 struct task_struct *task = priv->task;
523
524 if (maps_protect && !ptrace_may_attach(task))
525 return -EACCES;
526
527 return show_numa_map(m, v);
528}
529
447static struct seq_operations proc_pid_numa_maps_op = { 530static struct seq_operations proc_pid_numa_maps_op = {
448 .start = m_start, 531 .start = m_start,
449 .next = m_next, 532 .next = m_next,
450 .stop = m_stop, 533 .stop = m_stop,
451 .show = show_numa_map 534 .show = show_numa_map_checked
452}; 535};
453 536
454static int numa_maps_open(struct inode *inode, struct file *file) 537static int numa_maps_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 7cddf6b8635a..d8b8c7183c24 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -2,6 +2,7 @@
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/mount.h> 4#include <linux/mount.h>
5#include <linux/ptrace.h>
5#include <linux/seq_file.h> 6#include <linux/seq_file.h>
6#include "internal.h" 7#include "internal.h"
7 8
@@ -143,6 +144,12 @@ out:
143static int show_map(struct seq_file *m, void *_vml) 144static int show_map(struct seq_file *m, void *_vml)
144{ 145{
145 struct vm_list_struct *vml = _vml; 146 struct vm_list_struct *vml = _vml;
147 struct proc_maps_private *priv = m->private;
148 struct task_struct *task = priv->task;
149
150 if (maps_protect && !ptrace_may_attach(task))
151 return -EACCES;
152
146 return nommu_vma_show(m, vml->vma); 153 return nommu_vma_show(m, vml->vma);
147} 154}
148 155
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index d96050728c43..523e1098ae88 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_headers(void)
514 /* Do some basic Verification. */ 514 /* Do some basic Verification. */
515 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || 515 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
516 (ehdr.e_type != ET_CORE) || 516 (ehdr.e_type != ET_CORE) ||
517 !elf_check_arch(&ehdr) || 517 !vmcore_elf_check_arch(&ehdr) ||
518 ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 518 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
519 ehdr.e_ident[EI_VERSION] != EV_CURRENT || 519 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
520 ehdr.e_version != EV_CURRENT || 520 ehdr.e_version != EV_CURRENT ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 83bc8e7824cd..75fc8498f2e2 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -536,8 +536,7 @@ static void init_once(void *foo, struct kmem_cache * cachep,
536{ 536{
537 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 537 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
538 538
539 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 539 if (flags & SLAB_CTOR_CONSTRUCTOR)
540 SLAB_CTOR_CONSTRUCTOR)
541 inode_init_once(&ei->vfs_inode); 540 inode_init_once(&ei->vfs_inode);
542} 541}
543 542
diff --git a/fs/quota.c b/fs/quota.c
index b9dae76a0b6e..e9d88fd0eca8 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -11,7 +11,6 @@
11#include <asm/current.h> 11#include <asm/current.h>
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/smp_lock.h>
15#include <linux/security.h> 14#include <linux/security.h>
16#include <linux/syscalls.h> 15#include <linux/syscalls.h>
17#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d3fd7c6732d2..3b481d557edb 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -16,7 +16,6 @@
16#include <linux/highmem.h> 16#include <linux/highmem.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/smp_lock.h>
20#include <linux/backing-dev.h> 19#include <linux/backing-dev.h>
21#include <linux/ramfs.h> 20#include <linux/ramfs.h>
22#include <linux/quotaops.h> 21#include <linux/quotaops.h>
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index ff1f7639707b..4ace5d72eae1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
35#include <linux/ramfs.h> 34#include <linux/ramfs.h>
36 35
diff --git a/fs/read_write.c b/fs/read_write.c
index 1f8dc373ede7..4d03008f015b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -37,10 +37,10 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
37 37
38 mutex_lock(&inode->i_mutex); 38 mutex_lock(&inode->i_mutex);
39 switch (origin) { 39 switch (origin) {
40 case 2: 40 case SEEK_END:
41 offset += inode->i_size; 41 offset += inode->i_size;
42 break; 42 break;
43 case 1: 43 case SEEK_CUR:
44 offset += file->f_pos; 44 offset += file->f_pos;
45 } 45 }
46 retval = -EINVAL; 46 retval = -EINVAL;
@@ -63,10 +63,10 @@ loff_t remote_llseek(struct file *file, loff_t offset, int origin)
63 63
64 lock_kernel(); 64 lock_kernel();
65 switch (origin) { 65 switch (origin) {
66 case 2: 66 case SEEK_END:
67 offset += i_size_read(file->f_path.dentry->d_inode); 67 offset += i_size_read(file->f_path.dentry->d_inode);
68 break; 68 break;
69 case 1: 69 case SEEK_CUR:
70 offset += file->f_pos; 70 offset += file->f_pos;
71 } 71 }
72 retval = -EINVAL; 72 retval = -EINVAL;
@@ -94,10 +94,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
94 94
95 lock_kernel(); 95 lock_kernel();
96 switch (origin) { 96 switch (origin) {
97 case 2: 97 case SEEK_END:
98 offset += i_size_read(file->f_path.dentry->d_inode); 98 offset += i_size_read(file->f_path.dentry->d_inode);
99 break; 99 break;
100 case 1: 100 case SEEK_CUR:
101 offset += file->f_pos; 101 offset += file->f_pos;
102 } 102 }
103 retval = -EINVAL; 103 retval = -EINVAL;
@@ -139,7 +139,7 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
139 goto bad; 139 goto bad;
140 140
141 retval = -EINVAL; 141 retval = -EINVAL;
142 if (origin <= 2) { 142 if (origin <= SEEK_MAX) {
143 loff_t res = vfs_llseek(file, offset, origin); 143 loff_t res = vfs_llseek(file, offset, origin);
144 retval = res; 144 retval = res;
145 if (res != (loff_t)retval) 145 if (res != (loff_t)retval)
@@ -166,7 +166,7 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
166 goto bad; 166 goto bad;
167 167
168 retval = -EINVAL; 168 retval = -EINVAL;
169 if (origin > 2) 169 if (origin > SEEK_MAX)
170 goto out_putf; 170 goto out_putf;
171 171
172 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 172 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
diff --git a/fs/readdir.c b/fs/readdir.c
index f39f5b313252..efe52e676577 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -4,13 +4,13 @@
4 * Copyright (C) 1995 Linus Torvalds 4 * Copyright (C) 1995 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/kernel.h>
7#include <linux/module.h> 8#include <linux/module.h>
8#include <linux/time.h> 9#include <linux/time.h>
9#include <linux/mm.h> 10#include <linux/mm.h>
10#include <linux/errno.h> 11#include <linux/errno.h>
11#include <linux/stat.h> 12#include <linux/stat.h>
12#include <linux/file.h> 13#include <linux/file.h>
13#include <linux/smp_lock.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/dirent.h> 15#include <linux/dirent.h>
16#include <linux/security.h> 16#include <linux/security.h>
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(vfs_readdir);
52 * case (the low-level handlers don't need to care about this). 52 * case (the low-level handlers don't need to care about this).
53 */ 53 */
54#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) 54#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
55#define ROUND_UP(x) (((x)+sizeof(long)-1) & ~(sizeof(long)-1))
56 55
57#ifdef __ARCH_WANT_OLD_READDIR 56#ifdef __ARCH_WANT_OLD_READDIR
58 57
@@ -147,7 +146,7 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
147 struct linux_dirent __user * dirent; 146 struct linux_dirent __user * dirent;
148 struct getdents_callback * buf = (struct getdents_callback *) __buf; 147 struct getdents_callback * buf = (struct getdents_callback *) __buf;
149 unsigned long d_ino; 148 unsigned long d_ino;
150 int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); 149 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long));
151 150
152 buf->error = -EINVAL; /* only used if we fail.. */ 151 buf->error = -EINVAL; /* only used if we fail.. */
153 if (reclen > buf->count) 152 if (reclen > buf->count)
@@ -220,8 +219,6 @@ out:
220 return error; 219 return error;
221} 220}
222 221
223#define ROUND_UP64(x) (((x)+sizeof(u64)-1) & ~(sizeof(u64)-1))
224
225struct getdents_callback64 { 222struct getdents_callback64 {
226 struct linux_dirent64 __user * current_dir; 223 struct linux_dirent64 __user * current_dir;
227 struct linux_dirent64 __user * previous; 224 struct linux_dirent64 __user * previous;
@@ -234,7 +231,7 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
234{ 231{
235 struct linux_dirent64 __user *dirent; 232 struct linux_dirent64 __user *dirent;
236 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; 233 struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
237 int reclen = ROUND_UP64(NAME_OFFSET(dirent) + namlen + 1); 234 int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64));
238 235
239 buf->error = -EINVAL; /* only used if we fail.. */ 236 buf->error = -EINVAL; /* only used if we fail.. */
240 if (reclen > buf->count) 237 if (reclen > buf->count)
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 96a2f8889da3..9c23fee3bae9 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -7,7 +7,6 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_fs.h>
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/smp_lock.h>
11#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
12#include <asm/uaccess.h> 11#include <asm/uaccess.h>
13 12
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index abfada2f52db..9e451a68580f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,6 @@
6#include <linux/reiserfs_fs.h> 6#include <linux/reiserfs_fs.h>
7#include <linux/reiserfs_acl.h> 7#include <linux/reiserfs_acl.h>
8#include <linux/reiserfs_xattr.h> 8#include <linux/reiserfs_xattr.h>
9#include <linux/smp_lock.h>
10#include <asm/uaccess.h> 9#include <asm/uaccess.h>
11#include <linux/pagemap.h> 10#include <linux/pagemap.h>
12#include <linux/swap.h> 11#include <linux/swap.h>
@@ -1060,20 +1059,12 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
1060 maping blocks, since there is none, so we just zero out remaining 1059 maping blocks, since there is none, so we just zero out remaining
1061 parts of first and last pages in write area (if needed) */ 1060 parts of first and last pages in write area (if needed) */
1062 if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { 1061 if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
1063 if (from != 0) { /* First page needs to be partially zeroed */ 1062 if (from != 0) /* First page needs to be partially zeroed */
1064 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); 1063 zero_user_page(prepared_pages[0], 0, from, KM_USER0);
1065 memset(kaddr, 0, from); 1064
1066 kunmap_atomic(kaddr, KM_USER0); 1065 if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */
1067 flush_dcache_page(prepared_pages[0]); 1066 zero_user_page(prepared_pages[num_pages-1], to,
1068 } 1067 PAGE_CACHE_SIZE - to, KM_USER0);
1069 if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */
1070 char *kaddr =
1071 kmap_atomic(prepared_pages[num_pages - 1],
1072 KM_USER0);
1073 memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
1074 kunmap_atomic(kaddr, KM_USER0);
1075 flush_dcache_page(prepared_pages[num_pages - 1]);
1076 }
1077 1068
1078 /* Since all blocks are new - use already calculated value */ 1069 /* Since all blocks are new - use already calculated value */
1079 return blocks; 1070 return blocks;
@@ -1200,13 +1191,9 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
1200 ll_rw_block(READ, 1, &bh); 1191 ll_rw_block(READ, 1, &bh);
1201 *wait_bh++ = bh; 1192 *wait_bh++ = bh;
1202 } else { /* Not mapped, zero it */ 1193 } else { /* Not mapped, zero it */
1203 char *kaddr = 1194 zero_user_page(prepared_pages[0],
1204 kmap_atomic(prepared_pages[0], 1195 block_start,
1205 KM_USER0); 1196 from - block_start, KM_USER0);
1206 memset(kaddr + block_start, 0,
1207 from - block_start);
1208 kunmap_atomic(kaddr, KM_USER0);
1209 flush_dcache_page(prepared_pages[0]);
1210 set_buffer_uptodate(bh); 1197 set_buffer_uptodate(bh);
1211 } 1198 }
1212 } 1199 }
@@ -1238,13 +1225,8 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
1238 ll_rw_block(READ, 1, &bh); 1225 ll_rw_block(READ, 1, &bh);
1239 *wait_bh++ = bh; 1226 *wait_bh++ = bh;
1240 } else { /* Not mapped, zero it */ 1227 } else { /* Not mapped, zero it */
1241 char *kaddr = 1228 zero_user_page(prepared_pages[num_pages-1],
1242 kmap_atomic(prepared_pages 1229 to, block_end - to, KM_USER0);
1243 [num_pages - 1],
1244 KM_USER0);
1245 memset(kaddr + to, 0, block_end - to);
1246 kunmap_atomic(kaddr, KM_USER0);
1247 flush_dcache_page(prepared_pages[num_pages - 1]);
1248 set_buffer_uptodate(bh); 1230 set_buffer_uptodate(bh);
1249 } 1231 }
1250 } 1232 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9fcbfe316977..1272d11399fb 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2148,13 +2148,8 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
2148 length = offset & (blocksize - 1); 2148 length = offset & (blocksize - 1);
2149 /* if we are not on a block boundary */ 2149 /* if we are not on a block boundary */
2150 if (length) { 2150 if (length) {
2151 char *kaddr;
2152
2153 length = blocksize - length; 2151 length = blocksize - length;
2154 kaddr = kmap_atomic(page, KM_USER0); 2152 zero_user_page(page, offset, length, KM_USER0);
2155 memset(kaddr + offset, 0, length);
2156 flush_dcache_page(page);
2157 kunmap_atomic(kaddr, KM_USER0);
2158 if (buffer_mapped(bh) && bh->b_blocknr != 0) { 2153 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2159 mark_buffer_dirty(bh); 2154 mark_buffer_dirty(bh);
2160 } 2155 }
@@ -2370,7 +2365,6 @@ static int reiserfs_write_full_page(struct page *page,
2370 ** last byte in the file 2365 ** last byte in the file
2371 */ 2366 */
2372 if (page->index >= end_index) { 2367 if (page->index >= end_index) {
2373 char *kaddr;
2374 unsigned last_offset; 2368 unsigned last_offset;
2375 2369
2376 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); 2370 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
@@ -2379,10 +2373,7 @@ static int reiserfs_write_full_page(struct page *page,
2379 unlock_page(page); 2373 unlock_page(page);
2380 return 0; 2374 return 0;
2381 } 2375 }
2382 kaddr = kmap_atomic(page, KM_USER0); 2376 zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0);
2383 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset);
2384 flush_dcache_page(page);
2385 kunmap_atomic(kaddr, KM_USER0);
2386 } 2377 }
2387 bh = head; 2378 bh = head;
2388 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); 2379 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 7280a23ef344..f25086aeef5f 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1110,7 +1110,7 @@ static int flush_commit_list(struct super_block *s,
1110 if (!barrier) { 1110 if (!barrier) {
1111 /* If there was a write error in the journal - we can't commit 1111 /* If there was a write error in the journal - we can't commit
1112 * this transaction - it will be invalid and, if successful, 1112 * this transaction - it will be invalid and, if successful,
1113 * will just end up propogating the write error out to 1113 * will just end up propagating the write error out to
1114 * the file system. */ 1114 * the file system. */
1115 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 1115 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
1116 if (buffer_dirty(jl->j_commit_bh)) 1116 if (buffer_dirty(jl->j_commit_bh))
@@ -1125,7 +1125,7 @@ static int flush_commit_list(struct super_block *s,
1125 1125
1126 /* If there was a write error in the journal - we can't commit this 1126 /* If there was a write error in the journal - we can't commit this
1127 * transaction - it will be invalid and, if successful, will just end 1127 * transaction - it will be invalid and, if successful, will just end
1128 * up propogating the write error out to the filesystem. */ 1128 * up propagating the write error out to the filesystem. */
1129 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 1129 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1130#ifdef CONFIG_REISERFS_CHECK 1130#ifdef CONFIG_REISERFS_CHECK
1131 reiserfs_warning(s, "journal-615: buffer write failed"); 1131 reiserfs_warning(s, "journal-615: buffer write failed");
@@ -2918,7 +2918,7 @@ static void queue_log_writer(struct super_block *s)
2918 set_current_state(TASK_UNINTERRUPTIBLE); 2918 set_current_state(TASK_UNINTERRUPTIBLE);
2919 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 2919 if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
2920 schedule(); 2920 schedule();
2921 current->state = TASK_RUNNING; 2921 __set_current_state(TASK_RUNNING);
2922 remove_wait_queue(&journal->j_join_wait, &wait); 2922 remove_wait_queue(&journal->j_join_wait, &wait);
2923} 2923}
2924 2924
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index a2161840bc7c..b378eea332ca 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -16,7 +16,6 @@
16#include <linux/reiserfs_fs.h> 16#include <linux/reiserfs_fs.h>
17#include <linux/reiserfs_acl.h> 17#include <linux/reiserfs_acl.h>
18#include <linux/reiserfs_xattr.h> 18#include <linux/reiserfs_xattr.h>
19#include <linux/smp_lock.h>
20#include <linux/quotaops.h> 19#include <linux/quotaops.h>
21 20
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } 21#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index ecc9943202fc..9aa7a06e093f 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -16,11 +16,10 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <linux/reiserfs_fs.h> 17#include <linux/reiserfs_fs.h>
18#include <linux/reiserfs_fs_sb.h> 18#include <linux/reiserfs_fs_sb.h>
19#include <linux/smp_lock.h>
20#include <linux/init.h> 19#include <linux/init.h>
21#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
22 21
23#if defined( REISERFS_PROC_INFO ) 22#ifdef CONFIG_REISERFS_PROC_INFO
24 23
25/* 24/*
26 * LOCKING: 25 * LOCKING:
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 315684793d1d..976cc7887a0d 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -131,6 +131,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
131 /* don't use read_bitmap_block since it will cache 131 /* don't use read_bitmap_block since it will cache
132 * the uninitialized bitmap */ 132 * the uninitialized bitmap */
133 bh = sb_bread(s, i * s->s_blocksize * 8); 133 bh = sb_bread(s, i * s->s_blocksize * 8);
134 if (!bh) {
135 vfree(bitmap);
136 return -EIO;
137 }
134 memset(bh->b_data, 0, sb_blocksize(sb)); 138 memset(bh->b_data, 0, sb_blocksize(sb));
135 reiserfs_test_and_set_le_bit(0, bh->b_data); 139 reiserfs_test_and_set_le_bit(0, bh->b_data);
136 reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); 140 reiserfs_cache_bitmap_metadata(s, bh, bitmap + i);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index afb21ea45302..b6f12593c39d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -53,7 +53,6 @@
53#include <linux/string.h> 53#include <linux/string.h>
54#include <linux/pagemap.h> 54#include <linux/pagemap.h>
55#include <linux/reiserfs_fs.h> 55#include <linux/reiserfs_fs.h>
56#include <linux/smp_lock.h>
57#include <linux/buffer_head.h> 56#include <linux/buffer_head.h>
58#include <linux/quotaops.h> 57#include <linux/quotaops.h>
59 58
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f13a7f164dc6..c7762140c425 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -18,7 +18,6 @@
18#include <linux/reiserfs_fs.h> 18#include <linux/reiserfs_fs.h>
19#include <linux/reiserfs_acl.h> 19#include <linux/reiserfs_acl.h>
20#include <linux/reiserfs_xattr.h> 20#include <linux/reiserfs_xattr.h>
21#include <linux/smp_lock.h>
22#include <linux/init.h> 21#include <linux/init.h>
23#include <linux/blkdev.h> 22#include <linux/blkdev.h>
24#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
@@ -433,12 +432,13 @@ int remove_save_link(struct inode *inode, int truncate)
433static void reiserfs_kill_sb(struct super_block *s) 432static void reiserfs_kill_sb(struct super_block *s)
434{ 433{
435 if (REISERFS_SB(s)) { 434 if (REISERFS_SB(s)) {
435#ifdef CONFIG_REISERFS_FS_XATTR
436 if (REISERFS_SB(s)->xattr_root) { 436 if (REISERFS_SB(s)->xattr_root) {
437 d_invalidate(REISERFS_SB(s)->xattr_root); 437 d_invalidate(REISERFS_SB(s)->xattr_root);
438 dput(REISERFS_SB(s)->xattr_root); 438 dput(REISERFS_SB(s)->xattr_root);
439 REISERFS_SB(s)->xattr_root = NULL; 439 REISERFS_SB(s)->xattr_root = NULL;
440 } 440 }
441 441#endif
442 if (REISERFS_SB(s)->priv_root) { 442 if (REISERFS_SB(s)->priv_root) {
443 d_invalidate(REISERFS_SB(s)->priv_root); 443 d_invalidate(REISERFS_SB(s)->priv_root);
444 dput(REISERFS_SB(s)->priv_root); 444 dput(REISERFS_SB(s)->priv_root);
@@ -511,8 +511,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
511{ 511{
512 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 512 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
513 513
514 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == 514 if (flags & SLAB_CTOR_CONSTRUCTOR) {
515 SLAB_CTOR_CONSTRUCTOR) {
516 INIT_LIST_HEAD(&ei->i_prealloc_list); 515 INIT_LIST_HEAD(&ei->i_prealloc_list);
517 inode_init_once(&ei->vfs_inode); 516 inode_init_once(&ei->vfs_inode);
518#ifdef CONFIG_REISERFS_FS_POSIX_ACL 517#ifdef CONFIG_REISERFS_FS_POSIX_ACL
@@ -1563,9 +1562,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1563 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1562 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
1564 /* Preallocate by 16 blocks (17-1) at once */ 1563 /* Preallocate by 16 blocks (17-1) at once */
1565 REISERFS_SB(s)->s_alloc_options.preallocsize = 17; 1564 REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
1565#ifdef CONFIG_REISERFS_FS_XATTR
1566 /* Initialize the rwsem for xattr dir */ 1566 /* Initialize the rwsem for xattr dir */
1567 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); 1567 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem);
1568 1568#endif
1569 /* setup default block allocator options */ 1569 /* setup default block allocator options */
1570 reiserfs_init_alloc_options(s); 1570 reiserfs_init_alloc_options(s);
1571 1571
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c8178b7b9212..bf6e58214538 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -68,7 +68,7 @@ static struct dentry *get_xa_root(struct super_block *sb, int flags)
68 if (!privroot) 68 if (!privroot)
69 return ERR_PTR(-ENODATA); 69 return ERR_PTR(-ENODATA);
70 70
71 mutex_lock(&privroot->d_inode->i_mutex); 71 mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
72 if (REISERFS_SB(sb)->xattr_root) { 72 if (REISERFS_SB(sb)->xattr_root) {
73 xaroot = dget(REISERFS_SB(sb)->xattr_root); 73 xaroot = dget(REISERFS_SB(sb)->xattr_root);
74 goto out; 74 goto out;
@@ -410,11 +410,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
410 mapping_set_gfp_mask(mapping, GFP_NOFS); 410 mapping_set_gfp_mask(mapping, GFP_NOFS);
411 page = read_mapping_page(mapping, n, NULL); 411 page = read_mapping_page(mapping, n, NULL);
412 if (!IS_ERR(page)) { 412 if (!IS_ERR(page)) {
413 wait_on_page_locked(page);
414 kmap(page); 413 kmap(page);
415 if (!PageUptodate(page))
416 goto fail;
417
418 if (PageError(page)) 414 if (PageError(page))
419 goto fail; 415 goto fail;
420 } 416 }
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index fd601014813e..804285190271 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -570,8 +570,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
570{ 570{
571 struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; 571 struct romfs_inode_info *ei = (struct romfs_inode_info *) foo;
572 572
573 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 573 if (flags & SLAB_CTOR_CONSTRUCTOR)
574 SLAB_CTOR_CONSTRUCTOR)
575 inode_init_once(&ei->vfs_inode); 574 inode_init_once(&ei->vfs_inode);
576} 575}
577 576
diff --git a/fs/select.c b/fs/select.c
index fe0893afd931..a974082b0824 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -14,10 +14,10 @@
14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
15 */ 15 */
16 16
17#include <linux/kernel.h>
17#include <linux/syscalls.h> 18#include <linux/syscalls.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <linux/smp_lock.h>
21#include <linux/poll.h> 21#include <linux/poll.h>
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
@@ -26,7 +26,6 @@
26 26
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
30#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) 29#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
31 30
32struct poll_table_page { 31struct poll_table_page {
@@ -65,7 +64,7 @@ EXPORT_SYMBOL(poll_initwait);
65 64
66static void free_poll_entry(struct poll_table_entry *entry) 65static void free_poll_entry(struct poll_table_entry *entry)
67{ 66{
68 remove_wait_queue(entry->wait_address,&entry->wait); 67 remove_wait_queue(entry->wait_address, &entry->wait);
69 fput(entry->filp); 68 fput(entry->filp);
70} 69}
71 70
@@ -129,7 +128,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
129 entry->filp = filp; 128 entry->filp = filp;
130 entry->wait_address = wait_address; 129 entry->wait_address = wait_address;
131 init_waitqueue_entry(&entry->wait, current); 130 init_waitqueue_entry(&entry->wait, current);
132 add_wait_queue(wait_address,&entry->wait); 131 add_wait_queue(wait_address, &entry->wait);
133} 132}
134 133
135#define FDS_IN(fds, n) (fds->in + n) 134#define FDS_IN(fds, n) (fds->in + n)
@@ -399,7 +398,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
399 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) 398 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
400 timeout = -1; /* infinite */ 399 timeout = -1; /* infinite */
401 else { 400 else {
402 timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); 401 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
403 timeout += tv.tv_sec * HZ; 402 timeout += tv.tv_sec * HZ;
404 } 403 }
405 } 404 }
@@ -454,7 +453,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
454 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 453 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
455 timeout = -1; /* infinite */ 454 timeout = -1; /* infinite */
456 else { 455 else {
457 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); 456 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
458 timeout += ts.tv_sec * HZ; 457 timeout += ts.tv_sec * HZ;
459 } 458 }
460 } 459 }
@@ -776,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
776 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 775 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
777 timeout = -1; /* infinite */ 776 timeout = -1; /* infinite */
778 else { 777 else {
779 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); 778 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
780 timeout += ts.tv_sec * HZ; 779 timeout += ts.tv_sec * HZ;
781 } 780 }
782 } 781 }
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 5faba4f1c9ab..424a3ddf86dd 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -69,9 +69,8 @@ static void smb_destroy_inode(struct inode *inode)
69static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 69static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
70{ 70{
71 struct smb_inode_info *ei = (struct smb_inode_info *) foo; 71 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
72 unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR;
73 72
74 if ((flags & flagmask) == SLAB_CTOR_CONSTRUCTOR) 73 if (flags & SLAB_CTOR_CONSTRUCTOR)
75 inode_init_once(&ei->vfs_inode); 74 inode_init_once(&ei->vfs_inode);
76} 75}
77 76
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 723f7c667661..c288fbe7953d 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -6,6 +6,7 @@
6 * Please add a note about your changes to smbfs in the ChangeLog file. 6 * Please add a note about your changes to smbfs in the ChangeLog file.
7 */ 7 */
8 8
9#include <linux/kernel.h>
9#include <linux/types.h> 10#include <linux/types.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/slab.h> 12#include <linux/slab.h>
@@ -22,8 +23,6 @@
22/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */ 23/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */
23#define SMB_SLAB_DEBUG 0 24#define SMB_SLAB_DEBUG 0
24 25
25#define ROUND_UP(x) (((x)+3) & ~3)
26
27/* cache for request structures */ 26/* cache for request structures */
28static struct kmem_cache *req_cachep; 27static struct kmem_cache *req_cachep;
29 28
@@ -200,8 +199,8 @@ static int smb_setup_trans2request(struct smb_request *req)
200 199
201 const int smb_parameters = 15; 200 const int smb_parameters = 15;
202 const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2; 201 const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
203 const int oparam = ROUND_UP(header + 3); 202 const int oparam = ALIGN(header + 3, sizeof(u32));
204 const int odata = ROUND_UP(oparam + req->rq_lparm); 203 const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32));
205 const int bcc = (req->rq_data ? odata + req->rq_ldata : 204 const int bcc = (req->rq_data ? odata + req->rq_ldata :
206 oparam + req->rq_lparm) - header; 205 oparam + req->rq_lparm) - header;
207 206
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 89eaf31f1d46..67176af8515f 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -16,7 +16,6 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/file.h> 17#include <linux/file.h>
18#include <linux/dcache.h> 18#include <linux/dcache.h>
19#include <linux/smp_lock.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/net.h> 20#include <linux/net.h>
22#include <linux/kthread.h> 21#include <linux/kthread.h>
@@ -299,8 +298,6 @@ out:
299 */ 298 */
300static int smbiod(void *unused) 299static int smbiod(void *unused)
301{ 300{
302 allow_signal(SIGKILL);
303
304 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid); 301 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
305 302
306 for (;;) { 303 for (;;) {
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
index 92ea6b2367d7..e48bd8235a8e 100644
--- a/fs/smbfs/sock.c
+++ b/fs/smbfs/sock.c
@@ -17,7 +17,6 @@
17#include <linux/net.h> 17#include <linux/net.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/smp_lock.h>
21#include <linux/workqueue.h> 20#include <linux/workqueue.h>
22#include <net/scm.h> 21#include <net/scm.h>
23#include <net/tcp_states.h> 22#include <net/tcp_states.h>
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index fea20ceb8a5f..00b2909bd469 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -13,7 +13,6 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/smp_lock.h>
17#include <linux/net.h> 16#include <linux/net.h>
18#include <linux/namei.h> 17#include <linux/namei.h>
19 18
diff --git a/fs/splice.c b/fs/splice.c
index 5428b0ff3b6f..12f28281d2b1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -289,12 +289,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
289 nr_pages = PIPE_BUFFERS; 289 nr_pages = PIPE_BUFFERS;
290 290
291 /* 291 /*
292 * Initiate read-ahead on this page range. however, don't call into 292 * Don't try to 2nd guess the read-ahead logic, call into
293 * read-ahead if this is a non-zero offset (we are likely doing small 293 * page_cache_readahead() like the page cache reads would do.
294 * chunk splice and the page is already there) for a single page.
295 */ 294 */
296 if (!loff || nr_pages > 1) 295 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
297 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
298 296
299 /* 297 /*
300 * Now fill in the holes: 298 * Now fill in the holes:
@@ -378,10 +376,11 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
378 * If in nonblock mode then dont block on waiting 376 * If in nonblock mode then dont block on waiting
379 * for an in-flight io page 377 * for an in-flight io page
380 */ 378 */
381 if (flags & SPLICE_F_NONBLOCK) 379 if (flags & SPLICE_F_NONBLOCK) {
382 break; 380 if (TestSetPageLocked(page))
383 381 break;
384 lock_page(page); 382 } else
383 lock_page(page);
385 384
386 /* 385 /*
387 * page was truncated, stop here. if this isn't the 386 * page was truncated, stop here. if this isn't the
diff --git a/fs/stat.c b/fs/stat.c
index 38a8cb2a28de..68510068a641 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -8,7 +8,6 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/smp_lock.h>
12#include <linux/highuid.h> 11#include <linux/highuid.h>
13#include <linux/fs.h> 12#include <linux/fs.h>
14#include <linux/namei.h> 13#include <linux/namei.h>
diff --git a/fs/super.c b/fs/super.c
index 8341e4e1d738..5260d620c555 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -107,6 +107,7 @@ out:
107static inline void destroy_super(struct super_block *s) 107static inline void destroy_super(struct super_block *s)
108{ 108{
109 security_sb_free(s); 109 security_sb_free(s);
110 kfree(s->s_subtype);
110 kfree(s); 111 kfree(s);
111} 112}
112 113
@@ -907,6 +908,29 @@ out:
907 908
908EXPORT_SYMBOL_GPL(vfs_kern_mount); 909EXPORT_SYMBOL_GPL(vfs_kern_mount);
909 910
911static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
912{
913 int err;
914 const char *subtype = strchr(fstype, '.');
915 if (subtype) {
916 subtype++;
917 err = -EINVAL;
918 if (!subtype[0])
919 goto err;
920 } else
921 subtype = "";
922
923 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
924 err = -ENOMEM;
925 if (!mnt->mnt_sb->s_subtype)
926 goto err;
927 return mnt;
928
929 err:
930 mntput(mnt);
931 return ERR_PTR(err);
932}
933
910struct vfsmount * 934struct vfsmount *
911do_kern_mount(const char *fstype, int flags, const char *name, void *data) 935do_kern_mount(const char *fstype, int flags, const char *name, void *data)
912{ 936{
@@ -915,6 +939,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
915 if (!type) 939 if (!type)
916 return ERR_PTR(-ENODEV); 940 return ERR_PTR(-ENODEV);
917 mnt = vfs_kern_mount(type, flags, name, data); 941 mnt = vfs_kern_mount(type, flags, name, data);
942 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
943 !mnt->mnt_sb->s_subtype)
944 mnt = fs_set_subtype(mnt, fstype);
918 put_filesystem(type); 945 put_filesystem(type);
919 return mnt; 946 return mnt;
920} 947}
diff --git a/fs/sync.c b/fs/sync.c
index 5cb9e7e43383..2f97576355b8 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -229,7 +229,7 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
229 !S_ISLNK(i_mode)) 229 !S_ISLNK(i_mode))
230 goto out_put; 230 goto out_put;
231 231
232 ret = do_sync_file_range(file, offset, endbyte, flags); 232 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
233out_put: 233out_put:
234 fput_light(file, fput_needed); 234 fput_light(file, fput_needed);
235out: 235out:
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 8ea2a51ce883..d3b9f5f07db1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -59,7 +59,7 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
59 if (copy_to_user(userbuf, buffer, count)) 59 if (copy_to_user(userbuf, buffer, count))
60 return -EFAULT; 60 return -EFAULT;
61 61
62 pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); 62 pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
63 63
64 *off = offs + count; 64 *off = offs + count;
65 65
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index db0413a411d6..b502c7197ec0 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -13,8 +13,7 @@
13 13
14#include "sysfs.h" 14#include "sysfs.h"
15 15
16#define to_subsys(k) container_of(k,struct subsystem,kset.kobj) 16#define to_sattr(a) container_of(a,struct subsys_attribute, attr)
17#define to_sattr(a) container_of(a,struct subsys_attribute,attr)
18 17
19/* 18/*
20 * Subsystem file operations. 19 * Subsystem file operations.
@@ -24,12 +23,12 @@
24static ssize_t 23static ssize_t
25subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) 24subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
26{ 25{
27 struct subsystem * s = to_subsys(kobj); 26 struct kset *kset = to_kset(kobj);
28 struct subsys_attribute * sattr = to_sattr(attr); 27 struct subsys_attribute * sattr = to_sattr(attr);
29 ssize_t ret = -EIO; 28 ssize_t ret = -EIO;
30 29
31 if (sattr->show) 30 if (sattr->show)
32 ret = sattr->show(s,page); 31 ret = sattr->show(kset, page);
33 return ret; 32 return ret;
34} 33}
35 34
@@ -37,12 +36,12 @@ static ssize_t
37subsys_attr_store(struct kobject * kobj, struct attribute * attr, 36subsys_attr_store(struct kobject * kobj, struct attribute * attr,
38 const char * page, size_t count) 37 const char * page, size_t count)
39{ 38{
40 struct subsystem * s = to_subsys(kobj); 39 struct kset *kset = to_kset(kobj);
41 struct subsys_attribute * sattr = to_sattr(attr); 40 struct subsys_attribute * sattr = to_sattr(attr);
42 ssize_t ret = -EIO; 41 ssize_t ret = -EIO;
43 42
44 if (sattr->store) 43 if (sattr->store)
45 ret = sattr->store(s,page,count); 44 ret = sattr->store(kset, page, count);
46 return ret; 45 return ret;
47} 46}
48 47
@@ -112,36 +111,6 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
112 return ret; 111 return ret;
113} 112}
114 113
115
116/**
117 * flush_read_buffer - push buffer to userspace.
118 * @buffer: data buffer for file.
119 * @buf: user-passed buffer.
120 * @count: number of bytes requested.
121 * @ppos: file position.
122 *
123 * Copy the buffer we filled in fill_read_buffer() to userspace.
124 * This is done at the reader's leisure, copying and advancing
125 * the amount they specify each time.
126 * This may be called continuously until the buffer is empty.
127 */
128static int flush_read_buffer(struct sysfs_buffer * buffer, char __user * buf,
129 size_t count, loff_t * ppos)
130{
131 int error;
132
133 if (*ppos > buffer->count)
134 return 0;
135
136 if (count > (buffer->count - *ppos))
137 count = buffer->count - *ppos;
138
139 error = copy_to_user(buf,buffer->page + *ppos,count);
140 if (!error)
141 *ppos += count;
142 return error ? -EFAULT : count;
143}
144
145/** 114/**
146 * sysfs_read_file - read an attribute. 115 * sysfs_read_file - read an attribute.
147 * @file: file pointer. 116 * @file: file pointer.
@@ -178,7 +147,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
178 } 147 }
179 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 148 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
180 __FUNCTION__, count, *ppos, buffer->page); 149 __FUNCTION__, count, *ppos, buffer->page);
181 retval = flush_read_buffer(buffer,buf,count,ppos); 150 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
151 buffer->count);
182out: 152out:
183 up(&buffer->sem); 153 up(&buffer->sem);
184 return retval; 154 return retval;
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index ebf7007fa161..e566b387fcf9 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -54,17 +54,9 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
54{ 54{
55 struct address_space *mapping = dir->i_mapping; 55 struct address_space *mapping = dir->i_mapping;
56 struct page *page = read_mapping_page(mapping, n, NULL); 56 struct page *page = read_mapping_page(mapping, n, NULL);
57 if (!IS_ERR(page)) { 57 if (!IS_ERR(page))
58 wait_on_page_locked(page);
59 kmap(page); 58 kmap(page);
60 if (!PageUptodate(page))
61 goto fail;
62 }
63 return page; 59 return page;
64
65fail:
66 dir_put_page(page);
67 return ERR_PTR(-EIO);
68} 60}
69 61
70static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) 62static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9311cac186fe..3152d7415606 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -322,8 +322,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
322{ 322{
323 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 323 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
324 324
325 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 325 if (flags & SLAB_CTOR_CONSTRUCTOR)
326 SLAB_CTOR_CONSTRUCTOR)
327 inode_init_once(&si->vfs_inode); 326 inode_init_once(&si->vfs_inode);
328} 327}
329 328
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 4e48abbd2b5d..6bd850b7641a 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -13,7 +13,6 @@
13 */ 13 */
14 14
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/smp_lock.h>
17#include "sysv.h" 16#include "sysv.h"
18 17
19static int add_nondir(struct dentry *dentry, struct inode *inode) 18static int add_nondir(struct dentry *dentry, struct inode *inode)
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index ea521f846d97..4cec91015681 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -427,9 +427,9 @@ static void udf_table_free_blocks(struct super_block * sb,
427{ 427{
428 struct udf_sb_info *sbi = UDF_SB(sb); 428 struct udf_sb_info *sbi = UDF_SB(sb);
429 uint32_t start, end; 429 uint32_t start, end;
430 uint32_t nextoffset, oextoffset, elen; 430 uint32_t elen;
431 kernel_lb_addr nbloc, obloc, eloc; 431 kernel_lb_addr eloc;
432 struct buffer_head *obh, *nbh; 432 struct extent_position oepos, epos;
433 int8_t etype; 433 int8_t etype;
434 int i; 434 int i;
435 435
@@ -457,14 +457,13 @@ static void udf_table_free_blocks(struct super_block * sb,
457 start = bloc.logicalBlockNum + offset; 457 start = bloc.logicalBlockNum + offset;
458 end = bloc.logicalBlockNum + offset + count - 1; 458 end = bloc.logicalBlockNum + offset + count - 1;
459 459
460 oextoffset = nextoffset = sizeof(struct unallocSpaceEntry); 460 epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry);
461 elen = 0; 461 elen = 0;
462 obloc = nbloc = UDF_I_LOCATION(table); 462 epos.block = oepos.block = UDF_I_LOCATION(table);
463 463 epos.bh = oepos.bh = NULL;
464 obh = nbh = NULL;
465 464
466 while (count && (etype = 465 while (count && (etype =
467 udf_next_aext(table, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) != -1) 466 udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1)
468 { 467 {
469 if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == 468 if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) ==
470 start)) 469 start))
@@ -482,7 +481,7 @@ static void udf_table_free_blocks(struct super_block * sb,
482 start += count; 481 start += count;
483 count = 0; 482 count = 0;
484 } 483 }
485 udf_write_aext(table, obloc, &oextoffset, eloc, elen, obh, 1); 484 udf_write_aext(table, &oepos, eloc, elen, 1);
486 } 485 }
487 else if (eloc.logicalBlockNum == (end + 1)) 486 else if (eloc.logicalBlockNum == (end + 1))
488 { 487 {
@@ -502,20 +501,20 @@ static void udf_table_free_blocks(struct super_block * sb,
502 end -= count; 501 end -= count;
503 count = 0; 502 count = 0;
504 } 503 }
505 udf_write_aext(table, obloc, &oextoffset, eloc, elen, obh, 1); 504 udf_write_aext(table, &oepos, eloc, elen, 1);
506 } 505 }
507 506
508 if (nbh != obh) 507 if (epos.bh != oepos.bh)
509 { 508 {
510 i = -1; 509 i = -1;
511 obloc = nbloc; 510 oepos.block = epos.block;
512 udf_release_data(obh); 511 brelse(oepos.bh);
513 atomic_inc(&nbh->b_count); 512 get_bh(epos.bh);
514 obh = nbh; 513 oepos.bh = epos.bh;
515 oextoffset = 0; 514 oepos.offset = 0;
516 } 515 }
517 else 516 else
518 oextoffset = nextoffset; 517 oepos.offset = epos.offset;
519 } 518 }
520 519
521 if (count) 520 if (count)
@@ -547,55 +546,53 @@ static void udf_table_free_blocks(struct super_block * sb,
547 adsize = sizeof(long_ad); 546 adsize = sizeof(long_ad);
548 else 547 else
549 { 548 {
550 udf_release_data(obh); 549 brelse(oepos.bh);
551 udf_release_data(nbh); 550 brelse(epos.bh);
552 goto error_return; 551 goto error_return;
553 } 552 }
554 553
555 if (nextoffset + (2 * adsize) > sb->s_blocksize) 554 if (epos.offset + (2 * adsize) > sb->s_blocksize)
556 { 555 {
557 char *sptr, *dptr; 556 char *sptr, *dptr;
558 int loffset; 557 int loffset;
559 558
560 udf_release_data(obh); 559 brelse(oepos.bh);
561 obh = nbh; 560 oepos = epos;
562 obloc = nbloc;
563 oextoffset = nextoffset;
564 561
565 /* Steal a block from the extent being free'd */ 562 /* Steal a block from the extent being free'd */
566 nbloc.logicalBlockNum = eloc.logicalBlockNum; 563 epos.block.logicalBlockNum = eloc.logicalBlockNum;
567 eloc.logicalBlockNum ++; 564 eloc.logicalBlockNum ++;
568 elen -= sb->s_blocksize; 565 elen -= sb->s_blocksize;
569 566
570 if (!(nbh = udf_tread(sb, 567 if (!(epos.bh = udf_tread(sb,
571 udf_get_lb_pblock(sb, nbloc, 0)))) 568 udf_get_lb_pblock(sb, epos.block, 0))))
572 { 569 {
573 udf_release_data(obh); 570 brelse(oepos.bh);
574 goto error_return; 571 goto error_return;
575 } 572 }
576 aed = (struct allocExtDesc *)(nbh->b_data); 573 aed = (struct allocExtDesc *)(epos.bh->b_data);
577 aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); 574 aed->previousAllocExtLocation = cpu_to_le32(oepos.block.logicalBlockNum);
578 if (nextoffset + adsize > sb->s_blocksize) 575 if (epos.offset + adsize > sb->s_blocksize)
579 { 576 {
580 loffset = nextoffset; 577 loffset = epos.offset;
581 aed->lengthAllocDescs = cpu_to_le32(adsize); 578 aed->lengthAllocDescs = cpu_to_le32(adsize);
582 sptr = UDF_I_DATA(inode) + nextoffset - 579 sptr = UDF_I_DATA(inode) + epos.offset -
583 udf_file_entry_alloc_offset(inode) + 580 udf_file_entry_alloc_offset(inode) +
584 UDF_I_LENEATTR(inode) - adsize; 581 UDF_I_LENEATTR(inode) - adsize;
585 dptr = nbh->b_data + sizeof(struct allocExtDesc); 582 dptr = epos.bh->b_data + sizeof(struct allocExtDesc);
586 memcpy(dptr, sptr, adsize); 583 memcpy(dptr, sptr, adsize);
587 nextoffset = sizeof(struct allocExtDesc) + adsize; 584 epos.offset = sizeof(struct allocExtDesc) + adsize;
588 } 585 }
589 else 586 else
590 { 587 {
591 loffset = nextoffset + adsize; 588 loffset = epos.offset + adsize;
592 aed->lengthAllocDescs = cpu_to_le32(0); 589 aed->lengthAllocDescs = cpu_to_le32(0);
593 sptr = (obh)->b_data + nextoffset; 590 sptr = oepos.bh->b_data + epos.offset;
594 nextoffset = sizeof(struct allocExtDesc); 591 epos.offset = sizeof(struct allocExtDesc);
595 592
596 if (obh) 593 if (oepos.bh)
597 { 594 {
598 aed = (struct allocExtDesc *)(obh)->b_data; 595 aed = (struct allocExtDesc *)oepos.bh->b_data;
599 aed->lengthAllocDescs = 596 aed->lengthAllocDescs =
600 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); 597 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
601 } 598 }
@@ -606,11 +603,11 @@ static void udf_table_free_blocks(struct super_block * sb,
606 } 603 }
607 } 604 }
608 if (UDF_SB_UDFREV(sb) >= 0x0200) 605 if (UDF_SB_UDFREV(sb) >= 0x0200)
609 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, 606 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1,
610 nbloc.logicalBlockNum, sizeof(tag)); 607 epos.block.logicalBlockNum, sizeof(tag));
611 else 608 else
612 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, 609 udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 2, 1,
613 nbloc.logicalBlockNum, sizeof(tag)); 610 epos.block.logicalBlockNum, sizeof(tag));
614 switch (UDF_I_ALLOCTYPE(table)) 611 switch (UDF_I_ALLOCTYPE(table))
615 { 612 {
616 case ICBTAG_FLAG_AD_SHORT: 613 case ICBTAG_FLAG_AD_SHORT:
@@ -619,7 +616,7 @@ static void udf_table_free_blocks(struct super_block * sb,
619 sad->extLength = cpu_to_le32( 616 sad->extLength = cpu_to_le32(
620 EXT_NEXT_EXTENT_ALLOCDECS | 617 EXT_NEXT_EXTENT_ALLOCDECS |
621 sb->s_blocksize); 618 sb->s_blocksize);
622 sad->extPosition = cpu_to_le32(nbloc.logicalBlockNum); 619 sad->extPosition = cpu_to_le32(epos.block.logicalBlockNum);
623 break; 620 break;
624 } 621 }
625 case ICBTAG_FLAG_AD_LONG: 622 case ICBTAG_FLAG_AD_LONG:
@@ -628,14 +625,14 @@ static void udf_table_free_blocks(struct super_block * sb,
628 lad->extLength = cpu_to_le32( 625 lad->extLength = cpu_to_le32(
629 EXT_NEXT_EXTENT_ALLOCDECS | 626 EXT_NEXT_EXTENT_ALLOCDECS |
630 sb->s_blocksize); 627 sb->s_blocksize);
631 lad->extLocation = cpu_to_lelb(nbloc); 628 lad->extLocation = cpu_to_lelb(epos.block);
632 break; 629 break;
633 } 630 }
634 } 631 }
635 if (obh) 632 if (oepos.bh)
636 { 633 {
637 udf_update_tag(obh->b_data, loffset); 634 udf_update_tag(oepos.bh->b_data, loffset);
638 mark_buffer_dirty(obh); 635 mark_buffer_dirty(oepos.bh);
639 } 636 }
640 else 637 else
641 mark_inode_dirty(table); 638 mark_inode_dirty(table);
@@ -643,26 +640,26 @@ static void udf_table_free_blocks(struct super_block * sb,
643 640
644 if (elen) /* It's possible that stealing the block emptied the extent */ 641 if (elen) /* It's possible that stealing the block emptied the extent */
645 { 642 {
646 udf_write_aext(table, nbloc, &nextoffset, eloc, elen, nbh, 1); 643 udf_write_aext(table, &epos, eloc, elen, 1);
647 644
648 if (!nbh) 645 if (!epos.bh)
649 { 646 {
650 UDF_I_LENALLOC(table) += adsize; 647 UDF_I_LENALLOC(table) += adsize;
651 mark_inode_dirty(table); 648 mark_inode_dirty(table);
652 } 649 }
653 else 650 else
654 { 651 {
655 aed = (struct allocExtDesc *)nbh->b_data; 652 aed = (struct allocExtDesc *)epos.bh->b_data;
656 aed->lengthAllocDescs = 653 aed->lengthAllocDescs =
657 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); 654 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
658 udf_update_tag(nbh->b_data, nextoffset); 655 udf_update_tag(epos.bh->b_data, epos.offset);
659 mark_buffer_dirty(nbh); 656 mark_buffer_dirty(epos.bh);
660 } 657 }
661 } 658 }
662 } 659 }
663 660
664 udf_release_data(nbh); 661 brelse(epos.bh);
665 udf_release_data(obh); 662 brelse(oepos.bh);
666 663
667error_return: 664error_return:
668 sb->s_dirt = 1; 665 sb->s_dirt = 1;
@@ -677,9 +674,9 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
677{ 674{
678 struct udf_sb_info *sbi = UDF_SB(sb); 675 struct udf_sb_info *sbi = UDF_SB(sb);
679 int alloc_count = 0; 676 int alloc_count = 0;
680 uint32_t extoffset, elen, adsize; 677 uint32_t elen, adsize;
681 kernel_lb_addr bloc, eloc; 678 kernel_lb_addr eloc;
682 struct buffer_head *bh; 679 struct extent_position epos;
683 int8_t etype = -1; 680 int8_t etype = -1;
684 681
685 if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) 682 if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition))
@@ -693,14 +690,13 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
693 return 0; 690 return 0;
694 691
695 mutex_lock(&sbi->s_alloc_mutex); 692 mutex_lock(&sbi->s_alloc_mutex);
696 extoffset = sizeof(struct unallocSpaceEntry); 693 epos.offset = sizeof(struct unallocSpaceEntry);
697 bloc = UDF_I_LOCATION(table); 694 epos.block = UDF_I_LOCATION(table);
698 695 epos.bh = NULL;
699 bh = NULL;
700 eloc.logicalBlockNum = 0xFFFFFFFF; 696 eloc.logicalBlockNum = 0xFFFFFFFF;
701 697
702 while (first_block != eloc.logicalBlockNum && (etype = 698 while (first_block != eloc.logicalBlockNum && (etype =
703 udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) 699 udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1)
704 { 700 {
705 udf_debug("eloc=%d, elen=%d, first_block=%d\n", 701 udf_debug("eloc=%d, elen=%d, first_block=%d\n",
706 eloc.logicalBlockNum, elen, first_block); 702 eloc.logicalBlockNum, elen, first_block);
@@ -709,7 +705,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
709 705
710 if (first_block == eloc.logicalBlockNum) 706 if (first_block == eloc.logicalBlockNum)
711 { 707 {
712 extoffset -= adsize; 708 epos.offset -= adsize;
713 709
714 alloc_count = (elen >> sb->s_blocksize_bits); 710 alloc_count = (elen >> sb->s_blocksize_bits);
715 if (inode && DQUOT_PREALLOC_BLOCK(inode, alloc_count > block_count ? block_count : alloc_count)) 711 if (inode && DQUOT_PREALLOC_BLOCK(inode, alloc_count > block_count ? block_count : alloc_count))
@@ -719,15 +715,15 @@ static int udf_table_prealloc_blocks(struct super_block * sb,
719 alloc_count = block_count; 715 alloc_count = block_count;
720 eloc.logicalBlockNum += alloc_count; 716 eloc.logicalBlockNum += alloc_count;
721 elen -= (alloc_count << sb->s_blocksize_bits); 717 elen -= (alloc_count << sb->s_blocksize_bits);
722 udf_write_aext(table, bloc, &extoffset, eloc, (etype << 30) | elen, bh, 1); 718 udf_write_aext(table, &epos, eloc, (etype << 30) | elen, 1);
723 } 719 }
724 else 720 else
725 udf_delete_aext(table, bloc, extoffset, eloc, (etype << 30) | elen, bh); 721 udf_delete_aext(table, epos, eloc, (etype << 30) | elen);
726 } 722 }
727 else 723 else
728 alloc_count = 0; 724 alloc_count = 0;
729 725
730 udf_release_data(bh); 726 brelse(epos.bh);
731 727
732 if (alloc_count && UDF_SB_LVIDBH(sb)) 728 if (alloc_count && UDF_SB_LVIDBH(sb))
733 { 729 {
@@ -747,9 +743,9 @@ static int udf_table_new_block(struct super_block * sb,
747 struct udf_sb_info *sbi = UDF_SB(sb); 743 struct udf_sb_info *sbi = UDF_SB(sb);
748 uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; 744 uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF;
749 uint32_t newblock = 0, adsize; 745 uint32_t newblock = 0, adsize;
750 uint32_t extoffset, goal_extoffset, elen, goal_elen = 0; 746 uint32_t elen, goal_elen = 0;
751 kernel_lb_addr bloc, goal_bloc, eloc, goal_eloc; 747 kernel_lb_addr eloc, goal_eloc;
752 struct buffer_head *bh, *goal_bh; 748 struct extent_position epos, goal_epos;
753 int8_t etype; 749 int8_t etype;
754 750
755 *err = -ENOSPC; 751 *err = -ENOSPC;
@@ -770,14 +766,12 @@ static int udf_table_new_block(struct super_block * sb,
770 We store the buffer_head, bloc, and extoffset of the current closest 766 We store the buffer_head, bloc, and extoffset of the current closest
771 match and use that when we are done. 767 match and use that when we are done.
772 */ 768 */
773 769 epos.offset = sizeof(struct unallocSpaceEntry);
774 extoffset = sizeof(struct unallocSpaceEntry); 770 epos.block = UDF_I_LOCATION(table);
775 bloc = UDF_I_LOCATION(table); 771 epos.bh = goal_epos.bh = NULL;
776
777 goal_bh = bh = NULL;
778 772
779 while (spread && (etype = 773 while (spread && (etype =
780 udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) 774 udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1)
781 { 775 {
782 if (goal >= eloc.logicalBlockNum) 776 if (goal >= eloc.logicalBlockNum)
783 { 777 {
@@ -793,24 +787,24 @@ static int udf_table_new_block(struct super_block * sb,
793 if (nspread < spread) 787 if (nspread < spread)
794 { 788 {
795 spread = nspread; 789 spread = nspread;
796 if (goal_bh != bh) 790 if (goal_epos.bh != epos.bh)
797 { 791 {
798 udf_release_data(goal_bh); 792 brelse(goal_epos.bh);
799 goal_bh = bh; 793 goal_epos.bh = epos.bh;
800 atomic_inc(&goal_bh->b_count); 794 get_bh(goal_epos.bh);
801 } 795 }
802 goal_bloc = bloc; 796 goal_epos.block = epos.block;
803 goal_extoffset = extoffset - adsize; 797 goal_epos.offset = epos.offset - adsize;
804 goal_eloc = eloc; 798 goal_eloc = eloc;
805 goal_elen = (etype << 30) | elen; 799 goal_elen = (etype << 30) | elen;
806 } 800 }
807 } 801 }
808 802
809 udf_release_data(bh); 803 brelse(epos.bh);
810 804
811 if (spread == 0xFFFFFFFF) 805 if (spread == 0xFFFFFFFF)
812 { 806 {
813 udf_release_data(goal_bh); 807 brelse(goal_epos.bh);
814 mutex_unlock(&sbi->s_alloc_mutex); 808 mutex_unlock(&sbi->s_alloc_mutex);
815 return 0; 809 return 0;
816 } 810 }
@@ -826,17 +820,17 @@ static int udf_table_new_block(struct super_block * sb,
826 820
827 if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) 821 if (inode && DQUOT_ALLOC_BLOCK(inode, 1))
828 { 822 {
829 udf_release_data(goal_bh); 823 brelse(goal_epos.bh);
830 mutex_unlock(&sbi->s_alloc_mutex); 824 mutex_unlock(&sbi->s_alloc_mutex);
831 *err = -EDQUOT; 825 *err = -EDQUOT;
832 return 0; 826 return 0;
833 } 827 }
834 828
835 if (goal_elen) 829 if (goal_elen)
836 udf_write_aext(table, goal_bloc, &goal_extoffset, goal_eloc, goal_elen, goal_bh, 1); 830 udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1);
837 else 831 else
838 udf_delete_aext(table, goal_bloc, goal_extoffset, goal_eloc, goal_elen, goal_bh); 832 udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
839 udf_release_data(goal_bh); 833 brelse(goal_epos.bh);
840 834
841 if (UDF_SB_LVIDBH(sb)) 835 if (UDF_SB_LVIDBH(sb))
842 { 836 {
@@ -921,11 +915,14 @@ inline int udf_new_block(struct super_block * sb,
921 struct inode * inode, 915 struct inode * inode,
922 uint16_t partition, uint32_t goal, int *err) 916 uint16_t partition, uint32_t goal, int *err)
923{ 917{
918 int ret;
919
924 if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) 920 if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP)
925 { 921 {
926 return udf_bitmap_new_block(sb, inode, 922 ret = udf_bitmap_new_block(sb, inode,
927 UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, 923 UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap,
928 partition, goal, err); 924 partition, goal, err);
925 return ret;
929 } 926 }
930 else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) 927 else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE)
931 { 928 {
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 2391c9150c49..e45f86b5e7b0 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -111,11 +111,13 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
111 uint16_t liu; 111 uint16_t liu;
112 uint8_t lfi; 112 uint8_t lfi;
113 loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; 113 loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
114 struct buffer_head * bh = NULL, * tmp, * bha[16]; 114 struct buffer_head *tmp, *bha[16];
115 kernel_lb_addr bloc, eloc; 115 kernel_lb_addr eloc;
116 uint32_t extoffset, elen, offset; 116 uint32_t elen;
117 sector_t offset;
117 int i, num; 118 int i, num;
118 unsigned int dt_type; 119 unsigned int dt_type;
120 struct extent_position epos = { NULL, 0, {0, 0}};
119 121
120 if (nf_pos >= size) 122 if (nf_pos >= size)
121 return 0; 123 return 0;
@@ -127,23 +129,22 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
127 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) 129 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB)
128 fibh.sbh = fibh.ebh = NULL; 130 fibh.sbh = fibh.ebh = NULL;
129 else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2), 131 else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2),
130 &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) 132 &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30))
131 { 133 {
132 offset >>= dir->i_sb->s_blocksize_bits;
133 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 134 block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
134 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) 135 if ((++offset << dir->i_sb->s_blocksize_bits) < elen)
135 { 136 {
136 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) 137 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT)
137 extoffset -= sizeof(short_ad); 138 epos.offset -= sizeof(short_ad);
138 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) 139 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG)
139 extoffset -= sizeof(long_ad); 140 epos.offset -= sizeof(long_ad);
140 } 141 }
141 else 142 else
142 offset = 0; 143 offset = 0;
143 144
144 if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) 145 if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block)))
145 { 146 {
146 udf_release_data(bh); 147 brelse(epos.bh);
147 return -EIO; 148 return -EIO;
148 } 149 }
149 150
@@ -171,7 +172,7 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
171 } 172 }
172 else 173 else
173 { 174 {
174 udf_release_data(bh); 175 brelse(epos.bh);
175 return -ENOENT; 176 return -ENOENT;
176 } 177 }
177 178
@@ -179,14 +180,14 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
179 { 180 {
180 filp->f_pos = nf_pos + 1; 181 filp->f_pos = nf_pos + 1;
181 182
182 fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); 183 fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset);
183 184
184 if (!fi) 185 if (!fi)
185 { 186 {
186 if (fibh.sbh != fibh.ebh) 187 if (fibh.sbh != fibh.ebh)
187 udf_release_data(fibh.ebh); 188 brelse(fibh.ebh);
188 udf_release_data(fibh.sbh); 189 brelse(fibh.sbh);
189 udf_release_data(bh); 190 brelse(epos.bh);
190 return 0; 191 return 0;
191 } 192 }
192 193
@@ -244,9 +245,9 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
244 if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) 245 if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0)
245 { 246 {
246 if (fibh.sbh != fibh.ebh) 247 if (fibh.sbh != fibh.ebh)
247 udf_release_data(fibh.ebh); 248 brelse(fibh.ebh);
248 udf_release_data(fibh.sbh); 249 brelse(fibh.sbh);
249 udf_release_data(bh); 250 brelse(epos.bh);
250 return 0; 251 return 0;
251 } 252 }
252 } 253 }
@@ -255,9 +256,9 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d
255 filp->f_pos = nf_pos + 1; 256 filp->f_pos = nf_pos + 1;
256 257
257 if (fibh.sbh != fibh.ebh) 258 if (fibh.sbh != fibh.ebh)
258 udf_release_data(fibh.ebh); 259 brelse(fibh.ebh);
259 udf_release_data(fibh.sbh); 260 brelse(fibh.sbh);
260 udf_release_data(bh); 261 brelse(epos.bh);
261 262
262 return 0; 263 return 0;
263} 264}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index fe751a2a0e47..198caa33027a 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -36,14 +36,14 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size,
36 36
37 if (!ad) 37 if (!ad)
38 { 38 {
39 udf_release_data(*bh); 39 brelse(*bh);
40 *error = 1; 40 *error = 1;
41 return NULL; 41 return NULL;
42 } 42 }
43 43
44 if (*offset == dir->i_sb->s_blocksize) 44 if (*offset == dir->i_sb->s_blocksize)
45 { 45 {
46 udf_release_data(*bh); 46 brelse(*bh);
47 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); 47 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
48 if (!block) 48 if (!block)
49 return NULL; 49 return NULL;
@@ -57,7 +57,7 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size,
57 remainder = dir->i_sb->s_blocksize - loffset; 57 remainder = dir->i_sb->s_blocksize - loffset;
58 memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder); 58 memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder);
59 59
60 udf_release_data(*bh); 60 brelse(*bh);
61 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); 61 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
62 if (!block) 62 if (!block)
63 return NULL; 63 return NULL;
@@ -75,9 +75,9 @@ struct fileIdentDesc *
75udf_fileident_read(struct inode *dir, loff_t *nf_pos, 75udf_fileident_read(struct inode *dir, loff_t *nf_pos,
76 struct udf_fileident_bh *fibh, 76 struct udf_fileident_bh *fibh,
77 struct fileIdentDesc *cfi, 77 struct fileIdentDesc *cfi,
78 kernel_lb_addr *bloc, uint32_t *extoffset, 78 struct extent_position *epos,
79 kernel_lb_addr *eloc, uint32_t *elen, 79 kernel_lb_addr *eloc, uint32_t *elen,
80 uint32_t *offset, struct buffer_head **bh) 80 sector_t *offset)
81{ 81{
82 struct fileIdentDesc *fi; 82 struct fileIdentDesc *fi;
83 int i, num, block; 83 int i, num, block;
@@ -105,13 +105,11 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos,
105 105
106 if (fibh->eoffset == dir->i_sb->s_blocksize) 106 if (fibh->eoffset == dir->i_sb->s_blocksize)
107 { 107 {
108 int lextoffset = *extoffset; 108 int lextoffset = epos->offset;
109 109
110 if (udf_next_aext(dir, bloc, extoffset, eloc, elen, bh, 1) != 110 if (udf_next_aext(dir, epos, eloc, elen, 1) !=
111 (EXT_RECORDED_ALLOCATED >> 30)) 111 (EXT_RECORDED_ALLOCATED >> 30))
112 {
113 return NULL; 112 return NULL;
114 }
115 113
116 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); 114 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
117 115
@@ -120,9 +118,9 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos,
120 if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) 118 if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen)
121 *offset = 0; 119 *offset = 0;
122 else 120 else
123 *extoffset = lextoffset; 121 epos->offset = lextoffset;
124 122
125 udf_release_data(fibh->sbh); 123 brelse(fibh->sbh);
126 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) 124 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block)))
127 return NULL; 125 return NULL;
128 fibh->soffset = fibh->eoffset = 0; 126 fibh->soffset = fibh->eoffset = 0;
@@ -151,7 +149,7 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos,
151 } 149 }
152 else if (fibh->sbh != fibh->ebh) 150 else if (fibh->sbh != fibh->ebh)
153 { 151 {
154 udf_release_data(fibh->sbh); 152 brelse(fibh->sbh);
155 fibh->sbh = fibh->ebh; 153 fibh->sbh = fibh->ebh;
156 } 154 }
157 155
@@ -169,13 +167,11 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos,
169 } 167 }
170 else if (fibh->eoffset > dir->i_sb->s_blocksize) 168 else if (fibh->eoffset > dir->i_sb->s_blocksize)
171 { 169 {
172 int lextoffset = *extoffset; 170 int lextoffset = epos->offset;
173 171
174 if (udf_next_aext(dir, bloc, extoffset, eloc, elen, bh, 1) != 172 if (udf_next_aext(dir, epos, eloc, elen, 1) !=
175 (EXT_RECORDED_ALLOCATED >> 30)) 173 (EXT_RECORDED_ALLOCATED >> 30))
176 {
177 return NULL; 174 return NULL;
178 }
179 175
180 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); 176 block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset);
181 177
@@ -184,7 +180,7 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos,
184 if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) 180 if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen)
185 *offset = 0; 181 *offset = 0;
186 else 182 else
187 *extoffset = lextoffset; 183 epos->offset = lextoffset;
188 184
189 fibh->soffset -= dir->i_sb->s_blocksize; 185 fibh->soffset -= dir->i_sb->s_blocksize;
190 fibh->eoffset -= dir->i_sb->s_blocksize; 186 fibh->eoffset -= dir->i_sb->s_blocksize;
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
index 5887d78cde43..6ded93e7c44f 100644
--- a/fs/udf/fsync.c
+++ b/fs/udf/fsync.c
@@ -21,7 +21,6 @@
21#include "udfdecl.h" 21#include "udfdecl.h"
22 22
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/smp_lock.h>
25 24
26static int udf_fsync_inode(struct inode *, int); 25static int udf_fsync_inode(struct inode *, int);
27 26
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ae21a0e59e95..c8461551e108 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -49,10 +49,10 @@ MODULE_LICENSE("GPL");
49static mode_t udf_convert_permissions(struct fileEntry *); 49static mode_t udf_convert_permissions(struct fileEntry *);
50static int udf_update_inode(struct inode *, int); 50static int udf_update_inode(struct inode *, int);
51static void udf_fill_inode(struct inode *, struct buffer_head *); 51static void udf_fill_inode(struct inode *, struct buffer_head *);
52static struct buffer_head *inode_getblk(struct inode *, long, int *, 52static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
53 long *, int *); 53 long *, int *);
54static int8_t udf_insert_aext(struct inode *, kernel_lb_addr, int, 54static int8_t udf_insert_aext(struct inode *, struct extent_position,
55 kernel_lb_addr, uint32_t, struct buffer_head *); 55 kernel_lb_addr, uint32_t);
56static void udf_split_extents(struct inode *, int *, int, int, 56static void udf_split_extents(struct inode *, int *, int, int,
57 kernel_long_ad [EXTENT_MERGE_SIZE], int *); 57 kernel_long_ad [EXTENT_MERGE_SIZE], int *);
58static void udf_prealloc_extents(struct inode *, int, int, 58static void udf_prealloc_extents(struct inode *, int, int,
@@ -61,7 +61,7 @@ static void udf_merge_extents(struct inode *,
61 kernel_long_ad [EXTENT_MERGE_SIZE], int *); 61 kernel_long_ad [EXTENT_MERGE_SIZE], int *);
62static void udf_update_extents(struct inode *, 62static void udf_update_extents(struct inode *,
63 kernel_long_ad [EXTENT_MERGE_SIZE], int, int, 63 kernel_long_ad [EXTENT_MERGE_SIZE], int, int,
64 kernel_lb_addr, uint32_t, struct buffer_head **); 64 struct extent_position *);
65static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); 65static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
66 66
67/* 67/*
@@ -194,10 +194,11 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
194struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int *err) 194struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int *err)
195{ 195{
196 int newblock; 196 int newblock;
197 struct buffer_head *sbh = NULL, *dbh = NULL; 197 struct buffer_head *dbh = NULL;
198 kernel_lb_addr bloc, eloc; 198 kernel_lb_addr eloc;
199 uint32_t elen, extoffset; 199 uint32_t elen;
200 uint8_t alloctype; 200 uint8_t alloctype;
201 struct extent_position epos;
201 202
202 struct udf_fileident_bh sfibh, dfibh; 203 struct udf_fileident_bh sfibh, dfibh;
203 loff_t f_pos = udf_ext0_offset(inode) >> 2; 204 loff_t f_pos = udf_ext0_offset(inode) >> 2;
@@ -237,16 +238,16 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int
237 mark_buffer_dirty_inode(dbh, inode); 238 mark_buffer_dirty_inode(dbh, inode);
238 239
239 sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2; 240 sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2;
240 sbh = sfibh.sbh = sfibh.ebh = NULL; 241 sfibh.sbh = sfibh.ebh = NULL;
241 dfibh.soffset = dfibh.eoffset = 0; 242 dfibh.soffset = dfibh.eoffset = 0;
242 dfibh.sbh = dfibh.ebh = dbh; 243 dfibh.sbh = dfibh.ebh = dbh;
243 while ( (f_pos < size) ) 244 while ( (f_pos < size) )
244 { 245 {
245 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; 246 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
246 sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL, NULL, NULL); 247 sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL);
247 if (!sfi) 248 if (!sfi)
248 { 249 {
249 udf_release_data(dbh); 250 brelse(dbh);
250 return NULL; 251 return NULL;
251 } 252 }
252 UDF_I_ALLOCTYPE(inode) = alloctype; 253 UDF_I_ALLOCTYPE(inode) = alloctype;
@@ -258,7 +259,7 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int
258 sfi->fileIdent + le16_to_cpu(sfi->lengthOfImpUse))) 259 sfi->fileIdent + le16_to_cpu(sfi->lengthOfImpUse)))
259 { 260 {
260 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; 261 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
261 udf_release_data(dbh); 262 brelse(dbh);
262 return NULL; 263 return NULL;
263 } 264 }
264 } 265 }
@@ -266,16 +267,17 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int
266 267
267 memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, UDF_I_LENALLOC(inode)); 268 memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, UDF_I_LENALLOC(inode));
268 UDF_I_LENALLOC(inode) = 0; 269 UDF_I_LENALLOC(inode) = 0;
269 bloc = UDF_I_LOCATION(inode);
270 eloc.logicalBlockNum = *block; 270 eloc.logicalBlockNum = *block;
271 eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; 271 eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum;
272 elen = inode->i_size; 272 elen = inode->i_size;
273 UDF_I_LENEXTENTS(inode) = elen; 273 UDF_I_LENEXTENTS(inode) = elen;
274 extoffset = udf_file_entry_alloc_offset(inode); 274 epos.bh = NULL;
275 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &sbh, 0); 275 epos.block = UDF_I_LOCATION(inode);
276 epos.offset = udf_file_entry_alloc_offset(inode);
277 udf_add_aext(inode, &epos, eloc, elen, 0);
276 /* UniqueID stuff */ 278 /* UniqueID stuff */
277 279
278 udf_release_data(sbh); 280 brelse(epos.bh);
279 mark_inode_dirty(inode); 281 mark_inode_dirty(inode);
280 return dbh; 282 return dbh;
281} 283}
@@ -354,53 +356,153 @@ udf_getblk(struct inode *inode, long block, int create, int *err)
354 return NULL; 356 return NULL;
355} 357}
356 358
357static struct buffer_head * inode_getblk(struct inode * inode, long block, 359/* Extend the file by 'blocks' blocks, return the number of extents added */
360int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
361 kernel_long_ad *last_ext, sector_t blocks)
362{
363 sector_t add;
364 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
365 struct super_block *sb = inode->i_sb;
366 kernel_lb_addr prealloc_loc = {0, 0};
367 int prealloc_len = 0;
368
369 /* The previous extent is fake and we should not extend by anything
370 * - there's nothing to do... */
371 if (!blocks && fake)
372 return 0;
373 /* Round the last extent up to a multiple of block size */
374 if (last_ext->extLength & (sb->s_blocksize - 1)) {
375 last_ext->extLength =
376 (last_ext->extLength & UDF_EXTENT_FLAG_MASK) |
377 (((last_ext->extLength & UDF_EXTENT_LENGTH_MASK) +
378 sb->s_blocksize - 1) & ~(sb->s_blocksize - 1));
379 UDF_I_LENEXTENTS(inode) =
380 (UDF_I_LENEXTENTS(inode) + sb->s_blocksize - 1) &
381 ~(sb->s_blocksize - 1);
382 }
383 /* Last extent are just preallocated blocks? */
384 if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_ALLOCATED) {
385 /* Save the extent so that we can reattach it to the end */
386 prealloc_loc = last_ext->extLocation;
387 prealloc_len = last_ext->extLength;
388 /* Mark the extent as a hole */
389 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
390 (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
391 last_ext->extLocation.logicalBlockNum = 0;
392 last_ext->extLocation.partitionReferenceNum = 0;
393 }
394 /* Can we merge with the previous extent? */
395 if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) {
396 add = ((1<<30) - sb->s_blocksize - (last_ext->extLength &
397 UDF_EXTENT_LENGTH_MASK)) >> sb->s_blocksize_bits;
398 if (add > blocks)
399 add = blocks;
400 blocks -= add;
401 last_ext->extLength += add << sb->s_blocksize_bits;
402 }
403
404 if (fake) {
405 udf_add_aext(inode, last_pos, last_ext->extLocation,
406 last_ext->extLength, 1);
407 count++;
408 }
409 else
410 udf_write_aext(inode, last_pos, last_ext->extLocation, last_ext->extLength, 1);
411 /* Managed to do everything necessary? */
412 if (!blocks)
413 goto out;
414
415 /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */
416 last_ext->extLocation.logicalBlockNum = 0;
417 last_ext->extLocation.partitionReferenceNum = 0;
418 add = (1 << (30-sb->s_blocksize_bits)) - 1;
419 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | (add << sb->s_blocksize_bits);
420 /* Create enough extents to cover the whole hole */
421 while (blocks > add) {
422 blocks -= add;
423 if (udf_add_aext(inode, last_pos, last_ext->extLocation,
424 last_ext->extLength, 1) == -1)
425 return -1;
426 count++;
427 }
428 if (blocks) {
429 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
430 (blocks << sb->s_blocksize_bits);
431 if (udf_add_aext(inode, last_pos, last_ext->extLocation,
432 last_ext->extLength, 1) == -1)
433 return -1;
434 count++;
435 }
436out:
437 /* Do we have some preallocated blocks saved? */
438 if (prealloc_len) {
439 if (udf_add_aext(inode, last_pos, prealloc_loc, prealloc_len, 1) == -1)
440 return -1;
441 last_ext->extLocation = prealloc_loc;
442 last_ext->extLength = prealloc_len;
443 count++;
444 }
445 /* last_pos should point to the last written extent... */
446 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
447 last_pos->offset -= sizeof(short_ad);
448 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
449 last_pos->offset -= sizeof(long_ad);
450 else
451 return -1;
452 return count;
453}
454
455static struct buffer_head * inode_getblk(struct inode * inode, sector_t block,
358 int *err, long *phys, int *new) 456 int *err, long *phys, int *new)
359{ 457{
360 struct buffer_head *pbh = NULL, *cbh = NULL, *nbh = NULL, *result = NULL; 458 static sector_t last_block;
459 struct buffer_head *result = NULL;
361 kernel_long_ad laarr[EXTENT_MERGE_SIZE]; 460 kernel_long_ad laarr[EXTENT_MERGE_SIZE];
362 uint32_t pextoffset = 0, cextoffset = 0, nextoffset = 0; 461 struct extent_position prev_epos, cur_epos, next_epos;
363 int count = 0, startnum = 0, endnum = 0; 462 int count = 0, startnum = 0, endnum = 0;
364 uint32_t elen = 0; 463 uint32_t elen = 0;
365 kernel_lb_addr eloc, pbloc, cbloc, nbloc; 464 kernel_lb_addr eloc;
366 int c = 1; 465 int c = 1;
367 uint64_t lbcount = 0, b_off = 0; 466 loff_t lbcount = 0, b_off = 0;
368 uint32_t newblocknum, newblock, offset = 0; 467 uint32_t newblocknum, newblock;
468 sector_t offset = 0;
369 int8_t etype; 469 int8_t etype;
370 int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum; 470 int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum;
371 char lastblock = 0; 471 int lastblock = 0;
372 472
373 pextoffset = cextoffset = nextoffset = udf_file_entry_alloc_offset(inode); 473 prev_epos.offset = udf_file_entry_alloc_offset(inode);
374 b_off = (uint64_t)block << inode->i_sb->s_blocksize_bits; 474 prev_epos.block = UDF_I_LOCATION(inode);
375 pbloc = cbloc = nbloc = UDF_I_LOCATION(inode); 475 prev_epos.bh = NULL;
476 cur_epos = next_epos = prev_epos;
477 b_off = (loff_t)block << inode->i_sb->s_blocksize_bits;
376 478
377 /* find the extent which contains the block we are looking for. 479 /* find the extent which contains the block we are looking for.
378 alternate between laarr[0] and laarr[1] for locations of the 480 alternate between laarr[0] and laarr[1] for locations of the
379 current extent, and the previous extent */ 481 current extent, and the previous extent */
380 do 482 do
381 { 483 {
382 if (pbh != cbh) 484 if (prev_epos.bh != cur_epos.bh)
383 { 485 {
384 udf_release_data(pbh); 486 brelse(prev_epos.bh);
385 atomic_inc(&cbh->b_count); 487 get_bh(cur_epos.bh);
386 pbh = cbh; 488 prev_epos.bh = cur_epos.bh;
387 } 489 }
388 if (cbh != nbh) 490 if (cur_epos.bh != next_epos.bh)
389 { 491 {
390 udf_release_data(cbh); 492 brelse(cur_epos.bh);
391 atomic_inc(&nbh->b_count); 493 get_bh(next_epos.bh);
392 cbh = nbh; 494 cur_epos.bh = next_epos.bh;
393 } 495 }
394 496
395 lbcount += elen; 497 lbcount += elen;
396 498
397 pbloc = cbloc; 499 prev_epos.block = cur_epos.block;
398 cbloc = nbloc; 500 cur_epos.block = next_epos.block;
399 501
400 pextoffset = cextoffset; 502 prev_epos.offset = cur_epos.offset;
401 cextoffset = nextoffset; 503 cur_epos.offset = next_epos.offset;
402 504
403 if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) == -1) 505 if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1)) == -1)
404 break; 506 break;
405 507
406 c = !c; 508 c = !c;
@@ -418,6 +520,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block,
418 520
419 b_off -= lbcount; 521 b_off -= lbcount;
420 offset = b_off >> inode->i_sb->s_blocksize_bits; 522 offset = b_off >> inode->i_sb->s_blocksize_bits;
523 /* Move into indirect extent if we are at a pointer to it */
524 udf_next_aext(inode, &prev_epos, &eloc, &elen, 0);
421 525
422 /* if the extent is allocated and recorded, return the block 526 /* if the extent is allocated and recorded, return the block
423 if the extent is not a multiple of the blocksize, round up */ 527 if the extent is not a multiple of the blocksize, round up */
@@ -429,54 +533,77 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block,
429 elen = EXT_RECORDED_ALLOCATED | 533 elen = EXT_RECORDED_ALLOCATED |
430 ((elen + inode->i_sb->s_blocksize - 1) & 534 ((elen + inode->i_sb->s_blocksize - 1) &
431 ~(inode->i_sb->s_blocksize - 1)); 535 ~(inode->i_sb->s_blocksize - 1));
432 etype = udf_write_aext(inode, nbloc, &cextoffset, eloc, elen, nbh, 1); 536 etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1);
433 } 537 }
434 udf_release_data(pbh); 538 brelse(prev_epos.bh);
435 udf_release_data(cbh); 539 brelse(cur_epos.bh);
436 udf_release_data(nbh); 540 brelse(next_epos.bh);
437 newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset); 541 newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset);
438 *phys = newblock; 542 *phys = newblock;
439 return NULL; 543 return NULL;
440 } 544 }
441 545
546 last_block = block;
547 /* Are we beyond EOF? */
442 if (etype == -1) 548 if (etype == -1)
443 { 549 {
444 endnum = startnum = ((count > 1) ? 1 : count); 550 int ret;
445 if (laarr[c].extLength & (inode->i_sb->s_blocksize - 1)) 551
446 { 552 if (count) {
447 laarr[c].extLength = 553 if (c)
448 (laarr[c].extLength & UDF_EXTENT_FLAG_MASK) | 554 laarr[0] = laarr[1];
449 (((laarr[c].extLength & UDF_EXTENT_LENGTH_MASK) + 555 startnum = 1;
450 inode->i_sb->s_blocksize - 1) & 556 }
451 ~(inode->i_sb->s_blocksize - 1)); 557 else {
452 UDF_I_LENEXTENTS(inode) = 558 /* Create a fake extent when there's not one */
453 (UDF_I_LENEXTENTS(inode) + inode->i_sb->s_blocksize - 1) & 559 memset(&laarr[0].extLocation, 0x00, sizeof(kernel_lb_addr));
454 ~(inode->i_sb->s_blocksize - 1); 560 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
561 /* Will udf_extend_file() create real extent from a fake one? */
562 startnum = (offset > 0);
563 }
564 /* Create extents for the hole between EOF and offset */
565 ret = udf_extend_file(inode, &prev_epos, laarr, offset);
566 if (ret == -1) {
567 brelse(prev_epos.bh);
568 brelse(cur_epos.bh);
569 brelse(next_epos.bh);
570 /* We don't really know the error here so we just make
571 * something up */
572 *err = -ENOSPC;
573 return NULL;
455 } 574 }
456 c = !c; 575 c = 0;
457 laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | 576 offset = 0;
458 ((offset + 1) << inode->i_sb->s_blocksize_bits); 577 count += ret;
459 memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr)); 578 /* We are not covered by a preallocated extent? */
460 count ++; 579 if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != EXT_NOT_RECORDED_ALLOCATED) {
461 endnum ++; 580 /* Is there any real extent? - otherwise we overwrite
581 * the fake one... */
582 if (count)
583 c = !c;
584 laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
585 inode->i_sb->s_blocksize;
586 memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr));
587 count ++;
588 endnum ++;
589 }
590 endnum = c+1;
462 lastblock = 1; 591 lastblock = 1;
463 } 592 }
464 else 593 else {
465 endnum = startnum = ((count > 2) ? 2 : count); 594 endnum = startnum = ((count > 2) ? 2 : count);
466 595
467 /* if the current extent is in position 0, swap it with the previous */ 596 /* if the current extent is in position 0, swap it with the previous */
468 if (!c && count != 1) 597 if (!c && count != 1)
469 { 598 {
470 laarr[2] = laarr[0]; 599 laarr[2] = laarr[0];
471 laarr[0] = laarr[1]; 600 laarr[0] = laarr[1];
472 laarr[1] = laarr[2]; 601 laarr[1] = laarr[2];
473 c = 1; 602 c = 1;
474 } 603 }
475 604
476 /* if the current block is located in a extent, read the next extent */ 605 /* if the current block is located in an extent, read the next extent */
477 if (etype != -1) 606 if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0)) != -1)
478 {
479 if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 0)) != -1)
480 { 607 {
481 laarr[c+1].extLength = (etype << 30) | elen; 608 laarr[c+1].extLength = (etype << 30) | elen;
482 laarr[c+1].extLocation = eloc; 609 laarr[c+1].extLocation = eloc;
@@ -484,11 +611,10 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block,
484 startnum ++; 611 startnum ++;
485 endnum ++; 612 endnum ++;
486 } 613 }
487 else 614 else {
488 lastblock = 1; 615 lastblock = 1;
616 }
489 } 617 }
490 udf_release_data(cbh);
491 udf_release_data(nbh);
492 618
493 /* if the current extent is not recorded but allocated, get the 619 /* if the current extent is not recorded but allocated, get the
494 block in the extent corresponding to the requested block */ 620 block in the extent corresponding to the requested block */
@@ -508,7 +634,7 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block,
508 if (!(newblocknum = udf_new_block(inode->i_sb, inode, 634 if (!(newblocknum = udf_new_block(inode->i_sb, inode,
509 UDF_I_LOCATION(inode).partitionReferenceNum, goal, err))) 635 UDF_I_LOCATION(inode).partitionReferenceNum, goal, err)))
510 { 636 {
511 udf_release_data(pbh); 637 brelse(prev_epos.bh);
512 *err = -ENOSPC; 638 *err = -ENOSPC;
513 return NULL; 639 return NULL;
514 } 640 }
@@ -529,11 +655,11 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block,
529 udf_merge_extents(inode, laarr, &endnum); 655 udf_merge_extents(inode, laarr, &endnum);
530 656
531 /* write back the new extents, inserting new extents if the new number 657 /* write back the new extents, inserting new extents if the new number
532 of extents is greater than the old number, and deleting extents if 658 of extents is greater than the old number, and deleting extents if
533 the new number of extents is less than the old number */ 659 the new number of extents is less than the old number */
534 udf_update_extents(inode, laarr, startnum, endnum, pbloc, pextoffset, &pbh); 660 udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
535 661
536 udf_release_data(pbh); 662 brelse(prev_epos.bh);
537 663
538 if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum, 664 if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum,
539 UDF_I_LOCATION(inode).partitionReferenceNum, 0))) 665 UDF_I_LOCATION(inode).partitionReferenceNum, 0)))
@@ -795,7 +921,7 @@ static void udf_merge_extents(struct inode *inode,
795 921
796static void udf_update_extents(struct inode *inode, 922static void udf_update_extents(struct inode *inode,
797 kernel_long_ad laarr[EXTENT_MERGE_SIZE], int startnum, int endnum, 923 kernel_long_ad laarr[EXTENT_MERGE_SIZE], int startnum, int endnum,
798 kernel_lb_addr pbloc, uint32_t pextoffset, struct buffer_head **pbh) 924 struct extent_position *epos)
799{ 925{
800 int start = 0, i; 926 int start = 0, i;
801 kernel_lb_addr tmploc; 927 kernel_lb_addr tmploc;
@@ -804,28 +930,26 @@ static void udf_update_extents(struct inode *inode,
804 if (startnum > endnum) 930 if (startnum > endnum)
805 { 931 {
806 for (i=0; i<(startnum-endnum); i++) 932 for (i=0; i<(startnum-endnum); i++)
807 { 933 udf_delete_aext(inode, *epos, laarr[i].extLocation,
808 udf_delete_aext(inode, pbloc, pextoffset, laarr[i].extLocation, 934 laarr[i].extLength);
809 laarr[i].extLength, *pbh);
810 }
811 } 935 }
812 else if (startnum < endnum) 936 else if (startnum < endnum)
813 { 937 {
814 for (i=0; i<(endnum-startnum); i++) 938 for (i=0; i<(endnum-startnum); i++)
815 { 939 {
816 udf_insert_aext(inode, pbloc, pextoffset, laarr[i].extLocation, 940 udf_insert_aext(inode, *epos, laarr[i].extLocation,
817 laarr[i].extLength, *pbh); 941 laarr[i].extLength);
818 udf_next_aext(inode, &pbloc, &pextoffset, &laarr[i].extLocation, 942 udf_next_aext(inode, epos, &laarr[i].extLocation,
819 &laarr[i].extLength, pbh, 1); 943 &laarr[i].extLength, 1);
820 start ++; 944 start ++;
821 } 945 }
822 } 946 }
823 947
824 for (i=start; i<endnum; i++) 948 for (i=start; i<endnum; i++)
825 { 949 {
826 udf_next_aext(inode, &pbloc, &pextoffset, &tmploc, &tmplen, pbh, 0); 950 udf_next_aext(inode, epos, &tmploc, &tmplen, 0);
827 udf_write_aext(inode, pbloc, &pextoffset, laarr[i].extLocation, 951 udf_write_aext(inode, epos, laarr[i].extLocation,
828 laarr[i].extLength, *pbh, 1); 952 laarr[i].extLength, 1);
829 } 953 }
830} 954}
831 955
@@ -931,7 +1055,7 @@ __udf_read_inode(struct inode *inode)
931 { 1055 {
932 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n", 1056 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n",
933 inode->i_ino, ident); 1057 inode->i_ino, ident);
934 udf_release_data(bh); 1058 brelse(bh);
935 make_bad_inode(inode); 1059 make_bad_inode(inode);
936 return; 1060 return;
937 } 1061 }
@@ -960,35 +1084,36 @@ __udf_read_inode(struct inode *inode)
960 ident == TAG_IDENT_EFE) 1084 ident == TAG_IDENT_EFE)
961 { 1085 {
962 memcpy(&UDF_I_LOCATION(inode), &loc, sizeof(kernel_lb_addr)); 1086 memcpy(&UDF_I_LOCATION(inode), &loc, sizeof(kernel_lb_addr));
963 udf_release_data(bh); 1087 brelse(bh);
964 udf_release_data(ibh); 1088 brelse(ibh);
965 udf_release_data(nbh); 1089 brelse(nbh);
966 __udf_read_inode(inode); 1090 __udf_read_inode(inode);
967 return; 1091 return;
968 } 1092 }
969 else 1093 else
970 { 1094 {
971 udf_release_data(nbh); 1095 brelse(nbh);
972 udf_release_data(ibh); 1096 brelse(ibh);
973 } 1097 }
974 } 1098 }
975 else 1099 else
976 udf_release_data(ibh); 1100 brelse(ibh);
977 } 1101 }
978 } 1102 }
979 else 1103 else
980 udf_release_data(ibh); 1104 brelse(ibh);
981 } 1105 }
982 else if (le16_to_cpu(fe->icbTag.strategyType) != 4) 1106 else if (le16_to_cpu(fe->icbTag.strategyType) != 4)
983 { 1107 {
984 printk(KERN_ERR "udf: unsupported strategy type: %d\n", 1108 printk(KERN_ERR "udf: unsupported strategy type: %d\n",
985 le16_to_cpu(fe->icbTag.strategyType)); 1109 le16_to_cpu(fe->icbTag.strategyType));
986 udf_release_data(bh); 1110 brelse(bh);
987 make_bad_inode(inode); 1111 make_bad_inode(inode);
988 return; 1112 return;
989 } 1113 }
990 udf_fill_inode(inode, bh); 1114 udf_fill_inode(inode, bh);
991 udf_release_data(bh); 1115
1116 brelse(bh);
992} 1117}
993 1118
994static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) 1119static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
@@ -1331,7 +1456,7 @@ udf_update_inode(struct inode *inode, int do_sync)
1331 use->descTag.tagChecksum += ((uint8_t *)&(use->descTag))[i]; 1456 use->descTag.tagChecksum += ((uint8_t *)&(use->descTag))[i];
1332 1457
1333 mark_buffer_dirty(bh); 1458 mark_buffer_dirty(bh);
1334 udf_release_data(bh); 1459 brelse(bh);
1335 return err; 1460 return err;
1336 } 1461 }
1337 1462
@@ -1520,7 +1645,7 @@ udf_update_inode(struct inode *inode, int do_sync)
1520 err = -EIO; 1645 err = -EIO;
1521 } 1646 }
1522 } 1647 }
1523 udf_release_data(bh); 1648 brelse(bh);
1524 return err; 1649 return err;
1525} 1650}
1526 1651
@@ -1556,8 +1681,8 @@ udf_iget(struct super_block *sb, kernel_lb_addr ino)
1556 return NULL; 1681 return NULL;
1557} 1682}
1558 1683
1559int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, 1684int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1560 kernel_lb_addr eloc, uint32_t elen, struct buffer_head **bh, int inc) 1685 kernel_lb_addr eloc, uint32_t elen, int inc)
1561{ 1686{
1562 int adsize; 1687 int adsize;
1563 short_ad *sad = NULL; 1688 short_ad *sad = NULL;
@@ -1566,10 +1691,10 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1566 int8_t etype; 1691 int8_t etype;
1567 uint8_t *ptr; 1692 uint8_t *ptr;
1568 1693
1569 if (!*bh) 1694 if (!epos->bh)
1570 ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); 1695 ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode);
1571 else 1696 else
1572 ptr = (*bh)->b_data + *extoffset; 1697 ptr = epos->bh->b_data + epos->offset;
1573 1698
1574 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) 1699 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
1575 adsize = sizeof(short_ad); 1700 adsize = sizeof(short_ad);
@@ -1578,20 +1703,20 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1578 else 1703 else
1579 return -1; 1704 return -1;
1580 1705
1581 if (*extoffset + (2 * adsize) > inode->i_sb->s_blocksize) 1706 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize)
1582 { 1707 {
1583 char *sptr, *dptr; 1708 char *sptr, *dptr;
1584 struct buffer_head *nbh; 1709 struct buffer_head *nbh;
1585 int err, loffset; 1710 int err, loffset;
1586 kernel_lb_addr obloc = *bloc; 1711 kernel_lb_addr obloc = epos->block;
1587 1712
1588 if (!(bloc->logicalBlockNum = udf_new_block(inode->i_sb, NULL, 1713 if (!(epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL,
1589 obloc.partitionReferenceNum, obloc.logicalBlockNum, &err))) 1714 obloc.partitionReferenceNum, obloc.logicalBlockNum, &err)))
1590 { 1715 {
1591 return -1; 1716 return -1;
1592 } 1717 }
1593 if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, 1718 if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
1594 *bloc, 0)))) 1719 epos->block, 0))))
1595 { 1720 {
1596 return -1; 1721 return -1;
1597 } 1722 }
@@ -1604,25 +1729,25 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1604 aed = (struct allocExtDesc *)(nbh->b_data); 1729 aed = (struct allocExtDesc *)(nbh->b_data);
1605 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) 1730 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
1606 aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); 1731 aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum);
1607 if (*extoffset + adsize > inode->i_sb->s_blocksize) 1732 if (epos->offset + adsize > inode->i_sb->s_blocksize)
1608 { 1733 {
1609 loffset = *extoffset; 1734 loffset = epos->offset;
1610 aed->lengthAllocDescs = cpu_to_le32(adsize); 1735 aed->lengthAllocDescs = cpu_to_le32(adsize);
1611 sptr = ptr - adsize; 1736 sptr = ptr - adsize;
1612 dptr = nbh->b_data + sizeof(struct allocExtDesc); 1737 dptr = nbh->b_data + sizeof(struct allocExtDesc);
1613 memcpy(dptr, sptr, adsize); 1738 memcpy(dptr, sptr, adsize);
1614 *extoffset = sizeof(struct allocExtDesc) + adsize; 1739 epos->offset = sizeof(struct allocExtDesc) + adsize;
1615 } 1740 }
1616 else 1741 else
1617 { 1742 {
1618 loffset = *extoffset + adsize; 1743 loffset = epos->offset + adsize;
1619 aed->lengthAllocDescs = cpu_to_le32(0); 1744 aed->lengthAllocDescs = cpu_to_le32(0);
1620 sptr = ptr; 1745 sptr = ptr;
1621 *extoffset = sizeof(struct allocExtDesc); 1746 epos->offset = sizeof(struct allocExtDesc);
1622 1747
1623 if (*bh) 1748 if (epos->bh)
1624 { 1749 {
1625 aed = (struct allocExtDesc *)(*bh)->b_data; 1750 aed = (struct allocExtDesc *)epos->bh->b_data;
1626 aed->lengthAllocDescs = 1751 aed->lengthAllocDescs =
1627 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); 1752 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
1628 } 1753 }
@@ -1634,10 +1759,10 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1634 } 1759 }
1635 if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) 1760 if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200)
1636 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, 1761 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1,
1637 bloc->logicalBlockNum, sizeof(tag)); 1762 epos->block.logicalBlockNum, sizeof(tag));
1638 else 1763 else
1639 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, 1764 udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1,
1640 bloc->logicalBlockNum, sizeof(tag)); 1765 epos->block.logicalBlockNum, sizeof(tag));
1641 switch (UDF_I_ALLOCTYPE(inode)) 1766 switch (UDF_I_ALLOCTYPE(inode))
1642 { 1767 {
1643 case ICBTAG_FLAG_AD_SHORT: 1768 case ICBTAG_FLAG_AD_SHORT:
@@ -1646,7 +1771,7 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1646 sad->extLength = cpu_to_le32( 1771 sad->extLength = cpu_to_le32(
1647 EXT_NEXT_EXTENT_ALLOCDECS | 1772 EXT_NEXT_EXTENT_ALLOCDECS |
1648 inode->i_sb->s_blocksize); 1773 inode->i_sb->s_blocksize);
1649 sad->extPosition = cpu_to_le32(bloc->logicalBlockNum); 1774 sad->extPosition = cpu_to_le32(epos->block.logicalBlockNum);
1650 break; 1775 break;
1651 } 1776 }
1652 case ICBTAG_FLAG_AD_LONG: 1777 case ICBTAG_FLAG_AD_LONG:
@@ -1655,60 +1780,57 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1655 lad->extLength = cpu_to_le32( 1780 lad->extLength = cpu_to_le32(
1656 EXT_NEXT_EXTENT_ALLOCDECS | 1781 EXT_NEXT_EXTENT_ALLOCDECS |
1657 inode->i_sb->s_blocksize); 1782 inode->i_sb->s_blocksize);
1658 lad->extLocation = cpu_to_lelb(*bloc); 1783 lad->extLocation = cpu_to_lelb(epos->block);
1659 memset(lad->impUse, 0x00, sizeof(lad->impUse)); 1784 memset(lad->impUse, 0x00, sizeof(lad->impUse));
1660 break; 1785 break;
1661 } 1786 }
1662 } 1787 }
1663 if (*bh) 1788 if (epos->bh)
1664 { 1789 {
1665 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 1790 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
1666 udf_update_tag((*bh)->b_data, loffset); 1791 udf_update_tag(epos->bh->b_data, loffset);
1667 else 1792 else
1668 udf_update_tag((*bh)->b_data, sizeof(struct allocExtDesc)); 1793 udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc));
1669 mark_buffer_dirty_inode(*bh, inode); 1794 mark_buffer_dirty_inode(epos->bh, inode);
1670 udf_release_data(*bh); 1795 brelse(epos->bh);
1671 } 1796 }
1672 else 1797 else
1673 mark_inode_dirty(inode); 1798 mark_inode_dirty(inode);
1674 *bh = nbh; 1799 epos->bh = nbh;
1675 } 1800 }
1676 1801
1677 etype = udf_write_aext(inode, *bloc, extoffset, eloc, elen, *bh, inc); 1802 etype = udf_write_aext(inode, epos, eloc, elen, inc);
1678 1803
1679 if (!*bh) 1804 if (!epos->bh)
1680 { 1805 {
1681 UDF_I_LENALLOC(inode) += adsize; 1806 UDF_I_LENALLOC(inode) += adsize;
1682 mark_inode_dirty(inode); 1807 mark_inode_dirty(inode);
1683 } 1808 }
1684 else 1809 else
1685 { 1810 {
1686 aed = (struct allocExtDesc *)(*bh)->b_data; 1811 aed = (struct allocExtDesc *)epos->bh->b_data;
1687 aed->lengthAllocDescs = 1812 aed->lengthAllocDescs =
1688 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); 1813 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
1689 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 1814 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
1690 udf_update_tag((*bh)->b_data, *extoffset + (inc ? 0 : adsize)); 1815 udf_update_tag(epos->bh->b_data, epos->offset + (inc ? 0 : adsize));
1691 else 1816 else
1692 udf_update_tag((*bh)->b_data, sizeof(struct allocExtDesc)); 1817 udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc));
1693 mark_buffer_dirty_inode(*bh, inode); 1818 mark_buffer_dirty_inode(epos->bh, inode);
1694 } 1819 }
1695 1820
1696 return etype; 1821 return etype;
1697} 1822}
1698 1823
1699int8_t udf_write_aext(struct inode *inode, kernel_lb_addr bloc, int *extoffset, 1824int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1700 kernel_lb_addr eloc, uint32_t elen, struct buffer_head *bh, int inc) 1825 kernel_lb_addr eloc, uint32_t elen, int inc)
1701{ 1826{
1702 int adsize; 1827 int adsize;
1703 uint8_t *ptr; 1828 uint8_t *ptr;
1704 1829
1705 if (!bh) 1830 if (!epos->bh)
1706 ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); 1831 ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode);
1707 else 1832 else
1708 { 1833 ptr = epos->bh->b_data + epos->offset;
1709 ptr = bh->b_data + *extoffset;
1710 atomic_inc(&bh->b_count);
1711 }
1712 1834
1713 switch (UDF_I_ALLOCTYPE(inode)) 1835 switch (UDF_I_ALLOCTYPE(inode))
1714 { 1836 {
@@ -1733,40 +1855,39 @@ int8_t udf_write_aext(struct inode *inode, kernel_lb_addr bloc, int *extoffset,
1733 return -1; 1855 return -1;
1734 } 1856 }
1735 1857
1736 if (bh) 1858 if (epos->bh)
1737 { 1859 {
1738 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 1860 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
1739 { 1861 {
1740 struct allocExtDesc *aed = (struct allocExtDesc *)(bh)->b_data; 1862 struct allocExtDesc *aed = (struct allocExtDesc *)epos->bh->b_data;
1741 udf_update_tag((bh)->b_data, 1863 udf_update_tag(epos->bh->b_data,
1742 le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc)); 1864 le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc));
1743 } 1865 }
1744 mark_buffer_dirty_inode(bh, inode); 1866 mark_buffer_dirty_inode(epos->bh, inode);
1745 udf_release_data(bh);
1746 } 1867 }
1747 else 1868 else
1748 mark_inode_dirty(inode); 1869 mark_inode_dirty(inode);
1749 1870
1750 if (inc) 1871 if (inc)
1751 *extoffset += adsize; 1872 epos->offset += adsize;
1752 return (elen >> 30); 1873 return (elen >> 30);
1753} 1874}
1754 1875
1755int8_t udf_next_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, 1876int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
1756 kernel_lb_addr *eloc, uint32_t *elen, struct buffer_head **bh, int inc) 1877 kernel_lb_addr *eloc, uint32_t *elen, int inc)
1757{ 1878{
1758 int8_t etype; 1879 int8_t etype;
1759 1880
1760 while ((etype = udf_current_aext(inode, bloc, extoffset, eloc, elen, bh, inc)) == 1881 while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) ==
1761 (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) 1882 (EXT_NEXT_EXTENT_ALLOCDECS >> 30))
1762 { 1883 {
1763 *bloc = *eloc; 1884 epos->block = *eloc;
1764 *extoffset = sizeof(struct allocExtDesc); 1885 epos->offset = sizeof(struct allocExtDesc);
1765 udf_release_data(*bh); 1886 brelse(epos->bh);
1766 if (!(*bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, *bloc, 0)))) 1887 if (!(epos->bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, epos->block, 0))))
1767 { 1888 {
1768 udf_debug("reading block %d failed!\n", 1889 udf_debug("reading block %d failed!\n",
1769 udf_get_lb_pblock(inode->i_sb, *bloc, 0)); 1890 udf_get_lb_pblock(inode->i_sb, epos->block, 0));
1770 return -1; 1891 return -1;
1771 } 1892 }
1772 } 1893 }
@@ -1774,26 +1895,26 @@ int8_t udf_next_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset,
1774 return etype; 1895 return etype;
1775} 1896}
1776 1897
1777int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, 1898int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1778 kernel_lb_addr *eloc, uint32_t *elen, struct buffer_head **bh, int inc) 1899 kernel_lb_addr *eloc, uint32_t *elen, int inc)
1779{ 1900{
1780 int alen; 1901 int alen;
1781 int8_t etype; 1902 int8_t etype;
1782 uint8_t *ptr; 1903 uint8_t *ptr;
1783 1904
1784 if (!*bh) 1905 if (!epos->bh)
1785 { 1906 {
1786 if (!(*extoffset)) 1907 if (!epos->offset)
1787 *extoffset = udf_file_entry_alloc_offset(inode); 1908 epos->offset = udf_file_entry_alloc_offset(inode);
1788 ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); 1909 ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode);
1789 alen = udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode); 1910 alen = udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode);
1790 } 1911 }
1791 else 1912 else
1792 { 1913 {
1793 if (!(*extoffset)) 1914 if (!epos->offset)
1794 *extoffset = sizeof(struct allocExtDesc); 1915 epos->offset = sizeof(struct allocExtDesc);
1795 ptr = (*bh)->b_data + *extoffset; 1916 ptr = epos->bh->b_data + epos->offset;
1796 alen = sizeof(struct allocExtDesc) + le32_to_cpu(((struct allocExtDesc *)(*bh)->b_data)->lengthAllocDescs); 1917 alen = sizeof(struct allocExtDesc) + le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)->lengthAllocDescs);
1797 } 1918 }
1798 1919
1799 switch (UDF_I_ALLOCTYPE(inode)) 1920 switch (UDF_I_ALLOCTYPE(inode))
@@ -1802,7 +1923,7 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse
1802 { 1923 {
1803 short_ad *sad; 1924 short_ad *sad;
1804 1925
1805 if (!(sad = udf_get_fileshortad(ptr, alen, extoffset, inc))) 1926 if (!(sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc)))
1806 return -1; 1927 return -1;
1807 1928
1808 etype = le32_to_cpu(sad->extLength) >> 30; 1929 etype = le32_to_cpu(sad->extLength) >> 30;
@@ -1815,7 +1936,7 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse
1815 { 1936 {
1816 long_ad *lad; 1937 long_ad *lad;
1817 1938
1818 if (!(lad = udf_get_filelongad(ptr, alen, extoffset, inc))) 1939 if (!(lad = udf_get_filelongad(ptr, alen, &epos->offset, inc)))
1819 return -1; 1940 return -1;
1820 1941
1821 etype = le32_to_cpu(lad->extLength) >> 30; 1942 etype = le32_to_cpu(lad->extLength) >> 30;
@@ -1834,41 +1955,40 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse
1834} 1955}
1835 1956
1836static int8_t 1957static int8_t
1837udf_insert_aext(struct inode *inode, kernel_lb_addr bloc, int extoffset, 1958udf_insert_aext(struct inode *inode, struct extent_position epos,
1838 kernel_lb_addr neloc, uint32_t nelen, struct buffer_head *bh) 1959 kernel_lb_addr neloc, uint32_t nelen)
1839{ 1960{
1840 kernel_lb_addr oeloc; 1961 kernel_lb_addr oeloc;
1841 uint32_t oelen; 1962 uint32_t oelen;
1842 int8_t etype; 1963 int8_t etype;
1843 1964
1844 if (bh) 1965 if (epos.bh)
1845 atomic_inc(&bh->b_count); 1966 get_bh(epos.bh);
1846 1967
1847 while ((etype = udf_next_aext(inode, &bloc, &extoffset, &oeloc, &oelen, &bh, 0)) != -1) 1968 while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1)
1848 { 1969 {
1849 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); 1970 udf_write_aext(inode, &epos, neloc, nelen, 1);
1850 1971
1851 neloc = oeloc; 1972 neloc = oeloc;
1852 nelen = (etype << 30) | oelen; 1973 nelen = (etype << 30) | oelen;
1853 } 1974 }
1854 udf_add_aext(inode, &bloc, &extoffset, neloc, nelen, &bh, 1); 1975 udf_add_aext(inode, &epos, neloc, nelen, 1);
1855 udf_release_data(bh); 1976 brelse(epos.bh);
1856 return (nelen >> 30); 1977 return (nelen >> 30);
1857} 1978}
1858 1979
1859int8_t udf_delete_aext(struct inode *inode, kernel_lb_addr nbloc, int nextoffset, 1980int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
1860 kernel_lb_addr eloc, uint32_t elen, struct buffer_head *nbh) 1981 kernel_lb_addr eloc, uint32_t elen)
1861{ 1982{
1862 struct buffer_head *obh; 1983 struct extent_position oepos;
1863 kernel_lb_addr obloc; 1984 int adsize;
1864 int oextoffset, adsize;
1865 int8_t etype; 1985 int8_t etype;
1866 struct allocExtDesc *aed; 1986 struct allocExtDesc *aed;
1867 1987
1868 if (nbh) 1988 if (epos.bh)
1869 { 1989 {
1870 atomic_inc(&nbh->b_count); 1990 get_bh(epos.bh);
1871 atomic_inc(&nbh->b_count); 1991 get_bh(epos.bh);
1872 } 1992 }
1873 1993
1874 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) 1994 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
@@ -1878,80 +1998,77 @@ int8_t udf_delete_aext(struct inode *inode, kernel_lb_addr nbloc, int nextoffset
1878 else 1998 else
1879 adsize = 0; 1999 adsize = 0;
1880 2000
1881 obh = nbh; 2001 oepos = epos;
1882 obloc = nbloc; 2002 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1)
1883 oextoffset = nextoffset;
1884
1885 if (udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1) == -1)
1886 return -1; 2003 return -1;
1887 2004
1888 while ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) != -1) 2005 while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1)
1889 { 2006 {
1890 udf_write_aext(inode, obloc, &oextoffset, eloc, (etype << 30) | elen, obh, 1); 2007 udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1);
1891 if (obh != nbh) 2008 if (oepos.bh != epos.bh)
1892 { 2009 {
1893 obloc = nbloc; 2010 oepos.block = epos.block;
1894 udf_release_data(obh); 2011 brelse(oepos.bh);
1895 atomic_inc(&nbh->b_count); 2012 get_bh(epos.bh);
1896 obh = nbh; 2013 oepos.bh = epos.bh;
1897 oextoffset = nextoffset - adsize; 2014 oepos.offset = epos.offset - adsize;
1898 } 2015 }
1899 } 2016 }
1900 memset(&eloc, 0x00, sizeof(kernel_lb_addr)); 2017 memset(&eloc, 0x00, sizeof(kernel_lb_addr));
1901 elen = 0; 2018 elen = 0;
1902 2019
1903 if (nbh != obh) 2020 if (epos.bh != oepos.bh)
1904 { 2021 {
1905 udf_free_blocks(inode->i_sb, inode, nbloc, 0, 1); 2022 udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1);
1906 udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); 2023 udf_write_aext(inode, &oepos, eloc, elen, 1);
1907 udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); 2024 udf_write_aext(inode, &oepos, eloc, elen, 1);
1908 if (!obh) 2025 if (!oepos.bh)
1909 { 2026 {
1910 UDF_I_LENALLOC(inode) -= (adsize * 2); 2027 UDF_I_LENALLOC(inode) -= (adsize * 2);
1911 mark_inode_dirty(inode); 2028 mark_inode_dirty(inode);
1912 } 2029 }
1913 else 2030 else
1914 { 2031 {
1915 aed = (struct allocExtDesc *)(obh)->b_data; 2032 aed = (struct allocExtDesc *)oepos.bh->b_data;
1916 aed->lengthAllocDescs = 2033 aed->lengthAllocDescs =
1917 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2*adsize)); 2034 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2*adsize));
1918 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 2035 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
1919 udf_update_tag((obh)->b_data, oextoffset - (2*adsize)); 2036 udf_update_tag(oepos.bh->b_data, oepos.offset - (2*adsize));
1920 else 2037 else
1921 udf_update_tag((obh)->b_data, sizeof(struct allocExtDesc)); 2038 udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc));
1922 mark_buffer_dirty_inode(obh, inode); 2039 mark_buffer_dirty_inode(oepos.bh, inode);
1923 } 2040 }
1924 } 2041 }
1925 else 2042 else
1926 { 2043 {
1927 udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); 2044 udf_write_aext(inode, &oepos, eloc, elen, 1);
1928 if (!obh) 2045 if (!oepos.bh)
1929 { 2046 {
1930 UDF_I_LENALLOC(inode) -= adsize; 2047 UDF_I_LENALLOC(inode) -= adsize;
1931 mark_inode_dirty(inode); 2048 mark_inode_dirty(inode);
1932 } 2049 }
1933 else 2050 else
1934 { 2051 {
1935 aed = (struct allocExtDesc *)(obh)->b_data; 2052 aed = (struct allocExtDesc *)oepos.bh->b_data;
1936 aed->lengthAllocDescs = 2053 aed->lengthAllocDescs =
1937 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize); 2054 cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize);
1938 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 2055 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
1939 udf_update_tag((obh)->b_data, oextoffset - adsize); 2056 udf_update_tag(oepos.bh->b_data, epos.offset - adsize);
1940 else 2057 else
1941 udf_update_tag((obh)->b_data, sizeof(struct allocExtDesc)); 2058 udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc));
1942 mark_buffer_dirty_inode(obh, inode); 2059 mark_buffer_dirty_inode(oepos.bh, inode);
1943 } 2060 }
1944 } 2061 }
1945 2062
1946 udf_release_data(nbh); 2063 brelse(epos.bh);
1947 udf_release_data(obh); 2064 brelse(oepos.bh);
1948 return (elen >> 30); 2065 return (elen >> 30);
1949} 2066}
1950 2067
1951int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t *extoffset, 2068int8_t inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos,
1952 kernel_lb_addr *eloc, uint32_t *elen, uint32_t *offset, struct buffer_head **bh) 2069 kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset)
1953{ 2070{
1954 uint64_t lbcount = 0, bcount = (uint64_t)block << inode->i_sb->s_blocksize_bits; 2071 loff_t lbcount = 0, bcount = (loff_t)block << inode->i_sb->s_blocksize_bits;
1955 int8_t etype; 2072 int8_t etype;
1956 2073
1957 if (block < 0) 2074 if (block < 0)
@@ -1960,42 +2077,44 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
1960 return -1; 2077 return -1;
1961 } 2078 }
1962 2079
1963 *extoffset = 0; 2080 pos->offset = 0;
2081 pos->block = UDF_I_LOCATION(inode);
2082 pos->bh = NULL;
1964 *elen = 0; 2083 *elen = 0;
1965 *bloc = UDF_I_LOCATION(inode);
1966 2084
1967 do 2085 do
1968 { 2086 {
1969 if ((etype = udf_next_aext(inode, bloc, extoffset, eloc, elen, bh, 1)) == -1) 2087 if ((etype = udf_next_aext(inode, pos, eloc, elen, 1)) == -1)
1970 { 2088 {
1971 *offset = bcount - lbcount; 2089 *offset = (bcount - lbcount) >> inode->i_sb->s_blocksize_bits;
1972 UDF_I_LENEXTENTS(inode) = lbcount; 2090 UDF_I_LENEXTENTS(inode) = lbcount;
1973 return -1; 2091 return -1;
1974 } 2092 }
1975 lbcount += *elen; 2093 lbcount += *elen;
1976 } while (lbcount <= bcount); 2094 } while (lbcount <= bcount);
1977 2095
1978 *offset = bcount + *elen - lbcount; 2096 *offset = (bcount + *elen - lbcount) >> inode->i_sb->s_blocksize_bits;
1979 2097
1980 return etype; 2098 return etype;
1981} 2099}
1982 2100
1983long udf_block_map(struct inode *inode, long block) 2101long udf_block_map(struct inode *inode, sector_t block)
1984{ 2102{
1985 kernel_lb_addr eloc, bloc; 2103 kernel_lb_addr eloc;
1986 uint32_t offset, extoffset, elen; 2104 uint32_t elen;
1987 struct buffer_head *bh = NULL; 2105 sector_t offset;
2106 struct extent_position epos = { NULL, 0, { 0, 0}};
1988 int ret; 2107 int ret;
1989 2108
1990 lock_kernel(); 2109 lock_kernel();
1991 2110
1992 if (inode_bmap(inode, block, &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) 2111 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30))
1993 ret = udf_get_lb_pblock(inode->i_sb, eloc, offset >> inode->i_sb->s_blocksize_bits); 2112 ret = udf_get_lb_pblock(inode->i_sb, eloc, offset);
1994 else 2113 else
1995 ret = 0; 2114 ret = 0;
1996 2115
1997 unlock_kernel(); 2116 unlock_kernel();
1998 udf_release_data(bh); 2117 brelse(epos.bh);
1999 2118
2000 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV)) 2119 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
2001 return udf_fixed_to_variable(ret); 2120 return udf_fixed_to_variable(ret);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index cc8ca3254db1..a2b2a98ce78a 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -274,12 +274,6 @@ udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, uint32_t offset, ui
274 loc.logicalBlockNum + offset, ident); 274 loc.logicalBlockNum + offset, ident);
275} 275}
276 276
277void udf_release_data(struct buffer_head *bh)
278{
279 if (bh)
280 brelse(bh);
281}
282
283void udf_update_tag(char *data, int length) 277void udf_update_tag(char *data, int length)
284{ 278{
285 tag *tptr = (tag *)data; 279 tag *tptr = (tag *)data;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index fe361cd19a98..91df4928651c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -155,9 +155,10 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
155 uint8_t lfi; 155 uint8_t lfi;
156 uint16_t liu; 156 uint16_t liu;
157 loff_t size; 157 loff_t size;
158 kernel_lb_addr bloc, eloc; 158 kernel_lb_addr eloc;
159 uint32_t extoffset, elen, offset; 159 uint32_t elen;
160 struct buffer_head *bh = NULL; 160 sector_t offset;
161 struct extent_position epos = { NULL, 0, { 0, 0}};
161 162
162 size = (udf_ext0_offset(dir) + dir->i_size) >> 2; 163 size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
163 f_pos = (udf_ext0_offset(dir) >> 2); 164 f_pos = (udf_ext0_offset(dir) >> 2);
@@ -166,42 +167,41 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
166 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) 167 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB)
167 fibh->sbh = fibh->ebh = NULL; 168 fibh->sbh = fibh->ebh = NULL;
168 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 169 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2),
169 &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) 170 &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30))
170 { 171 {
171 offset >>= dir->i_sb->s_blocksize_bits;
172 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 172 block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
173 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) 173 if ((++offset << dir->i_sb->s_blocksize_bits) < elen)
174 { 174 {
175 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) 175 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT)
176 extoffset -= sizeof(short_ad); 176 epos.offset -= sizeof(short_ad);
177 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) 177 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG)
178 extoffset -= sizeof(long_ad); 178 epos.offset -= sizeof(long_ad);
179 } 179 }
180 else 180 else
181 offset = 0; 181 offset = 0;
182 182
183 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) 183 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block)))
184 { 184 {
185 udf_release_data(bh); 185 brelse(epos.bh);
186 return NULL; 186 return NULL;
187 } 187 }
188 } 188 }
189 else 189 else
190 { 190 {
191 udf_release_data(bh); 191 brelse(epos.bh);
192 return NULL; 192 return NULL;
193 } 193 }
194 194
195 while ( (f_pos < size) ) 195 while ( (f_pos < size) )
196 { 196 {
197 fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); 197 fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset);
198 198
199 if (!fi) 199 if (!fi)
200 { 200 {
201 if (fibh->sbh != fibh->ebh) 201 if (fibh->sbh != fibh->ebh)
202 udf_release_data(fibh->ebh); 202 brelse(fibh->ebh);
203 udf_release_data(fibh->sbh); 203 brelse(fibh->sbh);
204 udf_release_data(bh); 204 brelse(epos.bh);
205 return NULL; 205 return NULL;
206 } 206 }
207 207
@@ -247,15 +247,15 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
247 { 247 {
248 if (udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) 248 if (udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name))
249 { 249 {
250 udf_release_data(bh); 250 brelse(epos.bh);
251 return fi; 251 return fi;
252 } 252 }
253 } 253 }
254 } 254 }
255 if (fibh->sbh != fibh->ebh) 255 if (fibh->sbh != fibh->ebh)
256 udf_release_data(fibh->ebh); 256 brelse(fibh->ebh);
257 udf_release_data(fibh->sbh); 257 brelse(fibh->sbh);
258 udf_release_data(bh); 258 brelse(epos.bh);
259 return NULL; 259 return NULL;
260} 260}
261 261
@@ -321,8 +321,8 @@ udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
321 if (udf_find_entry(dir, dentry, &fibh, &cfi)) 321 if (udf_find_entry(dir, dentry, &fibh, &cfi))
322 { 322 {
323 if (fibh.sbh != fibh.ebh) 323 if (fibh.sbh != fibh.ebh)
324 udf_release_data(fibh.ebh); 324 brelse(fibh.ebh);
325 udf_release_data(fibh.sbh); 325 brelse(fibh.sbh);
326 326
327 inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation)); 327 inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation));
328 if ( !inode ) 328 if ( !inode )
@@ -353,9 +353,10 @@ udf_add_entry(struct inode *dir, struct dentry *dentry,
353 uint8_t lfi; 353 uint8_t lfi;
354 uint16_t liu; 354 uint16_t liu;
355 int block; 355 int block;
356 kernel_lb_addr bloc, eloc; 356 kernel_lb_addr eloc;
357 uint32_t extoffset, elen, offset; 357 uint32_t elen;
358 struct buffer_head *bh = NULL; 358 sector_t offset;
359 struct extent_position epos = { NULL, 0, { 0, 0 }};
359 360
360 sb = dir->i_sb; 361 sb = dir->i_sb;
361 362
@@ -384,23 +385,22 @@ udf_add_entry(struct inode *dir, struct dentry *dentry,
384 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) 385 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB)
385 fibh->sbh = fibh->ebh = NULL; 386 fibh->sbh = fibh->ebh = NULL;
386 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 387 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2),
387 &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) 388 &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30))
388 { 389 {
389 offset >>= dir->i_sb->s_blocksize_bits;
390 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 390 block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
391 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) 391 if ((++offset << dir->i_sb->s_blocksize_bits) < elen)
392 { 392 {
393 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) 393 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT)
394 extoffset -= sizeof(short_ad); 394 epos.offset -= sizeof(short_ad);
395 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) 395 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG)
396 extoffset -= sizeof(long_ad); 396 epos.offset -= sizeof(long_ad);
397 } 397 }
398 else 398 else
399 offset = 0; 399 offset = 0;
400 400
401 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) 401 if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block)))
402 { 402 {
403 udf_release_data(bh); 403 brelse(epos.bh);
404 *err = -EIO; 404 *err = -EIO;
405 return NULL; 405 return NULL;
406 } 406 }
@@ -418,14 +418,14 @@ udf_add_entry(struct inode *dir, struct dentry *dentry,
418 418
419 while ( (f_pos < size) ) 419 while ( (f_pos < size) )
420 { 420 {
421 fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); 421 fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset);
422 422
423 if (!fi) 423 if (!fi)
424 { 424 {
425 if (fibh->sbh != fibh->ebh) 425 if (fibh->sbh != fibh->ebh)
426 udf_release_data(fibh->ebh); 426 brelse(fibh->ebh);
427 udf_release_data(fibh->sbh); 427 brelse(fibh->sbh);
428 udf_release_data(bh); 428 brelse(epos.bh);
429 *err = -EIO; 429 *err = -EIO;
430 return NULL; 430 return NULL;
431 } 431 }
@@ -455,7 +455,7 @@ udf_add_entry(struct inode *dir, struct dentry *dentry,
455 { 455 {
456 if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen) 456 if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen)
457 { 457 {
458 udf_release_data(bh); 458 brelse(epos.bh);
459 cfi->descTag.tagSerialNum = cpu_to_le16(1); 459 cfi->descTag.tagSerialNum = cpu_to_le16(1);
460 cfi->fileVersionNum = cpu_to_le16(1); 460 cfi->fileVersionNum = cpu_to_le16(1);
461 cfi->fileCharacteristics = 0; 461 cfi->fileCharacteristics = 0;
@@ -478,9 +478,9 @@ udf_add_entry(struct inode *dir, struct dentry *dentry,
478 udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) 478 udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name))
479 { 479 {
480 if (fibh->sbh != fibh->ebh) 480 if (fibh->sbh != fibh->ebh)
481 udf_release_data(fibh->ebh); 481 brelse(fibh->ebh);
482 udf_release_data(fibh->sbh); 482 brelse(fibh->sbh);
483 udf_release_data(bh); 483 brelse(epos.bh);
484 *err = -EEXIST; 484 *err = -EEXIST;
485 return NULL; 485 return NULL;
486 } 486 }
@@ -492,25 +492,25 @@ add:
492 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB && 492 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB &&
493 sb->s_blocksize - fibh->eoffset < nfidlen) 493 sb->s_blocksize - fibh->eoffset < nfidlen)
494 { 494 {
495 udf_release_data(bh); 495 brelse(epos.bh);
496 bh = NULL; 496 epos.bh = NULL;
497 fibh->soffset -= udf_ext0_offset(dir); 497 fibh->soffset -= udf_ext0_offset(dir);
498 fibh->eoffset -= udf_ext0_offset(dir); 498 fibh->eoffset -= udf_ext0_offset(dir);
499 f_pos -= (udf_ext0_offset(dir) >> 2); 499 f_pos -= (udf_ext0_offset(dir) >> 2);
500 if (fibh->sbh != fibh->ebh) 500 if (fibh->sbh != fibh->ebh)
501 udf_release_data(fibh->ebh); 501 brelse(fibh->ebh);
502 udf_release_data(fibh->sbh); 502 brelse(fibh->sbh);
503 if (!(fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err))) 503 if (!(fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err)))
504 return NULL; 504 return NULL;
505 bloc = UDF_I_LOCATION(dir); 505 epos.block = UDF_I_LOCATION(dir);
506 eloc.logicalBlockNum = block; 506 eloc.logicalBlockNum = block;
507 eloc.partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; 507 eloc.partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum;
508 elen = dir->i_sb->s_blocksize; 508 elen = dir->i_sb->s_blocksize;
509 extoffset = udf_file_entry_alloc_offset(dir); 509 epos.offset = udf_file_entry_alloc_offset(dir);
510 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) 510 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT)
511 extoffset += sizeof(short_ad); 511 epos.offset += sizeof(short_ad);
512 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) 512 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG)
513 extoffset += sizeof(long_ad); 513 epos.offset += sizeof(long_ad);
514 } 514 }
515 515
516 if (sb->s_blocksize - fibh->eoffset >= nfidlen) 516 if (sb->s_blocksize - fibh->eoffset >= nfidlen)
@@ -519,7 +519,7 @@ add:
519 fibh->eoffset += nfidlen; 519 fibh->eoffset += nfidlen;
520 if (fibh->sbh != fibh->ebh) 520 if (fibh->sbh != fibh->ebh)
521 { 521 {
522 udf_release_data(fibh->sbh); 522 brelse(fibh->sbh);
523 fibh->sbh = fibh->ebh; 523 fibh->sbh = fibh->ebh;
524 } 524 }
525 525
@@ -541,7 +541,7 @@ add:
541 fibh->eoffset += nfidlen - sb->s_blocksize; 541 fibh->eoffset += nfidlen - sb->s_blocksize;
542 if (fibh->sbh != fibh->ebh) 542 if (fibh->sbh != fibh->ebh)
543 { 543 {
544 udf_release_data(fibh->sbh); 544 brelse(fibh->sbh);
545 fibh->sbh = fibh->ebh; 545 fibh->sbh = fibh->ebh;
546 } 546 }
547 547
@@ -550,14 +550,14 @@ add:
550 550
551 if (!(fibh->ebh = udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 1, err))) 551 if (!(fibh->ebh = udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 1, err)))
552 { 552 {
553 udf_release_data(bh); 553 brelse(epos.bh);
554 udf_release_data(fibh->sbh); 554 brelse(fibh->sbh);
555 return NULL; 555 return NULL;
556 } 556 }
557 557
558 if (!(fibh->soffset)) 558 if (!(fibh->soffset))
559 { 559 {
560 if (udf_next_aext(dir, &bloc, &extoffset, &eloc, &elen, &bh, 1) == 560 if (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
561 (EXT_RECORDED_ALLOCATED >> 30)) 561 (EXT_RECORDED_ALLOCATED >> 30))
562 { 562 {
563 block = eloc.logicalBlockNum + ((elen - 1) >> 563 block = eloc.logicalBlockNum + ((elen - 1) >>
@@ -566,7 +566,7 @@ add:
566 else 566 else
567 block ++; 567 block ++;
568 568
569 udf_release_data(fibh->sbh); 569 brelse(fibh->sbh);
570 fibh->sbh = fibh->ebh; 570 fibh->sbh = fibh->ebh;
571 fi = (struct fileIdentDesc *)(fibh->sbh->b_data); 571 fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
572 } 572 }
@@ -587,7 +587,7 @@ add:
587 cfi->lengthOfImpUse = cpu_to_le16(0); 587 cfi->lengthOfImpUse = cpu_to_le16(0);
588 if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) 588 if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name))
589 { 589 {
590 udf_release_data(bh); 590 brelse(epos.bh);
591 dir->i_size += nfidlen; 591 dir->i_size += nfidlen;
592 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) 592 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB)
593 UDF_I_LENALLOC(dir) += nfidlen; 593 UDF_I_LENALLOC(dir) += nfidlen;
@@ -596,10 +596,10 @@ add:
596 } 596 }
597 else 597 else
598 { 598 {
599 udf_release_data(bh); 599 brelse(epos.bh);
600 if (fibh->sbh != fibh->ebh) 600 if (fibh->sbh != fibh->ebh)
601 udf_release_data(fibh->ebh); 601 brelse(fibh->ebh);
602 udf_release_data(fibh->sbh); 602 brelse(fibh->sbh);
603 *err = -EIO; 603 *err = -EIO;
604 return NULL; 604 return NULL;
605 } 605 }
@@ -656,8 +656,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct
656 mark_inode_dirty(dir); 656 mark_inode_dirty(dir);
657 } 657 }
658 if (fibh.sbh != fibh.ebh) 658 if (fibh.sbh != fibh.ebh)
659 udf_release_data(fibh.ebh); 659 brelse(fibh.ebh);
660 udf_release_data(fibh.sbh); 660 brelse(fibh.sbh);
661 unlock_kernel(); 661 unlock_kernel();
662 d_instantiate(dentry, inode); 662 d_instantiate(dentry, inode);
663 return 0; 663 return 0;
@@ -701,8 +701,8 @@ static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t
701 mark_inode_dirty(inode); 701 mark_inode_dirty(inode);
702 702
703 if (fibh.sbh != fibh.ebh) 703 if (fibh.sbh != fibh.ebh)
704 udf_release_data(fibh.ebh); 704 brelse(fibh.ebh);
705 udf_release_data(fibh.sbh); 705 brelse(fibh.sbh);
706 d_instantiate(dentry, inode); 706 d_instantiate(dentry, inode);
707 err = 0; 707 err = 0;
708out: 708out:
@@ -743,7 +743,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode)
743 cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL); 743 cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL);
744 cfi.fileCharacteristics = FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; 744 cfi.fileCharacteristics = FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT;
745 udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); 745 udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL);
746 udf_release_data(fibh.sbh); 746 brelse(fibh.sbh);
747 inode->i_mode = S_IFDIR | mode; 747 inode->i_mode = S_IFDIR | mode;
748 if (dir->i_mode & S_ISGID) 748 if (dir->i_mode & S_ISGID)
749 inode->i_mode |= S_ISGID; 749 inode->i_mode |= S_ISGID;
@@ -766,8 +766,8 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode)
766 mark_inode_dirty(dir); 766 mark_inode_dirty(dir);
767 d_instantiate(dentry, inode); 767 d_instantiate(dentry, inode);
768 if (fibh.sbh != fibh.ebh) 768 if (fibh.sbh != fibh.ebh)
769 udf_release_data(fibh.ebh); 769 brelse(fibh.ebh);
770 udf_release_data(fibh.sbh); 770 brelse(fibh.sbh);
771 err = 0; 771 err = 0;
772out: 772out:
773 unlock_kernel(); 773 unlock_kernel();
@@ -781,9 +781,10 @@ static int empty_dir(struct inode *dir)
781 loff_t f_pos; 781 loff_t f_pos;
782 loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; 782 loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
783 int block; 783 int block;
784 kernel_lb_addr bloc, eloc; 784 kernel_lb_addr eloc;
785 uint32_t extoffset, elen, offset; 785 uint32_t elen;
786 struct buffer_head *bh = NULL; 786 sector_t offset;
787 struct extent_position epos = { NULL, 0, { 0, 0}};
787 788
788 f_pos = (udf_ext0_offset(dir) >> 2); 789 f_pos = (udf_ext0_offset(dir) >> 2);
789 790
@@ -792,59 +793,58 @@ static int empty_dir(struct inode *dir)
792 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) 793 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB)
793 fibh.sbh = fibh.ebh = NULL; 794 fibh.sbh = fibh.ebh = NULL;
794 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 795 else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2),
795 &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) 796 &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30))
796 { 797 {
797 offset >>= dir->i_sb->s_blocksize_bits;
798 block = udf_get_lb_pblock(dir->i_sb, eloc, offset); 798 block = udf_get_lb_pblock(dir->i_sb, eloc, offset);
799 if ((++offset << dir->i_sb->s_blocksize_bits) < elen) 799 if ((++offset << dir->i_sb->s_blocksize_bits) < elen)
800 { 800 {
801 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) 801 if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT)
802 extoffset -= sizeof(short_ad); 802 epos.offset -= sizeof(short_ad);
803 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) 803 else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG)
804 extoffset -= sizeof(long_ad); 804 epos.offset -= sizeof(long_ad);
805 } 805 }
806 else 806 else
807 offset = 0; 807 offset = 0;
808 808
809 if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) 809 if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block)))
810 { 810 {
811 udf_release_data(bh); 811 brelse(epos.bh);
812 return 0; 812 return 0;
813 } 813 }
814 } 814 }
815 else 815 else
816 { 816 {
817 udf_release_data(bh); 817 brelse(epos.bh);
818 return 0; 818 return 0;
819 } 819 }
820 820
821 821
822 while ( (f_pos < size) ) 822 while ( (f_pos < size) )
823 { 823 {
824 fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); 824 fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset);
825 825
826 if (!fi) 826 if (!fi)
827 { 827 {
828 if (fibh.sbh != fibh.ebh) 828 if (fibh.sbh != fibh.ebh)
829 udf_release_data(fibh.ebh); 829 brelse(fibh.ebh);
830 udf_release_data(fibh.sbh); 830 brelse(fibh.sbh);
831 udf_release_data(bh); 831 brelse(epos.bh);
832 return 0; 832 return 0;
833 } 833 }
834 834
835 if (cfi.lengthFileIdent && (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0) 835 if (cfi.lengthFileIdent && (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0)
836 { 836 {
837 if (fibh.sbh != fibh.ebh) 837 if (fibh.sbh != fibh.ebh)
838 udf_release_data(fibh.ebh); 838 brelse(fibh.ebh);
839 udf_release_data(fibh.sbh); 839 brelse(fibh.sbh);
840 udf_release_data(bh); 840 brelse(epos.bh);
841 return 0; 841 return 0;
842 } 842 }
843 } 843 }
844 if (fibh.sbh != fibh.ebh) 844 if (fibh.sbh != fibh.ebh)
845 udf_release_data(fibh.ebh); 845 brelse(fibh.ebh);
846 udf_release_data(fibh.sbh); 846 brelse(fibh.sbh);
847 udf_release_data(bh); 847 brelse(epos.bh);
848 return 1; 848 return 1;
849} 849}
850 850
@@ -878,14 +878,14 @@ static int udf_rmdir(struct inode * dir, struct dentry * dentry)
878 inode->i_nlink); 878 inode->i_nlink);
879 clear_nlink(inode); 879 clear_nlink(inode);
880 inode->i_size = 0; 880 inode->i_size = 0;
881 inode_dec_link_count(inode); 881 inode_dec_link_count(dir);
882 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); 882 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
883 mark_inode_dirty(dir); 883 mark_inode_dirty(dir);
884 884
885end_rmdir: 885end_rmdir:
886 if (fibh.sbh != fibh.ebh) 886 if (fibh.sbh != fibh.ebh)
887 udf_release_data(fibh.ebh); 887 brelse(fibh.ebh);
888 udf_release_data(fibh.sbh); 888 brelse(fibh.sbh);
889out: 889out:
890 unlock_kernel(); 890 unlock_kernel();
891 return retval; 891 return retval;
@@ -928,8 +928,8 @@ static int udf_unlink(struct inode * dir, struct dentry * dentry)
928 928
929end_unlink: 929end_unlink:
930 if (fibh.sbh != fibh.ebh) 930 if (fibh.sbh != fibh.ebh)
931 udf_release_data(fibh.ebh); 931 brelse(fibh.ebh);
932 udf_release_data(fibh.sbh); 932 brelse(fibh.sbh);
933out: 933out:
934 unlock_kernel(); 934 unlock_kernel();
935 return retval; 935 return retval;
@@ -941,7 +941,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
941 struct pathComponent *pc; 941 struct pathComponent *pc;
942 char *compstart; 942 char *compstart;
943 struct udf_fileident_bh fibh; 943 struct udf_fileident_bh fibh;
944 struct buffer_head *bh = NULL; 944 struct extent_position epos = { NULL, 0, {0, 0}};
945 int eoffset, elen = 0; 945 int eoffset, elen = 0;
946 struct fileIdentDesc *fi; 946 struct fileIdentDesc *fi;
947 struct fileIdentDesc cfi; 947 struct fileIdentDesc cfi;
@@ -961,33 +961,33 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
961 961
962 if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB) 962 if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB)
963 { 963 {
964 struct buffer_head *bh = NULL; 964 kernel_lb_addr eloc;
965 kernel_lb_addr bloc, eloc; 965 uint32_t elen;
966 uint32_t elen, extoffset;
967 966
968 block = udf_new_block(inode->i_sb, inode, 967 block = udf_new_block(inode->i_sb, inode,
969 UDF_I_LOCATION(inode).partitionReferenceNum, 968 UDF_I_LOCATION(inode).partitionReferenceNum,
970 UDF_I_LOCATION(inode).logicalBlockNum, &err); 969 UDF_I_LOCATION(inode).logicalBlockNum, &err);
971 if (!block) 970 if (!block)
972 goto out_no_entry; 971 goto out_no_entry;
973 bloc = UDF_I_LOCATION(inode); 972 epos.block = UDF_I_LOCATION(inode);
973 epos.offset = udf_file_entry_alloc_offset(inode);
974 epos.bh = NULL;
974 eloc.logicalBlockNum = block; 975 eloc.logicalBlockNum = block;
975 eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; 976 eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum;
976 elen = inode->i_sb->s_blocksize; 977 elen = inode->i_sb->s_blocksize;
977 UDF_I_LENEXTENTS(inode) = elen; 978 UDF_I_LENEXTENTS(inode) = elen;
978 extoffset = udf_file_entry_alloc_offset(inode); 979 udf_add_aext(inode, &epos, eloc, elen, 0);
979 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 0); 980 brelse(epos.bh);
980 udf_release_data(bh);
981 981
982 block = udf_get_pblock(inode->i_sb, block, 982 block = udf_get_pblock(inode->i_sb, block,
983 UDF_I_LOCATION(inode).partitionReferenceNum, 0); 983 UDF_I_LOCATION(inode).partitionReferenceNum, 0);
984 bh = udf_tread(inode->i_sb, block); 984 epos.bh = udf_tread(inode->i_sb, block);
985 lock_buffer(bh); 985 lock_buffer(epos.bh);
986 memset(bh->b_data, 0x00, inode->i_sb->s_blocksize); 986 memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize);
987 set_buffer_uptodate(bh); 987 set_buffer_uptodate(epos.bh);
988 unlock_buffer(bh); 988 unlock_buffer(epos.bh);
989 mark_buffer_dirty_inode(bh, inode); 989 mark_buffer_dirty_inode(epos.bh, inode);
990 ea = bh->b_data + udf_ext0_offset(inode); 990 ea = epos.bh->b_data + udf_ext0_offset(inode);
991 } 991 }
992 else 992 else
993 ea = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); 993 ea = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode);
@@ -1060,7 +1060,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
1060 } 1060 }
1061 } 1061 }
1062 1062
1063 udf_release_data(bh); 1063 brelse(epos.bh);
1064 inode->i_size = elen; 1064 inode->i_size = elen;
1065 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) 1065 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB)
1066 UDF_I_LENALLOC(inode) = inode->i_size; 1066 UDF_I_LENALLOC(inode) = inode->i_size;
@@ -1089,8 +1089,8 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
1089 mark_inode_dirty(dir); 1089 mark_inode_dirty(dir);
1090 } 1090 }
1091 if (fibh.sbh != fibh.ebh) 1091 if (fibh.sbh != fibh.ebh)
1092 udf_release_data(fibh.ebh); 1092 brelse(fibh.ebh);
1093 udf_release_data(fibh.sbh); 1093 brelse(fibh.sbh);
1094 d_instantiate(dentry, inode); 1094 d_instantiate(dentry, inode);
1095 err = 0; 1095 err = 0;
1096 1096
@@ -1145,8 +1145,8 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir,
1145 mark_inode_dirty(dir); 1145 mark_inode_dirty(dir);
1146 } 1146 }
1147 if (fibh.sbh != fibh.ebh) 1147 if (fibh.sbh != fibh.ebh)
1148 udf_release_data(fibh.ebh); 1148 brelse(fibh.ebh);
1149 udf_release_data(fibh.sbh); 1149 brelse(fibh.sbh);
1150 inc_nlink(inode); 1150 inc_nlink(inode);
1151 inode->i_ctime = current_fs_time(inode->i_sb); 1151 inode->i_ctime = current_fs_time(inode->i_sb);
1152 mark_inode_dirty(inode); 1152 mark_inode_dirty(inode);
@@ -1174,8 +1174,8 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry,
1174 if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi))) 1174 if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi)))
1175 { 1175 {
1176 if (ofibh.sbh != ofibh.ebh) 1176 if (ofibh.sbh != ofibh.ebh)
1177 udf_release_data(ofibh.ebh); 1177 brelse(ofibh.ebh);
1178 udf_release_data(ofibh.sbh); 1178 brelse(ofibh.sbh);
1179 } 1179 }
1180 tloc = lelb_to_cpu(ocfi.icb.extLocation); 1180 tloc = lelb_to_cpu(ocfi.icb.extLocation);
1181 if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0) 1181 if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0)
@@ -1188,8 +1188,8 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry,
1188 if (!new_inode) 1188 if (!new_inode)
1189 { 1189 {
1190 if (nfibh.sbh != nfibh.ebh) 1190 if (nfibh.sbh != nfibh.ebh)
1191 udf_release_data(nfibh.ebh); 1191 brelse(nfibh.ebh);
1192 udf_release_data(nfibh.sbh); 1192 brelse(nfibh.sbh);
1193 nfi = NULL; 1193 nfi = NULL;
1194 } 1194 }
1195 } 1195 }
@@ -1290,19 +1290,19 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry,
1290 if (ofi) 1290 if (ofi)
1291 { 1291 {
1292 if (ofibh.sbh != ofibh.ebh) 1292 if (ofibh.sbh != ofibh.ebh)
1293 udf_release_data(ofibh.ebh); 1293 brelse(ofibh.ebh);
1294 udf_release_data(ofibh.sbh); 1294 brelse(ofibh.sbh);
1295 } 1295 }
1296 1296
1297 retval = 0; 1297 retval = 0;
1298 1298
1299end_rename: 1299end_rename:
1300 udf_release_data(dir_bh); 1300 brelse(dir_bh);
1301 if (nfi) 1301 if (nfi)
1302 { 1302 {
1303 if (nfibh.sbh != nfibh.ebh) 1303 if (nfibh.sbh != nfibh.ebh)
1304 udf_release_data(nfibh.ebh); 1304 brelse(nfibh.ebh);
1305 udf_release_data(nfibh.sbh); 1305 brelse(nfibh.sbh);
1306 } 1306 }
1307 unlock_kernel(); 1307 unlock_kernel();
1308 return retval; 1308 return retval;
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index dabf2b841db8..467a26171cd9 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -81,7 +81,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, uint16_t
81 81
82 loc = le32_to_cpu(((__le32 *)bh->b_data)[index]); 82 loc = le32_to_cpu(((__le32 *)bh->b_data)[index]);
83 83
84 udf_release_data(bh); 84 brelse(bh);
85 85
86 if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) 86 if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition)
87 { 87 {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8672b88f7ff2..9b8644a06e53 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -134,9 +134,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
134{ 134{
135 struct udf_inode_info *ei = (struct udf_inode_info *) foo; 135 struct udf_inode_info *ei = (struct udf_inode_info *) foo;
136 136
137 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 137 if (flags & SLAB_CTOR_CONSTRUCTOR) {
138 SLAB_CTOR_CONSTRUCTOR)
139 {
140 ei->i_ext.i_data = NULL; 138 ei->i_ext.i_data = NULL;
141 inode_init_once(&ei->vfs_inode); 139 inode_init_once(&ei->vfs_inode);
142 } 140 }
@@ -565,7 +563,7 @@ udf_vrs(struct super_block *sb, int silent)
565 563
566 if (vsd->stdIdent[0] == 0) 564 if (vsd->stdIdent[0] == 0)
567 { 565 {
568 udf_release_data(bh); 566 brelse(bh);
569 break; 567 break;
570 } 568 }
571 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) 569 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN))
@@ -598,7 +596,7 @@ udf_vrs(struct super_block *sb, int silent)
598 } 596 }
599 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN)) 597 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN))
600 { 598 {
601 udf_release_data(bh); 599 brelse(bh);
602 break; 600 break;
603 } 601 }
604 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) 602 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN))
@@ -609,7 +607,7 @@ udf_vrs(struct super_block *sb, int silent)
609 { 607 {
610 nsr03 = sector; 608 nsr03 = sector;
611 } 609 }
612 udf_release_data(bh); 610 brelse(bh);
613 } 611 }
614 612
615 if (nsr03) 613 if (nsr03)
@@ -675,7 +673,7 @@ udf_find_anchor(struct super_block *sb)
675 { 673 {
676 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); 674 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent);
677 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); 675 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
678 udf_release_data(bh); 676 brelse(bh);
679 } 677 }
680 678
681 if (ident == TAG_IDENT_AVDP) 679 if (ident == TAG_IDENT_AVDP)
@@ -710,7 +708,7 @@ udf_find_anchor(struct super_block *sb)
710 { 708 {
711 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); 709 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent);
712 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); 710 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
713 udf_release_data(bh); 711 brelse(bh);
714 } 712 }
715 713
716 if (ident == TAG_IDENT_AVDP && 714 if (ident == TAG_IDENT_AVDP &&
@@ -729,7 +727,7 @@ udf_find_anchor(struct super_block *sb)
729 { 727 {
730 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); 728 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent);
731 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); 729 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
732 udf_release_data(bh); 730 brelse(bh);
733 } 731 }
734 732
735 if (ident == TAG_IDENT_AVDP && 733 if (ident == TAG_IDENT_AVDP &&
@@ -751,7 +749,7 @@ udf_find_anchor(struct super_block *sb)
751 { 749 {
752 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); 750 ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent);
753 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); 751 location = le32_to_cpu(((tag *)bh->b_data)->tagLocation);
754 udf_release_data(bh); 752 brelse(bh);
755 753
756 if (ident == TAG_IDENT_AVDP && location == 256) 754 if (ident == TAG_IDENT_AVDP && location == 256)
757 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); 755 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
@@ -768,7 +766,7 @@ udf_find_anchor(struct super_block *sb)
768 } 766 }
769 else 767 else
770 { 768 {
771 udf_release_data(bh); 769 brelse(bh);
772 if ((ident != TAG_IDENT_AVDP) && (i || 770 if ((ident != TAG_IDENT_AVDP) && (i ||
773 (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE))) 771 (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE)))
774 { 772 {
@@ -797,7 +795,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr
797 return 1; 795 return 1;
798 else if (ident != TAG_IDENT_FSD) 796 else if (ident != TAG_IDENT_FSD)
799 { 797 {
800 udf_release_data(bh); 798 brelse(bh);
801 return 1; 799 return 1;
802 } 800 }
803 801
@@ -836,7 +834,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr
836 newfileset.logicalBlockNum += 1 + 834 newfileset.logicalBlockNum += 1 +
837 ((le32_to_cpu(sp->numOfBytes) + sizeof(struct spaceBitmapDesc) - 1) 835 ((le32_to_cpu(sp->numOfBytes) + sizeof(struct spaceBitmapDesc) - 1)
838 >> sb->s_blocksize_bits); 836 >> sb->s_blocksize_bits);
839 udf_release_data(bh); 837 brelse(bh);
840 break; 838 break;
841 } 839 }
842 case TAG_IDENT_FSD: 840 case TAG_IDENT_FSD:
@@ -847,7 +845,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr
847 default: 845 default:
848 { 846 {
849 newfileset.logicalBlockNum ++; 847 newfileset.logicalBlockNum ++;
850 udf_release_data(bh); 848 brelse(bh);
851 bh = NULL; 849 bh = NULL;
852 break; 850 break;
853 } 851 }
@@ -867,7 +865,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr
867 865
868 UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum; 866 UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum;
869 udf_load_fileset(sb, bh, root); 867 udf_load_fileset(sb, bh, root);
870 udf_release_data(bh); 868 brelse(bh);
871 return 0; 869 return 0;
872 } 870 }
873 return 1; 871 return 1;
@@ -1085,7 +1083,7 @@ udf_load_logicalvol(struct super_block *sb, struct buffer_head * bh, kernel_lb_a
1085 if (ident != 0 || 1083 if (ident != 0 ||
1086 strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING))) 1084 strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING)))
1087 { 1085 {
1088 udf_release_data(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]); 1086 brelse(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]);
1089 UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = NULL; 1087 UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = NULL;
1090 } 1088 }
1091 } 1089 }
@@ -1139,12 +1137,12 @@ udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc)
1139 udf_load_logicalvolint(sb, leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt)); 1137 udf_load_logicalvolint(sb, leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt));
1140 1138
1141 if (UDF_SB_LVIDBH(sb) != bh) 1139 if (UDF_SB_LVIDBH(sb) != bh)
1142 udf_release_data(bh); 1140 brelse(bh);
1143 loc.extLength -= sb->s_blocksize; 1141 loc.extLength -= sb->s_blocksize;
1144 loc.extLocation ++; 1142 loc.extLocation ++;
1145 } 1143 }
1146 if (UDF_SB_LVIDBH(sb) != bh) 1144 if (UDF_SB_LVIDBH(sb) != bh)
1147 udf_release_data(bh); 1145 brelse(bh);
1148} 1146}
1149 1147
1150/* 1148/*
@@ -1247,7 +1245,7 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_
1247 done = 1; 1245 done = 1;
1248 break; 1246 break;
1249 } 1247 }
1250 udf_release_data(bh); 1248 brelse(bh);
1251 } 1249 }
1252 for (i=0; i<VDS_POS_LENGTH; i++) 1250 for (i=0; i<VDS_POS_LENGTH; i++)
1253 { 1251 {
@@ -1269,10 +1267,10 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_
1269 gd = (struct generic_desc *)bh2->b_data; 1267 gd = (struct generic_desc *)bh2->b_data;
1270 if (ident == TAG_IDENT_PD) 1268 if (ident == TAG_IDENT_PD)
1271 udf_load_partdesc(sb, bh2); 1269 udf_load_partdesc(sb, bh2);
1272 udf_release_data(bh2); 1270 brelse(bh2);
1273 } 1271 }
1274 } 1272 }
1275 udf_release_data(bh); 1273 brelse(bh);
1276 } 1274 }
1277 } 1275 }
1278 1276
@@ -1335,7 +1333,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
1335 reserve_e = reserve_e >> sb->s_blocksize_bits; 1333 reserve_e = reserve_e >> sb->s_blocksize_bits;
1336 reserve_e += reserve_s; 1334 reserve_e += reserve_s;
1337 1335
1338 udf_release_data(bh); 1336 brelse(bh);
1339 1337
1340 /* Process the main & reserve sequences */ 1338 /* Process the main & reserve sequences */
1341 /* responsible for finding the PartitionDesc(s) */ 1339 /* responsible for finding the PartitionDesc(s) */
@@ -1405,12 +1403,14 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset)
1405 1403
1406 pos = udf_block_map(UDF_SB_VAT(sb), 0); 1404 pos = udf_block_map(UDF_SB_VAT(sb), 0);
1407 bh = sb_bread(sb, pos); 1405 bh = sb_bread(sb, pos);
1406 if (!bh)
1407 return 1;
1408 UDF_SB_TYPEVIRT(sb,i).s_start_offset = 1408 UDF_SB_TYPEVIRT(sb,i).s_start_offset =
1409 le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data + udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) + 1409 le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data + udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) +
1410 udf_ext0_offset(UDF_SB_VAT(sb)); 1410 udf_ext0_offset(UDF_SB_VAT(sb));
1411 UDF_SB_TYPEVIRT(sb,i).s_num_entries = (UDF_SB_VAT(sb)->i_size - 1411 UDF_SB_TYPEVIRT(sb,i).s_num_entries = (UDF_SB_VAT(sb)->i_size -
1412 UDF_SB_TYPEVIRT(sb,i).s_start_offset) >> 2; 1412 UDF_SB_TYPEVIRT(sb,i).s_start_offset) >> 2;
1413 udf_release_data(bh); 1413 brelse(bh);
1414 } 1414 }
1415 UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0); 1415 UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0);
1416 UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum); 1416 UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum);
@@ -1663,7 +1663,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1663 iput(inode); 1663 iput(inode);
1664 goto error_out; 1664 goto error_out;
1665 } 1665 }
1666 sb->s_maxbytes = 1<<30; 1666 sb->s_maxbytes = MAX_LFS_FILESIZE;
1667 return 0; 1667 return 0;
1668 1668
1669error_out: 1669error_out:
@@ -1682,7 +1682,7 @@ error_out:
1682 if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) 1682 if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15)
1683 { 1683 {
1684 for (i=0; i<4; i++) 1684 for (i=0; i<4; i++)
1685 udf_release_data(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); 1685 brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]);
1686 } 1686 }
1687 } 1687 }
1688#ifdef CONFIG_UDF_NLS 1688#ifdef CONFIG_UDF_NLS
@@ -1691,7 +1691,7 @@ error_out:
1691#endif 1691#endif
1692 if (!(sb->s_flags & MS_RDONLY)) 1692 if (!(sb->s_flags & MS_RDONLY))
1693 udf_close_lvid(sb); 1693 udf_close_lvid(sb);
1694 udf_release_data(UDF_SB_LVIDBH(sb)); 1694 brelse(UDF_SB_LVIDBH(sb));
1695 UDF_SB_FREE(sb); 1695 UDF_SB_FREE(sb);
1696 kfree(sbi); 1696 kfree(sbi);
1697 sb->s_fs_info = NULL; 1697 sb->s_fs_info = NULL;
@@ -1760,7 +1760,7 @@ udf_put_super(struct super_block *sb)
1760 if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) 1760 if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15)
1761 { 1761 {
1762 for (i=0; i<4; i++) 1762 for (i=0; i<4; i++)
1763 udf_release_data(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); 1763 brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]);
1764 } 1764 }
1765 } 1765 }
1766#ifdef CONFIG_UDF_NLS 1766#ifdef CONFIG_UDF_NLS
@@ -1769,7 +1769,7 @@ udf_put_super(struct super_block *sb)
1769#endif 1769#endif
1770 if (!(sb->s_flags & MS_RDONLY)) 1770 if (!(sb->s_flags & MS_RDONLY))
1771 udf_close_lvid(sb); 1771 udf_close_lvid(sb);
1772 udf_release_data(UDF_SB_LVIDBH(sb)); 1772 brelse(UDF_SB_LVIDBH(sb));
1773 UDF_SB_FREE(sb); 1773 UDF_SB_FREE(sb);
1774 kfree(sb->s_fs_info); 1774 kfree(sb->s_fs_info);
1775 sb->s_fs_info = NULL; 1775 sb->s_fs_info = NULL;
@@ -1839,7 +1839,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap)
1839 } 1839 }
1840 else if (ident != TAG_IDENT_SBD) 1840 else if (ident != TAG_IDENT_SBD)
1841 { 1841 {
1842 udf_release_data(bh); 1842 brelse(bh);
1843 printk(KERN_ERR "udf: udf_count_free failed\n"); 1843 printk(KERN_ERR "udf: udf_count_free failed\n");
1844 goto out; 1844 goto out;
1845 } 1845 }
@@ -1861,7 +1861,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap)
1861 } 1861 }
1862 if ( bytes ) 1862 if ( bytes )
1863 { 1863 {
1864 udf_release_data(bh); 1864 brelse(bh);
1865 newblock = udf_get_lb_pblock(sb, loc, ++block); 1865 newblock = udf_get_lb_pblock(sb, loc, ++block);
1866 bh = udf_tread(sb, newblock); 1866 bh = udf_tread(sb, newblock);
1867 if (!bh) 1867 if (!bh)
@@ -1873,7 +1873,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap)
1873 ptr = (uint8_t *)bh->b_data; 1873 ptr = (uint8_t *)bh->b_data;
1874 } 1874 }
1875 } 1875 }
1876 udf_release_data(bh); 1876 brelse(bh);
1877 1877
1878out: 1878out:
1879 unlock_kernel(); 1879 unlock_kernel();
@@ -1885,21 +1885,20 @@ static unsigned int
1885udf_count_free_table(struct super_block *sb, struct inode * table) 1885udf_count_free_table(struct super_block *sb, struct inode * table)
1886{ 1886{
1887 unsigned int accum = 0; 1887 unsigned int accum = 0;
1888 uint32_t extoffset, elen; 1888 uint32_t elen;
1889 kernel_lb_addr bloc, eloc; 1889 kernel_lb_addr eloc;
1890 int8_t etype; 1890 int8_t etype;
1891 struct buffer_head *bh = NULL; 1891 struct extent_position epos;
1892 1892
1893 lock_kernel(); 1893 lock_kernel();
1894 1894
1895 bloc = UDF_I_LOCATION(table); 1895 epos.block = UDF_I_LOCATION(table);
1896 extoffset = sizeof(struct unallocSpaceEntry); 1896 epos.offset = sizeof(struct unallocSpaceEntry);
1897 epos.bh = NULL;
1897 1898
1898 while ((etype = udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) 1899 while ((etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1)
1899 {
1900 accum += (elen >> table->i_sb->s_blocksize_bits); 1900 accum += (elen >> table->i_sb->s_blocksize_bits);
1901 } 1901 brelse(epos.bh);
1902 udf_release_data(bh);
1903 1902
1904 unlock_kernel(); 1903 unlock_kernel();
1905 1904
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index ba068a786563..12613b680cc4 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -95,7 +95,7 @@ static int udf_symlink_filler(struct file *file, struct page *page)
95 } 95 }
96 96
97 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); 97 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
98 udf_release_data(bh); 98 brelse(bh);
99 99
100 unlock_kernel(); 100 unlock_kernel();
101 SetPageUptodate(page); 101 SetPageUptodate(page);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 0abd66ce36ea..77975ae291a5 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -28,8 +28,8 @@
28#include "udf_i.h" 28#include "udf_i.h"
29#include "udf_sb.h" 29#include "udf_sb.h"
30 30
31static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffset, 31static void extent_trunc(struct inode * inode, struct extent_position *epos,
32 kernel_lb_addr eloc, int8_t etype, uint32_t elen, struct buffer_head *bh, uint32_t nelen) 32 kernel_lb_addr eloc, int8_t etype, uint32_t elen, uint32_t nelen)
33{ 33{
34 kernel_lb_addr neloc = { 0, 0 }; 34 kernel_lb_addr neloc = { 0, 0 };
35 int last_block = (elen + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; 35 int last_block = (elen + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits;
@@ -49,7 +49,7 @@ static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffse
49 49
50 if (elen != nelen) 50 if (elen != nelen)
51 { 51 {
52 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 0); 52 udf_write_aext(inode, epos, neloc, nelen, 0);
53 if (last_block - first_block > 0) 53 if (last_block - first_block > 0)
54 { 54 {
55 if (etype == (EXT_RECORDED_ALLOCATED >> 30)) 55 if (etype == (EXT_RECORDED_ALLOCATED >> 30))
@@ -63,18 +63,16 @@ static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffse
63 63
64void udf_discard_prealloc(struct inode * inode) 64void udf_discard_prealloc(struct inode * inode)
65{ 65{
66 kernel_lb_addr bloc, eloc; 66 struct extent_position epos = { NULL, 0, {0, 0}};
67 uint32_t extoffset = 0, elen, nelen; 67 kernel_lb_addr eloc;
68 uint32_t elen, nelen;
68 uint64_t lbcount = 0; 69 uint64_t lbcount = 0;
69 int8_t etype = -1, netype; 70 int8_t etype = -1, netype;
70 struct buffer_head *bh = NULL;
71 int adsize; 71 int adsize;
72 72
73 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || 73 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB ||
74 inode->i_size == UDF_I_LENEXTENTS(inode)) 74 inode->i_size == UDF_I_LENEXTENTS(inode))
75 {
76 return; 75 return;
77 }
78 76
79 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) 77 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
80 adsize = sizeof(short_ad); 78 adsize = sizeof(short_ad);
@@ -83,52 +81,58 @@ void udf_discard_prealloc(struct inode * inode)
83 else 81 else
84 adsize = 0; 82 adsize = 0;
85 83
86 bloc = UDF_I_LOCATION(inode); 84 epos.block = UDF_I_LOCATION(inode);
87 85
88 while ((netype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) 86 /* Find the last extent in the file */
87 while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1)
89 { 88 {
90 etype = netype; 89 etype = netype;
91 lbcount += elen; 90 lbcount += elen;
92 if (lbcount > inode->i_size && lbcount - inode->i_size < inode->i_sb->s_blocksize) 91 if (lbcount > inode->i_size && lbcount - elen < inode->i_size)
93 { 92 {
93 WARN_ON(lbcount - inode->i_size >= inode->i_sb->s_blocksize);
94 nelen = elen - (lbcount - inode->i_size); 94 nelen = elen - (lbcount - inode->i_size);
95 extent_trunc(inode, bloc, extoffset-adsize, eloc, etype, elen, bh, nelen); 95 epos.offset -= adsize;
96 extent_trunc(inode, &epos, eloc, etype, elen, nelen);
97 epos.offset += adsize;
96 lbcount = inode->i_size; 98 lbcount = inode->i_size;
97 } 99 }
98 } 100 }
99 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) 101 if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
100 { 102 epos.offset -= adsize;
101 extoffset -= adsize;
102 lbcount -= elen; 103 lbcount -= elen;
103 extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, 0); 104 extent_trunc(inode, &epos, eloc, etype, elen, 0);
104 if (!bh) 105 if (!epos.bh)
105 { 106 {
106 UDF_I_LENALLOC(inode) = extoffset - udf_file_entry_alloc_offset(inode); 107 UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode);
107 mark_inode_dirty(inode); 108 mark_inode_dirty(inode);
108 } 109 }
109 else 110 else
110 { 111 {
111 struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); 112 struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
112 aed->lengthAllocDescs = cpu_to_le32(extoffset - sizeof(struct allocExtDesc)); 113 aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc));
113 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 114 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
114 udf_update_tag(bh->b_data, extoffset); 115 udf_update_tag(epos.bh->b_data, epos.offset);
115 else 116 else
116 udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); 117 udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc));
117 mark_buffer_dirty_inode(bh, inode); 118 mark_buffer_dirty_inode(epos.bh, inode);
118 } 119 }
119 } 120 }
120 UDF_I_LENEXTENTS(inode) = lbcount; 121 UDF_I_LENEXTENTS(inode) = lbcount;
121 122
122 udf_release_data(bh); 123 WARN_ON(lbcount != inode->i_size);
124 brelse(epos.bh);
123} 125}
124 126
125void udf_truncate_extents(struct inode * inode) 127void udf_truncate_extents(struct inode * inode)
126{ 128{
127 kernel_lb_addr bloc, eloc, neloc = { 0, 0 }; 129 struct extent_position epos;
128 uint32_t extoffset, elen, offset, nelen = 0, lelen = 0, lenalloc; 130 kernel_lb_addr eloc, neloc = { 0, 0 };
131 uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc;
129 int8_t etype; 132 int8_t etype;
130 int first_block = inode->i_size >> inode->i_sb->s_blocksize_bits; 133 struct super_block *sb = inode->i_sb;
131 struct buffer_head *bh = NULL; 134 sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset;
135 loff_t byte_offset;
132 int adsize; 136 int adsize;
133 137
134 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) 138 if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT)
@@ -136,158 +140,130 @@ void udf_truncate_extents(struct inode * inode)
136 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) 140 else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG)
137 adsize = sizeof(long_ad); 141 adsize = sizeof(long_ad);
138 else 142 else
139 adsize = 0; 143 BUG();
140 144
141 etype = inode_bmap(inode, first_block, &bloc, &extoffset, &eloc, &elen, &offset, &bh); 145 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
142 offset += (inode->i_size & (inode->i_sb->s_blocksize - 1)); 146 byte_offset = (offset << sb->s_blocksize_bits) + (inode->i_size & (sb->s_blocksize-1));
143 if (etype != -1) 147 if (etype != -1)
144 { 148 {
145 extoffset -= adsize; 149 epos.offset -= adsize;
146 extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, offset); 150 extent_trunc(inode, &epos, eloc, etype, elen, byte_offset);
147 extoffset += adsize; 151 epos.offset += adsize;
148 152 if (byte_offset)
149 if (offset) 153 lenalloc = epos.offset;
150 lenalloc = extoffset;
151 else 154 else
152 lenalloc = extoffset - adsize; 155 lenalloc = epos.offset - adsize;
153 156
154 if (!bh) 157 if (!epos.bh)
155 lenalloc -= udf_file_entry_alloc_offset(inode); 158 lenalloc -= udf_file_entry_alloc_offset(inode);
156 else 159 else
157 lenalloc -= sizeof(struct allocExtDesc); 160 lenalloc -= sizeof(struct allocExtDesc);
158 161
159 while ((etype = udf_current_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 0)) != -1) 162 while ((etype = udf_current_aext(inode, &epos, &eloc, &elen, 0)) != -1)
160 { 163 {
161 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) 164 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30))
162 { 165 {
163 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 0); 166 udf_write_aext(inode, &epos, neloc, nelen, 0);
164 extoffset = 0; 167 if (indirect_ext_len)
165 if (lelen)
166 { 168 {
167 if (!bh) 169 /* We managed to free all extents in the
170 * indirect extent - free it too */
171 if (!epos.bh)
168 BUG(); 172 BUG();
169 else 173 udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len);
170 memset(bh->b_data, 0x00, sizeof(struct allocExtDesc));
171 udf_free_blocks(inode->i_sb, inode, bloc, 0, lelen);
172 } 174 }
173 else 175 else
174 { 176 {
175 if (!bh) 177 if (!epos.bh)
176 { 178 {
177 UDF_I_LENALLOC(inode) = lenalloc; 179 UDF_I_LENALLOC(inode) = lenalloc;
178 mark_inode_dirty(inode); 180 mark_inode_dirty(inode);
179 } 181 }
180 else 182 else
181 { 183 {
182 struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); 184 struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
183 aed->lengthAllocDescs = cpu_to_le32(lenalloc); 185 aed->lengthAllocDescs = cpu_to_le32(lenalloc);
184 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 186 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201)
185 udf_update_tag(bh->b_data, lenalloc + 187 udf_update_tag(epos.bh->b_data, lenalloc +
186 sizeof(struct allocExtDesc)); 188 sizeof(struct allocExtDesc));
187 else 189 else
188 udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); 190 udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc));
189 mark_buffer_dirty_inode(bh, inode); 191 mark_buffer_dirty_inode(epos.bh, inode);
190 } 192 }
191 } 193 }
192 194 brelse(epos.bh);
193 udf_release_data(bh); 195 epos.offset = sizeof(struct allocExtDesc);
194 extoffset = sizeof(struct allocExtDesc); 196 epos.block = eloc;
195 bloc = eloc; 197 epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, eloc, 0));
196 bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, bloc, 0));
197 if (elen) 198 if (elen)
198 lelen = (elen + inode->i_sb->s_blocksize - 1) >> 199 indirect_ext_len = (elen +
199 inode->i_sb->s_blocksize_bits; 200 sb->s_blocksize - 1) >>
201 sb->s_blocksize_bits;
200 else 202 else
201 lelen = 1; 203 indirect_ext_len = 1;
202 } 204 }
203 else 205 else
204 { 206 {
205 extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, 0); 207 extent_trunc(inode, &epos, eloc, etype, elen, 0);
206 extoffset += adsize; 208 epos.offset += adsize;
207 } 209 }
208 } 210 }
209 211
210 if (lelen) 212 if (indirect_ext_len)
211 { 213 {
212 if (!bh) 214 if (!epos.bh)
213 BUG(); 215 BUG();
214 else 216 udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len);
215 memset(bh->b_data, 0x00, sizeof(struct allocExtDesc));
216 udf_free_blocks(inode->i_sb, inode, bloc, 0, lelen);
217 } 217 }
218 else 218 else
219 { 219 {
220 if (!bh) 220 if (!epos.bh)
221 { 221 {
222 UDF_I_LENALLOC(inode) = lenalloc; 222 UDF_I_LENALLOC(inode) = lenalloc;
223 mark_inode_dirty(inode); 223 mark_inode_dirty(inode);
224 } 224 }
225 else 225 else
226 { 226 {
227 struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); 227 struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data);
228 aed->lengthAllocDescs = cpu_to_le32(lenalloc); 228 aed->lengthAllocDescs = cpu_to_le32(lenalloc);
229 if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) 229 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201)
230 udf_update_tag(bh->b_data, lenalloc + 230 udf_update_tag(epos.bh->b_data, lenalloc +
231 sizeof(struct allocExtDesc)); 231 sizeof(struct allocExtDesc));
232 else 232 else
233 udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); 233 udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc));
234 mark_buffer_dirty_inode(bh, inode); 234 mark_buffer_dirty_inode(epos.bh, inode);
235 } 235 }
236 } 236 }
237 } 237 }
238 else if (inode->i_size) 238 else if (inode->i_size)
239 { 239 {
240 if (offset) 240 if (byte_offset)
241 { 241 {
242 kernel_long_ad extent;
243
242 /* 244 /*
243 * OK, there is not extent covering inode->i_size and 245 * OK, there is not extent covering inode->i_size and
244 * no extent above inode->i_size => truncate is 246 * no extent above inode->i_size => truncate is
245 * extending the file by 'offset'. 247 * extending the file by 'offset' blocks.
246 */ 248 */
247 if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) || 249 if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
248 (bh && extoffset == sizeof(struct allocExtDesc))) { 250 (epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
249 /* File has no extents at all! */ 251 /* File has no extents at all or has empty last
250 memset(&eloc, 0x00, sizeof(kernel_lb_addr)); 252 * indirect extent! Create a fake extent... */
251 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; 253 extent.extLocation.logicalBlockNum = 0;
252 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); 254 extent.extLocation.partitionReferenceNum = 0;
255 extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
253 } 256 }
254 else { 257 else {
255 extoffset -= adsize; 258 epos.offset -= adsize;
256 etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); 259 etype = udf_next_aext(inode, &epos,
257 if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) 260 &extent.extLocation, &extent.extLength, 0);
258 { 261 extent.extLength |= etype << 30;
259 extoffset -= adsize;
260 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
261 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
262 }
263 else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
264 {
265 kernel_lb_addr neloc = { 0, 0 };
266 extoffset -= adsize;
267 nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
268 ((elen + offset + inode->i_sb->s_blocksize - 1) &
269 ~(inode->i_sb->s_blocksize - 1));
270 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
271 udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
272 }
273 else
274 {
275 if (elen & (inode->i_sb->s_blocksize - 1))
276 {
277 extoffset -= adsize;
278 elen = EXT_RECORDED_ALLOCATED |
279 ((elen + inode->i_sb->s_blocksize - 1) &
280 ~(inode->i_sb->s_blocksize - 1));
281 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
282 }
283 memset(&eloc, 0x00, sizeof(kernel_lb_addr));
284 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
285 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
286 }
287 } 262 }
263 udf_extend_file(inode, &epos, &extent, offset+((inode->i_size & (sb->s_blocksize-1)) != 0));
288 } 264 }
289 } 265 }
290 UDF_I_LENEXTENTS(inode) = inode->i_size; 266 UDF_I_LENEXTENTS(inode) = inode->i_size;
291 267
292 udf_release_data(bh); 268 brelse(epos.bh);
293} 269}
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 110f8d62616f..3b2e6c8cb151 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -93,7 +93,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
93 for (i=0; i<nr_groups; i++)\ 93 for (i=0; i<nr_groups; i++)\
94 {\ 94 {\
95 if (UDF_SB_BITMAP(X,Y,Z,i))\ 95 if (UDF_SB_BITMAP(X,Y,Z,i))\
96 udf_release_data(UDF_SB_BITMAP(X,Y,Z,i));\ 96 brelse(UDF_SB_BITMAP(X,Y,Z,i));\
97 }\ 97 }\
98 if (size <= PAGE_SIZE)\ 98 if (size <= PAGE_SIZE)\
99 kfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\ 99 kfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ee1dece1f6f5..67ded289497c 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -77,6 +77,13 @@ struct ustr
77 uint8_t u_len; 77 uint8_t u_len;
78}; 78};
79 79
80struct extent_position {
81 struct buffer_head *bh;
82 uint32_t offset;
83 kernel_lb_addr block;
84};
85
86
80/* super.c */ 87/* super.c */
81extern void udf_error(struct super_block *, const char *, const char *, ...); 88extern void udf_error(struct super_block *, const char *, const char *, ...);
82extern void udf_warning(struct super_block *, const char *, const char *, ...); 89extern void udf_warning(struct super_block *, const char *, const char *, ...);
@@ -98,13 +105,14 @@ extern void udf_read_inode(struct inode *);
98extern void udf_delete_inode(struct inode *); 105extern void udf_delete_inode(struct inode *);
99extern void udf_clear_inode(struct inode *); 106extern void udf_clear_inode(struct inode *);
100extern int udf_write_inode(struct inode *, int); 107extern int udf_write_inode(struct inode *, int);
101extern long udf_block_map(struct inode *, long); 108extern long udf_block_map(struct inode *, sector_t);
102extern int8_t inode_bmap(struct inode *, int, kernel_lb_addr *, uint32_t *, kernel_lb_addr *, uint32_t *, uint32_t *, struct buffer_head **); 109extern int udf_extend_file(struct inode *, struct extent_position *, kernel_long_ad *, sector_t);
103extern int8_t udf_add_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr, uint32_t, struct buffer_head **, int); 110extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *);
104extern int8_t udf_write_aext(struct inode *, kernel_lb_addr, int *, kernel_lb_addr, uint32_t, struct buffer_head *, int); 111extern int8_t udf_add_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int);
105extern int8_t udf_delete_aext(struct inode *, kernel_lb_addr, int, kernel_lb_addr, uint32_t, struct buffer_head *); 112extern int8_t udf_write_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int);
106extern int8_t udf_next_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr *, uint32_t *, struct buffer_head **, int); 113extern int8_t udf_delete_aext(struct inode *, struct extent_position, kernel_lb_addr, uint32_t);
107extern int8_t udf_current_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr *, uint32_t *, struct buffer_head **, int); 114extern int8_t udf_next_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int);
115extern int8_t udf_current_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int);
108 116
109/* misc.c */ 117/* misc.c */
110extern struct buffer_head *udf_tgetblk(struct super_block *, int); 118extern struct buffer_head *udf_tgetblk(struct super_block *, int);
@@ -113,7 +121,6 @@ extern struct genericFormat *udf_add_extendedattr(struct inode *, uint32_t, uint
113extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, uint8_t); 121extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, uint8_t);
114extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, uint32_t, uint16_t *); 122extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, uint32_t, uint16_t *);
115extern struct buffer_head *udf_read_ptagged(struct super_block *, kernel_lb_addr, uint32_t, uint16_t *); 123extern struct buffer_head *udf_read_ptagged(struct super_block *, kernel_lb_addr, uint32_t, uint16_t *);
116extern void udf_release_data(struct buffer_head *);
117extern void udf_update_tag(char *, int); 124extern void udf_update_tag(char *, int);
118extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); 125extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int);
119 126
@@ -151,7 +158,7 @@ extern int udf_new_block(struct super_block *, struct inode *, uint16_t, uint32_
151extern int udf_fsync_file(struct file *, struct dentry *, int); 158extern int udf_fsync_file(struct file *, struct dentry *, int);
152 159
153/* directory.c */ 160/* directory.c */
154extern struct fileIdentDesc * udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, struct fileIdentDesc *, kernel_lb_addr *, uint32_t *, kernel_lb_addr *, uint32_t *, uint32_t *, struct buffer_head **); 161extern struct fileIdentDesc * udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, struct fileIdentDesc *, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *);
155extern struct fileIdentDesc * udf_get_fileident(void * buffer, int bufsize, int * offset); 162extern struct fileIdentDesc * udf_get_fileident(void * buffer, int bufsize, int * offset);
156extern long_ad * udf_get_filelongad(uint8_t *, int, int *, int); 163extern long_ad * udf_get_filelongad(uint8_t *, int, int *, int);
157extern short_ad * udf_get_fileshortad(uint8_t *, int, int *, int); 164extern short_ad * udf_get_fileshortad(uint8_t *, int, int *, int);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 4890ddf1518e..154452172f43 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -19,7 +19,6 @@
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/ufs_fs.h> 21#include <linux/ufs_fs.h>
22#include <linux/smp_lock.h>
23 22
24#include "swab.h" 23#include "swab.h"
25#include "util.h" 24#include "util.h"
@@ -180,13 +179,9 @@ fail:
180static struct page *ufs_get_page(struct inode *dir, unsigned long n) 179static struct page *ufs_get_page(struct inode *dir, unsigned long n)
181{ 180{
182 struct address_space *mapping = dir->i_mapping; 181 struct address_space *mapping = dir->i_mapping;
183 struct page *page = read_cache_page(mapping, n, 182 struct page *page = read_mapping_page(mapping, n, NULL);
184 (filler_t*)mapping->a_ops->readpage, NULL);
185 if (!IS_ERR(page)) { 183 if (!IS_ERR(page)) {
186 wait_on_page_locked(page);
187 kmap(page); 184 kmap(page);
188 if (!PageUptodate(page))
189 goto fail;
190 if (!PageChecked(page)) 185 if (!PageChecked(page))
191 ufs_check_page(page); 186 ufs_check_page(page);
192 if (PageError(page)) 187 if (PageError(page))
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b5a6461ec66b..be7c48c5f203 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1237,8 +1237,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1237{ 1237{
1238 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1238 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
1239 1239
1240 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 1240 if (flags & SLAB_CTOR_CONSTRUCTOR)
1241 SLAB_CTOR_CONSTRUCTOR)
1242 inode_init_once(&ei->vfs_inode); 1241 inode_init_once(&ei->vfs_inode);
1243} 1242}
1244 1243
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 17437574f79c..84357f1ff0ec 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -251,13 +251,11 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
251 251
252 page = find_lock_page(mapping, index); 252 page = find_lock_page(mapping, index);
253 if (!page) { 253 if (!page) {
254 page = read_cache_page(mapping, index, 254 page = read_mapping_page(mapping, index, NULL);
255 (filler_t*)mapping->a_ops->readpage,
256 NULL);
257 255
258 if (IS_ERR(page)) { 256 if (IS_ERR(page)) {
259 printk(KERN_ERR "ufs_change_blocknr: " 257 printk(KERN_ERR "ufs_change_blocknr: "
260 "read_cache_page error: ino %lu, index: %lu\n", 258 "read_mapping_page error: ino %lu, index: %lu\n",
261 mapping->host->i_ino, index); 259 mapping->host->i_ino, index);
262 goto out; 260 goto out;
263 } 261 }
diff --git a/fs/utimes.c b/fs/utimes.c
index 99cf2cb11fec..480f7c8c29da 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -1,8 +1,10 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/file.h>
2#include <linux/fs.h> 3#include <linux/fs.h>
3#include <linux/linkage.h> 4#include <linux/linkage.h>
4#include <linux/namei.h> 5#include <linux/namei.h>
5#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stat.h>
6#include <linux/utime.h> 8#include <linux/utime.h>
7#include <asm/uaccess.h> 9#include <asm/uaccess.h>
8#include <asm/unistd.h> 10#include <asm/unistd.h>
@@ -20,54 +22,18 @@
20 * must be owner or have write permission. 22 * must be owner or have write permission.
21 * Else, update from *times, must be owner or super user. 23 * Else, update from *times, must be owner or super user.
22 */ 24 */
23asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) 25asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
24{ 26{
25 int error; 27 struct timespec tv[2];
26 struct nameidata nd;
27 struct inode * inode;
28 struct iattr newattrs;
29 28
30 error = user_path_walk(filename, &nd);
31 if (error)
32 goto out;
33 inode = nd.dentry->d_inode;
34
35 error = -EROFS;
36 if (IS_RDONLY(inode))
37 goto dput_and_out;
38
39 /* Don't worry, the checks are done in inode_change_ok() */
40 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
41 if (times) { 29 if (times) {
42 error = -EPERM; 30 if (get_user(tv[0].tv_sec, &times->actime) ||
43 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 31 get_user(tv[1].tv_sec, &times->modtime))
44 goto dput_and_out; 32 return -EFAULT;
45 33 tv[0].tv_nsec = 0;
46 error = get_user(newattrs.ia_atime.tv_sec, &times->actime); 34 tv[1].tv_nsec = 0;
47 newattrs.ia_atime.tv_nsec = 0;
48 if (!error)
49 error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
50 newattrs.ia_mtime.tv_nsec = 0;
51 if (error)
52 goto dput_and_out;
53
54 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
55 } else {
56 error = -EACCES;
57 if (IS_IMMUTABLE(inode))
58 goto dput_and_out;
59
60 if (current->fsuid != inode->i_uid &&
61 (error = vfs_permission(&nd, MAY_WRITE)) != 0)
62 goto dput_and_out;
63 } 35 }
64 mutex_lock(&inode->i_mutex); 36 return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
65 error = notify_change(nd.dentry, &newattrs);
66 mutex_unlock(&inode->i_mutex);
67dput_and_out:
68 path_release(&nd);
69out:
70 return error;
71} 37}
72 38
73#endif 39#endif
@@ -76,18 +42,38 @@ out:
76 * must be owner or have write permission. 42 * must be owner or have write permission.
77 * Else, update from *times, must be owner or super user. 43 * Else, update from *times, must be owner or super user.
78 */ 44 */
79long do_utimes(int dfd, char __user *filename, struct timeval *times) 45long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
80{ 46{
81 int error; 47 int error;
82 struct nameidata nd; 48 struct nameidata nd;
83 struct inode * inode; 49 struct dentry *dentry;
50 struct inode *inode;
84 struct iattr newattrs; 51 struct iattr newattrs;
52 struct file *f = NULL;
85 53
86 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 54 error = -EINVAL;
87 55 if (flags & ~AT_SYMLINK_NOFOLLOW)
88 if (error)
89 goto out; 56 goto out;
90 inode = nd.dentry->d_inode; 57
58 if (filename == NULL && dfd != AT_FDCWD) {
59 error = -EINVAL;
60 if (flags & AT_SYMLINK_NOFOLLOW)
61 goto out;
62
63 error = -EBADF;
64 f = fget(dfd);
65 if (!f)
66 goto out;
67 dentry = f->f_path.dentry;
68 } else {
69 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
70 if (error)
71 goto out;
72
73 dentry = nd.dentry;
74 }
75
76 inode = dentry->d_inode;
91 77
92 error = -EROFS; 78 error = -EROFS;
93 if (IS_RDONLY(inode)) 79 if (IS_RDONLY(inode))
@@ -100,11 +86,21 @@ long do_utimes(int dfd, char __user *filename, struct timeval *times)
100 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 86 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
101 goto dput_and_out; 87 goto dput_and_out;
102 88
103 newattrs.ia_atime.tv_sec = times[0].tv_sec; 89 if (times[0].tv_nsec == UTIME_OMIT)
104 newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; 90 newattrs.ia_valid &= ~ATTR_ATIME;
105 newattrs.ia_mtime.tv_sec = times[1].tv_sec; 91 else if (times[0].tv_nsec != UTIME_NOW) {
106 newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; 92 newattrs.ia_atime.tv_sec = times[0].tv_sec;
107 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; 93 newattrs.ia_atime.tv_nsec = times[0].tv_nsec;
94 newattrs.ia_valid |= ATTR_ATIME_SET;
95 }
96
97 if (times[1].tv_nsec == UTIME_OMIT)
98 newattrs.ia_valid &= ~ATTR_MTIME;
99 else if (times[1].tv_nsec != UTIME_NOW) {
100 newattrs.ia_mtime.tv_sec = times[1].tv_sec;
101 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
102 newattrs.ia_valid |= ATTR_MTIME_SET;
103 }
108 } else { 104 } else {
109 error = -EACCES; 105 error = -EACCES;
110 if (IS_IMMUTABLE(inode)) 106 if (IS_IMMUTABLE(inode))
@@ -115,21 +111,67 @@ long do_utimes(int dfd, char __user *filename, struct timeval *times)
115 goto dput_and_out; 111 goto dput_and_out;
116 } 112 }
117 mutex_lock(&inode->i_mutex); 113 mutex_lock(&inode->i_mutex);
118 error = notify_change(nd.dentry, &newattrs); 114 error = notify_change(dentry, &newattrs);
119 mutex_unlock(&inode->i_mutex); 115 mutex_unlock(&inode->i_mutex);
120dput_and_out: 116dput_and_out:
121 path_release(&nd); 117 if (f)
118 fput(f);
119 else
120 path_release(&nd);
122out: 121out:
123 return error; 122 return error;
124} 123}
125 124
125asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags)
126{
127 struct timespec tstimes[2];
128
129 if (utimes) {
130 if (copy_from_user(&tstimes, utimes, sizeof(tstimes)))
131 return -EFAULT;
132 if ((tstimes[0].tv_nsec == UTIME_OMIT ||
133 tstimes[0].tv_nsec == UTIME_NOW) &&
134 tstimes[0].tv_sec != 0)
135 return -EINVAL;
136 if ((tstimes[1].tv_nsec == UTIME_OMIT ||
137 tstimes[1].tv_nsec == UTIME_NOW) &&
138 tstimes[1].tv_sec != 0)
139 return -EINVAL;
140
141 /* Nothing to do, we must not even check the path. */
142 if (tstimes[0].tv_nsec == UTIME_OMIT &&
143 tstimes[1].tv_nsec == UTIME_OMIT)
144 return 0;
145 }
146
147 return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
148}
149
126asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) 150asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes)
127{ 151{
128 struct timeval times[2]; 152 struct timeval times[2];
153 struct timespec tstimes[2];
154
155 if (utimes) {
156 if (copy_from_user(&times, utimes, sizeof(times)))
157 return -EFAULT;
158
159 /* This test is needed to catch all invalid values. If we
160 would test only in do_utimes we would miss those invalid
161 values truncated by the multiplication with 1000. Note
162 that we also catch UTIME_{NOW,OMIT} here which are only
163 valid for utimensat. */
164 if (times[0].tv_usec >= 1000000 || times[0].tv_usec < 0 ||
165 times[1].tv_usec >= 1000000 || times[1].tv_usec < 0)
166 return -EINVAL;
167
168 tstimes[0].tv_sec = times[0].tv_sec;
169 tstimes[0].tv_nsec = 1000 * times[0].tv_usec;
170 tstimes[1].tv_sec = times[1].tv_sec;
171 tstimes[1].tv_nsec = 1000 * times[1].tv_usec;
172 }
129 173
130 if (utimes && copy_from_user(&times, utimes, sizeof(times))) 174 return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0);
131 return -EFAULT;
132 return do_utimes(dfd, filename, utimes ? times : NULL);
133} 175}
134 176
135asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) 177asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
diff --git a/fs/xattr.c b/fs/xattr.c
index 38646132ab0e..9f4568b55b0f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -9,7 +9,6 @@
9 */ 9 */
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/smp_lock.h>
13#include <linux/file.h> 12#include <linux/file.h>
14#include <linux/xattr.h> 13#include <linux/xattr.h>
15#include <linux/namei.h> 14#include <linux/namei.h>
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index af168a1a98c1..c110bb002665 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -43,6 +43,18 @@ static inline void mrupdate(mrlock_t *mrp)
43 mrp->mr_writer = 1; 43 mrp->mr_writer = 1;
44} 44}
45 45
46static inline void mraccess_nested(mrlock_t *mrp, int subclass)
47{
48 down_read_nested(&mrp->mr_lock, subclass);
49}
50
51static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
52{
53 down_write_nested(&mrp->mr_lock, subclass);
54 mrp->mr_writer = 1;
55}
56
57
46static inline int mrtryaccess(mrlock_t *mrp) 58static inline int mrtryaccess(mrlock_t *mrp)
47{ 59{
48 return down_read_trylock(&mrp->mr_lock); 60 return down_read_trylock(&mrp->mr_lock);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 143ffc851c9d..4475588e973a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -141,9 +141,46 @@ xfs_destroy_ioend(
141} 141}
142 142
143/* 143/*
144 * Update on-disk file size now that data has been written to disk.
145 * The current in-memory file size is i_size. If a write is beyond
146 * eof io_new_size will be the intended file size until i_size is
147 * updated. If this write does not extend all the way to the valid
148 * file size then restrict this update to the end of the write.
149 */
150STATIC void
151xfs_setfilesize(
152 xfs_ioend_t *ioend)
153{
154 xfs_inode_t *ip;
155 xfs_fsize_t isize;
156 xfs_fsize_t bsize;
157
158 ip = xfs_vtoi(ioend->io_vnode);
159
160 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
161 ASSERT(ioend->io_type != IOMAP_READ);
162
163 if (unlikely(ioend->io_error))
164 return;
165
166 bsize = ioend->io_offset + ioend->io_size;
167
168 xfs_ilock(ip, XFS_ILOCK_EXCL);
169
170 isize = MAX(ip->i_size, ip->i_iocore.io_new_size);
171 isize = MIN(isize, bsize);
172
173 if (ip->i_d.di_size < isize) {
174 ip->i_d.di_size = isize;
175 ip->i_update_core = 1;
176 ip->i_update_size = 1;
177 }
178
179 xfs_iunlock(ip, XFS_ILOCK_EXCL);
180}
181
182/*
144 * Buffered IO write completion for delayed allocate extents. 183 * Buffered IO write completion for delayed allocate extents.
145 * TODO: Update ondisk isize now that we know the file data
146 * has been flushed (i.e. the notorious "NULL file" problem).
147 */ 184 */
148STATIC void 185STATIC void
149xfs_end_bio_delalloc( 186xfs_end_bio_delalloc(
@@ -152,6 +189,7 @@ xfs_end_bio_delalloc(
152 xfs_ioend_t *ioend = 189 xfs_ioend_t *ioend =
153 container_of(work, xfs_ioend_t, io_work); 190 container_of(work, xfs_ioend_t, io_work);
154 191
192 xfs_setfilesize(ioend);
155 xfs_destroy_ioend(ioend); 193 xfs_destroy_ioend(ioend);
156} 194}
157 195
@@ -165,6 +203,7 @@ xfs_end_bio_written(
165 xfs_ioend_t *ioend = 203 xfs_ioend_t *ioend =
166 container_of(work, xfs_ioend_t, io_work); 204 container_of(work, xfs_ioend_t, io_work);
167 205
206 xfs_setfilesize(ioend);
168 xfs_destroy_ioend(ioend); 207 xfs_destroy_ioend(ioend);
169} 208}
170 209
@@ -184,8 +223,23 @@ xfs_end_bio_unwritten(
184 xfs_off_t offset = ioend->io_offset; 223 xfs_off_t offset = ioend->io_offset;
185 size_t size = ioend->io_size; 224 size_t size = ioend->io_size;
186 225
187 if (likely(!ioend->io_error)) 226 if (likely(!ioend->io_error)) {
188 bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL); 227 bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL);
228 xfs_setfilesize(ioend);
229 }
230 xfs_destroy_ioend(ioend);
231}
232
233/*
234 * IO read completion for regular, written extents.
235 */
236STATIC void
237xfs_end_bio_read(
238 struct work_struct *work)
239{
240 xfs_ioend_t *ioend =
241 container_of(work, xfs_ioend_t, io_work);
242
189 xfs_destroy_ioend(ioend); 243 xfs_destroy_ioend(ioend);
190} 244}
191 245
@@ -224,6 +278,8 @@ xfs_alloc_ioend(
224 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten); 278 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
225 else if (type == IOMAP_DELAY) 279 else if (type == IOMAP_DELAY)
226 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc); 280 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
281 else if (type == IOMAP_READ)
282 INIT_WORK(&ioend->io_work, xfs_end_bio_read);
227 else 283 else
228 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 284 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
229 285
@@ -913,7 +969,7 @@ xfs_page_state_convert(
913 bh = head = page_buffers(page); 969 bh = head = page_buffers(page);
914 offset = page_offset(page); 970 offset = page_offset(page);
915 flags = -1; 971 flags = -1;
916 type = 0; 972 type = IOMAP_READ;
917 973
918 /* TODO: cleanup count and page_dirty */ 974 /* TODO: cleanup count and page_dirty */
919 975
@@ -999,7 +1055,7 @@ xfs_page_state_convert(
999 * That means it must already have extents allocated 1055 * That means it must already have extents allocated
1000 * underneath it. Map the extent by reading it. 1056 * underneath it. Map the extent by reading it.
1001 */ 1057 */
1002 if (!iomap_valid || type != 0) { 1058 if (!iomap_valid || type != IOMAP_READ) {
1003 flags = BMAPI_READ; 1059 flags = BMAPI_READ;
1004 size = xfs_probe_cluster(inode, page, bh, 1060 size = xfs_probe_cluster(inode, page, bh,
1005 head, 1); 1061 head, 1);
@@ -1010,7 +1066,7 @@ xfs_page_state_convert(
1010 iomap_valid = xfs_iomap_valid(&iomap, offset); 1066 iomap_valid = xfs_iomap_valid(&iomap, offset);
1011 } 1067 }
1012 1068
1013 type = 0; 1069 type = IOMAP_READ;
1014 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1070 if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
1015 ASSERT(buffer_mapped(bh)); 1071 ASSERT(buffer_mapped(bh));
1016 if (iomap_valid) 1072 if (iomap_valid)
@@ -1356,12 +1412,21 @@ xfs_end_io_direct(
1356 * completion handler in the future, in which case all this can 1412 * completion handler in the future, in which case all this can
1357 * go away. 1413 * go away.
1358 */ 1414 */
1359 if (private && size > 0) { 1415 ioend->io_offset = offset;
1360 ioend->io_offset = offset; 1416 ioend->io_size = size;
1361 ioend->io_size = size; 1417 if (ioend->io_type == IOMAP_READ) {
1418 xfs_finish_ioend(ioend);
1419 } else if (private && size > 0) {
1362 xfs_finish_ioend(ioend); 1420 xfs_finish_ioend(ioend);
1363 } else { 1421 } else {
1364 xfs_destroy_ioend(ioend); 1422 /*
1423 * A direct I/O write ioend starts it's life in unwritten
1424 * state in case they map an unwritten extent. This write
1425 * didn't map an unwritten extent so switch it's completion
1426 * handler.
1427 */
1428 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
1429 xfs_finish_ioend(ioend);
1365 } 1430 }
1366 1431
1367 /* 1432 /*
@@ -1392,15 +1457,15 @@ xfs_vm_direct_IO(
1392 if (error) 1457 if (error)
1393 return -error; 1458 return -error;
1394 1459
1395 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1396
1397 if (rw == WRITE) { 1460 if (rw == WRITE) {
1461 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1398 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1462 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1399 iomap.iomap_target->bt_bdev, 1463 iomap.iomap_target->bt_bdev,
1400 iov, offset, nr_segs, 1464 iov, offset, nr_segs,
1401 xfs_get_blocks_direct, 1465 xfs_get_blocks_direct,
1402 xfs_end_io_direct); 1466 xfs_end_io_direct);
1403 } else { 1467 } else {
1468 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1404 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 1469 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1405 iomap.iomap_target->bt_bdev, 1470 iomap.iomap_target->bt_bdev,
1406 iov, offset, nr_segs, 1471 iov, offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 69e9e80735d2..fe4f66a5af14 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1426,7 +1426,7 @@ xfs_free_bufhash(
1426/* 1426/*
1427 * buftarg list for delwrite queue processing 1427 * buftarg list for delwrite queue processing
1428 */ 1428 */
1429LIST_HEAD(xfs_buftarg_list); 1429static LIST_HEAD(xfs_buftarg_list);
1430static DEFINE_SPINLOCK(xfs_buftarg_lock); 1430static DEFINE_SPINLOCK(xfs_buftarg_lock);
1431 1431
1432STATIC void 1432STATIC void
@@ -1867,3 +1867,11 @@ xfs_buf_terminate(void)
1867 ktrace_free(xfs_buf_trace_buf); 1867 ktrace_free(xfs_buf_trace_buf);
1868#endif 1868#endif
1869} 1869}
1870
1871#ifdef CONFIG_KDB_MODULES
1872struct list_head *
1873xfs_get_buftarg_list(void)
1874{
1875 return &xfs_buftarg_list;
1876}
1877#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9e8ef8fef39f..b6241f6201a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -411,6 +411,9 @@ extern void xfs_free_buftarg(xfs_buftarg_t *, int);
411extern void xfs_wait_buftarg(xfs_buftarg_t *); 411extern void xfs_wait_buftarg(xfs_buftarg_t *);
412extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 412extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
413extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 413extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
414#ifdef CONFIG_KDB_MODULES
415extern struct list_head *xfs_get_buftarg_list(void);
416#endif
414 417
415#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) 418#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
416#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) 419#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index dc0562828e76..2eb87cd082af 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -35,7 +35,7 @@ fs_tosspages(
35 truncate_inode_pages(ip->i_mapping, first); 35 truncate_inode_pages(ip->i_mapping, first);
36} 36}
37 37
38void 38int
39fs_flushinval_pages( 39fs_flushinval_pages(
40 bhv_desc_t *bdp, 40 bhv_desc_t *bdp,
41 xfs_off_t first, 41 xfs_off_t first,
@@ -44,13 +44,16 @@ fs_flushinval_pages(
44{ 44{
45 bhv_vnode_t *vp = BHV_TO_VNODE(bdp); 45 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
46 struct inode *ip = vn_to_inode(vp); 46 struct inode *ip = vn_to_inode(vp);
47 int ret = 0;
47 48
48 if (VN_CACHED(vp)) { 49 if (VN_CACHED(vp)) {
49 if (VN_TRUNC(vp)) 50 if (VN_TRUNC(vp))
50 VUNTRUNCATE(vp); 51 VUNTRUNCATE(vp);
51 filemap_write_and_wait(ip->i_mapping); 52 ret = filemap_write_and_wait(ip->i_mapping);
52 truncate_inode_pages(ip->i_mapping, first); 53 if (!ret)
54 truncate_inode_pages(ip->i_mapping, first);
53 } 55 }
56 return ret;
54} 57}
55 58
56int 59int
@@ -63,14 +66,18 @@ fs_flush_pages(
63{ 66{
64 bhv_vnode_t *vp = BHV_TO_VNODE(bdp); 67 bhv_vnode_t *vp = BHV_TO_VNODE(bdp);
65 struct inode *ip = vn_to_inode(vp); 68 struct inode *ip = vn_to_inode(vp);
69 int ret = 0;
70 int ret2;
66 71
67 if (VN_DIRTY(vp)) { 72 if (VN_DIRTY(vp)) {
68 if (VN_TRUNC(vp)) 73 if (VN_TRUNC(vp))
69 VUNTRUNCATE(vp); 74 VUNTRUNCATE(vp);
70 filemap_fdatawrite(ip->i_mapping); 75 ret = filemap_fdatawrite(ip->i_mapping);
71 if (flags & XFS_B_ASYNC) 76 if (flags & XFS_B_ASYNC)
72 return 0; 77 return ret;
73 filemap_fdatawait(ip->i_mapping); 78 ret2 = filemap_fdatawait(ip->i_mapping);
79 if (!ret)
80 ret = ret2;
74 } 81 }
75 return 0; 82 return ret;
76} 83}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
index aee9ccdd18f7..c1b53118a303 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -23,7 +23,7 @@ extern int fs_noerr(void);
23extern int fs_nosys(void); 23extern int fs_nosys(void);
24extern void fs_noval(void); 24extern void fs_noval(void);
25extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 25extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
26extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 26extern int fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
27extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); 27extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
28 28
29#endif /* __XFS_FS_SUBR_H__ */ 29#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ff8d64eba9f8..86fb671a8bcc 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -191,7 +191,7 @@ xfs_read(
191 struct file *file = iocb->ki_filp; 191 struct file *file = iocb->ki_filp;
192 struct inode *inode = file->f_mapping->host; 192 struct inode *inode = file->f_mapping->host;
193 size_t size = 0; 193 size_t size = 0;
194 ssize_t ret; 194 ssize_t ret = 0;
195 xfs_fsize_t n; 195 xfs_fsize_t n;
196 xfs_inode_t *ip; 196 xfs_inode_t *ip;
197 xfs_mount_t *mp; 197 xfs_mount_t *mp;
@@ -224,7 +224,7 @@ xfs_read(
224 mp->m_rtdev_targp : mp->m_ddev_targp; 224 mp->m_rtdev_targp : mp->m_ddev_targp;
225 if ((*offset & target->bt_smask) || 225 if ((*offset & target->bt_smask) ||
226 (size & target->bt_smask)) { 226 (size & target->bt_smask)) {
227 if (*offset == ip->i_d.di_size) { 227 if (*offset == ip->i_size) {
228 return (0); 228 return (0);
229 } 229 }
230 return -XFS_ERROR(EINVAL); 230 return -XFS_ERROR(EINVAL);
@@ -263,9 +263,13 @@ xfs_read(
263 263
264 if (unlikely(ioflags & IO_ISDIRECT)) { 264 if (unlikely(ioflags & IO_ISDIRECT)) {
265 if (VN_CACHED(vp)) 265 if (VN_CACHED(vp))
266 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), 266 ret = bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
267 -1, FI_REMAPF_LOCKED); 267 -1, FI_REMAPF_LOCKED);
268 mutex_unlock(&inode->i_mutex); 268 mutex_unlock(&inode->i_mutex);
269 if (ret) {
270 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
271 return ret;
272 }
269 } 273 }
270 274
271 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, 275 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
@@ -383,9 +387,10 @@ xfs_splice_write(
383{ 387{
384 xfs_inode_t *ip = XFS_BHVTOI(bdp); 388 xfs_inode_t *ip = XFS_BHVTOI(bdp);
385 xfs_mount_t *mp = ip->i_mount; 389 xfs_mount_t *mp = ip->i_mount;
390 xfs_iocore_t *io = &ip->i_iocore;
386 ssize_t ret; 391 ssize_t ret;
387 struct inode *inode = outfilp->f_mapping->host; 392 struct inode *inode = outfilp->f_mapping->host;
388 xfs_fsize_t isize; 393 xfs_fsize_t isize, new_size;
389 394
390 XFS_STATS_INC(xs_write_calls); 395 XFS_STATS_INC(xs_write_calls);
391 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 396 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -406,6 +411,14 @@ xfs_splice_write(
406 return -error; 411 return -error;
407 } 412 }
408 } 413 }
414
415 new_size = *ppos + count;
416
417 xfs_ilock(ip, XFS_ILOCK_EXCL);
418 if (new_size > ip->i_size)
419 io->io_new_size = new_size;
420 xfs_iunlock(ip, XFS_ILOCK_EXCL);
421
409 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, 422 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
410 pipe, count, *ppos, ioflags); 423 pipe, count, *ppos, ioflags);
411 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 424 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
@@ -416,14 +429,18 @@ xfs_splice_write(
416 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) 429 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
417 *ppos = isize; 430 *ppos = isize;
418 431
419 if (*ppos > ip->i_d.di_size) { 432 if (*ppos > ip->i_size) {
420 xfs_ilock(ip, XFS_ILOCK_EXCL); 433 xfs_ilock(ip, XFS_ILOCK_EXCL);
421 if (*ppos > ip->i_d.di_size) { 434 if (*ppos > ip->i_size)
422 ip->i_d.di_size = *ppos; 435 ip->i_size = *ppos;
423 i_size_write(inode, *ppos); 436 xfs_iunlock(ip, XFS_ILOCK_EXCL);
424 ip->i_update_core = 1; 437 }
425 ip->i_update_size = 1; 438
426 } 439 if (io->io_new_size) {
440 xfs_ilock(ip, XFS_ILOCK_EXCL);
441 io->io_new_size = 0;
442 if (ip->i_d.di_size > ip->i_size)
443 ip->i_d.di_size = ip->i_size;
427 xfs_iunlock(ip, XFS_ILOCK_EXCL); 444 xfs_iunlock(ip, XFS_ILOCK_EXCL);
428 } 445 }
429 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 446 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -639,37 +656,21 @@ xfs_write(
639 xfs_fsize_t isize, new_size; 656 xfs_fsize_t isize, new_size;
640 xfs_iocore_t *io; 657 xfs_iocore_t *io;
641 bhv_vnode_t *vp; 658 bhv_vnode_t *vp;
642 unsigned long seg;
643 int iolock; 659 int iolock;
644 int eventsent = 0; 660 int eventsent = 0;
645 bhv_vrwlock_t locktype; 661 bhv_vrwlock_t locktype;
646 size_t ocount = 0, count; 662 size_t ocount = 0, count;
647 loff_t pos; 663 loff_t pos;
648 int need_i_mutex = 1, need_flush = 0; 664 int need_i_mutex;
649 665
650 XFS_STATS_INC(xs_write_calls); 666 XFS_STATS_INC(xs_write_calls);
651 667
652 vp = BHV_TO_VNODE(bdp); 668 vp = BHV_TO_VNODE(bdp);
653 xip = XFS_BHVTOI(bdp); 669 xip = XFS_BHVTOI(bdp);
654 670
655 for (seg = 0; seg < segs; seg++) { 671 error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
656 const struct iovec *iv = &iovp[seg]; 672 if (error)
657 673 return error;
658 /*
659 * If any segment has a negative length, or the cumulative
660 * length ever wraps negative then return -EINVAL.
661 */
662 ocount += iv->iov_len;
663 if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
664 return -EINVAL;
665 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
666 continue;
667 if (seg == 0)
668 return -EFAULT;
669 segs = seg;
670 ocount -= iv->iov_len; /* This segment is no good */
671 break;
672 }
673 674
674 count = ocount; 675 count = ocount;
675 pos = *offset; 676 pos = *offset;
@@ -685,39 +686,20 @@ xfs_write(
685 if (XFS_FORCED_SHUTDOWN(mp)) 686 if (XFS_FORCED_SHUTDOWN(mp))
686 return -EIO; 687 return -EIO;
687 688
688 if (ioflags & IO_ISDIRECT) {
689 xfs_buftarg_t *target =
690 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
691 mp->m_rtdev_targp : mp->m_ddev_targp;
692
693 if ((pos & target->bt_smask) || (count & target->bt_smask))
694 return XFS_ERROR(-EINVAL);
695
696 if (!VN_CACHED(vp) && pos < i_size_read(inode))
697 need_i_mutex = 0;
698
699 if (VN_CACHED(vp))
700 need_flush = 1;
701 }
702
703relock: 689relock:
704 if (need_i_mutex) { 690 if (ioflags & IO_ISDIRECT) {
691 iolock = XFS_IOLOCK_SHARED;
692 locktype = VRWLOCK_WRITE_DIRECT;
693 need_i_mutex = 0;
694 } else {
705 iolock = XFS_IOLOCK_EXCL; 695 iolock = XFS_IOLOCK_EXCL;
706 locktype = VRWLOCK_WRITE; 696 locktype = VRWLOCK_WRITE;
707 697 need_i_mutex = 1;
708 mutex_lock(&inode->i_mutex); 698 mutex_lock(&inode->i_mutex);
709 } else {
710 iolock = XFS_IOLOCK_SHARED;
711 locktype = VRWLOCK_WRITE_DIRECT;
712 } 699 }
713 700
714 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); 701 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
715 702
716 isize = i_size_read(inode);
717
718 if (file->f_flags & O_APPEND)
719 *offset = isize;
720
721start: 703start:
722 error = -generic_write_checks(file, &pos, &count, 704 error = -generic_write_checks(file, &pos, &count,
723 S_ISBLK(inode->i_mode)); 705 S_ISBLK(inode->i_mode));
@@ -726,13 +708,8 @@ start:
726 goto out_unlock_mutex; 708 goto out_unlock_mutex;
727 } 709 }
728 710
729 new_size = pos + count;
730 if (new_size > isize)
731 io->io_new_size = new_size;
732
733 if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && 711 if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
734 !(ioflags & IO_INVIS) && !eventsent)) { 712 !(ioflags & IO_INVIS) && !eventsent)) {
735 loff_t savedsize = pos;
736 int dmflags = FILP_DELAY_FLAG(file); 713 int dmflags = FILP_DELAY_FLAG(file);
737 714
738 if (need_i_mutex) 715 if (need_i_mutex)
@@ -743,8 +720,7 @@ start:
743 pos, count, 720 pos, count,
744 dmflags, &locktype); 721 dmflags, &locktype);
745 if (error) { 722 if (error) {
746 xfs_iunlock(xip, iolock); 723 goto out_unlock_internal;
747 goto out_unlock_mutex;
748 } 724 }
749 xfs_ilock(xip, XFS_ILOCK_EXCL); 725 xfs_ilock(xip, XFS_ILOCK_EXCL);
750 eventsent = 1; 726 eventsent = 1;
@@ -756,12 +732,35 @@ start:
756 * event prevents another call to XFS_SEND_DATA, which is 732 * event prevents another call to XFS_SEND_DATA, which is
757 * what allows the size to change in the first place. 733 * what allows the size to change in the first place.
758 */ 734 */
759 if ((file->f_flags & O_APPEND) && savedsize != isize) { 735 if ((file->f_flags & O_APPEND) && pos != xip->i_size)
760 pos = isize = xip->i_d.di_size; 736 goto start;
737 }
738
739 if (ioflags & IO_ISDIRECT) {
740 xfs_buftarg_t *target =
741 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
742 mp->m_rtdev_targp : mp->m_ddev_targp;
743
744 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
745 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
746 return XFS_ERROR(-EINVAL);
747 }
748
749 if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
750 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
751 iolock = XFS_IOLOCK_EXCL;
752 locktype = VRWLOCK_WRITE;
753 need_i_mutex = 1;
754 mutex_lock(&inode->i_mutex);
755 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
761 goto start; 756 goto start;
762 } 757 }
763 } 758 }
764 759
760 new_size = pos + count;
761 if (new_size > xip->i_size)
762 io->io_new_size = new_size;
763
765 if (likely(!(ioflags & IO_INVIS))) { 764 if (likely(!(ioflags & IO_INVIS))) {
766 file_update_time(file); 765 file_update_time(file);
767 xfs_ichgtime_fast(xip, inode, 766 xfs_ichgtime_fast(xip, inode,
@@ -777,11 +776,11 @@ start:
777 * to zero it out up to the new size. 776 * to zero it out up to the new size.
778 */ 777 */
779 778
780 if (pos > isize) { 779 if (pos > xip->i_size) {
781 error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize); 780 error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, xip->i_size);
782 if (error) { 781 if (error) {
783 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 782 xfs_iunlock(xip, XFS_ILOCK_EXCL);
784 goto out_unlock_mutex; 783 goto out_unlock_internal;
785 } 784 }
786 } 785 }
787 xfs_iunlock(xip, XFS_ILOCK_EXCL); 786 xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -801,8 +800,7 @@ start:
801 if (likely(!error)) 800 if (likely(!error))
802 error = -remove_suid(file->f_path.dentry); 801 error = -remove_suid(file->f_path.dentry);
803 if (unlikely(error)) { 802 if (unlikely(error)) {
804 xfs_iunlock(xip, iolock); 803 goto out_unlock_internal;
805 goto out_unlock_mutex;
806 } 804 }
807 } 805 }
808 806
@@ -811,11 +809,14 @@ retry:
811 current->backing_dev_info = mapping->backing_dev_info; 809 current->backing_dev_info = mapping->backing_dev_info;
812 810
813 if ((ioflags & IO_ISDIRECT)) { 811 if ((ioflags & IO_ISDIRECT)) {
814 if (need_flush) { 812 if (VN_CACHED(vp)) {
813 WARN_ON(need_i_mutex == 0);
815 xfs_inval_cached_trace(io, pos, -1, 814 xfs_inval_cached_trace(io, pos, -1,
816 ctooff(offtoct(pos)), -1); 815 ctooff(offtoct(pos)), -1);
817 bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), 816 error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
818 -1, FI_REMAPF_LOCKED); 817 -1, FI_REMAPF_LOCKED);
818 if (error)
819 goto out_unlock_internal;
819 } 820 }
820 821
821 if (need_i_mutex) { 822 if (need_i_mutex) {
@@ -843,7 +844,6 @@ retry:
843 pos += ret; 844 pos += ret;
844 count -= ret; 845 count -= ret;
845 846
846 need_i_mutex = 1;
847 ioflags &= ~IO_ISDIRECT; 847 ioflags &= ~IO_ISDIRECT;
848 xfs_iunlock(xip, iolock); 848 xfs_iunlock(xip, iolock);
849 goto relock; 849 goto relock;
@@ -870,12 +870,12 @@ retry:
870 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, 870 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
871 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 871 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
872 0, 0, 0); /* Delay flag intentionally unused */ 872 0, 0, 0); /* Delay flag intentionally unused */
873 if (error)
874 goto out_nounlocks;
875 if (need_i_mutex) 873 if (need_i_mutex)
876 mutex_lock(&inode->i_mutex); 874 mutex_lock(&inode->i_mutex);
877 xfs_rwlock(bdp, locktype); 875 xfs_rwlock(bdp, locktype);
878 pos = xip->i_d.di_size; 876 if (error)
877 goto out_unlock_internal;
878 pos = xip->i_size;
879 ret = 0; 879 ret = 0;
880 goto retry; 880 goto retry;
881 } 881 }
@@ -884,14 +884,10 @@ retry:
884 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) 884 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
885 *offset = isize; 885 *offset = isize;
886 886
887 if (*offset > xip->i_d.di_size) { 887 if (*offset > xip->i_size) {
888 xfs_ilock(xip, XFS_ILOCK_EXCL); 888 xfs_ilock(xip, XFS_ILOCK_EXCL);
889 if (*offset > xip->i_d.di_size) { 889 if (*offset > xip->i_size)
890 xip->i_d.di_size = *offset; 890 xip->i_size = *offset;
891 i_size_write(inode, *offset);
892 xip->i_update_core = 1;
893 xip->i_update_size = 1;
894 }
895 xfs_iunlock(xip, XFS_ILOCK_EXCL); 891 xfs_iunlock(xip, XFS_ILOCK_EXCL);
896 } 892 }
897 893
@@ -913,16 +909,31 @@ retry:
913 909
914 error = sync_page_range(inode, mapping, pos, ret); 910 error = sync_page_range(inode, mapping, pos, ret);
915 if (!error) 911 if (!error)
916 error = ret; 912 error = -ret;
917 return error; 913 if (need_i_mutex)
914 mutex_lock(&inode->i_mutex);
915 xfs_rwlock(bdp, locktype);
918 } 916 }
919 917
920 out_unlock_internal: 918 out_unlock_internal:
919 if (io->io_new_size) {
920 xfs_ilock(xip, XFS_ILOCK_EXCL);
921 io->io_new_size = 0;
922 /*
923 * If this was a direct or synchronous I/O that failed (such
924 * as ENOSPC) then part of the I/O may have been written to
925 * disk before the error occured. In this case the on-disk
926 * file size may have been adjusted beyond the in-memory file
927 * size and now needs to be truncated back.
928 */
929 if (xip->i_d.di_size > xip->i_size)
930 xip->i_d.di_size = xip->i_size;
931 xfs_iunlock(xip, XFS_ILOCK_EXCL);
932 }
921 xfs_rwunlock(bdp, locktype); 933 xfs_rwunlock(bdp, locktype);
922 out_unlock_mutex: 934 out_unlock_mutex:
923 if (need_i_mutex) 935 if (need_i_mutex)
924 mutex_unlock(&inode->i_mutex); 936 mutex_unlock(&inode->i_mutex);
925 out_nounlocks:
926 return -error; 937 return -error;
927} 938}
928 939
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2f2c40db562e..14e2cbe5a8d5 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -360,8 +360,7 @@ xfs_fs_inode_init_once(
360 kmem_zone_t *zonep, 360 kmem_zone_t *zonep,
361 unsigned long flags) 361 unsigned long flags)
362{ 362{
363 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 363 if (flags & SLAB_CTOR_CONSTRUCTOR)
364 SLAB_CTOR_CONSTRUCTOR)
365 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 364 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
366} 365}
367 366
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b76118cf4897..d1b2d01843d1 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -194,7 +194,7 @@ typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
194typedef void (*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int); 194typedef void (*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int);
195typedef void (*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t); 195typedef void (*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t);
196typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 196typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
197typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); 197typedef int (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
198typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, 198typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
199 uint64_t, int); 199 uint64_t, int);
200typedef int (*vop_iflush_t)(bhv_desc_t *, int); 200typedef int (*vop_iflush_t)(bhv_desc_t *, int);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 4adaf13aac6f..cfdd35ee9f7a 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -753,8 +753,7 @@ xfs_qm_idtodq(
753 goto error0; 753 goto error0;
754 } 754 }
755 if (tp) { 755 if (tp) {
756 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, 756 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
757 NULL)))
758 goto error1; 757 goto error1;
759 } 758 }
760 759
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1de2acdc7f70..3e4a8ad8a34c 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -388,6 +388,17 @@ xfs_qm_mount_quotas(
388 return XFS_ERROR(error); 388 return XFS_ERROR(error);
389 } 389 }
390 } 390 }
391 /*
392 * If one type of quotas is off, then it will lose its
393 * quotachecked status, since we won't be doing accounting for
394 * that type anymore.
395 */
396 if (!XFS_IS_UQUOTA_ON(mp)) {
397 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
398 }
399 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) {
400 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
401 }
391 402
392 write_changes: 403 write_changes:
393 /* 404 /*
@@ -1453,8 +1464,7 @@ xfs_qm_qino_alloc(
1453 XFS_SB_UNLOCK(mp, s); 1464 XFS_SB_UNLOCK(mp, s);
1454 xfs_mod_sb(tp, sbfields); 1465 xfs_mod_sb(tp, sbfields);
1455 1466
1456 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, 1467 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1457 NULL))) {
1458 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); 1468 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1459 return error; 1469 return error;
1460 } 1470 }
@@ -2405,7 +2415,7 @@ xfs_qm_write_sb_changes(
2405 } 2415 }
2406 2416
2407 xfs_mod_sb(tp, flags); 2417 xfs_mod_sb(tp, flags);
2408 (void) xfs_trans_commit(tp, 0, NULL); 2418 (void) xfs_trans_commit(tp, 0);
2409 2419
2410 return 0; 2420 return 0;
2411} 2421}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 716f562aa8b2..2df67fd913e5 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -456,9 +456,7 @@ xfs_qm_scall_quotaon(
456 || 456 ||
457 ((flags & XFS_PQUOTA_ACCT) == 0 && 457 ((flags & XFS_PQUOTA_ACCT) == 0 &&
458 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && 458 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
459 (flags & XFS_OQUOTA_ENFD)) 459 (flags & XFS_GQUOTA_ACCT) == 0 &&
460 ||
461 ((flags & XFS_GQUOTA_ACCT) == 0 &&
462 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && 460 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
463 (flags & XFS_OQUOTA_ENFD))) { 461 (flags & XFS_OQUOTA_ENFD))) {
464 qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", 462 qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
@@ -735,7 +733,7 @@ xfs_qm_scall_setqlim(
735 xfs_trans_log_dquot(tp, dqp); 733 xfs_trans_log_dquot(tp, dqp);
736 734
737 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); 735 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
738 xfs_trans_commit(tp, 0, NULL); 736 xfs_trans_commit(tp, 0);
739 xfs_qm_dqprint(dqp); 737 xfs_qm_dqprint(dqp);
740 xfs_qm_dqrele(dqp); 738 xfs_qm_dqrele(dqp);
741 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 739 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
@@ -809,7 +807,7 @@ xfs_qm_log_quotaoff_end(
809 * We don't care about quotoff's performance. 807 * We don't care about quotoff's performance.
810 */ 808 */
811 xfs_trans_set_sync(tp); 809 xfs_trans_set_sync(tp);
812 error = xfs_trans_commit(tp, 0, NULL); 810 error = xfs_trans_commit(tp, 0);
813 return (error); 811 return (error);
814} 812}
815 813
@@ -852,7 +850,7 @@ xfs_qm_log_quotaoff(
852 * We don't care about quotoff's performance. 850 * We don't care about quotoff's performance.
853 */ 851 */
854 xfs_trans_set_sync(tp); 852 xfs_trans_set_sync(tp);
855 error = xfs_trans_commit(tp, 0, NULL); 853 error = xfs_trans_commit(tp, 0);
856 854
857error0: 855error0:
858 if (error) { 856 if (error) {
@@ -911,14 +909,19 @@ xfs_qm_export_dquot(
911 * gets turned off. No need to confuse the user level code, 909 * gets turned off. No need to confuse the user level code,
912 * so return zeroes in that case. 910 * so return zeroes in that case.
913 */ 911 */
914 if (! XFS_IS_QUOTA_ENFORCED(mp)) { 912 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
913 (!XFS_IS_OQUOTA_ENFORCED(mp) &&
914 (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
915 dst->d_btimer = 0; 915 dst->d_btimer = 0;
916 dst->d_itimer = 0; 916 dst->d_itimer = 0;
917 dst->d_rtbtimer = 0; 917 dst->d_rtbtimer = 0;
918 } 918 }
919 919
920#ifdef DEBUG 920#ifdef DEBUG
921 if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) { 921 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) ||
922 (XFS_IS_OQUOTA_ENFORCED(mp) &&
923 (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) &&
924 dst->d_id != 0) {
922 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && 925 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
923 (dst->d_blk_softlimit > 0)) { 926 (dst->d_blk_softlimit > 0)) {
924 ASSERT(dst->d_btimer != 0); 927 ASSERT(dst->d_btimer != 0);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index d7491e7b1f3b..7de6874bf1b8 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -656,7 +656,9 @@ xfs_trans_dqresv(
656 656
657 if ((flags & XFS_QMOPT_FORCE_RES) == 0 && 657 if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
658 dqp->q_core.d_id && 658 dqp->q_core.d_id &&
659 XFS_IS_QUOTA_ENFORCED(dqp->q_mount)) { 659 ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
660 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
661 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
660#ifdef QUOTADEBUG 662#ifdef QUOTADEBUG
661 cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" 663 cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld"
662 " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); 664 " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit);
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 08bbd3cb87ae..f45a49ffd3a3 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -81,20 +81,3 @@ assfail(char *expr, char *file, int line)
81 printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); 81 printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
82 BUG(); 82 BUG();
83} 83}
84
85#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
86unsigned long random(void)
87{
88 static unsigned long RandomValue = 1;
89 /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
90 register long rv = RandomValue;
91 register long lo;
92 register long hi;
93
94 hi = rv / 127773;
95 lo = rv % 127773;
96 rv = 16807 * lo - 2836 * hi;
97 if (rv <= 0) rv += 2147483647;
98 return RandomValue = rv;
99}
100#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 2a70cc605ae3..a27a7c8c0526 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -50,7 +50,7 @@ extern void assfail(char *expr, char *f, int l);
50#else /* DEBUG */ 50#else /* DEBUG */
51 51
52# define ASSERT(expr) ASSERT_ALWAYS(expr) 52# define ASSERT(expr) ASSERT_ALWAYS(expr)
53extern unsigned long random(void); 53# include <linux/random.h>
54 54
55#ifndef STATIC 55#ifndef STATIC
56# define STATIC noinline 56# define STATIC noinline
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index e80dda3437d1..8e9a40aa0cd3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -764,7 +764,7 @@ xfs_alloc_ag_vextent_near(
764 */ 764 */
765 int dofirst; /* set to do first algorithm */ 765 int dofirst; /* set to do first algorithm */
766 766
767 dofirst = random() & 1; 767 dofirst = random32() & 1;
768#endif 768#endif
769 /* 769 /*
770 * Get a cursor for the by-size btree. 770 * Get a cursor for the by-size btree.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 9d358ffce4e5..7ce44a7b88a2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -328,8 +328,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
328 xfs_trans_set_sync(args.trans); 328 xfs_trans_set_sync(args.trans);
329 } 329 }
330 err2 = xfs_trans_commit(args.trans, 330 err2 = xfs_trans_commit(args.trans,
331 XFS_TRANS_RELEASE_LOG_RES, 331 XFS_TRANS_RELEASE_LOG_RES);
332 NULL);
333 xfs_iunlock(dp, XFS_ILOCK_EXCL); 332 xfs_iunlock(dp, XFS_ILOCK_EXCL);
334 333
335 /* 334 /*
@@ -397,8 +396,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
397 * Commit the last in the sequence of transactions. 396 * Commit the last in the sequence of transactions.
398 */ 397 */
399 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); 398 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
400 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES, 399 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
401 NULL);
402 xfs_iunlock(dp, XFS_ILOCK_EXCL); 400 xfs_iunlock(dp, XFS_ILOCK_EXCL);
403 401
404 /* 402 /*
@@ -544,8 +542,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
544 * Commit the last in the sequence of transactions. 542 * Commit the last in the sequence of transactions.
545 */ 543 */
546 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); 544 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
547 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES, 545 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
548 NULL);
549 xfs_iunlock(dp, XFS_ILOCK_EXCL); 546 xfs_iunlock(dp, XFS_ILOCK_EXCL);
550 547
551 /* 548 /*
@@ -859,8 +856,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
859 * Commit the last in the sequence of transactions. 856 * Commit the last in the sequence of transactions.
860 */ 857 */
861 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); 858 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
862 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES, 859 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
863 NULL);
864 xfs_iunlock(dp, XFS_ILOCK_EXCL); 860 xfs_iunlock(dp, XFS_ILOCK_EXCL);
865 861
866 return(error); 862 return(error);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 8eab73e8340a..81f45dae1c57 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -3053,7 +3053,7 @@ xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp)
3053 * is in progress. The caller takes the responsibility to cancel 3053 * is in progress. The caller takes the responsibility to cancel
3054 * the duplicate transaction that gets returned. 3054 * the duplicate transaction that gets returned.
3055 */ 3055 */
3056 if ((error = xfs_trans_commit(trans, 0, NULL))) 3056 if ((error = xfs_trans_commit(trans, 0)))
3057 return (error); 3057 return (error);
3058 3058
3059 trans = *transp; 3059 trans = *transp;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 87795188cedf..b1ea26e40aaf 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -130,7 +130,6 @@ STATIC int /* error */
130xfs_bmap_add_extent_hole_delay( 130xfs_bmap_add_extent_hole_delay(
131 xfs_inode_t *ip, /* incore inode pointer */ 131 xfs_inode_t *ip, /* incore inode pointer */
132 xfs_extnum_t idx, /* extent number to update/insert */ 132 xfs_extnum_t idx, /* extent number to update/insert */
133 xfs_btree_cur_t *cur, /* if null, not a btree */
134 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 133 xfs_bmbt_irec_t *new, /* new data to add to file extents */
135 int *logflagsp,/* inode logging flags */ 134 int *logflagsp,/* inode logging flags */
136 xfs_extdelta_t *delta, /* Change made to incore extents */ 135 xfs_extdelta_t *delta, /* Change made to incore extents */
@@ -399,7 +398,6 @@ xfs_bmap_count_leaves(
399 398
400STATIC int 399STATIC int
401xfs_bmap_disk_count_leaves( 400xfs_bmap_disk_count_leaves(
402 xfs_ifork_t *ifp,
403 xfs_extnum_t idx, 401 xfs_extnum_t idx,
404 xfs_bmbt_block_t *block, 402 xfs_bmbt_block_t *block,
405 int numrecs, 403 int numrecs,
@@ -580,7 +578,7 @@ xfs_bmap_add_extent(
580 if (cur) 578 if (cur)
581 ASSERT((cur->bc_private.b.flags & 579 ASSERT((cur->bc_private.b.flags &
582 XFS_BTCUR_BPRV_WASDEL) == 0); 580 XFS_BTCUR_BPRV_WASDEL) == 0);
583 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new, 581 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
584 &logflags, delta, rsvd))) 582 &logflags, delta, rsvd)))
585 goto done; 583 goto done;
586 } 584 }
@@ -1841,7 +1839,6 @@ STATIC int /* error */
1841xfs_bmap_add_extent_hole_delay( 1839xfs_bmap_add_extent_hole_delay(
1842 xfs_inode_t *ip, /* incore inode pointer */ 1840 xfs_inode_t *ip, /* incore inode pointer */
1843 xfs_extnum_t idx, /* extent number to update/insert */ 1841 xfs_extnum_t idx, /* extent number to update/insert */
1844 xfs_btree_cur_t *cur, /* if null, not a btree */
1845 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1842 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1846 int *logflagsp, /* inode logging flags */ 1843 int *logflagsp, /* inode logging flags */
1847 xfs_extdelta_t *delta, /* Change made to incore extents */ 1844 xfs_extdelta_t *delta, /* Change made to incore extents */
@@ -4071,7 +4068,7 @@ xfs_bmap_add_attrfork(
4071 } 4068 }
4072 if ((error = xfs_bmap_finish(&tp, &flist, &committed))) 4069 if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
4073 goto error2; 4070 goto error2;
4074 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL); 4071 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES);
4075 ASSERT(ip->i_df.if_ext_max == 4072 ASSERT(ip->i_df.if_ext_max ==
4076 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); 4073 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
4077 return error; 4074 return error;
@@ -4227,7 +4224,7 @@ xfs_bmap_finish(
4227 logres = ntp->t_log_res; 4224 logres = ntp->t_log_res;
4228 logcount = ntp->t_log_count; 4225 logcount = ntp->t_log_count;
4229 ntp = xfs_trans_dup(*tp); 4226 ntp = xfs_trans_dup(*tp);
4230 error = xfs_trans_commit(*tp, 0, NULL); 4227 error = xfs_trans_commit(*tp, 0);
4231 *tp = ntp; 4228 *tp = ntp;
4232 *committed = 1; 4229 *committed = 1;
4233 /* 4230 /*
@@ -4447,8 +4444,11 @@ xfs_bmap_one_block(
4447 xfs_bmbt_irec_t s; /* internal version of extent */ 4444 xfs_bmbt_irec_t s; /* internal version of extent */
4448 4445
4449#ifndef DEBUG 4446#ifndef DEBUG
4450 if (whichfork == XFS_DATA_FORK) 4447 if (whichfork == XFS_DATA_FORK) {
4451 return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize; 4448 return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
4449 (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
4450 (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
4451 }
4452#endif /* !DEBUG */ 4452#endif /* !DEBUG */
4453 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) 4453 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
4454 return 0; 4454 return 0;
@@ -4460,7 +4460,7 @@ xfs_bmap_one_block(
4460 xfs_bmbt_get_all(ep, &s); 4460 xfs_bmbt_get_all(ep, &s);
4461 rval = s.br_startoff == 0 && s.br_blockcount == 1; 4461 rval = s.br_startoff == 0 && s.br_blockcount == 1;
4462 if (rval && whichfork == XFS_DATA_FORK) 4462 if (rval && whichfork == XFS_DATA_FORK)
4463 ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); 4463 ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize);
4464 return rval; 4464 return rval;
4465} 4465}
4466 4466
@@ -5820,7 +5820,7 @@ xfs_getbmap(
5820 fixlen = XFS_MAXIOFFSET(mp); 5820 fixlen = XFS_MAXIOFFSET(mp);
5821 } else { 5821 } else {
5822 prealloced = 0; 5822 prealloced = 0;
5823 fixlen = ip->i_d.di_size; 5823 fixlen = ip->i_size;
5824 } 5824 }
5825 } else { 5825 } else {
5826 prealloced = 0; 5826 prealloced = 0;
@@ -5844,7 +5844,8 @@ xfs_getbmap(
5844 5844
5845 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5845 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5846 5846
5847 if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) { 5847 if (whichfork == XFS_DATA_FORK &&
5848 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
5848 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5849 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5849 error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); 5850 error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
5850 } 5851 }
@@ -6425,8 +6426,8 @@ xfs_bmap_count_tree(
6425 for (;;) { 6426 for (;;) {
6426 nextbno = be64_to_cpu(block->bb_rightsib); 6427 nextbno = be64_to_cpu(block->bb_rightsib);
6427 numrecs = be16_to_cpu(block->bb_numrecs); 6428 numrecs = be16_to_cpu(block->bb_numrecs);
6428 if (unlikely(xfs_bmap_disk_count_leaves(ifp, 6429 if (unlikely(xfs_bmap_disk_count_leaves(0,
6429 0, block, numrecs, count) < 0)) { 6430 block, numrecs, count) < 0)) {
6430 xfs_trans_brelse(tp, bp); 6431 xfs_trans_brelse(tp, bp);
6431 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", 6432 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
6432 XFS_ERRLEVEL_LOW, mp); 6433 XFS_ERRLEVEL_LOW, mp);
@@ -6472,7 +6473,6 @@ xfs_bmap_count_leaves(
6472 */ 6473 */
6473int 6474int
6474xfs_bmap_disk_count_leaves( 6475xfs_bmap_disk_count_leaves(
6475 xfs_ifork_t *ifp,
6476 xfs_extnum_t idx, 6476 xfs_extnum_t idx,
6477 xfs_bmbt_block_t *block, 6477 xfs_bmbt_block_t *block,
6478 int numrecs, 6478 int numrecs,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index b847e6a7a3f0..de35d18cc002 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -199,7 +199,9 @@ xfs_swap_extents(
199 199
200 if (VN_CACHED(tvp) != 0) { 200 if (VN_CACHED(tvp) != 0) {
201 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); 201 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
202 bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED); 202 error = bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED);
203 if (error)
204 goto error0;
203 } 205 }
204 206
205 /* Verify O_DIRECT for ftmp */ 207 /* Verify O_DIRECT for ftmp */
@@ -382,7 +384,7 @@ xfs_swap_extents(
382 xfs_trans_set_sync(tp); 384 xfs_trans_set_sync(tp);
383 } 385 }
384 386
385 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); 387 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
386 locked = 0; 388 locked = 0;
387 389
388 error0: 390 error0:
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 9d7438bba30d..3accc1dcd6c9 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -282,8 +282,7 @@ xfs_dir2_block_addname(
282 * This needs to happen before the next call to use_free. 282 * This needs to happen before the next call to use_free.
283 */ 283 */
284 if (needscan) { 284 if (needscan) {
285 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, 285 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
286 &needlog, NULL);
287 needscan = 0; 286 needscan = 0;
288 } 287 }
289 } 288 }
@@ -333,7 +332,7 @@ xfs_dir2_block_addname(
333 */ 332 */
334 if (needscan) { 333 if (needscan) {
335 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, 334 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
336 &needlog, NULL); 335 &needlog);
337 needscan = 0; 336 needscan = 0;
338 } 337 }
339 /* 338 /*
@@ -418,8 +417,7 @@ xfs_dir2_block_addname(
418 * Clean up the bestfree array and log the header, tail, and entry. 417 * Clean up the bestfree array and log the header, tail, and entry.
419 */ 418 */
420 if (needscan) 419 if (needscan)
421 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, 420 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
422 NULL);
423 if (needlog) 421 if (needlog)
424 xfs_dir2_data_log_header(tp, bp); 422 xfs_dir2_data_log_header(tp, bp);
425 xfs_dir2_block_log_tail(tp, bp); 423 xfs_dir2_block_log_tail(tp, bp);
@@ -798,8 +796,7 @@ xfs_dir2_block_removename(
798 * Fix up bestfree, log the header if necessary. 796 * Fix up bestfree, log the header if necessary.
799 */ 797 */
800 if (needscan) 798 if (needscan)
801 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, 799 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
802 NULL);
803 if (needlog) 800 if (needlog)
804 xfs_dir2_data_log_header(tp, bp); 801 xfs_dir2_data_log_header(tp, bp);
805 xfs_dir2_data_check(dp, bp); 802 xfs_dir2_data_check(dp, bp);
@@ -996,8 +993,7 @@ xfs_dir2_leaf_to_block(
996 * Scan the bestfree if we need it and log the data block header. 993 * Scan the bestfree if we need it and log the data block header.
997 */ 994 */
998 if (needscan) 995 if (needscan)
999 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, 996 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
1000 NULL);
1001 if (needlog) 997 if (needlog)
1002 xfs_dir2_data_log_header(tp, dbp); 998 xfs_dir2_data_log_header(tp, dbp);
1003 /* 999 /*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index f7c799217072..c211c37ef67c 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -324,8 +324,7 @@ void
324xfs_dir2_data_freescan( 324xfs_dir2_data_freescan(
325 xfs_mount_t *mp, /* filesystem mount point */ 325 xfs_mount_t *mp, /* filesystem mount point */
326 xfs_dir2_data_t *d, /* data block pointer */ 326 xfs_dir2_data_t *d, /* data block pointer */
327 int *loghead, /* out: log data header */ 327 int *loghead) /* out: log data header */
328 char *aendp) /* in: caller's endp */
329{ 328{
330 xfs_dir2_block_tail_t *btp; /* block tail */ 329 xfs_dir2_block_tail_t *btp; /* block tail */
331 xfs_dir2_data_entry_t *dep; /* active data entry */ 330 xfs_dir2_data_entry_t *dep; /* active data entry */
@@ -346,9 +345,7 @@ xfs_dir2_data_freescan(
346 * Set up pointers. 345 * Set up pointers.
347 */ 346 */
348 p = (char *)d->u; 347 p = (char *)d->u;
349 if (aendp) 348 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
350 endp = aendp;
351 else if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
352 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); 349 btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
353 endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); 350 endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
354 } else 351 } else
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index a6ae2d21c40a..c94c9099cfb1 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -166,7 +166,7 @@ extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
166extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, 166extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
167 xfs_dir2_data_unused_t *dup, int *loghead); 167 xfs_dir2_data_unused_t *dup, int *loghead);
168extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, 168extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
169 int *loghead, char *aendp); 169 int *loghead);
170extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, 170extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
171 struct xfs_dabuf **bpp); 171 struct xfs_dabuf **bpp);
172extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, 172extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index b1cf1fbf423d..db14ea71459f 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -133,8 +133,7 @@ xfs_dir2_block_to_leaf(
133 */ 133 */
134 block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 134 block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
135 if (needscan) 135 if (needscan)
136 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, 136 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
137 NULL);
138 /* 137 /*
139 * Set up leaf tail and bests table. 138 * Set up leaf tail and bests table.
140 */ 139 */
@@ -414,7 +413,7 @@ xfs_dir2_leaf_addname(
414 * Need to scan fix up the bestfree table. 413 * Need to scan fix up the bestfree table.
415 */ 414 */
416 if (needscan) 415 if (needscan)
417 xfs_dir2_data_freescan(mp, data, &needlog, NULL); 416 xfs_dir2_data_freescan(mp, data, &needlog);
418 /* 417 /*
419 * Need to log the data block's header. 418 * Need to log the data block's header.
420 */ 419 */
@@ -1496,7 +1495,7 @@ xfs_dir2_leaf_removename(
1496 * log the data block header if necessary. 1495 * log the data block header if necessary.
1497 */ 1496 */
1498 if (needscan) 1497 if (needscan)
1499 xfs_dir2_data_freescan(mp, data, &needlog, NULL); 1498 xfs_dir2_data_freescan(mp, data, &needlog);
1500 if (needlog) 1499 if (needlog)
1501 xfs_dir2_data_log_header(tp, dbp); 1500 xfs_dir2_data_log_header(tp, dbp);
1502 /* 1501 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 9ca71719b683..d083c3819934 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -904,7 +904,7 @@ xfs_dir2_leafn_remove(
904 * Log the data block header if needed. 904 * Log the data block header if needed.
905 */ 905 */
906 if (needscan) 906 if (needscan)
907 xfs_dir2_data_freescan(mp, data, &needlog, NULL); 907 xfs_dir2_data_freescan(mp, data, &needlog);
908 if (needlog) 908 if (needlog)
909 xfs_dir2_data_log_header(tp, dbp); 909 xfs_dir2_data_log_header(tp, dbp);
910 xfs_dir2_data_check(dp, dbp); 910 xfs_dir2_data_check(dp, dbp);
@@ -1705,7 +1705,7 @@ xfs_dir2_node_addname_int(
1705 * Rescan the block for bestfree if needed. 1705 * Rescan the block for bestfree if needed.
1706 */ 1706 */
1707 if (needscan) 1707 if (needscan)
1708 xfs_dir2_data_freescan(mp, data, &needlog, NULL); 1708 xfs_dir2_data_freescan(mp, data, &needlog);
1709 /* 1709 /*
1710 * Log the data block header if needed. 1710 * Log the data block header if needed.
1711 */ 1711 */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index b1af54464f00..8c4331631337 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -80,7 +80,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
80 int i; 80 int i;
81 int64_t fsid; 81 int64_t fsid;
82 82
83 if (random() % randfactor) 83 if (random32() % randfactor)
84 return 0; 84 return 0;
85 85
86 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t)); 86 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 32c37c1c47ab..b599e6be9ec1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -346,7 +346,7 @@ xfs_growfs_data_private(
346 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); 346 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
347 if (dpct) 347 if (dpct)
348 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 348 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
349 error = xfs_trans_commit(tp, 0, NULL); 349 error = xfs_trans_commit(tp, 0);
350 if (error) { 350 if (error) {
351 return error; 351 return error;
352 } 352 }
@@ -605,7 +605,7 @@ xfs_fs_log_dummy(
605 xfs_trans_ihold(tp, ip); 605 xfs_trans_ihold(tp, ip);
606 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 606 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
607 xfs_trans_set_sync(tp); 607 xfs_trans_set_sync(tp);
608 xfs_trans_commit(tp, 0, NULL); 608 xfs_trans_commit(tp, 0);
609 609
610 xfs_iunlock(ip, XFS_ILOCK_EXCL); 610 xfs_iunlock(ip, XFS_ILOCK_EXCL);
611} 611}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index c1c89dac19cc..114433a22baa 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -879,17 +879,17 @@ xfs_ilock(xfs_inode_t *ip,
879 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 879 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
880 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 880 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
881 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 881 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
882 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); 882 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
883 883
884 if (lock_flags & XFS_IOLOCK_EXCL) { 884 if (lock_flags & XFS_IOLOCK_EXCL) {
885 mrupdate(&ip->i_iolock); 885 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
886 } else if (lock_flags & XFS_IOLOCK_SHARED) { 886 } else if (lock_flags & XFS_IOLOCK_SHARED) {
887 mraccess(&ip->i_iolock); 887 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
888 } 888 }
889 if (lock_flags & XFS_ILOCK_EXCL) { 889 if (lock_flags & XFS_ILOCK_EXCL) {
890 mrupdate(&ip->i_lock); 890 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
891 } else if (lock_flags & XFS_ILOCK_SHARED) { 891 } else if (lock_flags & XFS_ILOCK_SHARED) {
892 mraccess(&ip->i_lock); 892 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
893 } 893 }
894 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 894 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
895} 895}
@@ -923,7 +923,7 @@ xfs_ilock_nowait(xfs_inode_t *ip,
923 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 923 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
924 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 924 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
925 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 925 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
926 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); 926 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
927 927
928 iolocked = 0; 928 iolocked = 0;
929 if (lock_flags & XFS_IOLOCK_EXCL) { 929 if (lock_flags & XFS_IOLOCK_EXCL) {
@@ -983,7 +983,8 @@ xfs_iunlock(xfs_inode_t *ip,
983 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 983 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
984 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 984 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
985 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 985 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
986 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0); 986 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY |
987 XFS_LOCK_DEP_MASK)) == 0);
987 ASSERT(lock_flags != 0); 988 ASSERT(lock_flags != 0);
988 989
989 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 990 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3da9829c19d5..3ca5d43b8345 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -442,6 +442,7 @@ xfs_iformat(
442 return XFS_ERROR(EFSCORRUPTED); 442 return XFS_ERROR(EFSCORRUPTED);
443 } 443 }
444 ip->i_d.di_size = 0; 444 ip->i_d.di_size = 0;
445 ip->i_size = 0;
445 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 446 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
446 break; 447 break;
447 448
@@ -980,6 +981,7 @@ xfs_iread(
980 } 981 }
981 982
982 ip->i_delayed_blks = 0; 983 ip->i_delayed_blks = 0;
984 ip->i_size = ip->i_d.di_size;
983 985
984 /* 986 /*
985 * Mark the buffer containing the inode as something to keep 987 * Mark the buffer containing the inode as something to keep
@@ -1170,6 +1172,7 @@ xfs_ialloc(
1170 } 1172 }
1171 1173
1172 ip->i_d.di_size = 0; 1174 ip->i_d.di_size = 0;
1175 ip->i_size = 0;
1173 ip->i_d.di_nextents = 0; 1176 ip->i_d.di_nextents = 0;
1174 ASSERT(ip->i_d.di_nblocks == 0); 1177 ASSERT(ip->i_d.di_nblocks == 0);
1175 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1178 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
@@ -1340,7 +1343,7 @@ xfs_file_last_byte(
1340 } else { 1343 } else {
1341 last_block = 0; 1344 last_block = 0;
1342 } 1345 }
1343 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); 1346 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
1344 last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 1347 last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
1345 1348
1346 last_byte = XFS_FSB_TO_B(mp, last_block); 1349 last_byte = XFS_FSB_TO_B(mp, last_block);
@@ -1421,7 +1424,7 @@ xfs_itrunc_trace(
1421 * must be called again with all the same restrictions as the initial 1424 * must be called again with all the same restrictions as the initial
1422 * call. 1425 * call.
1423 */ 1426 */
1424void 1427int
1425xfs_itruncate_start( 1428xfs_itruncate_start(
1426 xfs_inode_t *ip, 1429 xfs_inode_t *ip,
1427 uint flags, 1430 uint flags,
@@ -1431,9 +1434,10 @@ xfs_itruncate_start(
1431 xfs_off_t toss_start; 1434 xfs_off_t toss_start;
1432 xfs_mount_t *mp; 1435 xfs_mount_t *mp;
1433 bhv_vnode_t *vp; 1436 bhv_vnode_t *vp;
1437 int error = 0;
1434 1438
1435 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1439 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1436 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1440 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1437 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1441 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1438 (flags == XFS_ITRUNC_MAYBE)); 1442 (flags == XFS_ITRUNC_MAYBE));
1439 1443
@@ -1468,7 +1472,7 @@ xfs_itruncate_start(
1468 * file size, so there is no way that the data extended 1472 * file size, so there is no way that the data extended
1469 * out there. 1473 * out there.
1470 */ 1474 */
1471 return; 1475 return 0;
1472 } 1476 }
1473 last_byte = xfs_file_last_byte(ip); 1477 last_byte = xfs_file_last_byte(ip);
1474 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1478 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start,
@@ -1477,7 +1481,7 @@ xfs_itruncate_start(
1477 if (flags & XFS_ITRUNC_DEFINITE) { 1481 if (flags & XFS_ITRUNC_DEFINITE) {
1478 bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); 1482 bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1479 } else { 1483 } else {
1480 bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); 1484 error = bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1481 } 1485 }
1482 } 1486 }
1483 1487
@@ -1486,6 +1490,7 @@ xfs_itruncate_start(
1486 ASSERT(VN_CACHED(vp) == 0); 1490 ASSERT(VN_CACHED(vp) == 0);
1487 } 1491 }
1488#endif 1492#endif
1493 return error;
1489} 1494}
1490 1495
1491/* 1496/*
@@ -1556,7 +1561,7 @@ xfs_itruncate_finish(
1556 1561
1557 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1562 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1558 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 1563 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1559 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1564 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1560 ASSERT(*tp != NULL); 1565 ASSERT(*tp != NULL);
1561 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1566 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1562 ASSERT(ip->i_transp == *tp); 1567 ASSERT(ip->i_transp == *tp);
@@ -1630,8 +1635,20 @@ xfs_itruncate_finish(
1630 */ 1635 */
1631 if (fork == XFS_DATA_FORK) { 1636 if (fork == XFS_DATA_FORK) {
1632 if (ip->i_d.di_nextents > 0) { 1637 if (ip->i_d.di_nextents > 0) {
1633 ip->i_d.di_size = new_size; 1638 /*
1634 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1639 * If we are not changing the file size then do
1640 * not update the on-disk file size - we may be
1641 * called from xfs_inactive_free_eofblocks(). If we
1642 * update the on-disk file size and then the system
1643 * crashes before the contents of the file are
1644 * flushed to disk then the files may be full of
1645 * holes (ie NULL files bug).
1646 */
1647 if (ip->i_size != new_size) {
1648 ip->i_d.di_size = new_size;
1649 ip->i_size = new_size;
1650 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1651 }
1635 } 1652 }
1636 } else if (sync) { 1653 } else if (sync) {
1637 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 1654 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
@@ -1746,7 +1763,7 @@ xfs_itruncate_finish(
1746 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1763 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1747 } 1764 }
1748 ntp = xfs_trans_dup(ntp); 1765 ntp = xfs_trans_dup(ntp);
1749 (void) xfs_trans_commit(*tp, 0, NULL); 1766 (void) xfs_trans_commit(*tp, 0);
1750 *tp = ntp; 1767 *tp = ntp;
1751 error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1768 error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1752 XFS_TRANS_PERM_LOG_RES, 1769 XFS_TRANS_PERM_LOG_RES,
@@ -1767,7 +1784,19 @@ xfs_itruncate_finish(
1767 */ 1784 */
1768 if (fork == XFS_DATA_FORK) { 1785 if (fork == XFS_DATA_FORK) {
1769 xfs_isize_check(mp, ip, new_size); 1786 xfs_isize_check(mp, ip, new_size);
1770 ip->i_d.di_size = new_size; 1787 /*
1788 * If we are not changing the file size then do
1789 * not update the on-disk file size - we may be
1790 * called from xfs_inactive_free_eofblocks(). If we
1791 * update the on-disk file size and then the system
1792 * crashes before the contents of the file are
1793 * flushed to disk then the files may be full of
1794 * holes (ie NULL files bug).
1795 */
1796 if (ip->i_size != new_size) {
1797 ip->i_d.di_size = new_size;
1798 ip->i_size = new_size;
1799 }
1771 } 1800 }
1772 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1801 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1773 ASSERT((new_size != 0) || 1802 ASSERT((new_size != 0) ||
@@ -1800,7 +1829,7 @@ xfs_igrow_start(
1800 1829
1801 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1830 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1802 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1831 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1803 ASSERT(new_size > ip->i_d.di_size); 1832 ASSERT(new_size > ip->i_size);
1804 1833
1805 /* 1834 /*
1806 * Zero any pages that may have been created by 1835 * Zero any pages that may have been created by
@@ -1808,7 +1837,7 @@ xfs_igrow_start(
1808 * and any blocks between the old and new file sizes. 1837 * and any blocks between the old and new file sizes.
1809 */ 1838 */
1810 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1839 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1811 ip->i_d.di_size); 1840 ip->i_size);
1812 return error; 1841 return error;
1813} 1842}
1814 1843
@@ -1832,13 +1861,14 @@ xfs_igrow_finish(
1832 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1861 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1833 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1862 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1834 ASSERT(ip->i_transp == tp); 1863 ASSERT(ip->i_transp == tp);
1835 ASSERT(new_size > ip->i_d.di_size); 1864 ASSERT(new_size > ip->i_size);
1836 1865
1837 /* 1866 /*
1838 * Update the file size. Update the inode change timestamp 1867 * Update the file size. Update the inode change timestamp
1839 * if change_flag set. 1868 * if change_flag set.
1840 */ 1869 */
1841 ip->i_d.di_size = new_size; 1870 ip->i_d.di_size = new_size;
1871 ip->i_size = new_size;
1842 if (change_flag) 1872 if (change_flag)
1843 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 1873 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1844 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1874 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -2321,7 +2351,7 @@ xfs_ifree(
2321 ASSERT(ip->i_d.di_nlink == 0); 2351 ASSERT(ip->i_d.di_nlink == 0);
2322 ASSERT(ip->i_d.di_nextents == 0); 2352 ASSERT(ip->i_d.di_nextents == 0);
2323 ASSERT(ip->i_d.di_anextents == 0); 2353 ASSERT(ip->i_d.di_anextents == 0);
2324 ASSERT((ip->i_d.di_size == 0) || 2354 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
2325 ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 2355 ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
2326 ASSERT(ip->i_d.di_nblocks == 0); 2356 ASSERT(ip->i_d.di_nblocks == 0);
2327 2357
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bc823720d88f..f75afecef8e7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -287,6 +287,7 @@ typedef struct xfs_inode {
287 struct xfs_inode *i_cnext; /* cluster hash link forward */ 287 struct xfs_inode *i_cnext; /* cluster hash link forward */
288 struct xfs_inode *i_cprev; /* cluster hash link backward */ 288 struct xfs_inode *i_cprev; /* cluster hash link backward */
289 289
290 xfs_fsize_t i_size; /* in-memory size */
290 /* Trace buffers per inode. */ 291 /* Trace buffers per inode. */
291#ifdef XFS_BMAP_TRACE 292#ifdef XFS_BMAP_TRACE
292 struct ktrace *i_xtrace; /* inode extent list trace */ 293 struct ktrace *i_xtrace; /* inode extent list trace */
@@ -305,6 +306,8 @@ typedef struct xfs_inode {
305#endif 306#endif
306} xfs_inode_t; 307} xfs_inode_t;
307 308
309#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
310 (ip)->i_size : (ip)->i_d.di_size;
308 311
309/* 312/*
310 * i_flags helper functions 313 * i_flags helper functions
@@ -379,26 +382,58 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
379 382
380/* 383/*
381 * Flags for inode locking. 384 * Flags for inode locking.
385 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
386 * 1<<16 - 1<<32-1 -- lockdep annotation (integers)
382 */ 387 */
383#define XFS_IOLOCK_EXCL 0x001 388#define XFS_IOLOCK_EXCL (1<<0)
384#define XFS_IOLOCK_SHARED 0x002 389#define XFS_IOLOCK_SHARED (1<<1)
385#define XFS_ILOCK_EXCL 0x004 390#define XFS_ILOCK_EXCL (1<<2)
386#define XFS_ILOCK_SHARED 0x008 391#define XFS_ILOCK_SHARED (1<<3)
387#define XFS_IUNLOCK_NONOTIFY 0x010 392#define XFS_IUNLOCK_NONOTIFY (1<<4)
388/* XFS_IOLOCK_NESTED 0x020 */ 393/* #define XFS_IOLOCK_NESTED (1<<5) */
389#define XFS_EXTENT_TOKEN_RD 0x040 394#define XFS_EXTENT_TOKEN_RD (1<<6)
390#define XFS_SIZE_TOKEN_RD 0x080 395#define XFS_SIZE_TOKEN_RD (1<<7)
391#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD) 396#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
392#define XFS_WILLLEND 0x100 /* Always acquire tokens for lending */ 397#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
393#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND) 398#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
394#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND) 399#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
395#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND) 400#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
396/* XFS_SIZE_TOKEN_WANT 0x200 */ 401/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
397 402
398#define XFS_LOCK_MASK \ 403#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
399 (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \ 404 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
400 XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \ 405 | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
401 XFS_WILLLEND) 406 | XFS_WILLLEND)
407
408/*
409 * Flags for lockdep annotations.
410 *
411 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes
412 * (ie directory operations that require locking a directory inode and
413 * an entry inode). The first inode gets locked with this flag so it
414 * gets a lockdep subclass of 1 and the second lock will have a lockdep
415 * subclass of 0.
416 *
417 * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time
418 * with xfs_lock_inodes(). This flag is used as the starting subclass
419 * and each subsequent lock acquired will increment the subclass by one.
420 * So the first lock acquired will have a lockdep subclass of 2, the
421 * second lock will have a lockdep subclass of 3, and so on.
422 */
423#define XFS_IOLOCK_SHIFT 16
424#define XFS_IOLOCK_PARENT (1 << XFS_IOLOCK_SHIFT)
425#define XFS_IOLOCK_INUMORDER (2 << XFS_IOLOCK_SHIFT)
426
427#define XFS_ILOCK_SHIFT 24
428#define XFS_ILOCK_PARENT (1 << XFS_ILOCK_SHIFT)
429#define XFS_ILOCK_INUMORDER (2 << XFS_ILOCK_SHIFT)
430
431#define XFS_IOLOCK_DEP_MASK 0x00ff0000
432#define XFS_ILOCK_DEP_MASK 0xff000000
433#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
434
435#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
436#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
402 437
403/* 438/*
404 * Flags for xfs_iflush() 439 * Flags for xfs_iflush()
@@ -481,7 +516,7 @@ uint xfs_ip2xflags(struct xfs_inode *);
481uint xfs_dic2xflags(struct xfs_dinode_core *); 516uint xfs_dic2xflags(struct xfs_dinode_core *);
482int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 517int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
483 struct xfs_bmap_free *); 518 struct xfs_bmap_free *);
484void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); 519int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
485int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 520int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
486 xfs_fsize_t, int, int); 521 xfs_fsize_t, int, int);
487int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 522int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
index 06d710c9ce4b..81548ec72ba6 100644
--- a/fs/xfs/xfs_iocore.c
+++ b/fs/xfs/xfs_iocore.c
@@ -52,7 +52,7 @@ STATIC xfs_fsize_t
52xfs_size_fn( 52xfs_size_fn(
53 xfs_inode_t *ip) 53 xfs_inode_t *ip)
54{ 54{
55 return (ip->i_d.di_size); 55 return XFS_ISIZE(ip);
56} 56}
57 57
58STATIC int 58STATIC int
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index cc6a7b5a9912..3f2b9f2a7b94 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -458,7 +458,7 @@ xfs_iomap_write_direct(
458 extsz = ip->i_d.di_extsize; 458 extsz = ip->i_d.di_extsize;
459 } 459 }
460 460
461 isize = ip->i_d.di_size; 461 isize = ip->i_size;
462 if (io->io_new_size > isize) 462 if (io->io_new_size > isize)
463 isize = io->io_new_size; 463 isize = io->io_new_size;
464 464
@@ -524,7 +524,7 @@ xfs_iomap_write_direct(
524 xfs_trans_ihold(tp, ip); 524 xfs_trans_ihold(tp, ip);
525 525
526 bmapi_flag = XFS_BMAPI_WRITE; 526 bmapi_flag = XFS_BMAPI_WRITE;
527 if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz)) 527 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
528 bmapi_flag |= XFS_BMAPI_PREALLOC; 528 bmapi_flag |= XFS_BMAPI_PREALLOC;
529 529
530 /* 530 /*
@@ -543,7 +543,7 @@ xfs_iomap_write_direct(
543 error = xfs_bmap_finish(&tp, &free_list, &committed); 543 error = xfs_bmap_finish(&tp, &free_list, &committed);
544 if (error) 544 if (error)
545 goto error0; 545 goto error0;
546 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 546 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
547 if (error) 547 if (error)
548 goto error_out; 548 goto error_out;
549 549
@@ -676,7 +676,7 @@ xfs_iomap_write_delay(
676 offset_fsb = XFS_B_TO_FSBT(mp, offset); 676 offset_fsb = XFS_B_TO_FSBT(mp, offset);
677 677
678retry: 678retry:
679 isize = ip->i_d.di_size; 679 isize = ip->i_size;
680 if (io->io_new_size > isize) 680 if (io->io_new_size > isize)
681 isize = io->io_new_size; 681 isize = io->io_new_size;
682 682
@@ -817,7 +817,7 @@ xfs_iomap_write_allocate(
817 * we dropped the ilock in the interim. 817 * we dropped the ilock in the interim.
818 */ 818 */
819 819
820 end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size); 820 end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
821 xfs_bmap_last_offset(NULL, ip, &last_block, 821 xfs_bmap_last_offset(NULL, ip, &last_block,
822 XFS_DATA_FORK); 822 XFS_DATA_FORK);
823 last_block = XFS_FILEOFF_MAX(last_block, end_fsb); 823 last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
@@ -840,8 +840,7 @@ xfs_iomap_write_allocate(
840 if (error) 840 if (error)
841 goto trans_cancel; 841 goto trans_cancel;
842 842
843 error = xfs_trans_commit(tp, 843 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
844 XFS_TRANS_RELEASE_LOG_RES, NULL);
845 if (error) 844 if (error)
846 goto error0; 845 goto error0;
847 846
@@ -948,7 +947,7 @@ xfs_iomap_write_unwritten(
948 if (error) 947 if (error)
949 goto error_on_bmapi_transaction; 948 goto error_on_bmapi_transaction;
950 949
951 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 950 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
952 xfs_iunlock(ip, XFS_ILOCK_EXCL); 951 xfs_iunlock(ip, XFS_ILOCK_EXCL);
953 if (error) 952 if (error)
954 return XFS_ERROR(error); 953 return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 3ce204a524b0..df441ee936b2 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -22,6 +22,7 @@
22 22
23 23
24typedef enum { /* iomap_flags values */ 24typedef enum { /* iomap_flags values */
25 IOMAP_READ = 0, /* mapping for a read */
25 IOMAP_EOF = 0x01, /* mapping contains EOF */ 26 IOMAP_EOF = 0x01, /* mapping contains EOF */
26 IOMAP_HOLE = 0x02, /* mapping covers a hole */ 27 IOMAP_HOLE = 0x02, /* mapping covers a hole */
27 IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ 28 IOMAP_DELAY = 0x04, /* mapping covers delalloc region */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 7775ddc0b3c6..e725ddd3de5f 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -809,7 +809,7 @@ xfs_inumbers(
809 xfs_buf_relse(agbp); 809 xfs_buf_relse(agbp);
810 agbp = NULL; 810 agbp = NULL;
811 /* 811 /*
812 * Move up the the last inode in the current 812 * Move up the last inode in the current
813 * chunk. The lookup_ge will always get 813 * chunk. The lookup_ge will always get
814 * us the first inode in the next chunk. 814 * us the first inode in the next chunk.
815 */ 815 */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ca74d3f5910e..080fabf61c92 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1509,7 +1509,6 @@ xlog_recover_insert_item_frontq(
1509 1509
1510STATIC int 1510STATIC int
1511xlog_recover_reorder_trans( 1511xlog_recover_reorder_trans(
1512 xlog_t *log,
1513 xlog_recover_t *trans) 1512 xlog_recover_t *trans)
1514{ 1513{
1515 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1514 xlog_recover_item_t *first_item, *itemq, *itemq_next;
@@ -1867,7 +1866,6 @@ xlog_recover_do_inode_buffer(
1867/*ARGSUSED*/ 1866/*ARGSUSED*/
1868STATIC void 1867STATIC void
1869xlog_recover_do_reg_buffer( 1868xlog_recover_do_reg_buffer(
1870 xfs_mount_t *mp,
1871 xlog_recover_item_t *item, 1869 xlog_recover_item_t *item,
1872 xfs_buf_t *bp, 1870 xfs_buf_t *bp,
1873 xfs_buf_log_format_t *buf_f) 1871 xfs_buf_log_format_t *buf_f)
@@ -2083,7 +2081,7 @@ xlog_recover_do_dquot_buffer(
2083 if (log->l_quotaoffs_flag & type) 2081 if (log->l_quotaoffs_flag & type)
2084 return; 2082 return;
2085 2083
2086 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2084 xlog_recover_do_reg_buffer(item, bp, buf_f);
2087} 2085}
2088 2086
2089/* 2087/*
@@ -2184,7 +2182,7 @@ xlog_recover_do_buffer_trans(
2184 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2182 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2185 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2183 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2186 } else { 2184 } else {
2187 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2185 xlog_recover_do_reg_buffer(item, bp, buf_f);
2188 } 2186 }
2189 if (error) 2187 if (error)
2190 return XFS_ERROR(error); 2188 return XFS_ERROR(error);
@@ -2765,7 +2763,7 @@ xlog_recover_do_trans(
2765 int error = 0; 2763 int error = 0;
2766 xlog_recover_item_t *item, *first_item; 2764 xlog_recover_item_t *item, *first_item;
2767 2765
2768 if ((error = xlog_recover_reorder_trans(log, trans))) 2766 if ((error = xlog_recover_reorder_trans(trans)))
2769 return error; 2767 return error;
2770 first_item = item = trans->r_itemq; 2768 first_item = item = trans->r_itemq;
2771 do { 2769 do {
@@ -3016,7 +3014,7 @@ xlog_recover_process_efi(
3016 } 3014 }
3017 3015
3018 efip->efi_flags |= XFS_EFI_RECOVERED; 3016 efip->efi_flags |= XFS_EFI_RECOVERED;
3019 xfs_trans_commit(tp, 0, NULL); 3017 xfs_trans_commit(tp, 0);
3020} 3018}
3021 3019
3022/* 3020/*
@@ -3143,7 +3141,7 @@ xlog_recover_clear_agi_bucket(
3143 xfs_trans_log_buf(tp, agibp, offset, 3141 xfs_trans_log_buf(tp, agibp, offset,
3144 (offset + sizeof(xfs_agino_t) - 1)); 3142 (offset + sizeof(xfs_agino_t) - 1));
3145 3143
3146 (void) xfs_trans_commit(tp, 0, NULL); 3144 (void) xfs_trans_commit(tp, 0);
3147} 3145}
3148 3146
3149/* 3147/*
@@ -3886,8 +3884,7 @@ xlog_recover(
3886 * under the vfs layer, so we can get away with it unless 3884 * under the vfs layer, so we can get away with it unless
3887 * the device itself is read-only, in which case we fail. 3885 * the device itself is read-only, in which case we fail.
3888 */ 3886 */
3889 if ((error = xfs_dev_is_read_only(log->l_mp, 3887 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
3890 "recovery required"))) {
3891 return error; 3888 return error;
3892 } 3889 }
3893 3890
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3bed0cf0d8af..a96bde6df96d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1653,7 +1653,7 @@ xfs_mount_log_sbunit(
1653 return; 1653 return;
1654 } 1654 }
1655 xfs_mod_sb(tp, fields); 1655 xfs_mod_sb(tp, fields);
1656 xfs_trans_commit(tp, 0, NULL); 1656 xfs_trans_commit(tp, 0);
1657} 1657}
1658 1658
1659 1659
@@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify(
1734 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1734 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1735 switch (action) { 1735 switch (action) {
1736 case CPU_UP_PREPARE: 1736 case CPU_UP_PREPARE:
1737 case CPU_UP_PREPARE_FROZEN:
1737 /* Easy Case - initialize the area and locks, and 1738 /* Easy Case - initialize the area and locks, and
1738 * then rebalance when online does everything else for us. */ 1739 * then rebalance when online does everything else for us. */
1739 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1740 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1740 break; 1741 break;
1741 case CPU_ONLINE: 1742 case CPU_ONLINE:
1743 case CPU_ONLINE_FROZEN:
1742 xfs_icsb_lock(mp); 1744 xfs_icsb_lock(mp);
1743 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 1745 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
1744 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 1746 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
@@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify(
1746 xfs_icsb_unlock(mp); 1748 xfs_icsb_unlock(mp);
1747 break; 1749 break;
1748 case CPU_DEAD: 1750 case CPU_DEAD:
1751 case CPU_DEAD_FROZEN:
1749 /* Disable all the counters, then fold the dead cpu's 1752 /* Disable all the counters, then fold the dead cpu's
1750 * count into the total on the global superblock and 1753 * count into the total on the global superblock and
1751 * re-enable the counters. */ 1754 * re-enable the counters. */
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 320d63ff9ca2..0d594ed7efef 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -78,7 +78,7 @@ xfs_mount_reset_sbqflags(xfs_mount_t *mp)
78 return error; 78 return error;
79 } 79 }
80 xfs_mod_sb(tp, XFS_SB_QFLAGS); 80 xfs_mod_sb(tp, XFS_SB_QFLAGS);
81 error = xfs_trans_commit(tp, 0, NULL); 81 error = xfs_trans_commit(tp, 0);
82 return error; 82 return error;
83} 83}
84 84
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 9dcb32aa4e2e..6f14df976f73 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -154,10 +154,11 @@ typedef struct xfs_qoff_logformat {
154#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) 154#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
155 155
156#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) 156#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
157#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
158#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) 157#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
159#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) 158#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
160#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) 159#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
160#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD)
161#define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD)
161 162
162/* 163/*
163 * Incore only flags for quotaoff - these bits get cleared when quota(s) 164 * Incore only flags for quotaoff - these bits get cleared when quota(s)
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4c6573d784cd..7679d7a7022d 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -584,7 +584,7 @@ xfs_rename(
584 * trans_commit will unlock src_ip, target_ip & decrement 584 * trans_commit will unlock src_ip, target_ip & decrement
585 * the vnode references. 585 * the vnode references.
586 */ 586 */
587 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 587 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
588 if (target_ip != NULL) { 588 if (target_ip != NULL) {
589 xfs_refcache_purge_ip(target_ip); 589 xfs_refcache_purge_ip(target_ip);
590 IRELE(target_ip); 590 IRELE(target_ip);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6fff19dc3cf9..b3a5f07bd073 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -150,7 +150,7 @@ xfs_growfs_rt_alloc(
150 error = xfs_bmap_finish(&tp, &flist, &committed); 150 error = xfs_bmap_finish(&tp, &flist, &committed);
151 if (error) 151 if (error)
152 goto error_exit; 152 goto error_exit;
153 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 153 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
154 /* 154 /*
155 * Now we need to clear the allocated blocks. 155 * Now we need to clear the allocated blocks.
156 * Do this one block per transaction, to keep it simple. 156 * Do this one block per transaction, to keep it simple.
@@ -187,7 +187,7 @@ xfs_growfs_rt_alloc(
187 /* 187 /*
188 * Commit the transaction. 188 * Commit the transaction.
189 */ 189 */
190 xfs_trans_commit(tp, 0, NULL); 190 xfs_trans_commit(tp, 0);
191 } 191 }
192 /* 192 /*
193 * Go on to the next extent, if any. 193 * Go on to the next extent, if any.
@@ -2042,7 +2042,7 @@ xfs_growfs_rt(
2042 /* 2042 /*
2043 * Commit the transaction. 2043 * Commit the transaction.
2044 */ 2044 */
2045 xfs_trans_commit(tp, 0, NULL); 2045 xfs_trans_commit(tp, 0);
2046 } 2046 }
2047 2047
2048 if (error) 2048 if (error)
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 1ea7c0ca6ae0..905d1c008be7 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -83,7 +83,7 @@ xfs_write_clear_setuid(
83 } 83 }
84 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 84 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
85 xfs_trans_set_sync(tp); 85 xfs_trans_set_sync(tp);
86 error = xfs_trans_commit(tp, 0, NULL); 86 error = xfs_trans_commit(tp, 0);
87 xfs_iunlock(ip, XFS_ILOCK_EXCL); 87 xfs_iunlock(ip, XFS_ILOCK_EXCL);
88 return 0; 88 return 0;
89} 89}
@@ -164,7 +164,7 @@ xfs_write_sync_logforce(
164 xfs_trans_ihold(tp, ip); 164 xfs_trans_ihold(tp, ip);
165 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 165 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
166 xfs_trans_set_sync(tp); 166 xfs_trans_set_sync(tp);
167 error = xfs_trans_commit(tp, 0, NULL); 167 error = xfs_trans_commit(tp, 0);
168 xfs_iunlock(ip, XFS_ILOCK_EXCL); 168 xfs_iunlock(ip, XFS_ILOCK_EXCL);
169 } 169 }
170 } 170 }
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 301ff9445b6f..cc2d60951e21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -753,7 +753,6 @@ int
753_xfs_trans_commit( 753_xfs_trans_commit(
754 xfs_trans_t *tp, 754 xfs_trans_t *tp,
755 uint flags, 755 uint flags,
756 xfs_lsn_t *commit_lsn_p,
757 int *log_flushed) 756 int *log_flushed)
758{ 757{
759 xfs_log_iovec_t *log_vector; 758 xfs_log_iovec_t *log_vector;
@@ -812,8 +811,6 @@ shut_us_down:
812 xfs_trans_free_busy(tp); 811 xfs_trans_free_busy(tp);
813 xfs_trans_free(tp); 812 xfs_trans_free(tp);
814 XFS_STATS_INC(xs_trans_empty); 813 XFS_STATS_INC(xs_trans_empty);
815 if (commit_lsn_p)
816 *commit_lsn_p = commit_lsn;
817 return (shutdown); 814 return (shutdown);
818 } 815 }
819 ASSERT(tp->t_ticket != NULL); 816 ASSERT(tp->t_ticket != NULL);
@@ -864,9 +861,6 @@ shut_us_down:
864 kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); 861 kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
865 } 862 }
866 863
867 if (commit_lsn_p)
868 *commit_lsn_p = commit_lsn;
869
870 /* 864 /*
871 * If we got a log write error. Unpin the logitems that we 865 * If we got a log write error. Unpin the logitems that we
872 * had pinned, clean up, free trans structure, and return error. 866 * had pinned, clean up, free trans structure, and return error.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index f1d7ab236726..7dfcc450366f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -988,10 +988,8 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
988 xfs_extlen_t); 988 xfs_extlen_t);
989int _xfs_trans_commit(xfs_trans_t *, 989int _xfs_trans_commit(xfs_trans_t *,
990 uint flags, 990 uint flags,
991 xfs_lsn_t *,
992 int *); 991 int *);
993#define xfs_trans_commit(tp, flags, lsn) \ 992#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
994 _xfs_trans_commit(tp, flags, lsn, NULL)
995void xfs_trans_cancel(xfs_trans_t *, int); 993void xfs_trans_cancel(xfs_trans_t *, int);
996void xfs_trans_ail_init(struct xfs_mount *); 994void xfs_trans_ail_init(struct xfs_mount *);
997xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); 995xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 9014d7e44488..20ffec308e1e 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -222,7 +222,7 @@ xfs_dir_ialloc(
222 } 222 }
223 223
224 ntp = xfs_trans_dup(tp); 224 ntp = xfs_trans_dup(tp);
225 code = xfs_trans_commit(tp, 0, NULL); 225 code = xfs_trans_commit(tp, 0);
226 tp = ntp; 226 tp = ntp;
227 if (committed != NULL) { 227 if (committed != NULL) {
228 *committed = 1; 228 *committed = 1;
@@ -420,7 +420,11 @@ xfs_truncate_file(
420 * in a transaction. 420 * in a transaction.
421 */ 421 */
422 xfs_ilock(ip, XFS_IOLOCK_EXCL); 422 xfs_ilock(ip, XFS_IOLOCK_EXCL);
423 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); 423 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0);
424 if (error) {
425 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
426 return error;
427 }
424 428
425 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); 429 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
426 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 430 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
@@ -460,8 +464,7 @@ xfs_truncate_file(
460 XFS_TRANS_ABORT); 464 XFS_TRANS_ABORT);
461 } else { 465 } else {
462 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 466 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
463 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, 467 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
464 NULL);
465 } 468 }
466 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 469 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
467 470
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 29f72f613782..65c561201cb8 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -696,7 +696,7 @@ xfs_unmount_flush(
696 bhv_vnode_t *rvp = XFS_ITOV(rip); 696 bhv_vnode_t *rvp = XFS_ITOV(rip);
697 int error; 697 int error;
698 698
699 xfs_ilock(rip, XFS_ILOCK_EXCL); 699 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
700 xfs_iflock(rip); 700 xfs_iflock(rip);
701 701
702 /* 702 /*
@@ -1147,7 +1147,7 @@ xfs_sync_inodes(
1147 if (XFS_FORCED_SHUTDOWN(mp)) { 1147 if (XFS_FORCED_SHUTDOWN(mp)) {
1148 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); 1148 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
1149 } else { 1149 } else {
1150 bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF); 1150 error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
1151 } 1151 }
1152 1152
1153 xfs_ilock(ip, XFS_ILOCK_SHARED); 1153 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1539,7 +1539,7 @@ xfs_syncsub(
1539 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1539 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1540 xfs_trans_ihold(tp, ip); 1540 xfs_trans_ihold(tp, ip);
1541 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1541 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1542 error = xfs_trans_commit(tp, 0, NULL); 1542 error = xfs_trans_commit(tp, 0);
1543 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1543 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1544 xfs_log_force(mp, (xfs_lsn_t)0, log_flags); 1544 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
1545 } 1545 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 52c41714ec54..de17aed578f0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -133,7 +133,7 @@ xfs_getattr(
133 if (!(flags & ATTR_LAZY)) 133 if (!(flags & ATTR_LAZY))
134 xfs_ilock(ip, XFS_ILOCK_SHARED); 134 xfs_ilock(ip, XFS_ILOCK_SHARED);
135 135
136 vap->va_size = ip->i_d.di_size; 136 vap->va_size = XFS_ISIZE(ip);
137 if (vap->va_mask == XFS_AT_SIZE) 137 if (vap->va_mask == XFS_AT_SIZE)
138 goto all_done; 138 goto all_done;
139 139
@@ -496,7 +496,7 @@ xfs_setattr(
496 if (mask & XFS_AT_SIZE) { 496 if (mask & XFS_AT_SIZE) {
497 /* Short circuit the truncate case for zero length files */ 497 /* Short circuit the truncate case for zero length files */
498 if ((vap->va_size == 0) && 498 if ((vap->va_size == 0) &&
499 (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { 499 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
500 xfs_iunlock(ip, XFS_ILOCK_EXCL); 500 xfs_iunlock(ip, XFS_ILOCK_EXCL);
501 lock_flags &= ~XFS_ILOCK_EXCL; 501 lock_flags &= ~XFS_ILOCK_EXCL;
502 if (mask & XFS_AT_CTIME) 502 if (mask & XFS_AT_CTIME)
@@ -614,7 +614,7 @@ xfs_setattr(
614 */ 614 */
615 if (mask & XFS_AT_SIZE) { 615 if (mask & XFS_AT_SIZE) {
616 code = 0; 616 code = 0;
617 if ((vap->va_size > ip->i_d.di_size) && 617 if ((vap->va_size > ip->i_size) &&
618 (flags & ATTR_NOSIZETOK) == 0) { 618 (flags & ATTR_NOSIZETOK) == 0) {
619 code = xfs_igrow_start(ip, vap->va_size, credp); 619 code = xfs_igrow_start(ip, vap->va_size, credp);
620 } 620 }
@@ -654,10 +654,10 @@ xfs_setattr(
654 * Truncate file. Must have write permission and not be a directory. 654 * Truncate file. Must have write permission and not be a directory.
655 */ 655 */
656 if (mask & XFS_AT_SIZE) { 656 if (mask & XFS_AT_SIZE) {
657 if (vap->va_size > ip->i_d.di_size) { 657 if (vap->va_size > ip->i_size) {
658 xfs_igrow_finish(tp, ip, vap->va_size, 658 xfs_igrow_finish(tp, ip, vap->va_size,
659 !(flags & ATTR_DMI)); 659 !(flags & ATTR_DMI));
660 } else if ((vap->va_size <= ip->i_d.di_size) || 660 } else if ((vap->va_size <= ip->i_size) ||
661 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 661 ((vap->va_size == 0) && ip->i_d.di_nextents)) {
662 /* 662 /*
663 * signal a sync transaction unless 663 * signal a sync transaction unless
@@ -873,7 +873,7 @@ xfs_setattr(
873 if (mp->m_flags & XFS_MOUNT_WSYNC) 873 if (mp->m_flags & XFS_MOUNT_WSYNC)
874 xfs_trans_set_sync(tp); 874 xfs_trans_set_sync(tp);
875 875
876 code = xfs_trans_commit(tp, commit_flags, NULL); 876 code = xfs_trans_commit(tp, commit_flags);
877 } 877 }
878 878
879 /* 879 /*
@@ -1176,7 +1176,7 @@ xfs_fsync(
1176 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1176 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1177 if (flag & FSYNC_WAIT) 1177 if (flag & FSYNC_WAIT)
1178 xfs_trans_set_sync(tp); 1178 xfs_trans_set_sync(tp);
1179 error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); 1179 error = _xfs_trans_commit(tp, 0, &log_flushed);
1180 1180
1181 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1181 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1182 } 1182 }
@@ -1221,7 +1221,7 @@ xfs_inactive_free_eofblocks(
1221 * Figure out if there are any blocks beyond the end 1221 * Figure out if there are any blocks beyond the end
1222 * of the file. If not, then there is nothing to do. 1222 * of the file. If not, then there is nothing to do.
1223 */ 1223 */
1224 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); 1224 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
1225 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1225 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1226 map_len = last_fsb - end_fsb; 1226 map_len = last_fsb - end_fsb;
1227 if (map_len <= 0) 1227 if (map_len <= 0)
@@ -1257,8 +1257,12 @@ xfs_inactive_free_eofblocks(
1257 * do that within a transaction. 1257 * do that within a transaction.
1258 */ 1258 */
1259 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1259 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1260 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1260 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1261 ip->i_d.di_size); 1261 ip->i_size);
1262 if (error) {
1263 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1264 return error;
1265 }
1262 1266
1263 error = xfs_trans_reserve(tp, 0, 1267 error = xfs_trans_reserve(tp, 0,
1264 XFS_ITRUNCATE_LOG_RES(mp), 1268 XFS_ITRUNCATE_LOG_RES(mp),
@@ -1278,7 +1282,7 @@ xfs_inactive_free_eofblocks(
1278 xfs_trans_ihold(tp, ip); 1282 xfs_trans_ihold(tp, ip);
1279 1283
1280 error = xfs_itruncate_finish(&tp, ip, 1284 error = xfs_itruncate_finish(&tp, ip,
1281 ip->i_d.di_size, 1285 ip->i_size,
1282 XFS_DATA_FORK, 1286 XFS_DATA_FORK,
1283 0); 1287 0);
1284 /* 1288 /*
@@ -1291,8 +1295,7 @@ xfs_inactive_free_eofblocks(
1291 XFS_TRANS_ABORT)); 1295 XFS_TRANS_ABORT));
1292 } else { 1296 } else {
1293 error = xfs_trans_commit(tp, 1297 error = xfs_trans_commit(tp,
1294 XFS_TRANS_RELEASE_LOG_RES, 1298 XFS_TRANS_RELEASE_LOG_RES);
1295 NULL);
1296 } 1299 }
1297 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1300 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1298 } 1301 }
@@ -1406,7 +1409,7 @@ xfs_inactive_symlink_rmt(
1406 * we need to unlock the inode since the new transaction doesn't 1409 * we need to unlock the inode since the new transaction doesn't
1407 * have the inode attached. 1410 * have the inode attached.
1408 */ 1411 */
1409 error = xfs_trans_commit(tp, 0, NULL); 1412 error = xfs_trans_commit(tp, 0);
1410 tp = ntp; 1413 tp = ntp;
1411 if (error) { 1414 if (error) {
1412 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1415 ASSERT(XFS_FORCED_SHUTDOWN(mp));
@@ -1503,7 +1506,7 @@ xfs_inactive_attrs(
1503 tp = *tpp; 1506 tp = *tpp;
1504 mp = ip->i_mount; 1507 mp = ip->i_mount;
1505 ASSERT(ip->i_d.di_forkoff != 0); 1508 ASSERT(ip->i_d.di_forkoff != 0);
1506 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1509 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1507 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1510 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1508 1511
1509 error = xfs_attr_inactive(ip); 1512 error = xfs_attr_inactive(ip);
@@ -1565,7 +1568,7 @@ xfs_release(
1565 1568
1566 if (ip->i_d.di_nlink != 0) { 1569 if (ip->i_d.di_nlink != 0) {
1567 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1570 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1568 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1571 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1569 ip->i_delayed_blks > 0)) && 1572 ip->i_delayed_blks > 0)) &&
1570 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1571 (!(ip->i_d.di_flags & 1574 (!(ip->i_d.di_flags &
@@ -1626,8 +1629,8 @@ xfs_inactive(
1626 * only one with a reference to the inode. 1629 * only one with a reference to the inode.
1627 */ 1630 */
1628 truncate = ((ip->i_d.di_nlink == 0) && 1631 truncate = ((ip->i_d.di_nlink == 0) &&
1629 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1632 ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
1630 (ip->i_delayed_blks > 0)) && 1633 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
1631 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1634 ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1632 1635
1633 mp = ip->i_mount; 1636 mp = ip->i_mount;
@@ -1645,7 +1648,7 @@ xfs_inactive(
1645 1648
1646 if (ip->i_d.di_nlink != 0) { 1649 if (ip->i_d.di_nlink != 0) {
1647 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1650 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1648 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1651 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1649 ip->i_delayed_blks > 0)) && 1652 ip->i_delayed_blks > 0)) &&
1650 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1653 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1651 (!(ip->i_d.di_flags & 1654 (!(ip->i_d.di_flags &
@@ -1675,7 +1678,11 @@ xfs_inactive(
1675 */ 1678 */
1676 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1679 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1677 1680
1678 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1681 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1682 if (error) {
1683 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1684 return VN_INACTIVE_CACHE;
1685 }
1679 1686
1680 error = xfs_trans_reserve(tp, 0, 1687 error = xfs_trans_reserve(tp, 0,
1681 XFS_ITRUNCATE_LOG_RES(mp), 1688 XFS_ITRUNCATE_LOG_RES(mp),
@@ -1790,7 +1797,7 @@ xfs_inactive(
1790 * nothing we can do except to try to keep going. 1797 * nothing we can do except to try to keep going.
1791 */ 1798 */
1792 (void) xfs_bmap_finish(&tp, &free_list, &committed); 1799 (void) xfs_bmap_finish(&tp, &free_list, &committed);
1793 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1800 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1794 } 1801 }
1795 /* 1802 /*
1796 * Release the dquots held by inode, if any. 1803 * Release the dquots held by inode, if any.
@@ -1940,7 +1947,7 @@ xfs_create(
1940 goto error_return; 1947 goto error_return;
1941 } 1948 }
1942 1949
1943 xfs_ilock(dp, XFS_ILOCK_EXCL); 1950 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1944 1951
1945 XFS_BMAP_INIT(&free_list, &first_block); 1952 XFS_BMAP_INIT(&free_list, &first_block);
1946 1953
@@ -2026,7 +2033,7 @@ xfs_create(
2026 goto abort_rele; 2033 goto abort_rele;
2027 } 2034 }
2028 2035
2029 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2036 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2030 if (error) { 2037 if (error) {
2031 IRELE(ip); 2038 IRELE(ip);
2032 tp = NULL; 2039 tp = NULL;
@@ -2121,7 +2128,6 @@ int xfs_rm_attempts;
2121STATIC int 2128STATIC int
2122xfs_lock_dir_and_entry( 2129xfs_lock_dir_and_entry(
2123 xfs_inode_t *dp, 2130 xfs_inode_t *dp,
2124 bhv_vname_t *dentry,
2125 xfs_inode_t *ip) /* inode of entry 'name' */ 2131 xfs_inode_t *ip) /* inode of entry 'name' */
2126{ 2132{
2127 int attempts; 2133 int attempts;
@@ -2135,7 +2141,7 @@ xfs_lock_dir_and_entry(
2135 attempts = 0; 2141 attempts = 0;
2136 2142
2137again: 2143again:
2138 xfs_ilock(dp, XFS_ILOCK_EXCL); 2144 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2139 2145
2140 e_inum = ip->i_ino; 2146 e_inum = ip->i_ino;
2141 2147
@@ -2204,6 +2210,21 @@ int xfs_lock_delays;
2204#endif 2210#endif
2205 2211
2206/* 2212/*
2213 * Bump the subclass so xfs_lock_inodes() acquires each lock with
2214 * a different value
2215 */
2216static inline int
2217xfs_lock_inumorder(int lock_mode, int subclass)
2218{
2219 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
2220 lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
2221 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
2222 lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT;
2223
2224 return lock_mode;
2225}
2226
2227/*
2207 * The following routine will lock n inodes in exclusive mode. 2228 * The following routine will lock n inodes in exclusive mode.
2208 * We assume the caller calls us with the inodes in i_ino order. 2229 * We assume the caller calls us with the inodes in i_ino order.
2209 * 2230 *
@@ -2270,7 +2291,7 @@ again:
2270 * that is in the AIL. 2291 * that is in the AIL.
2271 */ 2292 */
2272 ASSERT(i != 0); 2293 ASSERT(i != 0);
2273 if (!xfs_ilock_nowait(ips[i], lock_mode)) { 2294 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
2274 attempts++; 2295 attempts++;
2275 2296
2276 /* 2297 /*
@@ -2305,7 +2326,7 @@ again:
2305 goto again; 2326 goto again;
2306 } 2327 }
2307 } else { 2328 } else {
2308 xfs_ilock(ips[i], lock_mode); 2329 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
2309 } 2330 }
2310 } 2331 }
2311 2332
@@ -2440,7 +2461,7 @@ xfs_remove(
2440 return error; 2461 return error;
2441 } 2462 }
2442 2463
2443 error = xfs_lock_dir_and_entry(dp, dentry, ip); 2464 error = xfs_lock_dir_and_entry(dp, ip);
2444 if (error) { 2465 if (error) {
2445 REMOVE_DEBUG_TRACE(__LINE__); 2466 REMOVE_DEBUG_TRACE(__LINE__);
2446 xfs_trans_cancel(tp, cancel_flags); 2467 xfs_trans_cancel(tp, cancel_flags);
@@ -2511,7 +2532,7 @@ xfs_remove(
2511 goto error_rele; 2532 goto error_rele;
2512 } 2533 }
2513 2534
2514 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2535 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2515 if (error) { 2536 if (error) {
2516 IRELE(ip); 2537 IRELE(ip);
2517 goto std_return; 2538 goto std_return;
@@ -2719,7 +2740,7 @@ xfs_link(
2719 goto abort_return; 2740 goto abort_return;
2720 } 2741 }
2721 2742
2722 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2743 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2723 if (error) 2744 if (error)
2724 goto std_return; 2745 goto std_return;
2725 2746
@@ -2839,7 +2860,7 @@ xfs_mkdir(
2839 goto error_return; 2860 goto error_return;
2840 } 2861 }
2841 2862
2842 xfs_ilock(dp, XFS_ILOCK_EXCL); 2863 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2843 2864
2844 /* 2865 /*
2845 * Check for directory link count overflow. 2866 * Check for directory link count overflow.
@@ -2936,7 +2957,7 @@ xfs_mkdir(
2936 goto error2; 2957 goto error2;
2937 } 2958 }
2938 2959
2939 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2960 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2940 XFS_QM_DQRELE(mp, udqp); 2961 XFS_QM_DQRELE(mp, udqp);
2941 XFS_QM_DQRELE(mp, gdqp); 2962 XFS_QM_DQRELE(mp, gdqp);
2942 if (error) { 2963 if (error) {
@@ -3096,7 +3117,7 @@ xfs_rmdir(
3096 * that the directory entry for the child directory inode has 3117 * that the directory entry for the child directory inode has
3097 * not changed while we were obtaining a log reservation. 3118 * not changed while we were obtaining a log reservation.
3098 */ 3119 */
3099 error = xfs_lock_dir_and_entry(dp, dentry, cdp); 3120 error = xfs_lock_dir_and_entry(dp, cdp);
3100 if (error) { 3121 if (error) {
3101 xfs_trans_cancel(tp, cancel_flags); 3122 xfs_trans_cancel(tp, cancel_flags);
3102 IRELE(cdp); 3123 IRELE(cdp);
@@ -3190,7 +3211,7 @@ xfs_rmdir(
3190 goto std_return; 3211 goto std_return;
3191 } 3212 }
3192 3213
3193 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3214 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3194 if (error) { 3215 if (error) {
3195 IRELE(cdp); 3216 IRELE(cdp);
3196 goto std_return; 3217 goto std_return;
@@ -3393,7 +3414,7 @@ xfs_symlink(
3393 goto error_return; 3414 goto error_return;
3394 } 3415 }
3395 3416
3396 xfs_ilock(dp, XFS_ILOCK_EXCL); 3417 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
3397 3418
3398 /* 3419 /*
3399 * Check whether the directory allows new symlinks or not. 3420 * Check whether the directory allows new symlinks or not.
@@ -3535,7 +3556,7 @@ xfs_symlink(
3535 if (error) { 3556 if (error) {
3536 goto error2; 3557 goto error2;
3537 } 3558 }
3538 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3559 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3539 XFS_QM_DQRELE(mp, udqp); 3560 XFS_QM_DQRELE(mp, udqp);
3540 XFS_QM_DQRELE(mp, gdqp); 3561 XFS_QM_DQRELE(mp, gdqp);
3541 3562
@@ -3790,7 +3811,7 @@ xfs_set_dmattrs (
3790 3811
3791 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3812 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3792 IHOLD(ip); 3813 IHOLD(ip);
3793 error = xfs_trans_commit(tp, 0, NULL); 3814 error = xfs_trans_commit(tp, 0);
3794 3815
3795 return error; 3816 return error;
3796} 3817}
@@ -4049,14 +4070,14 @@ xfs_alloc_file_space(
4049 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4070 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
4050 4071
4051 /* Generate a DMAPI event if needed. */ 4072 /* Generate a DMAPI event if needed. */
4052 if (alloc_type != 0 && offset < ip->i_d.di_size && 4073 if (alloc_type != 0 && offset < ip->i_size &&
4053 (attr_flags&ATTR_DMI) == 0 && 4074 (attr_flags&ATTR_DMI) == 0 &&
4054 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4075 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4055 xfs_off_t end_dmi_offset; 4076 xfs_off_t end_dmi_offset;
4056 4077
4057 end_dmi_offset = offset+len; 4078 end_dmi_offset = offset+len;
4058 if (end_dmi_offset > ip->i_d.di_size) 4079 if (end_dmi_offset > ip->i_size)
4059 end_dmi_offset = ip->i_d.di_size; 4080 end_dmi_offset = ip->i_size;
4060 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4081 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
4061 offset, end_dmi_offset - offset, 4082 offset, end_dmi_offset - offset,
4062 0, NULL); 4083 0, NULL);
@@ -4148,7 +4169,7 @@ retry:
4148 goto error0; 4169 goto error0;
4149 } 4170 }
4150 4171
4151 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4172 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
4152 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4173 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4153 if (error) { 4174 if (error) {
4154 break; 4175 break;
@@ -4283,7 +4304,6 @@ xfs_free_file_space(
4283 int error; 4304 int error;
4284 xfs_fsblock_t firstfsb; 4305 xfs_fsblock_t firstfsb;
4285 xfs_bmap_free_t free_list; 4306 xfs_bmap_free_t free_list;
4286 xfs_off_t ilen;
4287 xfs_bmbt_irec_t imap; 4307 xfs_bmbt_irec_t imap;
4288 xfs_off_t ioffset; 4308 xfs_off_t ioffset;
4289 xfs_extlen_t mod=0; 4309 xfs_extlen_t mod=0;
@@ -4312,11 +4332,11 @@ xfs_free_file_space(
4312 end_dmi_offset = offset + len; 4332 end_dmi_offset = offset + len;
4313 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4333 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
4314 4334
4315 if (offset < ip->i_d.di_size && 4335 if (offset < ip->i_size &&
4316 (attr_flags & ATTR_DMI) == 0 && 4336 (attr_flags & ATTR_DMI) == 0 &&
4317 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4337 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4318 if (end_dmi_offset > ip->i_d.di_size) 4338 if (end_dmi_offset > ip->i_size)
4319 end_dmi_offset = ip->i_d.di_size; 4339 end_dmi_offset = ip->i_size;
4320 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4340 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
4321 offset, end_dmi_offset - offset, 4341 offset, end_dmi_offset - offset,
4322 AT_DELAY_FLAG(attr_flags), NULL); 4342 AT_DELAY_FLAG(attr_flags), NULL);
@@ -4332,16 +4352,15 @@ xfs_free_file_space(
4332 } 4352 }
4333 4353
4334 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); 4354 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP);
4335 ilen = len + (offset & (rounding - 1));
4336 ioffset = offset & ~(rounding - 1); 4355 ioffset = offset & ~(rounding - 1);
4337 if (ilen & (rounding - 1))
4338 ilen = (ilen + rounding) & ~(rounding - 1);
4339 4356
4340 if (VN_CACHED(vp) != 0) { 4357 if (VN_CACHED(vp) != 0) {
4341 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4358 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
4342 ctooff(offtoct(ioffset)), -1); 4359 ctooff(offtoct(ioffset)), -1);
4343 bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), 4360 error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)),
4344 -1, FI_REMAPF_LOCKED); 4361 -1, FI_REMAPF_LOCKED);
4362 if (error)
4363 goto out_unlock_iolock;
4345 } 4364 }
4346 4365
4347 /* 4366 /*
@@ -4455,7 +4474,7 @@ xfs_free_file_space(
4455 goto error0; 4474 goto error0;
4456 } 4475 }
4457 4476
4458 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4477 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
4459 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4478 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4460 } 4479 }
4461 4480
@@ -4533,7 +4552,7 @@ xfs_change_file_space(
4533 bf->l_start += offset; 4552 bf->l_start += offset;
4534 break; 4553 break;
4535 case 2: /*SEEK_END*/ 4554 case 2: /*SEEK_END*/
4536 bf->l_start += ip->i_d.di_size; 4555 bf->l_start += ip->i_size;
4537 break; 4556 break;
4538 default: 4557 default:
4539 return XFS_ERROR(EINVAL); 4558 return XFS_ERROR(EINVAL);
@@ -4550,7 +4569,7 @@ xfs_change_file_space(
4550 bf->l_whence = 0; 4569 bf->l_whence = 0;
4551 4570
4552 startoffset = bf->l_start; 4571 startoffset = bf->l_start;
4553 fsize = ip->i_d.di_size; 4572 fsize = ip->i_size;
4554 4573
4555 /* 4574 /*
4556 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4575 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
@@ -4649,7 +4668,7 @@ xfs_change_file_space(
4649 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4668 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4650 xfs_trans_set_sync(tp); 4669 xfs_trans_set_sync(tp);
4651 4670
4652 error = xfs_trans_commit(tp, 0, NULL); 4671 error = xfs_trans_commit(tp, 0);
4653 4672
4654 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4673 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4655 4674