aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c23
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/Kconfig9
-rw-r--r--fs/adfs/adfs.h4
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h6
-rw-r--r--fs/affs/dir.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/super.c2
-rw-r--r--fs/affs/symlink.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/aio.c5
-rw-r--r--fs/autofs/autofs_i.h4
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/autofs/root.c2
-rw-r--r--fs/autofs/symlink.c2
-rw-r--r--fs/autofs4/autofs_i.h10
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/autofs4/symlink.c2
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/bfs/bfs.h4
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_flat.c31
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/char_dev.c7
-rw-r--r--fs/cifs/CHANGES2
-rw-r--r--fs/cifs/cifsfs.c20
-rw-r--r--fs/cifs/cifsfs.h8
-rw-r--r--fs/cifs/file.c12
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smbdes.c10
-rw-r--r--fs/coda/cnode.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/sysctl.c76
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c5
-rw-r--r--fs/configfs/file.c9
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c2
-rw-r--r--fs/cramfs/inode.c8
-rw-r--r--fs/dcache.c150
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c2
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/debug_fs.c4
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c384
-rw-r--r--fs/dlm/memory.c4
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c15
-rw-r--r--fs/dlm/util.c4
-rw-r--r--fs/dquot.c59
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c337
-rw-r--r--fs/ecryptfs/debug.c6
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h161
-rw-r--r--fs/ecryptfs/file.c52
-rw-r--r--fs/ecryptfs/inode.c93
-rw-r--r--fs/ecryptfs/keystore.c802
-rw-r--r--fs/ecryptfs/main.c82
-rw-r--r--fs/ecryptfs/messaging.c515
-rw-r--r--fs/ecryptfs/mmap.c378
-rw-r--r--fs/ecryptfs/netlink.c255
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/dir.c2
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/dir.c8
-rw-r--r--fs/ext2/ext2.h10
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext2/super.c6
-rw-r--r--fs/ext2/symlink.c4
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/namei.c31
-rw-r--r--fs/ext3/super.c22
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c31
-rw-r--r--fs/ext4/super.c22
-rw-r--r--fs/ext4/symlink.c4
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/freevxfs/vxfs_extern.h2
-rw-r--r--fs/freevxfs/vxfs_immed.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/Kconfig47
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c25
-rw-r--r--fs/gfs2/dir.h21
-rw-r--r--fs/gfs2/eattr.c8
-rw-r--r--fs/gfs2/glock.c316
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c136
-rw-r--r--fs/gfs2/incore.h18
-rw-r--r--fs/gfs2/inode.c61
-rw-r--r--fs/gfs2/lm.c8
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/main.c6
-rw-r--r--fs/gfs2/locking/dlm/mount.c6
-rw-r--r--fs/gfs2/locking/dlm/plock.c2
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c13
-rw-r--r--fs/gfs2/lops.c14
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/gfs2/ops_address.c134
-rw-r--r--fs/gfs2/ops_dentry.c16
-rw-r--r--fs/gfs2/ops_export.c15
-rw-r--r--fs/gfs2/ops_file.c52
-rw-r--r--fs/gfs2/ops_inode.c63
-rw-r--r--fs/gfs2/ops_inode.h8
-rw-r--r--fs/gfs2/ops_super.c13
-rw-r--r--fs/gfs2/ops_super.h2
-rw-r--r--fs/gfs2/ops_vm.c24
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/sys.c10
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs.h2
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs_kern.c10
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c42
-rw-r--r--fs/inotify_user.c10
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/isofs/dir.c2
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/isofs.h2
-rw-r--r--fs/jffs/inode-v23.c16
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/os-linux.h6
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c6
-rw-r--r--fs/jfs/jfs_debug.h5
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_imap.c16
-rw-r--r--fs/jfs/jfs_incore.h29
-rw-r--r--fs/jfs/jfs_inode.h6
-rw-r--r--fs/jfs/jfs_lock.h2
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/jfs_xtree.c15
-rw-r--r--fs/jfs/namei.c50
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/jfs/symlink.c2
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/lockd/host.c2
-rw-r--r--fs/lockd/svc.c32
-rw-r--r--fs/lockd/svc4proc.c13
-rw-r--r--fs/lockd/svcproc.c13
-rw-r--r--fs/minix/bitmap.c69
-rw-r--r--fs/minix/dir.c162
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/minix/inode.c53
-rw-r--r--fs/minix/itree_common.c16
-rw-r--r--fs/minix/itree_v1.c4
-rw-r--r--fs/minix/itree_v2.c7
-rw-r--r--fs/minix/minix.h16
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/msdos/namei.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/callback.c34
-rw-r--r--fs/nfs/callback_xdr.c4
-rw-r--r--fs/nfs/client.c4
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/namespace.c4
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfs/symlink.c2
-rw-r--r--fs/nfsd/nfs4state.c18
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsfh.c7
-rw-r--r--fs/nfsd/nfsproc.c7
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/ntfs/ntfs.h6
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c158
-rw-r--r--fs/ocfs2/cluster/tcp.c35
-rw-r--r--fs/ocfs2/cluster/tcp.h6
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h12
-rw-r--r--fs/ocfs2/dlm/dlmast.c14
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h130
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c40
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c30
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c253
-rw-r--r--fs/ocfs2/dlm/dlmfs.c20
-rw-r--r--fs/ocfs2/dlm/dlmlock.c7
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c579
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c182
-rw-r--r--fs/ocfs2/dlm/dlmthread.c200
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c15
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/file.h4
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/namei.h2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/symlink.c4
-rw-r--r--fs/ocfs2/symlink.h4
-rw-r--r--fs/ocfs2/vote.c8
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/partitions/check.c15
-rw-r--r--fs/partitions/msdos.c22
-rw-r--r--fs/partitions/sgi.c2
-rw-r--r--fs/partitions/sun.c5
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c56
-rw-r--r--fs/proc/generic.c10
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h12
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c54
-rw-r--r--fs/proc/proc_tty.c2
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/qnx4/file.c2
-rw-r--r--fs/qnx4/inode.c4
-rw-r--r--fs/ramfs/file-mmu.c4
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c8
-rw-r--r--fs/ramfs/internal.h2
-rw-r--r--fs/read_write.c26
-rw-r--r--fs/reiserfs/do_balan.c5
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/romfs/inode.c6
-rw-r--r--fs/smbfs/dir.c4
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proto.h8
-rw-r--r--fs/smbfs/request.c3
-rw-r--r--fs/smbfs/symlink.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c219
-rw-r--r--fs/sysfs/file.c82
-rw-r--r--fs/sysfs/group.c2
-rw-r--r--fs/sysfs/inode.c38
-rw-r--r--fs/sysfs/mount.c13
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/sysfs/sysfs.h25
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/sysv/inode.c4
-rw-r--r--fs/sysv/namei.c2
-rw-r--r--fs/sysv/symlink.c2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/ufs/balloc.c197
-rw-r--r--fs/ufs/dir.c21
-rw-r--r--fs/ufs/ialloc.c116
-rw-r--r--fs/ufs/inode.c193
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/ufs/super.c54
-rw-r--r--fs/ufs/symlink.c2
-rw-r--r--fs/ufs/truncate.c141
-rw-r--r--fs/ufs/util.h57
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xfs/linux-2.6/mrlock.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c142
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c48
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c33
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c266
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c8
-rw-r--r--fs/xfs/quota/xfs_qm.c8
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c4
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.c23
-rw-r--r--fs/xfs/support/debug.h30
-rw-r--r--fs/xfs/support/move.h4
-rw-r--r--fs/xfs/xfs_acl.c1
-rw-r--r--fs/xfs/xfs_alloc_btree.h10
-rw-r--r--fs/xfs/xfs_attr.c49
-rw-r--r--fs/xfs/xfs_attr_leaf.c54
-rw-r--r--fs/xfs/xfs_bit.c2
-rw-r--r--fs/xfs/xfs_bmap.c101
-rw-r--r--fs/xfs/xfs_bmap.h1
-rw-r--r--fs/xfs/xfs_bmap_btree.c86
-rw-r--r--fs/xfs/xfs_bmap_btree.h59
-rw-r--r--fs/xfs/xfs_btree.h6
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_buf_item.h18
-rw-r--r--fs/xfs/xfs_cap.h70
-rw-r--r--fs/xfs/xfs_da_btree.c18
-rw-r--r--fs/xfs/xfs_da_btree.h1
-rw-r--r--fs/xfs/xfs_dfrag.c1
-rw-r--r--fs/xfs/xfs_error.c26
-rw-r--r--fs/xfs/xfs_error.h3
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_fsops.c60
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_ialloc_btree.h10
-rw-r--r--fs/xfs/xfs_inode.c30
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_log_recover.c61
-rw-r--r--fs/xfs/xfs_mac.h106
-rw-r--r--fs/xfs/xfs_mount.c288
-rw-r--r--fs/xfs/xfs_mount.h39
-rw-r--r--fs/xfs/xfs_rename.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c110
-rw-r--r--fs/xfs/xfs_rtalloc.h18
-rw-r--r--fs/xfs/xfs_rw.c1
-rw-r--r--fs/xfs/xfs_trans.c32
-rw-r--r--fs/xfs/xfs_trans.h46
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_vfsops.c45
-rw-r--r--fs/xfs/xfs_vnodeops.c22
384 files changed, 7187 insertions, 4506 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 9f17b0cacdd0..6c78343cf690 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -110,7 +110,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
110 110
111 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { 111 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
112 filemap_write_and_wait(inode->i_mapping); 112 filemap_write_and_wait(inode->i_mapping);
113 invalidate_inode_pages(&inode->i_data); 113 invalidate_mapping_pages(&inode->i_data, 0, -1);
114 } 114 }
115 115
116 return res; 116 return res;
@@ -234,7 +234,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
234 total += result; 234 total += result;
235 } while (count); 235 } while (count);
236 236
237 invalidate_inode_pages2(inode->i_mapping); 237 invalidate_inode_pages2(inode->i_mapping);
238 return total; 238 return total;
239} 239}
240 240
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9109ba1d6969..5cf22134826b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -41,10 +41,10 @@
41#include "v9fs_vfs.h" 41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
44static struct inode_operations v9fs_dir_inode_operations; 44static const struct inode_operations v9fs_dir_inode_operations;
45static struct inode_operations v9fs_dir_inode_operations_ext; 45static const struct inode_operations v9fs_dir_inode_operations_ext;
46static struct inode_operations v9fs_file_inode_operations; 46static const struct inode_operations v9fs_file_inode_operations;
47static struct inode_operations v9fs_symlink_inode_operations; 47static const struct inode_operations v9fs_symlink_inode_operations;
48 48
49/** 49/**
50 * unixmode2p9mode - convert unix mode bits to plan 9 50 * unixmode2p9mode - convert unix mode bits to plan 9
@@ -585,17 +585,14 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
585 if (IS_ERR(inode)) { 585 if (IS_ERR(inode)) {
586 err = PTR_ERR(inode); 586 err = PTR_ERR(inode);
587 inode = NULL; 587 inode = NULL;
588 goto clean_up_fids; 588 v9fs_fid_destroy(vfid);
589 goto error;
589 } 590 }
590 591
591 dentry->d_op = &v9fs_dentry_operations; 592 dentry->d_op = &v9fs_dentry_operations;
592 d_instantiate(dentry, inode); 593 d_instantiate(dentry, inode);
593 return 0; 594 return 0;
594 595
595clean_up_fids:
596 if (vfid)
597 v9fs_fid_destroy(vfid);
598
599clean_up_dfid: 596clean_up_dfid:
600 v9fs_fid_clunk(v9ses, dfid); 597 v9fs_fid_clunk(v9ses, dfid);
601 598
@@ -1306,7 +1303,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1306 return retval; 1303 return retval;
1307} 1304}
1308 1305
1309static struct inode_operations v9fs_dir_inode_operations_ext = { 1306static const struct inode_operations v9fs_dir_inode_operations_ext = {
1310 .create = v9fs_vfs_create, 1307 .create = v9fs_vfs_create,
1311 .lookup = v9fs_vfs_lookup, 1308 .lookup = v9fs_vfs_lookup,
1312 .symlink = v9fs_vfs_symlink, 1309 .symlink = v9fs_vfs_symlink,
@@ -1321,7 +1318,7 @@ static struct inode_operations v9fs_dir_inode_operations_ext = {
1321 .setattr = v9fs_vfs_setattr, 1318 .setattr = v9fs_vfs_setattr,
1322}; 1319};
1323 1320
1324static struct inode_operations v9fs_dir_inode_operations = { 1321static const struct inode_operations v9fs_dir_inode_operations = {
1325 .create = v9fs_vfs_create, 1322 .create = v9fs_vfs_create,
1326 .lookup = v9fs_vfs_lookup, 1323 .lookup = v9fs_vfs_lookup,
1327 .unlink = v9fs_vfs_unlink, 1324 .unlink = v9fs_vfs_unlink,
@@ -1333,12 +1330,12 @@ static struct inode_operations v9fs_dir_inode_operations = {
1333 .setattr = v9fs_vfs_setattr, 1330 .setattr = v9fs_vfs_setattr,
1334}; 1331};
1335 1332
1336static struct inode_operations v9fs_file_inode_operations = { 1333static const struct inode_operations v9fs_file_inode_operations = {
1337 .getattr = v9fs_vfs_getattr, 1334 .getattr = v9fs_vfs_getattr,
1338 .setattr = v9fs_vfs_setattr, 1335 .setattr = v9fs_vfs_setattr,
1339}; 1336};
1340 1337
1341static struct inode_operations v9fs_symlink_inode_operations = { 1338static const struct inode_operations v9fs_symlink_inode_operations = {
1342 .readlink = v9fs_vfs_readlink, 1339 .readlink = v9fs_vfs_readlink,
1343 .follow_link = v9fs_vfs_follow_link, 1340 .follow_link = v9fs_vfs_follow_link,
1344 .put_link = v9fs_vfs_put_link, 1341 .put_link = v9fs_vfs_put_link,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 63320d4e15d2..0ec42f665457 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,7 +45,7 @@
45#include "fid.h" 45#include "fid.h"
46 46
47static void v9fs_clear_inode(struct inode *); 47static void v9fs_clear_inode(struct inode *);
48static struct super_operations v9fs_super_ops; 48static const struct super_operations v9fs_super_ops;
49 49
50/** 50/**
51 * v9fs_clear_inode - release an inode 51 * v9fs_clear_inode - release an inode
@@ -263,7 +263,7 @@ v9fs_umount_begin(struct vfsmount *vfsmnt, int flags)
263 v9fs_session_cancel(v9ses); 263 v9fs_session_cancel(v9ses);
264} 264}
265 265
266static struct super_operations v9fs_super_ops = { 266static const struct super_operations v9fs_super_ops = {
267 .statfs = simple_statfs, 267 .statfs = simple_statfs,
268 .clear_inode = v9fs_clear_inode, 268 .clear_inode = v9fs_clear_inode,
269 .show_options = v9fs_show_options, 269 .show_options = v9fs_show_options,
diff --git a/fs/Kconfig b/fs/Kconfig
index 8cd2417a14db..488521ed9e9b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -426,7 +426,6 @@ config OCFS2_FS
426 select CONFIGFS_FS 426 select CONFIGFS_FS
427 select JBD 427 select JBD
428 select CRC32 428 select CRC32
429 select INET
430 help 429 help
431 OCFS2 is a general purpose extent based shared disk cluster file 430 OCFS2 is a general purpose extent based shared disk cluster file
432 system with many similarities to ext3. It supports 64 bit inode 431 system with many similarities to ext3. It supports 64 bit inode
@@ -675,12 +674,6 @@ config ZISOFS
675 necessary to create such a filesystem. Say Y here if you want to be 674 necessary to create such a filesystem. Say Y here if you want to be
676 able to read such compressed CD-ROMs. 675 able to read such compressed CD-ROMs.
677 676
678config ZISOFS_FS
679# for fs/nls/Config.in
680 tristate
681 depends on ZISOFS
682 default ISO9660_FS
683
684config UDF_FS 677config UDF_FS
685 tristate "UDF file system support" 678 tristate "UDF file system support"
686 help 679 help
@@ -1095,7 +1088,7 @@ config AFFS_FS
1095 1088
1096config ECRYPT_FS 1089config ECRYPT_FS
1097 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 1090 tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
1098 depends on EXPERIMENTAL && KEYS && CRYPTO 1091 depends on EXPERIMENTAL && KEYS && CRYPTO && NET
1099 help 1092 help
1100 Encrypted filesystem that operates on the VFS layer. See 1093 Encrypted filesystem that operates on the VFS layer. See
1101 <file:Documentation/ecryptfs.txt> to learn more about 1094 <file:Documentation/ecryptfs.txt> to learn more about
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 29217ff36d44..936f2af39c43 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -84,7 +84,7 @@ void __adfs_error(struct super_block *sb, const char *function,
84 */ 84 */
85 85
86/* dir_*.c */ 86/* dir_*.c */
87extern struct inode_operations adfs_dir_inode_operations; 87extern const struct inode_operations adfs_dir_inode_operations;
88extern const struct file_operations adfs_dir_operations; 88extern const struct file_operations adfs_dir_operations;
89extern struct dentry_operations adfs_dentry_operations; 89extern struct dentry_operations adfs_dentry_operations;
90extern struct adfs_dir_ops adfs_f_dir_ops; 90extern struct adfs_dir_ops adfs_f_dir_ops;
@@ -93,7 +93,7 @@ extern struct adfs_dir_ops adfs_fplus_dir_ops;
93extern int adfs_dir_update(struct super_block *sb, struct object_info *obj); 93extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
94 94
95/* file.c */ 95/* file.c */
96extern struct inode_operations adfs_file_inode_operations; 96extern const struct inode_operations adfs_file_inode_operations;
97extern const struct file_operations adfs_file_operations; 97extern const struct file_operations adfs_file_operations;
98 98
99static inline __u32 signed_asl(__u32 val, signed int shift) 99static inline __u32 signed_asl(__u32 val, signed int shift)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 2b8903893d3f..fc1a8dc64d78 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -295,7 +295,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
295/* 295/*
296 * directories can handle most operations... 296 * directories can handle most operations...
297 */ 297 */
298struct inode_operations adfs_dir_inode_operations = { 298const struct inode_operations adfs_dir_inode_operations = {
299 .lookup = adfs_lookup, 299 .lookup = adfs_lookup,
300 .setattr = adfs_notify_change, 300 .setattr = adfs_notify_change,
301}; 301};
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 6101ea679cb1..f544a2855923 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -36,6 +36,6 @@ const struct file_operations adfs_file_operations = {
36 .sendfile = generic_file_sendfile, 36 .sendfile = generic_file_sendfile,
37}; 37};
38 38
39struct inode_operations adfs_file_inode_operations = { 39const struct inode_operations adfs_file_inode_operations = {
40 .setattr = adfs_notify_change, 40 .setattr = adfs_notify_change,
41}; 41};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 5023351a7afe..2e5f2c8371ee 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -254,7 +254,7 @@ static void destroy_inodecache(void)
254 kmem_cache_destroy(adfs_inode_cachep); 254 kmem_cache_destroy(adfs_inode_cachep);
255} 255}
256 256
257static struct super_operations adfs_sops = { 257static const struct super_operations adfs_sops = {
258 .alloc_inode = adfs_alloc_inode, 258 .alloc_inode = adfs_alloc_inode,
259 .destroy_inode = adfs_destroy_inode, 259 .destroy_inode = adfs_destroy_inode,
260 .write_inode = adfs_write_inode, 260 .write_inode = adfs_write_inode,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1dc8438ef389..7db2d287e9f3 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -188,9 +188,9 @@ extern void affs_dir_truncate(struct inode *);
188 188
189/* jump tables */ 189/* jump tables */
190 190
191extern struct inode_operations affs_file_inode_operations; 191extern const struct inode_operations affs_file_inode_operations;
192extern struct inode_operations affs_dir_inode_operations; 192extern const struct inode_operations affs_dir_inode_operations;
193extern struct inode_operations affs_symlink_inode_operations; 193extern const struct inode_operations affs_symlink_inode_operations;
194extern const struct file_operations affs_file_operations; 194extern const struct file_operations affs_file_operations;
195extern const struct file_operations affs_file_operations_ofs; 195extern const struct file_operations affs_file_operations_ofs;
196extern const struct file_operations affs_dir_operations; 196extern const struct file_operations affs_dir_operations;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index cad3ee340063..6e3f282424b0 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -26,7 +26,7 @@ const struct file_operations affs_dir_operations = {
26/* 26/*
27 * directories can handle most operations... 27 * directories can handle most operations...
28 */ 28 */
29struct inode_operations affs_dir_inode_operations = { 29const struct inode_operations affs_dir_inode_operations = {
30 .create = affs_create, 30 .create = affs_create,
31 .lookup = affs_lookup, 31 .lookup = affs_lookup,
32 .link = affs_link, 32 .link = affs_link,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 05b5e22de759..4aa8079e71be 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -38,7 +38,7 @@ const struct file_operations affs_file_operations = {
38 .sendfile = generic_file_sendfile, 38 .sendfile = generic_file_sendfile,
39}; 39};
40 40
41struct inode_operations affs_file_inode_operations = { 41const struct inode_operations affs_file_inode_operations = {
42 .truncate = affs_truncate, 42 .truncate = affs_truncate,
43 .setattr = affs_notify_change, 43 .setattr = affs_notify_change,
44}; 44};
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 44d439cb69f4..fce6848a4641 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -12,7 +12,7 @@
12 12
13#include "affs.h" 13#include "affs.h"
14 14
15extern struct inode_operations affs_symlink_inode_operations; 15extern const struct inode_operations affs_symlink_inode_operations;
16extern struct timezone sys_tz; 16extern struct timezone sys_tz;
17 17
18void 18void
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3de93e799949..a324045d8554 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -112,7 +112,7 @@ static void destroy_inodecache(void)
112 kmem_cache_destroy(affs_inode_cachep); 112 kmem_cache_destroy(affs_inode_cachep);
113} 113}
114 114
115static struct super_operations affs_sops = { 115static const struct super_operations affs_sops = {
116 .alloc_inode = affs_alloc_inode, 116 .alloc_inode = affs_alloc_inode,
117 .destroy_inode = affs_destroy_inode, 117 .destroy_inode = affs_destroy_inode,
118 .read_inode = affs_read_inode, 118 .read_inode = affs_read_inode,
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index f802256a5933..41782539c907 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -70,7 +70,7 @@ const struct address_space_operations affs_symlink_aops = {
70 .readpage = affs_symlink_readpage, 70 .readpage = affs_symlink_readpage,
71}; 71};
72 72
73struct inode_operations affs_symlink_inode_operations = { 73const struct inode_operations affs_symlink_inode_operations = {
74 .readlink = generic_readlink, 74 .readlink = generic_readlink,
75 .follow_link = page_follow_link_light, 75 .follow_link = page_follow_link_light,
76 .put_link = page_put_link, 76 .put_link = page_put_link,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4acd04134055..9908462bcadc 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -37,7 +37,7 @@ const struct file_operations afs_dir_file_operations = {
37 .readdir = afs_dir_readdir, 37 .readdir = afs_dir_readdir,
38}; 38};
39 39
40struct inode_operations afs_dir_inode_operations = { 40const struct inode_operations afs_dir_inode_operations = {
41 .lookup = afs_dir_lookup, 41 .lookup = afs_dir_lookup,
42 .getattr = afs_inode_getattr, 42 .getattr = afs_inode_getattr,
43#if 0 /* TODO */ 43#if 0 /* TODO */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 2e8c42639eaa..eeff14c3f748 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,7 +30,7 @@ static int afs_file_readpage(struct file *file, struct page *page);
30static void afs_file_invalidatepage(struct page *page, unsigned long offset); 30static void afs_file_invalidatepage(struct page *page, unsigned long offset);
31static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); 31static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
32 32
33struct inode_operations afs_file_inode_operations = { 33const struct inode_operations afs_file_inode_operations = {
34 .getattr = afs_inode_getattr, 34 .getattr = afs_inode_getattr,
35}; 35};
36 36
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e88b3b65ae49..5151d5da2c2f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -63,14 +63,14 @@ extern struct cachefs_index_def afs_cache_cell_index_def;
63/* 63/*
64 * dir.c 64 * dir.c
65 */ 65 */
66extern struct inode_operations afs_dir_inode_operations; 66extern const struct inode_operations afs_dir_inode_operations;
67extern const struct file_operations afs_dir_file_operations; 67extern const struct file_operations afs_dir_file_operations;
68 68
69/* 69/*
70 * file.c 70 * file.c
71 */ 71 */
72extern const struct address_space_operations afs_fs_aops; 72extern const struct address_space_operations afs_fs_aops;
73extern struct inode_operations afs_file_inode_operations; 73extern const struct inode_operations afs_file_inode_operations;
74 74
75#ifdef AFS_CACHING_SUPPORT 75#ifdef AFS_CACHING_SUPPORT
76extern int afs_cache_get_page_cookie(struct page *page, 76extern int afs_cache_get_page_cookie(struct page *page,
@@ -104,7 +104,7 @@ extern struct cachefs_netfs afs_cache_netfs;
104/* 104/*
105 * mntpt.c 105 * mntpt.c
106 */ 106 */
107extern struct inode_operations afs_mntpt_inode_operations; 107extern const struct inode_operations afs_mntpt_inode_operations;
108extern const struct file_operations afs_mntpt_file_operations; 108extern const struct file_operations afs_mntpt_file_operations;
109extern struct afs_timer afs_mntpt_expiry_timer; 109extern struct afs_timer afs_mntpt_expiry_timer;
110extern struct afs_timer_ops afs_mntpt_expiry_timer_ops; 110extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f74e8450826..fdf23b2a2112 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -36,7 +36,7 @@ const struct file_operations afs_mntpt_file_operations = {
36 .open = afs_mntpt_open, 36 .open = afs_mntpt_open,
37}; 37};
38 38
39struct inode_operations afs_mntpt_inode_operations = { 39const struct inode_operations afs_mntpt_inode_operations = {
40 .lookup = afs_mntpt_lookup, 40 .lookup = afs_mntpt_lookup,
41 .follow_link = afs_mntpt_follow_link, 41 .follow_link = afs_mntpt_follow_link,
42 .readlink = page_readlink, 42 .readlink = page_readlink,
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 18d9b77ba40f..eb7e32349da3 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -56,7 +56,7 @@ struct file_system_type afs_fs_type = {
56 .fs_flags = FS_BINARY_MOUNTDATA, 56 .fs_flags = FS_BINARY_MOUNTDATA,
57}; 57};
58 58
59static struct super_operations afs_super_ops = { 59static const struct super_operations afs_super_ops = {
60 .statfs = simple_statfs, 60 .statfs = simple_statfs,
61 .alloc_inode = afs_alloc_inode, 61 .alloc_inode = afs_alloc_inode,
62 .drop_inode = generic_delete_inode, 62 .drop_inode = generic_delete_inode,
diff --git a/fs/aio.c b/fs/aio.c
index 55991e4132a7..0b4ee0a5c83e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -132,7 +132,7 @@ static int aio_setup_ring(struct kioctx *ctx)
132 dprintk("attempting mmap of %lu bytes\n", info->mmap_size); 132 dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
133 down_write(&ctx->mm->mmap_sem); 133 down_write(&ctx->mm->mmap_sem);
134 info->mmap_base = do_mmap(NULL, 0, info->mmap_size, 134 info->mmap_base = do_mmap(NULL, 0, info->mmap_size,
135 PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, 135 PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
136 0); 136 0);
137 if (IS_ERR((void *)info->mmap_base)) { 137 if (IS_ERR((void *)info->mmap_base)) {
138 up_write(&ctx->mm->mmap_sem); 138 up_write(&ctx->mm->mmap_sem);
@@ -211,11 +211,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
211 if ((unsigned long)nr_events > aio_max_nr) 211 if ((unsigned long)nr_events > aio_max_nr)
212 return ERR_PTR(-EAGAIN); 212 return ERR_PTR(-EAGAIN);
213 213
214 ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL); 214 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
215 if (!ctx) 215 if (!ctx)
216 return ERR_PTR(-ENOMEM); 216 return ERR_PTR(-ENOMEM);
217 217
218 memset(ctx, 0, sizeof(*ctx));
219 ctx->max_reqs = nr_events; 218 ctx->max_reqs = nr_events;
220 mm = ctx->mm = current->mm; 219 mm = ctx->mm = current->mm;
221 atomic_inc(&mm->mm_count); 220 atomic_inc(&mm->mm_count);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 906ba5ce2261..4ef544434b51 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -142,8 +142,8 @@ struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info
142 142
143/* Operations structures */ 143/* Operations structures */
144 144
145extern struct inode_operations autofs_root_inode_operations; 145extern const struct inode_operations autofs_root_inode_operations;
146extern struct inode_operations autofs_symlink_inode_operations; 146extern const struct inode_operations autofs_symlink_inode_operations;
147extern const struct file_operations autofs_root_operations; 147extern const struct file_operations autofs_root_operations;
148 148
149/* Initializing function */ 149/* Initializing function */
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f968d1342808..aa0b61ff8270 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -52,7 +52,7 @@ out_kill_sb:
52 52
53static void autofs_read_inode(struct inode *inode); 53static void autofs_read_inode(struct inode *inode);
54 54
55static struct super_operations autofs_sops = { 55static const struct super_operations autofs_sops = {
56 .read_inode = autofs_read_inode, 56 .read_inode = autofs_read_inode,
57 .statfs = simple_statfs, 57 .statfs = simple_statfs,
58}; 58};
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index e698c51d2b02..f2597205939d 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -32,7 +32,7 @@ const struct file_operations autofs_root_operations = {
32 .ioctl = autofs_root_ioctl, 32 .ioctl = autofs_root_ioctl,
33}; 33};
34 34
35struct inode_operations autofs_root_inode_operations = { 35const struct inode_operations autofs_root_inode_operations = {
36 .lookup = autofs_root_lookup, 36 .lookup = autofs_root_lookup,
37 .unlink = autofs_root_unlink, 37 .unlink = autofs_root_unlink,
38 .symlink = autofs_root_symlink, 38 .symlink = autofs_root_symlink,
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index c74f2eb65775..7ce9cb2c9ce2 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -20,7 +20,7 @@ static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
20 return NULL; 20 return NULL;
21} 21}
22 22
23struct inode_operations autofs_symlink_inode_operations = { 23const struct inode_operations autofs_symlink_inode_operations = {
24 .readlink = generic_readlink, 24 .readlink = generic_readlink,
25 .follow_link = autofs_follow_link 25 .follow_link = autofs_follow_link
26}; 26};
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 216b1a364ccb..6b4cec3f272f 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -168,11 +168,11 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *,
168 168
169/* Operations structures */ 169/* Operations structures */
170 170
171extern struct inode_operations autofs4_symlink_inode_operations; 171extern const struct inode_operations autofs4_symlink_inode_operations;
172extern struct inode_operations autofs4_dir_inode_operations; 172extern const struct inode_operations autofs4_dir_inode_operations;
173extern struct inode_operations autofs4_root_inode_operations; 173extern const struct inode_operations autofs4_root_inode_operations;
174extern struct inode_operations autofs4_indirect_root_inode_operations; 174extern const struct inode_operations autofs4_indirect_root_inode_operations;
175extern struct inode_operations autofs4_direct_root_inode_operations; 175extern const struct inode_operations autofs4_direct_root_inode_operations;
176extern const struct file_operations autofs4_dir_operations; 176extern const struct file_operations autofs4_dir_operations;
177extern const struct file_operations autofs4_root_operations; 177extern const struct file_operations autofs4_root_operations;
178 178
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e8f6c5ad3e90..5e458e096ef6 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -196,7 +196,7 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
196 return 0; 196 return 0;
197} 197}
198 198
199static struct super_operations autofs4_sops = { 199static const struct super_operations autofs4_sops = {
200 .statfs = simple_statfs, 200 .statfs = simple_statfs,
201 .show_options = autofs4_show_options, 201 .show_options = autofs4_show_options,
202}; 202};
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 8d05b9f7578d..47fee96c2182 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -47,7 +47,7 @@ const struct file_operations autofs4_dir_operations = {
47 .readdir = autofs4_dir_readdir, 47 .readdir = autofs4_dir_readdir,
48}; 48};
49 49
50struct inode_operations autofs4_indirect_root_inode_operations = { 50const struct inode_operations autofs4_indirect_root_inode_operations = {
51 .lookup = autofs4_lookup, 51 .lookup = autofs4_lookup,
52 .unlink = autofs4_dir_unlink, 52 .unlink = autofs4_dir_unlink,
53 .symlink = autofs4_dir_symlink, 53 .symlink = autofs4_dir_symlink,
@@ -55,7 +55,7 @@ struct inode_operations autofs4_indirect_root_inode_operations = {
55 .rmdir = autofs4_dir_rmdir, 55 .rmdir = autofs4_dir_rmdir,
56}; 56};
57 57
58struct inode_operations autofs4_direct_root_inode_operations = { 58const struct inode_operations autofs4_direct_root_inode_operations = {
59 .lookup = autofs4_lookup, 59 .lookup = autofs4_lookup,
60 .unlink = autofs4_dir_unlink, 60 .unlink = autofs4_dir_unlink,
61 .mkdir = autofs4_dir_mkdir, 61 .mkdir = autofs4_dir_mkdir,
@@ -63,7 +63,7 @@ struct inode_operations autofs4_direct_root_inode_operations = {
63 .follow_link = autofs4_follow_link, 63 .follow_link = autofs4_follow_link,
64}; 64};
65 65
66struct inode_operations autofs4_dir_inode_operations = { 66const struct inode_operations autofs4_dir_inode_operations = {
67 .lookup = autofs4_lookup, 67 .lookup = autofs4_lookup,
68 .unlink = autofs4_dir_unlink, 68 .unlink = autofs4_dir_unlink,
69 .symlink = autofs4_dir_symlink, 69 .symlink = autofs4_dir_symlink,
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index 2ea2c98fd84b..b4ea82934d2e 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -19,7 +19,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
19 return NULL; 19 return NULL;
20} 20}
21 21
22struct inode_operations autofs4_symlink_inode_operations = { 22const struct inode_operations autofs4_symlink_inode_operations = {
23 .readlink = generic_readlink, 23 .readlink = generic_readlink,
24 .follow_link = autofs4_follow_link 24 .follow_link = autofs4_follow_link
25}; 25};
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 869f5193ecc2..efeab2fab40b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -291,7 +291,7 @@ static int bad_inode_removexattr(struct dentry *dentry, const char *name)
291 return -EIO; 291 return -EIO;
292} 292}
293 293
294static struct inode_operations bad_inode_ops = 294static const struct inode_operations bad_inode_ops =
295{ 295{
296 .create = bad_inode_create, 296 .create = bad_inode_create,
297 .lookup = bad_inode_lookup, 297 .lookup = bad_inode_lookup,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 481e59b9d91c..cc6cc8ed2e39 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -68,7 +68,7 @@ static const struct file_operations befs_dir_operations = {
68 .readdir = befs_readdir, 68 .readdir = befs_readdir,
69}; 69};
70 70
71static struct inode_operations befs_dir_inode_operations = { 71static const struct inode_operations befs_dir_inode_operations = {
72 .lookup = befs_lookup, 72 .lookup = befs_lookup,
73}; 73};
74 74
@@ -78,7 +78,7 @@ static const struct address_space_operations befs_aops = {
78 .bmap = befs_bmap, 78 .bmap = befs_bmap,
79}; 79};
80 80
81static struct inode_operations befs_symlink_inode_operations = { 81static const struct inode_operations befs_symlink_inode_operations = {
82 .readlink = generic_readlink, 82 .readlink = generic_readlink,
83 .follow_link = befs_follow_link, 83 .follow_link = befs_follow_link,
84 .put_link = befs_put_link, 84 .put_link = befs_put_link,
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 31973bbbf057..130f6c66c5ba 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -48,12 +48,12 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
48 48
49 49
50/* file.c */ 50/* file.c */
51extern struct inode_operations bfs_file_inops; 51extern const struct inode_operations bfs_file_inops;
52extern const struct file_operations bfs_file_operations; 52extern const struct file_operations bfs_file_operations;
53extern const struct address_space_operations bfs_aops; 53extern const struct address_space_operations bfs_aops;
54 54
55/* dir.c */ 55/* dir.c */
56extern struct inode_operations bfs_dir_inops; 56extern const struct inode_operations bfs_dir_inops;
57extern const struct file_operations bfs_dir_operations; 57extern const struct file_operations bfs_dir_operations;
58 58
59#endif /* _FS_BFS_BFS_H */ 59#endif /* _FS_BFS_BFS_H */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 2a746e688df5..097f1497f743 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -260,7 +260,7 @@ end_rename:
260 return error; 260 return error;
261} 261}
262 262
263struct inode_operations bfs_dir_inops = { 263const struct inode_operations bfs_dir_inops = {
264 .create = bfs_create, 264 .create = bfs_create,
265 .lookup = bfs_lookup, 265 .lookup = bfs_lookup,
266 .link = bfs_link, 266 .link = bfs_link,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index a9164a87f8de..ef4d1fa04e65 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -164,4 +164,4 @@ const struct address_space_operations bfs_aops = {
164 .bmap = bfs_bmap, 164 .bmap = bfs_bmap,
165}; 165};
166 166
167struct inode_operations bfs_file_inops; 167const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 134c99941a63..93d6219243ad 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -270,7 +270,7 @@ static void destroy_inodecache(void)
270 kmem_cache_destroy(bfs_inode_cachep); 270 kmem_cache_destroy(bfs_inode_cachep);
271} 271}
272 272
273static struct super_operations bfs_sops = { 273static const struct super_operations bfs_sops = {
274 .alloc_inode = bfs_alloc_inode, 274 .alloc_inode = bfs_alloc_inode,
275 .destroy_inode = bfs_destroy_inode, 275 .destroy_inode = bfs_destroy_inode,
276 .read_inode = bfs_read_inode, 276 .read_inode = bfs_read_inode,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a4d933a51208..5810aa1339fd 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -372,7 +372,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
372 down_write(&current->mm->mmap_sem); 372 down_write(&current->mm->mmap_sem);
373 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 373 current->mm->start_brk = do_mmap(NULL, 0, stack_size,
374 PROT_READ | PROT_WRITE | PROT_EXEC, 374 PROT_READ | PROT_WRITE | PROT_EXEC,
375 MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 375 MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN,
376 0); 376 0);
377 377
378 if (IS_ERR_VALUE(current->mm->start_brk)) { 378 if (IS_ERR_VALUE(current->mm->start_brk)) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index ae8595d49856..7b0265d7f3a8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -419,7 +419,7 @@ static int load_flat_file(struct linux_binprm * bprm,
419 unsigned long textpos = 0, datapos = 0, result; 419 unsigned long textpos = 0, datapos = 0, result;
420 unsigned long realdatastart = 0; 420 unsigned long realdatastart = 0;
421 unsigned long text_len, data_len, bss_len, stack_len, flags; 421 unsigned long text_len, data_len, bss_len, stack_len, flags;
422 unsigned long memp = 0; /* for finding the brk area */ 422 unsigned long len, reallen, memp = 0;
423 unsigned long extra, rlim; 423 unsigned long extra, rlim;
424 unsigned long *reloc = 0, *rp; 424 unsigned long *reloc = 0, *rp;
425 struct inode *inode; 425 struct inode *inode;
@@ -540,10 +540,18 @@ static int load_flat_file(struct linux_binprm * bprm,
540 goto err; 540 goto err;
541 } 541 }
542 542
543 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
543 down_write(&current->mm->mmap_sem); 544 down_write(&current->mm->mmap_sem);
544 realdatastart = do_mmap(0, 0, data_len + extra + 545 realdatastart = do_mmap(0, 0, len,
545 MAX_SHARED_LIBS * sizeof(unsigned long), 546 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
546 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); 547 /* Remap to use all availabe slack region space */
548 if (realdatastart && (realdatastart < (unsigned long)-4096)) {
549 reallen = ksize(realdatastart);
550 if (reallen > len) {
551 realdatastart = do_mremap(realdatastart, len,
552 reallen, MREMAP_FIXED, realdatastart);
553 }
554 }
547 up_write(&current->mm->mmap_sem); 555 up_write(&current->mm->mmap_sem);
548 556
549 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { 557 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
@@ -584,11 +592,20 @@ static int load_flat_file(struct linux_binprm * bprm,
584 592
585 } else { 593 } else {
586 594
595 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
587 down_write(&current->mm->mmap_sem); 596 down_write(&current->mm->mmap_sem);
588 textpos = do_mmap(0, 0, text_len + data_len + extra + 597 textpos = do_mmap(0, 0, len,
589 MAX_SHARED_LIBS * sizeof(unsigned long), 598 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
590 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); 599 /* Remap to use all availabe slack region space */
600 if (textpos && (textpos < (unsigned long) -4096)) {
601 reallen = ksize(textpos);
602 if (reallen > len) {
603 textpos = do_mremap(textpos, len, reallen,
604 MREMAP_FIXED, textpos);
605 }
606 }
591 up_write(&current->mm->mmap_sem); 607 up_write(&current->mm->mmap_sem);
608
592 if (!textpos || textpos >= (unsigned long) -4096) { 609 if (!textpos || textpos >= (unsigned long) -4096) {
593 if (!textpos) 610 if (!textpos)
594 textpos = (unsigned long) -ENOMEM; 611 textpos = (unsigned long) -ENOMEM;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c2e08252af35..e6f57990b121 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -719,7 +719,7 @@ static const struct file_operations bm_status_operations = {
719 719
720/* Superblock handling */ 720/* Superblock handling */
721 721
722static struct super_operations s_ops = { 722static const struct super_operations s_ops = {
723 .statfs = simple_statfs, 723 .statfs = simple_statfs,
724 .clear_inode = bm_clear_inode, 724 .clear_inode = bm_clear_inode,
725}; 725};
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fc7028b685f2..0c59b703e9d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -489,7 +489,7 @@ static void bdev_clear_inode(struct inode *inode)
489 spin_unlock(&bdev_lock); 489 spin_unlock(&bdev_lock);
490} 490}
491 491
492static struct super_operations bdev_sops = { 492static const struct super_operations bdev_sops = {
493 .statfs = simple_statfs, 493 .statfs = simple_statfs,
494 .alloc_inode = bdev_alloc_inode, 494 .alloc_inode = bdev_alloc_inode,
495 .destroy_inode = bdev_destroy_inode, 495 .destroy_inode = bdev_destroy_inode,
diff --git a/fs/buffer.c b/fs/buffer.c
index 1ad674fd348c..f99c509697cd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(__lock_buffer);
78 78
79void fastcall unlock_buffer(struct buffer_head *bh) 79void fastcall unlock_buffer(struct buffer_head *bh)
80{ 80{
81 smp_mb__before_clear_bit();
81 clear_buffer_locked(bh); 82 clear_buffer_locked(bh);
82 smp_mb__after_clear_bit(); 83 smp_mb__after_clear_bit();
83 wake_up_bit(&bh->b_state, BH_Lock); 84 wake_up_bit(&bh->b_state, BH_Lock);
@@ -345,7 +346,7 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
345 * We really want to use invalidate_inode_pages2() for 346 * We really want to use invalidate_inode_pages2() for
346 * that, but not until that's cleaned up. 347 * that, but not until that's cleaned up.
347 */ 348 */
348 invalidate_inode_pages(mapping); 349 invalidate_mapping_pages(mapping, 0, -1);
349} 350}
350 351
351/* 352/*
@@ -1282,11 +1283,11 @@ static void bh_lru_install(struct buffer_head *bh)
1282 * Look up the bh in this cpu's LRU. If it's there, move it to the head. 1283 * Look up the bh in this cpu's LRU. If it's there, move it to the head.
1283 */ 1284 */
1284static struct buffer_head * 1285static struct buffer_head *
1285lookup_bh_lru(struct block_device *bdev, sector_t block, int size) 1286lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1286{ 1287{
1287 struct buffer_head *ret = NULL; 1288 struct buffer_head *ret = NULL;
1288 struct bh_lru *lru; 1289 struct bh_lru *lru;
1289 int i; 1290 unsigned int i;
1290 1291
1291 check_irqs_on(); 1292 check_irqs_on();
1292 bh_lru_lock(); 1293 bh_lru_lock();
@@ -1318,7 +1319,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
1318 * NULL 1319 * NULL
1319 */ 1320 */
1320struct buffer_head * 1321struct buffer_head *
1321__find_get_block(struct block_device *bdev, sector_t block, int size) 1322__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1322{ 1323{
1323 struct buffer_head *bh = lookup_bh_lru(bdev, block, size); 1324 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1324 1325
@@ -1346,7 +1347,7 @@ EXPORT_SYMBOL(__find_get_block);
1346 * attempt is failing. FIXME, perhaps? 1347 * attempt is failing. FIXME, perhaps?
1347 */ 1348 */
1348struct buffer_head * 1349struct buffer_head *
1349__getblk(struct block_device *bdev, sector_t block, int size) 1350__getblk(struct block_device *bdev, sector_t block, unsigned size)
1350{ 1351{
1351 struct buffer_head *bh = __find_get_block(bdev, block, size); 1352 struct buffer_head *bh = __find_get_block(bdev, block, size);
1352 1353
@@ -1360,7 +1361,7 @@ EXPORT_SYMBOL(__getblk);
1360/* 1361/*
1361 * Do async read-ahead on a buffer.. 1362 * Do async read-ahead on a buffer..
1362 */ 1363 */
1363void __breadahead(struct block_device *bdev, sector_t block, int size) 1364void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1364{ 1365{
1365 struct buffer_head *bh = __getblk(bdev, block, size); 1366 struct buffer_head *bh = __getblk(bdev, block, size);
1366 if (likely(bh)) { 1367 if (likely(bh)) {
@@ -1380,7 +1381,7 @@ EXPORT_SYMBOL(__breadahead);
1380 * It returns NULL if the block was unreadable. 1381 * It returns NULL if the block was unreadable.
1381 */ 1382 */
1382struct buffer_head * 1383struct buffer_head *
1383__bread(struct block_device *bdev, sector_t block, int size) 1384__bread(struct block_device *bdev, sector_t block, unsigned size)
1384{ 1385{
1385 struct buffer_head *bh = __getblk(bdev, block, size); 1386 struct buffer_head *bh = __getblk(bdev, block, size);
1386 1387
@@ -1439,6 +1440,7 @@ static void discard_buffer(struct buffer_head * bh)
1439 clear_buffer_req(bh); 1440 clear_buffer_req(bh);
1440 clear_buffer_new(bh); 1441 clear_buffer_new(bh);
1441 clear_buffer_delay(bh); 1442 clear_buffer_delay(bh);
1443 clear_buffer_unwritten(bh);
1442 unlock_buffer(bh); 1444 unlock_buffer(bh);
1443} 1445}
1444 1446
@@ -1822,6 +1824,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1822 continue; 1824 continue;
1823 } 1825 }
1824 if (!buffer_uptodate(bh) && !buffer_delay(bh) && 1826 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1827 !buffer_unwritten(bh) &&
1825 (block_start < from || block_end > to)) { 1828 (block_start < from || block_end > to)) {
1826 ll_rw_block(READ, 1, &bh); 1829 ll_rw_block(READ, 1, &bh);
1827 *wait_bh++=bh; 1830 *wait_bh++=bh;
@@ -2543,7 +2546,7 @@ int block_truncate_page(struct address_space *mapping,
2543 if (PageUptodate(page)) 2546 if (PageUptodate(page))
2544 set_buffer_uptodate(bh); 2547 set_buffer_uptodate(bh);
2545 2548
2546 if (!buffer_uptodate(bh) && !buffer_delay(bh)) { 2549 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2547 err = -EIO; 2550 err = -EIO;
2548 ll_rw_block(READ, 1, &bh); 2551 ll_rw_block(READ, 1, &bh);
2549 wait_on_buffer(bh); 2552 wait_on_buffer(bh);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a885f46ca001..e6194e2b9bb9 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -108,6 +108,13 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
108 /* temporary */ 108 /* temporary */
109 if (major == 0) { 109 if (major == 0) {
110 for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) { 110 for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
111 /*
112 * Disallow the LANANA-assigned LOCAL/EXPERIMENTAL
113 * majors
114 */
115 if ((60 <= i && i <= 63) || (120 <= i && i <= 127) ||
116 (240 <= i && i <= 254))
117 continue;
111 if (chrdevs[i] == NULL) 118 if (chrdevs[i] == NULL)
112 break; 119 break;
113 } 120 }
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index d04d2f7448d9..85e3850bf2c9 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,6 +1,8 @@
1Version 1.47 1Version 1.47
2------------ 2------------
3Fix oops in list_del during mount caused by unaligned string. 3Fix oops in list_del during mount caused by unaligned string.
4Seek to SEEK_END forces check for update of file size for non-cached
5files.
4 6
5Version 1.46 7Version 1.46
6------------ 8------------
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 10c90294cd18..e8287c4c6eb3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -64,7 +64,7 @@ extern struct task_struct * oplockThread; /* remove sparse warning */
64struct task_struct * oplockThread = NULL; 64struct task_struct * oplockThread = NULL;
65extern struct task_struct * dnotifyThread; /* remove sparse warning */ 65extern struct task_struct * dnotifyThread; /* remove sparse warning */
66struct task_struct * dnotifyThread = NULL; 66struct task_struct * dnotifyThread = NULL;
67static struct super_operations cifs_super_ops; 67static const struct super_operations cifs_super_ops;
68unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; 68unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
69module_param(CIFSMaxBufSize, int, 0); 69module_param(CIFSMaxBufSize, int, 0);
70MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048"); 70MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048");
@@ -453,7 +453,7 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
453 return 0; 453 return 0;
454} 454}
455 455
456static struct super_operations cifs_super_ops = { 456static const struct super_operations cifs_super_ops = {
457 .read_inode = cifs_read_inode, 457 .read_inode = cifs_read_inode,
458 .put_super = cifs_put_super, 458 .put_super = cifs_put_super,
459 .statfs = cifs_statfs, 459 .statfs = cifs_statfs,
@@ -511,7 +511,15 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
511{ 511{
512 /* origin == SEEK_END => we must revalidate the cached file length */ 512 /* origin == SEEK_END => we must revalidate the cached file length */
513 if (origin == SEEK_END) { 513 if (origin == SEEK_END) {
514 int retval = cifs_revalidate(file->f_path.dentry); 514 int retval;
515
516 /* some applications poll for the file length in this strange
517 way so we must seek to end on non-oplocked files by
518 setting the revalidate time to zero */
519 if(file->f_path.dentry->d_inode)
520 CIFS_I(file->f_path.dentry->d_inode)->time = 0;
521
522 retval = cifs_revalidate(file->f_path.dentry);
515 if (retval < 0) 523 if (retval < 0)
516 return (loff_t)retval; 524 return (loff_t)retval;
517 } 525 }
@@ -525,7 +533,7 @@ static struct file_system_type cifs_fs_type = {
525 .kill_sb = kill_anon_super, 533 .kill_sb = kill_anon_super,
526 /* .fs_flags */ 534 /* .fs_flags */
527}; 535};
528struct inode_operations cifs_dir_inode_ops = { 536const struct inode_operations cifs_dir_inode_ops = {
529 .create = cifs_create, 537 .create = cifs_create,
530 .lookup = cifs_lookup, 538 .lookup = cifs_lookup,
531 .getattr = cifs_getattr, 539 .getattr = cifs_getattr,
@@ -547,7 +555,7 @@ struct inode_operations cifs_dir_inode_ops = {
547#endif 555#endif
548}; 556};
549 557
550struct inode_operations cifs_file_inode_ops = { 558const struct inode_operations cifs_file_inode_ops = {
551/* revalidate:cifs_revalidate, */ 559/* revalidate:cifs_revalidate, */
552 .setattr = cifs_setattr, 560 .setattr = cifs_setattr,
553 .getattr = cifs_getattr, /* do we need this anymore? */ 561 .getattr = cifs_getattr, /* do we need this anymore? */
@@ -561,7 +569,7 @@ struct inode_operations cifs_file_inode_ops = {
561#endif 569#endif
562}; 570};
563 571
564struct inode_operations cifs_symlink_inode_ops = { 572const struct inode_operations cifs_symlink_inode_ops = {
565 .readlink = generic_readlink, 573 .readlink = generic_readlink,
566 .follow_link = cifs_follow_link, 574 .follow_link = cifs_follow_link,
567 .put_link = cifs_put_link, 575 .put_link = cifs_put_link,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 8aa66dcf13bd..01ae24af9cfd 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,13 +36,13 @@ extern const struct address_space_operations cifs_addr_ops;
36extern const struct address_space_operations cifs_addr_ops_smallbuf; 36extern const struct address_space_operations cifs_addr_ops_smallbuf;
37 37
38/* Functions related to super block operations */ 38/* Functions related to super block operations */
39/* extern struct super_operations cifs_super_ops;*/ 39/* extern const struct super_operations cifs_super_ops;*/
40extern void cifs_read_inode(struct inode *); 40extern void cifs_read_inode(struct inode *);
41extern void cifs_delete_inode(struct inode *); 41extern void cifs_delete_inode(struct inode *);
42/* extern void cifs_write_inode(struct inode *); *//* BB not needed yet */ 42/* extern void cifs_write_inode(struct inode *); *//* BB not needed yet */
43 43
44/* Functions related to inodes */ 44/* Functions related to inodes */
45extern struct inode_operations cifs_dir_inode_ops; 45extern const struct inode_operations cifs_dir_inode_ops;
46extern int cifs_create(struct inode *, struct dentry *, int, 46extern int cifs_create(struct inode *, struct dentry *, int,
47 struct nameidata *); 47 struct nameidata *);
48extern struct dentry * cifs_lookup(struct inode *, struct dentry *, 48extern struct dentry * cifs_lookup(struct inode *, struct dentry *,
@@ -58,8 +58,8 @@ extern int cifs_revalidate(struct dentry *);
58extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 58extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
59extern int cifs_setattr(struct dentry *, struct iattr *); 59extern int cifs_setattr(struct dentry *, struct iattr *);
60 60
61extern struct inode_operations cifs_file_inode_ops; 61extern const struct inode_operations cifs_file_inode_ops;
62extern struct inode_operations cifs_symlink_inode_ops; 62extern const struct inode_operations cifs_symlink_inode_ops;
63 63
64/* Functions related to files and directories */ 64/* Functions related to files and directories */
65extern const struct file_operations cifs_file_ops; 65extern const struct file_operations cifs_file_ops;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8a49b2e77d37..e9dcf5ee29a2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1146,7 +1146,7 @@ static int cifs_writepages(struct address_space *mapping,
1146 pgoff_t end; 1146 pgoff_t end;
1147 pgoff_t index; 1147 pgoff_t index;
1148 int range_whole = 0; 1148 int range_whole = 0;
1149 struct kvec iov[32]; 1149 struct kvec * iov;
1150 int len; 1150 int len;
1151 int n_iov = 0; 1151 int n_iov = 0;
1152 pgoff_t next; 1152 pgoff_t next;
@@ -1171,15 +1171,21 @@ static int cifs_writepages(struct address_space *mapping,
1171 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) 1171 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1172 if(cifs_sb->tcon->ses->server->secMode & 1172 if(cifs_sb->tcon->ses->server->secMode &
1173 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 1173 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1174 if(!experimEnabled) 1174 if(!experimEnabled)
1175 return generic_writepages(mapping, wbc); 1175 return generic_writepages(mapping, wbc);
1176 1176
1177 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1178 if(iov == NULL)
1179 return generic_writepages(mapping, wbc);
1180
1181
1177 /* 1182 /*
1178 * BB: Is this meaningful for a non-block-device file system? 1183 * BB: Is this meaningful for a non-block-device file system?
1179 * If it is, we should test it again after we do I/O 1184 * If it is, we should test it again after we do I/O
1180 */ 1185 */
1181 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1186 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1182 wbc->encountered_congestion = 1; 1187 wbc->encountered_congestion = 1;
1188 kfree(iov);
1183 return 0; 1189 return 0;
1184 } 1190 }
1185 1191
@@ -1345,7 +1351,7 @@ retry:
1345 mapping->writeback_index = index; 1351 mapping->writeback_index = index;
1346 1352
1347 FreeXid(xid); 1353 FreeXid(xid);
1348 1354 kfree(iov);
1349 return rc; 1355 return rc;
1350} 1356}
1351 1357
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 99dfb5337e31..782940be550f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -156,9 +156,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
156 tmp_inode->i_atime = cnvrtDosUnixTm( 156 tmp_inode->i_atime = cnvrtDosUnixTm(
157 le16_to_cpu(pfindData->LastAccessDate), 157 le16_to_cpu(pfindData->LastAccessDate),
158 le16_to_cpu(pfindData->LastAccessTime)); 158 le16_to_cpu(pfindData->LastAccessTime));
159 tmp_inode->i_ctime = cnvrtDosUnixTm( 159 tmp_inode->i_ctime = cnvrtDosUnixTm(
160 le16_to_cpu(pfindData->LastWriteDate), 160 le16_to_cpu(pfindData->LastWriteDate),
161 le16_to_cpu(pfindData->LastWriteTime)); 161 le16_to_cpu(pfindData->LastWriteTime));
162 AdjustForTZ(cifs_sb->tcon, tmp_inode); 162 AdjustForTZ(cifs_sb->tcon, tmp_inode);
163 attr = le16_to_cpu(pfindData->Attributes); 163 attr = le16_to_cpu(pfindData->Attributes);
164 allocation_size = le32_to_cpu(pfindData->AllocationSize); 164 allocation_size = le32_to_cpu(pfindData->AllocationSize);
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 7a1b2b961ec8..1b1daf63f062 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -196,7 +196,7 @@ dohash(char *out, char *in, char *key, int forw)
196 char c[28]; 196 char c[28];
197 char d[28]; 197 char d[28];
198 char *cd; 198 char *cd;
199 char ki[16][48]; 199 char (*ki)[48];
200 char *pd1; 200 char *pd1;
201 char l[32], r[32]; 201 char l[32], r[32];
202 char *rl; 202 char *rl;
@@ -206,6 +206,12 @@ dohash(char *out, char *in, char *key, int forw)
206 if(pk1 == NULL) 206 if(pk1 == NULL)
207 return; 207 return;
208 208
209 ki = kmalloc(16*48, GFP_KERNEL);
210 if(ki == NULL) {
211 kfree(pk1);
212 return;
213 }
214
209 cd = pk1 + 56; 215 cd = pk1 + 56;
210 pd1= cd + 56; 216 pd1= cd + 56;
211 rl = pd1 + 64; 217 rl = pd1 + 64;
@@ -243,6 +249,7 @@ dohash(char *out, char *in, char *key, int forw)
243 er = kmalloc(48+48+32+32+32, GFP_KERNEL); 249 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
244 if(er == NULL) { 250 if(er == NULL) {
245 kfree(pk1); 251 kfree(pk1);
252 kfree(ki);
246 return; 253 return;
247 } 254 }
248 erk = er+48; 255 erk = er+48;
@@ -290,6 +297,7 @@ dohash(char *out, char *in, char *key, int forw)
290 297
291 permute(out, rl, perm6, 64); 298 permute(out, rl, perm6, 64);
292 kfree(pk1); 299 kfree(pk1);
300 kfree(ki);
293} 301}
294 302
295static void 303static void
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 4c9fecbfa91f..28c872747f81 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -16,7 +16,7 @@ static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
16 return memcmp(fid1, fid2, sizeof(*fid1)) == 0; 16 return memcmp(fid1, fid2, sizeof(*fid1)) == 0;
17} 17}
18 18
19static struct inode_operations coda_symlink_inode_operations = { 19static const struct inode_operations coda_symlink_inode_operations = {
20 .readlink = generic_readlink, 20 .readlink = generic_readlink,
21 .follow_link = page_follow_link_light, 21 .follow_link = page_follow_link_light,
22 .put_link = page_put_link, 22 .put_link = page_put_link,
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0c6f7f3b3dd7..9ddf5ed62162 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -66,7 +66,7 @@ static struct dentry_operations coda_dentry_operations =
66 .d_delete = coda_dentry_delete, 66 .d_delete = coda_dentry_delete,
67}; 67};
68 68
69struct inode_operations coda_dir_inode_operations = 69const struct inode_operations coda_dir_inode_operations =
70{ 70{
71 .create = coda_create, 71 .create = coda_create,
72 .lookup = coda_lookup, 72 .lookup = coda_lookup,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 01395defed85..614175a3b02e 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -90,7 +90,7 @@ static int coda_remount(struct super_block *sb, int *flags, char *data)
90} 90}
91 91
92/* exported operations */ 92/* exported operations */
93static struct super_operations coda_super_operations = 93static const struct super_operations coda_super_operations =
94{ 94{
95 .alloc_inode = coda_alloc_inode, 95 .alloc_inode = coda_alloc_inode,
96 .destroy_inode = coda_destroy_inode, 96 .destroy_inode = coda_destroy_inode,
@@ -271,7 +271,7 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
271 return error; 271 return error;
272} 272}
273 273
274struct inode_operations coda_file_inode_operations = { 274const struct inode_operations coda_file_inode_operations = {
275 .permission = coda_permission, 275 .permission = coda_permission,
276 .getattr = coda_getattr, 276 .getattr = coda_getattr,
277 .setattr = coda_setattr, 277 .setattr = coda_setattr,
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 214822be87bd..2bf3026adc80 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -30,7 +30,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
30 unsigned int cmd, unsigned long user_data); 30 unsigned int cmd, unsigned long user_data);
31 31
32/* exported from this file */ 32/* exported from this file */
33struct inode_operations coda_ioctl_inode_operations = 33const struct inode_operations coda_ioctl_inode_operations =
34{ 34{
35 .permission = coda_ioctl_permission, 35 .permission = coda_ioctl_permission,
36 .setattr = coda_setattr, 36 .setattr = coda_setattr,
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 1c82e9a7d7c8..db3b1a9c9a5b 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sysctl.h> 16#include <linux/sysctl.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/stat.h> 20#include <linux/stat.h>
20#include <linux/ctype.h> 21#include <linux/ctype.h>
@@ -84,15 +85,11 @@ static int do_reset_coda_cache_inv_stats( ctl_table * table, int write,
84 return 0; 85 return 0;
85} 86}
86 87
87static int coda_vfs_stats_get_info( char * buffer, char ** start, 88static int proc_vfs_stats_show(struct seq_file *m, void *v)
88 off_t offset, int length)
89{ 89{
90 int len=0;
91 off_t begin;
92 struct coda_vfs_stats * ps = & coda_vfs_stat; 90 struct coda_vfs_stats * ps = & coda_vfs_stat;
93 91
94 /* this works as long as we are below 1024 characters! */ 92 seq_printf(m,
95 len += sprintf( buffer,
96 "Coda VFS statistics\n" 93 "Coda VFS statistics\n"
97 "===================\n\n" 94 "===================\n\n"
98 "File Operations:\n" 95 "File Operations:\n"
@@ -132,28 +129,14 @@ static int coda_vfs_stats_get_info( char * buffer, char ** start,
132 ps->rmdir, 129 ps->rmdir,
133 ps->rename, 130 ps->rename,
134 ps->permission); 131 ps->permission);
135 132 return 0;
136 begin = offset;
137 *start = buffer + begin;
138 len -= begin;
139
140 if ( len > length )
141 len = length;
142 if ( len < 0 )
143 len = 0;
144
145 return len;
146} 133}
147 134
148static int coda_cache_inv_stats_get_info( char * buffer, char ** start, 135static int proc_cache_inv_stats_show(struct seq_file *m, void *v)
149 off_t offset, int length)
150{ 136{
151 int len=0;
152 off_t begin;
153 struct coda_cache_inv_stats * ps = & coda_cache_inv_stat; 137 struct coda_cache_inv_stats * ps = & coda_cache_inv_stat;
154 138
155 /* this works as long as we are below 1024 characters! */ 139 seq_printf(m,
156 len += sprintf( buffer,
157 "Coda cache invalidation statistics\n" 140 "Coda cache invalidation statistics\n"
158 "==================================\n\n" 141 "==================================\n\n"
159 "flush\t\t%9d\n" 142 "flush\t\t%9d\n"
@@ -170,19 +153,35 @@ static int coda_cache_inv_stats_get_info( char * buffer, char ** start,
170 ps->zap_vnode, 153 ps->zap_vnode,
171 ps->purge_fid, 154 ps->purge_fid,
172 ps->replace ); 155 ps->replace );
173 156 return 0;
174 begin = offset; 157}
175 *start = buffer + begin;
176 len -= begin;
177 158
178 if ( len > length ) 159static int proc_vfs_stats_open(struct inode *inode, struct file *file)
179 len = length; 160{
180 if ( len < 0 ) 161 return single_open(file, proc_vfs_stats_show, NULL);
181 len = 0; 162}
182 163
183 return len; 164static int proc_cache_inv_stats_open(struct inode *inode, struct file *file)
165{
166 return single_open(file, proc_cache_inv_stats_show, NULL);
184} 167}
185 168
169static const struct file_operations proc_vfs_stats_fops = {
170 .owner = THIS_MODULE,
171 .open = proc_vfs_stats_open,
172 .read = seq_read,
173 .llseek = seq_lseek,
174 .release = single_release,
175};
176
177static const struct file_operations proc_cache_inv_stats_fops = {
178 .owner = THIS_MODULE,
179 .open = proc_cache_inv_stats_open,
180 .read = seq_read,
181 .llseek = seq_lseek,
182 .release = single_release,
183};
184
186static ctl_table coda_table[] = { 185static ctl_table coda_table[] = {
187 {CODA_TIMEOUT, "timeout", &coda_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, 186 {CODA_TIMEOUT, "timeout", &coda_timeout, sizeof(int), 0644, NULL, &proc_dointvec},
188 {CODA_HARD, "hard", &coda_hard, sizeof(int), 0644, NULL, &proc_dointvec}, 187 {CODA_HARD, "hard", &coda_hard, sizeof(int), 0644, NULL, &proc_dointvec},
@@ -212,9 +211,6 @@ static struct proc_dir_entry* proc_fs_coda;
212 211
213#endif 212#endif
214 213
215#define coda_proc_create(name,get_info) \
216 create_proc_info_entry(name, 0, proc_fs_coda, get_info)
217
218void coda_sysctl_init(void) 214void coda_sysctl_init(void)
219{ 215{
220 reset_coda_vfs_stats(); 216 reset_coda_vfs_stats();
@@ -223,9 +219,15 @@ void coda_sysctl_init(void)
223#ifdef CONFIG_PROC_FS 219#ifdef CONFIG_PROC_FS
224 proc_fs_coda = proc_mkdir("coda", proc_root_fs); 220 proc_fs_coda = proc_mkdir("coda", proc_root_fs);
225 if (proc_fs_coda) { 221 if (proc_fs_coda) {
222 struct proc_dir_entry *pde;
223
226 proc_fs_coda->owner = THIS_MODULE; 224 proc_fs_coda->owner = THIS_MODULE;
227 coda_proc_create("vfs_stats", coda_vfs_stats_get_info); 225 pde = create_proc_entry("vfs_stats", 0, proc_fs_coda);
228 coda_proc_create("cache_inv_stats", coda_cache_inv_stats_get_info); 226 if (pde)
227 pde->proc_fops = &proc_vfs_stats_fops;
228 pde = create_proc_entry("cache_inv_stats", 0, proc_fs_coda);
229 if (pde)
230 pde->proc_fops = &proc_cache_inv_stats_fops;
229 } 231 }
230#endif 232#endif
231 233
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f92cd303d2c9..7b48c034b312 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -75,8 +75,8 @@ extern struct super_block * configfs_sb;
75extern const struct file_operations configfs_dir_operations; 75extern const struct file_operations configfs_dir_operations;
76extern const struct file_operations configfs_file_operations; 76extern const struct file_operations configfs_file_operations;
77extern const struct file_operations bin_fops; 77extern const struct file_operations bin_fops;
78extern struct inode_operations configfs_dir_inode_operations; 78extern const struct inode_operations configfs_dir_inode_operations;
79extern struct inode_operations configfs_symlink_inode_operations; 79extern const struct inode_operations configfs_symlink_inode_operations;
80 80
81extern int configfs_symlink(struct inode *dir, struct dentry *dentry, 81extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
82 const char *symname); 82 const char *symname);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1814ba446809..34750d5e4ff2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -72,11 +72,10 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
72{ 72{
73 struct configfs_dirent * sd; 73 struct configfs_dirent * sd;
74 74
75 sd = kmem_cache_alloc(configfs_dir_cachep, GFP_KERNEL); 75 sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
76 if (!sd) 76 if (!sd)
77 return NULL; 77 return NULL;
78 78
79 memset(sd, 0, sizeof(*sd));
80 atomic_set(&sd->s_count, 1); 79 atomic_set(&sd->s_count, 1);
81 INIT_LIST_HEAD(&sd->s_links); 80 INIT_LIST_HEAD(&sd->s_links);
82 INIT_LIST_HEAD(&sd->s_children); 81 INIT_LIST_HEAD(&sd->s_children);
@@ -931,7 +930,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
931 return 0; 930 return 0;
932} 931}
933 932
934struct inode_operations configfs_dir_inode_operations = { 933const struct inode_operations configfs_dir_inode_operations = {
935 .mkdir = configfs_mkdir, 934 .mkdir = configfs_mkdir,
936 .rmdir = configfs_rmdir, 935 .rmdir = configfs_rmdir,
937 .symlink = configfs_symlink, 936 .symlink = configfs_symlink,
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 2a7cb086e80c..d98be5e01328 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -162,14 +162,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
162 int error; 162 int error;
163 163
164 if (!buffer->page) 164 if (!buffer->page)
165 buffer->page = (char *)get_zeroed_page(GFP_KERNEL); 165 buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0);
166 if (!buffer->page) 166 if (!buffer->page)
167 return -ENOMEM; 167 return -ENOMEM;
168 168
169 if (count > PAGE_SIZE) 169 if (count >= PAGE_SIZE)
170 count = PAGE_SIZE; 170 count = PAGE_SIZE - 1;
171 error = copy_from_user(buffer->page,buf,count); 171 error = copy_from_user(buffer->page,buf,count);
172 buffer->needs_read_fill = 1; 172 buffer->needs_read_fill = 1;
173 /* if buf is assumed to contain a string, terminate it by \0,
174 * so e.g. sscanf() can scan the string easily */
175 buffer->page[count] = 0;
173 return error ? -EFAULT : count; 176 return error ? -EFAULT : count;
174} 177}
175 178
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index fb18917954a9..2ec9beac17cf 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -49,7 +49,7 @@ static struct backing_dev_info configfs_backing_dev_info = {
49 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 49 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
50}; 50};
51 51
52static struct inode_operations configfs_inode_operations ={ 52static const struct inode_operations configfs_inode_operations ={
53 .setattr = configfs_setattr, 53 .setattr = configfs_setattr,
54}; 54};
55 55
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ed678529ebb2..6f573004cd7d 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -41,7 +41,7 @@ struct super_block * configfs_sb = NULL;
41struct kmem_cache *configfs_dir_cachep; 41struct kmem_cache *configfs_dir_cachep;
42static int configfs_mnt_count = 0; 42static int configfs_mnt_count = 0;
43 43
44static struct super_operations configfs_ops = { 44static const struct super_operations configfs_ops = {
45 .statfs = simple_statfs, 45 .statfs = simple_statfs,
46 .drop_inode = generic_delete_inode, 46 .drop_inode = generic_delete_inode,
47}; 47};
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index fb65e0800a86..22700d2857da 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -272,7 +272,7 @@ static void configfs_put_link(struct dentry *dentry, struct nameidata *nd,
272 } 272 }
273} 273}
274 274
275struct inode_operations configfs_symlink_inode_operations = { 275const struct inode_operations configfs_symlink_inode_operations = {
276 .follow_link = configfs_follow_link, 276 .follow_link = configfs_follow_link,
277 .readlink = generic_readlink, 277 .readlink = generic_readlink,
278 .put_link = configfs_put_link, 278 .put_link = configfs_put_link,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 6db03fb089dc..facd0c89be8f 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -27,8 +27,8 @@
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29 29
30static struct super_operations cramfs_ops; 30static const struct super_operations cramfs_ops;
31static struct inode_operations cramfs_dir_inode_operations; 31static const struct inode_operations cramfs_dir_inode_operations;
32static const struct file_operations cramfs_directory_operations; 32static const struct file_operations cramfs_directory_operations;
33static const struct address_space_operations cramfs_aops; 33static const struct address_space_operations cramfs_aops;
34 34
@@ -518,11 +518,11 @@ static const struct file_operations cramfs_directory_operations = {
518 .readdir = cramfs_readdir, 518 .readdir = cramfs_readdir,
519}; 519};
520 520
521static struct inode_operations cramfs_dir_inode_operations = { 521static const struct inode_operations cramfs_dir_inode_operations = {
522 .lookup = cramfs_lookup, 522 .lookup = cramfs_lookup,
523}; 523};
524 524
525static struct super_operations cramfs_ops = { 525static const struct super_operations cramfs_ops = {
526 .put_super = cramfs_put_super, 526 .put_super = cramfs_put_super,
527 .remount_fs = cramfs_remount, 527 .remount_fs = cramfs_remount,
528 .statfs = cramfs_statfs, 528 .statfs = cramfs_statfs,
diff --git a/fs/dcache.c b/fs/dcache.c
index d68631f18df1..b5f613932912 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1739,45 +1739,41 @@ shouldnt_be_hashed:
1739 * @rootmnt: vfsmnt to which the root dentry belongs 1739 * @rootmnt: vfsmnt to which the root dentry belongs
1740 * @buffer: buffer to return value in 1740 * @buffer: buffer to return value in
1741 * @buflen: buffer length 1741 * @buflen: buffer length
1742 * @fail_deleted: what to return for deleted files
1742 * 1743 *
1743 * Convert a dentry into an ASCII path name. If the entry has been deleted 1744 * Convert a dentry into an ASCII path name. If the entry has been deleted,
1744 * the string " (deleted)" is appended. Note that this is ambiguous. 1745 * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise,
1746 * the the string " (deleted)" is appended. Note that this is ambiguous.
1745 * 1747 *
1746 * Returns the buffer or an error code if the path was too long. 1748 * Returns the buffer or an error code.
1747 *
1748 * "buflen" should be positive. Caller holds the dcache_lock.
1749 */ 1749 */
1750static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, 1750static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1751 struct dentry *root, struct vfsmount *rootmnt, 1751 struct dentry *root, struct vfsmount *rootmnt,
1752 char *buffer, int buflen) 1752 char *buffer, int buflen, int fail_deleted)
1753{ 1753{
1754 char * end = buffer+buflen; 1754 int namelen, is_slash;
1755 char * retval; 1755
1756 int namelen; 1756 if (buflen < 2)
1757 return ERR_PTR(-ENAMETOOLONG);
1758 buffer += --buflen;
1759 *buffer = '\0';
1757 1760
1758 *--end = '\0'; 1761 spin_lock(&dcache_lock);
1759 buflen--;
1760 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1762 if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1761 buflen -= 10; 1763 if (fail_deleted) {
1762 end -= 10; 1764 buffer = ERR_PTR(-ENOENT);
1763 if (buflen < 0) 1765 goto out;
1766 }
1767 if (buflen < 10)
1764 goto Elong; 1768 goto Elong;
1765 memcpy(end, " (deleted)", 10); 1769 buflen -= 10;
1770 buffer -= 10;
1771 memcpy(buffer, " (deleted)", 10);
1766 } 1772 }
1767 1773 while (dentry != root || vfsmnt != rootmnt) {
1768 if (buflen < 1)
1769 goto Elong;
1770 /* Get '/' right */
1771 retval = end-1;
1772 *retval = '/';
1773
1774 for (;;) {
1775 struct dentry * parent; 1774 struct dentry * parent;
1776 1775
1777 if (dentry == root && vfsmnt == rootmnt)
1778 break;
1779 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1776 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1780 /* Global root? */
1781 spin_lock(&vfsmount_lock); 1777 spin_lock(&vfsmount_lock);
1782 if (vfsmnt->mnt_parent == vfsmnt) { 1778 if (vfsmnt->mnt_parent == vfsmnt) {
1783 spin_unlock(&vfsmount_lock); 1779 spin_unlock(&vfsmount_lock);
@@ -1791,33 +1787,60 @@ static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
1791 parent = dentry->d_parent; 1787 parent = dentry->d_parent;
1792 prefetch(parent); 1788 prefetch(parent);
1793 namelen = dentry->d_name.len; 1789 namelen = dentry->d_name.len;
1794 buflen -= namelen + 1; 1790 if (buflen <= namelen)
1795 if (buflen < 0)
1796 goto Elong; 1791 goto Elong;
1797 end -= namelen; 1792 buflen -= namelen + 1;
1798 memcpy(end, dentry->d_name.name, namelen); 1793 buffer -= namelen;
1799 *--end = '/'; 1794 memcpy(buffer, dentry->d_name.name, namelen);
1800 retval = end; 1795 *--buffer = '/';
1801 dentry = parent; 1796 dentry = parent;
1802 } 1797 }
1798 /* Get '/' right */
1799 if (*buffer != '/')
1800 *--buffer = '/';
1803 1801
1804 return retval; 1802out:
1803 spin_unlock(&dcache_lock);
1804 return buffer;
1805 1805
1806global_root: 1806global_root:
1807 /*
1808 * We went past the (vfsmount, dentry) we were looking for and have
1809 * either hit a root dentry, a lazily unmounted dentry, an
1810 * unconnected dentry, or the file is on a pseudo filesystem.
1811 */
1807 namelen = dentry->d_name.len; 1812 namelen = dentry->d_name.len;
1808 buflen -= namelen; 1813 is_slash = (namelen == 1 && *dentry->d_name.name == '/');
1809 if (buflen < 0) 1814 if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) {
1815 /*
1816 * Make sure we won't return a pathname starting with '/'.
1817 *
1818 * Historically, we also glue together the root dentry and
1819 * remaining name for pseudo filesystems like pipefs, which
1820 * have the MS_NOUSER flag set. This results in pathnames
1821 * like "pipe:[439336]".
1822 */
1823 if (*buffer == '/') {
1824 buffer++;
1825 buflen++;
1826 }
1827 if (is_slash)
1828 goto out;
1829 }
1830 if (buflen < namelen)
1810 goto Elong; 1831 goto Elong;
1811 retval -= namelen-1; /* hit the slash */ 1832 buffer -= namelen;
1812 memcpy(retval, dentry->d_name.name, namelen); 1833 memcpy(buffer, dentry->d_name.name, namelen);
1813 return retval; 1834 goto out;
1835
1814Elong: 1836Elong:
1815 return ERR_PTR(-ENAMETOOLONG); 1837 buffer = ERR_PTR(-ENAMETOOLONG);
1838 goto out;
1816} 1839}
1817 1840
1818/* write full pathname into buffer and return start of pathname */ 1841/* write full pathname into buffer and return start of pathname */
1819char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1842char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf,
1820 char *buf, int buflen) 1843 int buflen)
1821{ 1844{
1822 char *res; 1845 char *res;
1823 struct vfsmount *rootmnt; 1846 struct vfsmount *rootmnt;
@@ -1827,9 +1850,7 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1827 rootmnt = mntget(current->fs->rootmnt); 1850 rootmnt = mntget(current->fs->rootmnt);
1828 root = dget(current->fs->root); 1851 root = dget(current->fs->root);
1829 read_unlock(&current->fs->lock); 1852 read_unlock(&current->fs->lock);
1830 spin_lock(&dcache_lock); 1853 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0);
1831 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
1832 spin_unlock(&dcache_lock);
1833 dput(root); 1854 dput(root);
1834 mntput(rootmnt); 1855 mntput(rootmnt);
1835 return res; 1856 return res;
@@ -1855,10 +1876,10 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1855 */ 1876 */
1856asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1877asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1857{ 1878{
1858 int error; 1879 int error, len;
1859 struct vfsmount *pwdmnt, *rootmnt; 1880 struct vfsmount *pwdmnt, *rootmnt;
1860 struct dentry *pwd, *root; 1881 struct dentry *pwd, *root;
1861 char *page = (char *) __get_free_page(GFP_USER); 1882 char *page = (char *) __get_free_page(GFP_USER), *cwd;
1862 1883
1863 if (!page) 1884 if (!page)
1864 return -ENOMEM; 1885 return -ENOMEM;
@@ -1870,29 +1891,18 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1870 root = dget(current->fs->root); 1891 root = dget(current->fs->root);
1871 read_unlock(&current->fs->lock); 1892 read_unlock(&current->fs->lock);
1872 1893
1873 error = -ENOENT; 1894 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1);
1874 /* Has the current directory has been unlinked? */ 1895 error = PTR_ERR(cwd);
1875 spin_lock(&dcache_lock); 1896 if (IS_ERR(cwd))
1876 if (pwd->d_parent == pwd || !d_unhashed(pwd)) { 1897 goto out;
1877 unsigned long len;
1878 char * cwd;
1879
1880 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1881 spin_unlock(&dcache_lock);
1882
1883 error = PTR_ERR(cwd);
1884 if (IS_ERR(cwd))
1885 goto out;
1886 1898
1887 error = -ERANGE; 1899 error = -ERANGE;
1888 len = PAGE_SIZE + page - cwd; 1900 len = PAGE_SIZE + page - cwd;
1889 if (len <= size) { 1901 if (len <= size) {
1890 error = len; 1902 error = len;
1891 if (copy_to_user(buf, cwd, len)) 1903 if (copy_to_user(buf, cwd, len))
1892 error = -EFAULT; 1904 error = -EFAULT;
1893 } 1905 }
1894 } else
1895 spin_unlock(&dcache_lock);
1896 1906
1897out: 1907out:
1898 dput(pwd); 1908 dput(pwd);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index bf3901ab1744..8d130cc85322 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -254,7 +254,7 @@ static ssize_t read_file_blob(struct file *file, char __user *user_buf,
254 blob->size); 254 blob->size);
255} 255}
256 256
257static struct file_operations fops_blob = { 257static const struct file_operations fops_blob = {
258 .read = read_file_blob, 258 .read = read_file_blob,
259 .open = default_open, 259 .open = default_open,
260}; 260};
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5f7b5a6025bf..643e57b622bd 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -91,7 +91,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
91 return 0; 91 return 0;
92} 92}
93 93
94static struct super_operations devpts_sops = { 94static const struct super_operations devpts_sops = {
95 .statfs = simple_statfs, 95 .statfs = simple_statfs,
96 .remount_fs = devpts_remount, 96 .remount_fs = devpts_remount,
97}; 97};
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fef..6fa7b0d5c043 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP if DLM_SCTP 8 select IP_SCTP if DLM_SCTP
9 help 9 help
10 A general purpose distributed lock manager for kernel or userspace 10 A general purpose distributed lock manager for kernel or userspace
11 applications. 11 applications.
12 12
13choice 13choice
14 prompt "Select DLM communications protocol" 14 prompt "Select DLM communications protocol"
15 depends on DLM 15 depends on DLM
16 default DLM_TCP 16 default DLM_TCP
17 help 17 help
18 The DLM Can use TCP or SCTP for it's network communications. 18 The DLM Can use TCP or SCTP for it's network communications.
19 SCTP supports multi-homed operations whereas TCP doesn't. 19 SCTP supports multi-homed operations whereas TCP doesn't.
20 However, SCTP seems to have stability problems at the moment. 20 However, SCTP seems to have stability problems at the moment.
21 21
22config DLM_TCP 22config DLM_TCP
23 bool "TCP/IP" 23 bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
31 bool "DLM debugging" 31 bool "DLM debugging"
32 depends on DLM 32 depends on DLM
33 help 33 help
34 Under the debugfs mount point, the name of each lockspace will 34 Under the debugfs mount point, the name of each lockspace will
35 appear as a file in the "dlm" directory. The output is the 35 appear as a file in the "dlm" directory. The output is the
36 list of resource and locks the local node knows about. 36 list of resource and locks the local node knows about.
37 37
38endmenu 38endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbfa..8665c88e5af2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
54static void drop_node(struct config_group *, struct config_item *); 54static void drop_node(struct config_group *, struct config_item *);
55static void release_node(struct config_item *); 55static void release_node(struct config_item *);
56 56
57static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
58 char *buf);
59static ssize_t store_cluster(struct config_item *i,
60 struct configfs_attribute *a,
61 const char *buf, size_t len);
57static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 62static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
58 char *buf); 63 char *buf);
59static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, 64static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
73static ssize_t node_weight_read(struct node *nd, char *buf); 78static ssize_t node_weight_read(struct node *nd, char *buf);
74static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); 79static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
75 80
81struct cluster {
82 struct config_group group;
83 unsigned int cl_tcp_port;
84 unsigned int cl_buffer_size;
85 unsigned int cl_rsbtbl_size;
86 unsigned int cl_lkbtbl_size;
87 unsigned int cl_dirtbl_size;
88 unsigned int cl_recover_timer;
89 unsigned int cl_toss_secs;
90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug;
92};
93
94enum {
95 CLUSTER_ATTR_TCP_PORT = 0,
96 CLUSTER_ATTR_BUFFER_SIZE,
97 CLUSTER_ATTR_RSBTBL_SIZE,
98 CLUSTER_ATTR_LKBTBL_SIZE,
99 CLUSTER_ATTR_DIRTBL_SIZE,
100 CLUSTER_ATTR_RECOVER_TIMER,
101 CLUSTER_ATTR_TOSS_SECS,
102 CLUSTER_ATTR_SCAN_SECS,
103 CLUSTER_ATTR_LOG_DEBUG,
104};
105
106struct cluster_attribute {
107 struct configfs_attribute attr;
108 ssize_t (*show)(struct cluster *, char *);
109 ssize_t (*store)(struct cluster *, const char *, size_t);
110};
111
112static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
113 unsigned int *info_field, int check_zero,
114 const char *buf, size_t len)
115{
116 unsigned int x;
117
118 if (!capable(CAP_SYS_ADMIN))
119 return -EACCES;
120
121 x = simple_strtoul(buf, NULL, 0);
122
123 if (check_zero && !x)
124 return -EINVAL;
125
126 *cl_field = x;
127 *info_field = x;
128
129 return len;
130}
131
132#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
133 .attr = { .ca_name = __stringify(_name), \
134 .ca_mode = _mode, \
135 .ca_owner = THIS_MODULE }, \
136 .show = _read, \
137 .store = _write, \
138}
139
140#define CLUSTER_ATTR(name, check_zero) \
141static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
142{ \
143 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
144 check_zero, buf, len); \
145} \
146static ssize_t name##_read(struct cluster *cl, char *buf) \
147{ \
148 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
149} \
150static struct cluster_attribute cluster_attr_##name = \
151__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
152
153CLUSTER_ATTR(tcp_port, 1);
154CLUSTER_ATTR(buffer_size, 1);
155CLUSTER_ATTR(rsbtbl_size, 1);
156CLUSTER_ATTR(lkbtbl_size, 1);
157CLUSTER_ATTR(dirtbl_size, 1);
158CLUSTER_ATTR(recover_timer, 1);
159CLUSTER_ATTR(toss_secs, 1);
160CLUSTER_ATTR(scan_secs, 1);
161CLUSTER_ATTR(log_debug, 0);
162
163static struct configfs_attribute *cluster_attrs[] = {
164 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
165 [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
166 [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
167 [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
168 [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
169 [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
170 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
171 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
172 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
173 NULL,
174};
175
76enum { 176enum {
77 COMM_ATTR_NODEID = 0, 177 COMM_ATTR_NODEID = 0,
78 COMM_ATTR_LOCAL, 178 COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
152 struct configfs_subsystem subsys; 252 struct configfs_subsystem subsys;
153}; 253};
154 254
155struct cluster {
156 struct config_group group;
157};
158
159struct spaces { 255struct spaces {
160 struct config_group ss_group; 256 struct config_group ss_group;
161}; 257};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
197 293
198static struct configfs_item_operations cluster_ops = { 294static struct configfs_item_operations cluster_ops = {
199 .release = release_cluster, 295 .release = release_cluster,
296 .show_attribute = show_cluster,
297 .store_attribute = store_cluster,
200}; 298};
201 299
202static struct configfs_group_operations spaces_ops = { 300static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
237 335
238static struct config_item_type cluster_type = { 336static struct config_item_type cluster_type = {
239 .ct_item_ops = &cluster_ops, 337 .ct_item_ops = &cluster_ops,
338 .ct_attrs = cluster_attrs,
240 .ct_owner = THIS_MODULE, 339 .ct_owner = THIS_MODULE,
241}; 340};
242 341
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
317 cl->group.default_groups[1] = &cms->cs_group; 416 cl->group.default_groups[1] = &cms->cs_group;
318 cl->group.default_groups[2] = NULL; 417 cl->group.default_groups[2] = NULL;
319 418
419 cl->cl_tcp_port = dlm_config.ci_tcp_port;
420 cl->cl_buffer_size = dlm_config.ci_buffer_size;
421 cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
422 cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
423 cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
424 cl->cl_recover_timer = dlm_config.ci_recover_timer;
425 cl->cl_toss_secs = dlm_config.ci_toss_secs;
426 cl->cl_scan_secs = dlm_config.ci_scan_secs;
427 cl->cl_log_debug = dlm_config.ci_log_debug;
428
320 space_list = &sps->ss_group; 429 space_list = &sps->ss_group;
321 comm_list = &cms->cs_group; 430 comm_list = &cms->cs_group;
322 return &cl->group; 431 return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
509 * Functions for user space to read/write attributes 618 * Functions for user space to read/write attributes
510 */ 619 */
511 620
621static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
622 char *buf)
623{
624 struct cluster *cl = to_cluster(i);
625 struct cluster_attribute *cla =
626 container_of(a, struct cluster_attribute, attr);
627 return cla->show ? cla->show(cl, buf) : 0;
628}
629
630static ssize_t store_cluster(struct config_item *i,
631 struct configfs_attribute *a,
632 const char *buf, size_t len)
633{
634 struct cluster *cl = to_cluster(i);
635 struct cluster_attribute *cla =
636 container_of(a, struct cluster_attribute, attr);
637 return cla->store ? cla->store(cl, buf, len) : -EINVAL;
638}
639
512static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 640static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
513 char *buf) 641 char *buf)
514{ 642{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
775#define DEFAULT_RECOVER_TIMER 5 903#define DEFAULT_RECOVER_TIMER 5
776#define DEFAULT_TOSS_SECS 10 904#define DEFAULT_TOSS_SECS 10
777#define DEFAULT_SCAN_SECS 5 905#define DEFAULT_SCAN_SECS 5
906#define DEFAULT_LOG_DEBUG 0
778 907
779struct dlm_config_info dlm_config = { 908struct dlm_config_info dlm_config = {
780 .tcp_port = DEFAULT_TCP_PORT, 909 .ci_tcp_port = DEFAULT_TCP_PORT,
781 .buffer_size = DEFAULT_BUFFER_SIZE, 910 .ci_buffer_size = DEFAULT_BUFFER_SIZE,
782 .rsbtbl_size = DEFAULT_RSBTBL_SIZE, 911 .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
783 .lkbtbl_size = DEFAULT_LKBTBL_SIZE, 912 .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
784 .dirtbl_size = DEFAULT_DIRTBL_SIZE, 913 .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
785 .recover_timer = DEFAULT_RECOVER_TIMER, 914 .ci_recover_timer = DEFAULT_RECOVER_TIMER,
786 .toss_secs = DEFAULT_TOSS_SECS, 915 .ci_toss_secs = DEFAULT_TOSS_SECS,
787 .scan_secs = DEFAULT_SCAN_SECS 916 .ci_scan_secs = DEFAULT_SCAN_SECS,
917 .ci_log_debug = DEFAULT_LOG_DEBUG
788}; 918};
789 919
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a9..1e978611a96e 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
17#define DLM_MAX_ADDR_COUNT 3 17#define DLM_MAX_ADDR_COUNT 3
18 18
19struct dlm_config_info { 19struct dlm_config_info {
20 int tcp_port; 20 int ci_tcp_port;
21 int buffer_size; 21 int ci_buffer_size;
22 int rsbtbl_size; 22 int ci_rsbtbl_size;
23 int lkbtbl_size; 23 int ci_lkbtbl_size;
24 int dirtbl_size; 24 int ci_dirtbl_size;
25 int recover_timer; 25 int ci_recover_timer;
26 int toss_secs; 26 int ci_toss_secs;
27 int scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug;
28}; 29};
29 30
30extern struct dlm_config_info dlm_config; 31extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index ca94a837a5bb..61ba670b9e02 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -287,7 +287,7 @@ static int rsb_open(struct inode *inode, struct file *file)
287 return 0; 287 return 0;
288} 288}
289 289
290static struct file_operations rsb_fops = { 290static const struct file_operations rsb_fops = {
291 .owner = THIS_MODULE, 291 .owner = THIS_MODULE,
292 .open = rsb_open, 292 .open = rsb_open,
293 .read = seq_read, 293 .read = seq_read,
@@ -331,7 +331,7 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
331 return rv; 331 return rv;
332} 332}
333 333
334static struct file_operations waiters_fops = { 334static const struct file_operations waiters_fops = {
335 .owner = THIS_MODULE, 335 .owner = THIS_MODULE,
336 .open = waiters_open, 336 .open = waiters_open,
337 .read = waiters_read 337 .read = waiters_read
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc0..61d93201e1b2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/dlm.h> 43#include <linux/dlm.h>
44#include "config.h"
44 45
45#define DLM_LOCKSPACE_LEN 64 46#define DLM_LOCKSPACE_LEN 64
46 47
@@ -69,12 +70,12 @@ struct dlm_mhandle;
69#define log_error(ls, fmt, args...) \ 70#define log_error(ls, fmt, args...) \
70 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) 71 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
71 72
72#define DLM_LOG_DEBUG 73#define log_debug(ls, fmt, args...) \
73#ifdef DLM_LOG_DEBUG 74do { \
74#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) 75 if (dlm_config.ci_log_debug) \
75#else 76 printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
76#define log_debug(ls, fmt, args...) 77 (ls)->ls_name , ##args); \
77#endif 78} while (0)
78 79
79#define DLM_ASSERT(x, do) \ 80#define DLM_ASSERT(x, do) \
80{ \ 81{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
309 310
310/* dlm_header is first element of all structs sent between nodes */ 311/* dlm_header is first element of all structs sent between nodes */
311 312
312#define DLM_HEADER_MAJOR 0x00020000 313#define DLM_HEADER_MAJOR 0x00030000
313#define DLM_HEADER_MINOR 0x00000001 314#define DLM_HEADER_MINOR 0x00000000
314 315
315#define DLM_MSG 1 316#define DLM_MSG 1
316#define DLM_RCOM 2 317#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
386 uint32_t rc_type; /* DLM_RCOM_ */ 387 uint32_t rc_type; /* DLM_RCOM_ */
387 int rc_result; /* multi-purpose */ 388 int rc_result; /* multi-purpose */
388 uint64_t rc_id; /* match reply with request */ 389 uint64_t rc_id; /* match reply with request */
390 uint64_t rc_seq; /* sender's ls_recover_seq */
391 uint64_t rc_seq_reply; /* remote ls_recover_seq */
389 char rc_buf[0]; 392 char rc_buf[0];
390}; 393};
391 394
@@ -523,6 +526,7 @@ struct dlm_user_proc {
523 spinlock_t asts_spin; 526 spinlock_t asts_spin;
524 struct list_head locks; 527 struct list_head locks;
525 spinlock_t locks_spin; 528 spinlock_t locks_spin;
529 struct list_head unlocking;
526 wait_queue_head_t wait; 530 wait_queue_head_t wait;
527}; 531};
528 532
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb6..e725005fafd0 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
754 mutex_unlock(&ls->ls_waiters_mutex); 754 mutex_unlock(&ls->ls_waiters_mutex);
755} 755}
756 756
757/* We clear the RESEND flag because we might be taking an lkb off the waiters
758 list as part of process_requestqueue (e.g. a lookup that has an optimized
759 request reply on the requestqueue) between dlm_recover_waiters_pre() which
760 set RESEND and dlm_recover_waiters_post() */
761
757static int _remove_from_waiters(struct dlm_lkb *lkb) 762static int _remove_from_waiters(struct dlm_lkb *lkb)
758{ 763{
759 int error = 0; 764 int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
764 goto out; 769 goto out;
765 } 770 }
766 lkb->lkb_wait_type = 0; 771 lkb->lkb_wait_type = 0;
772 lkb->lkb_flags &= ~DLM_IFL_RESEND;
767 list_del(&lkb->lkb_wait_reply); 773 list_del(&lkb->lkb_wait_reply);
768 unhold_lkb(lkb); 774 unhold_lkb(lkb);
769 out: 775 out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
810 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 816 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
811 res_hashchain) { 817 res_hashchain) {
812 if (!time_after_eq(jiffies, r->res_toss_time + 818 if (!time_after_eq(jiffies, r->res_toss_time +
813 dlm_config.toss_secs * HZ)) 819 dlm_config.ci_toss_secs * HZ))
814 continue; 820 continue;
815 found = 1; 821 found = 1;
816 break; 822 break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2144 if (lkb->lkb_astaddr) 2150 if (lkb->lkb_astaddr)
2145 ms->m_asts |= AST_COMP; 2151 ms->m_asts |= AST_COMP;
2146 2152
2147 if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) 2153 /* compare with switch in create_message; send_remove() doesn't
2148 memcpy(ms->m_extra, r->res_name, r->res_length); 2154 use send_args() */
2149 2155
2150 else if (lkb->lkb_lvbptr) 2156 switch (ms->m_type) {
2157 case DLM_MSG_REQUEST:
2158 case DLM_MSG_LOOKUP:
2159 memcpy(ms->m_extra, r->res_name, r->res_length);
2160 break;
2161 case DLM_MSG_CONVERT:
2162 case DLM_MSG_UNLOCK:
2163 case DLM_MSG_REQUEST_REPLY:
2164 case DLM_MSG_CONVERT_REPLY:
2165 case DLM_MSG_GRANT:
2166 if (!lkb->lkb_lvbptr)
2167 break;
2151 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); 2168 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
2152 2169 break;
2170 }
2153} 2171}
2154 2172
2155static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) 2173static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2418 2436
2419 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); 2437 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
2420 2438
2421 if (receive_lvb(ls, lkb, ms)) 2439 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2422 return -ENOMEM; 2440 /* lkb was just created so there won't be an lvb yet */
2441 lkb->lkb_lvbptr = allocate_lvb(ls);
2442 if (!lkb->lkb_lvbptr)
2443 return -ENOMEM;
2444 }
2423 2445
2424 return 0; 2446 return 0;
2425} 2447}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3002{ 3024{
3003 struct dlm_message *ms = (struct dlm_message *) hd; 3025 struct dlm_message *ms = (struct dlm_message *) hd;
3004 struct dlm_ls *ls; 3026 struct dlm_ls *ls;
3005 int error; 3027 int error = 0;
3006 3028
3007 if (!recovery) 3029 if (!recovery)
3008 dlm_message_in(ms); 3030 dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3119 out: 3141 out:
3120 dlm_put_lockspace(ls); 3142 dlm_put_lockspace(ls);
3121 dlm_astd_wake(); 3143 dlm_astd_wake();
3122 return 0; 3144 return error;
3123} 3145}
3124 3146
3125 3147
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3132 if (middle_conversion(lkb)) { 3154 if (middle_conversion(lkb)) {
3133 hold_lkb(lkb); 3155 hold_lkb(lkb);
3134 ls->ls_stub_ms.m_result = -EINPROGRESS; 3156 ls->ls_stub_ms.m_result = -EINPROGRESS;
3157 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3135 _remove_from_waiters(lkb); 3158 _remove_from_waiters(lkb);
3136 _receive_convert_reply(lkb, &ls->ls_stub_ms); 3159 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3137 3160
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3205 case DLM_MSG_UNLOCK: 3228 case DLM_MSG_UNLOCK:
3206 hold_lkb(lkb); 3229 hold_lkb(lkb);
3207 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3230 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3231 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3208 _remove_from_waiters(lkb); 3232 _remove_from_waiters(lkb);
3209 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3233 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3210 dlm_put_lkb(lkb); 3234 dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3213 case DLM_MSG_CANCEL: 3237 case DLM_MSG_CANCEL:
3214 hold_lkb(lkb); 3238 hold_lkb(lkb);
3215 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3239 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3240 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3216 _remove_from_waiters(lkb); 3241 _remove_from_waiters(lkb);
3217 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3242 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3218 dlm_put_lkb(lkb); 3243 dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3571 lock_rsb(r); 3596 lock_rsb(r);
3572 3597
3573 switch (error) { 3598 switch (error) {
3599 case -EBADR:
3600 /* There's a chance the new master received our lock before
3601 dlm_recover_master_reply(), this wouldn't happen if we did
3602 a barrier between recover_masters and recover_locks. */
3603 log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
3604 (unsigned long)r, r->res_name);
3605 dlm_send_rcom_lock(r, lkb);
3606 goto out;
3574 case -EEXIST: 3607 case -EEXIST:
3575 log_debug(ls, "master copy exists %x", lkb->lkb_id); 3608 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3576 /* fall through */ 3609 /* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3585 /* an ack for dlm_recover_locks() which waits for replies from 3618 /* an ack for dlm_recover_locks() which waits for replies from
3586 all the locks it sends to new masters */ 3619 all the locks it sends to new masters */
3587 dlm_recovered_lock(r); 3620 dlm_recovered_lock(r);
3588 3621 out:
3589 unlock_rsb(r); 3622 unlock_rsb(r);
3590 put_rsb(r); 3623 put_rsb(r);
3591 dlm_put_lkb(lkb); 3624 dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
3610 } 3643 }
3611 3644
3612 if (flags & DLM_LKF_VALBLK) { 3645 if (flags & DLM_LKF_VALBLK) {
3613 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3646 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3614 if (!ua->lksb.sb_lvbptr) { 3647 if (!ua->lksb.sb_lvbptr) {
3615 kfree(ua); 3648 kfree(ua);
3616 __put_lkb(ls, lkb); 3649 __put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3679 ua = (struct dlm_user_args *)lkb->lkb_astparam; 3712 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3680 3713
3681 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { 3714 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
3682 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3715 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3683 if (!ua->lksb.sb_lvbptr) { 3716 if (!ua->lksb.sb_lvbptr) {
3684 error = -ENOMEM; 3717 error = -ENOMEM;
3685 goto out_put; 3718 goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3745 goto out_put; 3778 goto out_put;
3746 3779
3747 spin_lock(&ua->proc->locks_spin); 3780 spin_lock(&ua->proc->locks_spin);
3748 list_del_init(&lkb->lkb_ownqueue); 3781 /* dlm_user_add_ast() may have already taken lkb off the proc list */
3782 if (!list_empty(&lkb->lkb_ownqueue))
3783 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3749 spin_unlock(&ua->proc->locks_spin); 3784 spin_unlock(&ua->proc->locks_spin);
3750
3751 /* this removes the reference for the proc->locks list added by
3752 dlm_user_request */
3753 unhold_lkb(lkb);
3754 out_put: 3785 out_put:
3755 dlm_put_lkb(lkb); 3786 dlm_put_lkb(lkb);
3756 out: 3787 out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3790 /* this lkb was removed from the WAITING queue */ 3821 /* this lkb was removed from the WAITING queue */
3791 if (lkb->lkb_grmode == DLM_LOCK_IV) { 3822 if (lkb->lkb_grmode == DLM_LOCK_IV) {
3792 spin_lock(&ua->proc->locks_spin); 3823 spin_lock(&ua->proc->locks_spin);
3793 list_del_init(&lkb->lkb_ownqueue); 3824 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3794 spin_unlock(&ua->proc->locks_spin); 3825 spin_unlock(&ua->proc->locks_spin);
3795 unhold_lkb(lkb);
3796 } 3826 }
3797 out_put: 3827 out_put:
3798 dlm_put_lkb(lkb); 3828 dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3853 mutex_lock(&ls->ls_clear_proc_locks); 3883 mutex_lock(&ls->ls_clear_proc_locks);
3854 3884
3855 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { 3885 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
3856 if (lkb->lkb_ast_type) {
3857 list_del(&lkb->lkb_astqueue);
3858 unhold_lkb(lkb);
3859 }
3860
3861 list_del_init(&lkb->lkb_ownqueue); 3886 list_del_init(&lkb->lkb_ownqueue);
3862 3887
3863 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { 3888 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3874 3899
3875 dlm_put_lkb(lkb); 3900 dlm_put_lkb(lkb);
3876 } 3901 }
3902
3903 /* in-progress unlocks */
3904 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
3905 list_del_init(&lkb->lkb_ownqueue);
3906 lkb->lkb_flags |= DLM_IFL_DEAD;
3907 dlm_put_lkb(lkb);
3908 }
3909
3910 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
3911 list_del(&lkb->lkb_astqueue);
3912 dlm_put_lkb(lkb);
3913 }
3914
3877 mutex_unlock(&ls->ls_clear_proc_locks); 3915 mutex_unlock(&ls->ls_clear_proc_locks);
3878 unlock_recovery(ls); 3916 unlock_recovery(ls);
3879} 3917}
3918
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8d..f40817b53c6f 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
236 while (!kthread_should_stop()) { 236 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 237 list_for_each_entry(ls, &lslist, ls_list)
238 dlm_scan_rsbs(ls); 238 dlm_scan_rsbs(ls);
239 schedule_timeout_interruptible(dlm_config.scan_secs * HZ); 239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 240 }
241 return 0; 241 return 0;
242} 242}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
422 ls->ls_count = 0; 422 ls->ls_count = 0;
423 ls->ls_flags = 0; 423 ls->ls_flags = 0;
424 424
425 size = dlm_config.rsbtbl_size; 425 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 426 ls->ls_rsbtbl_size = size;
427 427
428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); 428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
434 rwlock_init(&ls->ls_rsbtbl[i].lock); 434 rwlock_init(&ls->ls_rsbtbl[i].lock);
435 } 435 }
436 436
437 size = dlm_config.lkbtbl_size; 437 size = dlm_config.ci_lkbtbl_size;
438 ls->ls_lkbtbl_size = size; 438 ls->ls_lkbtbl_size = size;
439 439
440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); 440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
446 ls->ls_lkbtbl[i].counter = 1; 446 ls->ls_lkbtbl[i].counter = 1;
447 } 447 }
448 448
449 size = dlm_config.dirtbl_size; 449 size = dlm_config.ci_dirtbl_size;
450 ls->ls_dirtbl_size = size; 450 ls->ls_dirtbl_size = size;
451 451
452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); 452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
489 mutex_init(&ls->ls_requestqueue_mutex); 489 mutex_init(&ls->ls_requestqueue_mutex);
490 mutex_init(&ls->ls_clear_proc_locks); 490 mutex_init(&ls->ls_clear_proc_locks);
491 491
492 ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 492 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
493 if (!ls->ls_recover_buf) 493 if (!ls->ls_recover_buf)
494 goto out_dirfree; 494 goto out_dirfree;
495 495
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a9285..dc83a9d979b5 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
72 struct list_head writequeue; /* outgoing writequeue_entries */ 72 struct list_head writequeue; /* outgoing writequeue_entries */
73 spinlock_t writequeue_lock; 73 spinlock_t writequeue_lock;
74 int nodeid; 74 int nodeid;
75 struct work_struct swork; /* Send workqueue */
76 struct work_struct lwork; /* Locking workqueue */
75}; 77};
76 78
77static DEFINE_IDR(nodeinfo_idr); 79static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
96 atomic_t waiting_requests; 98 atomic_t waiting_requests;
97 struct cbuf cb; 99 struct cbuf cb;
98 int eagain_flag; 100 int eagain_flag;
101 struct work_struct work; /* Send workqueue */
99}; 102};
100 103
101/* An entry waiting to be sent */ 104/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
137static LIST_HEAD(write_nodes); 140static LIST_HEAD(write_nodes);
138static DEFINE_SPINLOCK(write_nodes_lock); 141static DEFINE_SPINLOCK(write_nodes_lock);
139 142
143
140/* Maximum number of incoming messages to process before 144/* Maximum number of incoming messages to process before
141 * doing a schedule() 145 * doing a schedule()
142 */ 146 */
143#define MAX_RX_MSG_COUNT 25 147#define MAX_RX_MSG_COUNT 25
144 148
145/* Manage daemons */ 149/* Work queues */
146static struct task_struct *recv_task; 150static struct workqueue_struct *recv_workqueue;
147static struct task_struct *send_task; 151static struct workqueue_struct *send_workqueue;
148static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); 152static struct workqueue_struct *lock_workqueue;
149 153
150/* The SCTP connection */ 154/* The SCTP connection */
151static struct connection sctp_con; 155static struct connection sctp_con;
152 156
157static void process_send_sockets(struct work_struct *work);
158static void process_recv_sockets(struct work_struct *work);
159static void process_lock_request(struct work_struct *work);
153 160
154static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) 161static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
155{ 162{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
222 spin_lock_init(&ni->lock); 229 spin_lock_init(&ni->lock);
223 INIT_LIST_HEAD(&ni->writequeue); 230 INIT_LIST_HEAD(&ni->writequeue);
224 spin_lock_init(&ni->writequeue_lock); 231 spin_lock_init(&ni->writequeue_lock);
232 INIT_WORK(&ni->lwork, process_lock_request);
233 INIT_WORK(&ni->swork, process_send_sockets);
225 ni->nodeid = nodeid; 234 ni->nodeid = nodeid;
226 235
227 if (nodeid > max_nodeid) 236 if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
249/* Data or notification available on socket */ 258/* Data or notification available on socket */
250static void lowcomms_data_ready(struct sock *sk, int count_unused) 259static void lowcomms_data_ready(struct sock *sk, int count_unused)
251{ 260{
252 atomic_inc(&sctp_con.waiting_requests);
253 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) 261 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
254 return; 262 queue_work(recv_workqueue, &sctp_con.work);
255
256 wake_up_interruptible(&lowcomms_recv_wait);
257} 263}
258 264
259 265
@@ -361,10 +367,10 @@ static void init_failed(void)
361 spin_lock_bh(&write_nodes_lock); 367 spin_lock_bh(&write_nodes_lock);
362 list_add_tail(&ni->write_list, &write_nodes); 368 list_add_tail(&ni->write_list, &write_nodes);
363 spin_unlock_bh(&write_nodes_lock); 369 spin_unlock_bh(&write_nodes_lock);
370 queue_work(send_workqueue, &ni->swork);
364 } 371 }
365 } 372 }
366 } 373 }
367 wake_up_process(send_task);
368} 374}
369 375
370/* Something happened to an association */ 376/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
446 spin_lock_bh(&write_nodes_lock); 452 spin_lock_bh(&write_nodes_lock);
447 list_add_tail(&ni->write_list, &write_nodes); 453 list_add_tail(&ni->write_list, &write_nodes);
448 spin_unlock_bh(&write_nodes_lock); 454 spin_unlock_bh(&write_nodes_lock);
455 queue_work(send_workqueue, &ni->swork);
449 } 456 }
450 wake_up_process(send_task);
451 } 457 }
452 break; 458 break;
453 459
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
580 spin_lock_bh(&write_nodes_lock); 586 spin_lock_bh(&write_nodes_lock);
581 list_add_tail(&ni->write_list, &write_nodes); 587 list_add_tail(&ni->write_list, &write_nodes);
582 spin_unlock_bh(&write_nodes_lock); 588 spin_unlock_bh(&write_nodes_lock);
589 queue_work(send_workqueue, &ni->swork);
583 } 590 }
584 wake_up_process(send_task);
585 } 591 }
586 } 592 }
587 593
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
590 return 0; 596 return 0;
591 597
592 cbuf_add(&sctp_con.cb, ret); 598 cbuf_add(&sctp_con.cb, ret);
599 // PJC: TODO: Add to node's workqueue....can we ??
593 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), 600 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
594 page_address(sctp_con.rx_page), 601 page_address(sctp_con.rx_page),
595 sctp_con.cb.base, sctp_con.cb.len, 602 sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
635 642
636 if (result < 0) 643 if (result < 0)
637 log_print("Can't bind to port %d addr number %d", 644 log_print("Can't bind to port %d addr number %d",
638 dlm_config.tcp_port, num); 645 dlm_config.ci_tcp_port, num);
639 646
640 return result; 647 return result;
641} 648}
@@ -711,7 +718,7 @@ static int init_sock(void)
711 /* Bind to all interfaces. */ 718 /* Bind to all interfaces. */
712 for (i = 0; i < dlm_local_count; i++) { 719 for (i = 0; i < dlm_local_count; i++) {
713 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 720 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
714 make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); 721 make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
715 722
716 result = add_bind_addr(&localaddr, addr_len, num); 723 result = add_bind_addr(&localaddr, addr_len, num);
717 if (result) 724 if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
820 spin_lock_bh(&write_nodes_lock); 827 spin_lock_bh(&write_nodes_lock);
821 list_add_tail(&ni->write_list, &write_nodes); 828 list_add_tail(&ni->write_list, &write_nodes);
822 spin_unlock_bh(&write_nodes_lock); 829 spin_unlock_bh(&write_nodes_lock);
823 wake_up_process(send_task); 830
831 queue_work(send_workqueue, &ni->swork);
824 } 832 }
825 return; 833 return;
826 834
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
863 return; 871 return;
864 } 872 }
865 873
866 make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); 874 make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
867 875
868 outmessage.msg_name = &rem_addr; 876 outmessage.msg_name = &rem_addr;
869 outmessage.msg_namelen = addrlen; 877 outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
1088 return 0; 1096 return 0;
1089} 1097}
1090 1098
1091static int write_list_empty(void) 1099// PJC: The work queue function for receiving.
1100static void process_recv_sockets(struct work_struct *work)
1092{ 1101{
1093 int status; 1102 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1094 1103 int ret;
1095 spin_lock_bh(&write_nodes_lock);
1096 status = list_empty(&write_nodes);
1097 spin_unlock_bh(&write_nodes_lock);
1098
1099 return status;
1100}
1101
1102static int dlm_recvd(void *data)
1103{
1104 DECLARE_WAITQUEUE(wait, current);
1105
1106 while (!kthread_should_stop()) {
1107 int count = 0; 1104 int count = 0;
1108 1105
1109 set_current_state(TASK_INTERRUPTIBLE); 1106 do {
1110 add_wait_queue(&lowcomms_recv_wait, &wait); 1107 ret = receive_from_sock();
1111 if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
1112 cond_resched();
1113 remove_wait_queue(&lowcomms_recv_wait, &wait);
1114 set_current_state(TASK_RUNNING);
1115
1116 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1117 int ret;
1118
1119 do {
1120 ret = receive_from_sock();
1121 1108
1122 /* Don't starve out everyone else */ 1109 /* Don't starve out everyone else */
1123 if (++count >= MAX_RX_MSG_COUNT) { 1110 if (++count >= MAX_RX_MSG_COUNT) {
1124 cond_resched(); 1111 cond_resched();
1125 count = 0; 1112 count = 0;
1126 } 1113 }
1127 } while (!kthread_should_stop() && ret >=0); 1114 } while (!kthread_should_stop() && ret >=0);
1128 }
1129 cond_resched();
1130 } 1115 }
1131 1116 cond_resched();
1132 return 0;
1133} 1117}
1134 1118
1135static int dlm_sendd(void *data) 1119// PJC: the work queue function for sending
1120static void process_send_sockets(struct work_struct *work)
1136{ 1121{
1137 DECLARE_WAITQUEUE(wait, current); 1122 if (sctp_con.eagain_flag) {
1138 1123 sctp_con.eagain_flag = 0;
1139 add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1124 refill_write_queue();
1140
1141 while (!kthread_should_stop()) {
1142 set_current_state(TASK_INTERRUPTIBLE);
1143 if (write_list_empty())
1144 cond_resched();
1145 set_current_state(TASK_RUNNING);
1146
1147 if (sctp_con.eagain_flag) {
1148 sctp_con.eagain_flag = 0;
1149 refill_write_queue();
1150 }
1151 process_output_queue();
1152 } 1125 }
1126 process_output_queue();
1127}
1153 1128
1154 remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1129// PJC: Process lock requests from a particular node.
1155 1130// TODO: can we optimise this out on UP ??
1156 return 0; 1131static void process_lock_request(struct work_struct *work)
1132{
1157} 1133}
1158 1134
1159static void daemons_stop(void) 1135static void daemons_stop(void)
1160{ 1136{
1161 kthread_stop(recv_task); 1137 destroy_workqueue(recv_workqueue);
1162 kthread_stop(send_task); 1138 destroy_workqueue(send_workqueue);
1139 destroy_workqueue(lock_workqueue);
1163} 1140}
1164 1141
1165static int daemons_start(void) 1142static int daemons_start(void)
1166{ 1143{
1167 struct task_struct *p;
1168 int error; 1144 int error;
1145 recv_workqueue = create_workqueue("dlm_recv");
1146 error = IS_ERR(recv_workqueue);
1147 if (error) {
1148 log_print("can't start dlm_recv %d", error);
1149 return error;
1150 }
1169 1151
1170 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 1152 send_workqueue = create_singlethread_workqueue("dlm_send");
1171 error = IS_ERR(p); 1153 error = IS_ERR(send_workqueue);
1172 if (error) { 1154 if (error) {
1173 log_print("can't start dlm_recvd %d", error); 1155 log_print("can't start dlm_send %d", error);
1156 destroy_workqueue(recv_workqueue);
1174 return error; 1157 return error;
1175 } 1158 }
1176 recv_task = p;
1177 1159
1178 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 1160 lock_workqueue = create_workqueue("dlm_rlock");
1179 error = IS_ERR(p); 1161 error = IS_ERR(lock_workqueue);
1180 if (error) { 1162 if (error) {
1181 log_print("can't start dlm_sendd %d", error); 1163 log_print("can't start dlm_rlock %d", error);
1182 kthread_stop(recv_task); 1164 destroy_workqueue(send_workqueue);
1165 destroy_workqueue(recv_workqueue);
1183 return error; 1166 return error;
1184 } 1167 }
1185 send_task = p;
1186 1168
1187 return 0; 1169 return 0;
1188} 1170}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
1194{ 1176{
1195 int error; 1177 int error;
1196 1178
1179 INIT_WORK(&sctp_con.work, process_recv_sockets);
1180
1197 error = init_sock(); 1181 error = init_sock();
1198 if (error) 1182 if (error)
1199 goto fail_sock; 1183 goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
1224 for (i = 0; i < dlm_local_count; i++) 1208 for (i = 0; i < dlm_local_count; i++)
1225 kfree(dlm_local_addr[i]); 1209 kfree(dlm_local_addr[i]);
1226} 1210}
1227
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42a..07e0a122c32f 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
96struct connection { 96struct connection {
97 struct socket *sock; /* NULL if not connected */ 97 struct socket *sock; /* NULL if not connected */
98 uint32_t nodeid; /* So we know who we are in the list */ 98 uint32_t nodeid; /* So we know who we are in the list */
99 struct rw_semaphore sock_sem; /* Stop connect races */ 99 struct mutex sock_mutex;
100 struct list_head read_list; /* On this list when ready for reading */
101 struct list_head write_list; /* On this list when ready for writing */
102 struct list_head state_list; /* On this list when ready to connect */
103 unsigned long flags; /* bit 1,2 = We are on the read/write lists */ 100 unsigned long flags; /* bit 1,2 = We are on the read/write lists */
104#define CF_READ_PENDING 1 101#define CF_READ_PENDING 1
105#define CF_WRITE_PENDING 2 102#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
112 struct page *rx_page; 109 struct page *rx_page;
113 struct cbuf cb; 110 struct cbuf cb;
114 int retries; 111 int retries;
115 atomic_t waiting_requests;
116#define MAX_CONNECT_RETRIES 3 112#define MAX_CONNECT_RETRIES 3
117 struct connection *othercon; 113 struct connection *othercon;
114 struct work_struct rwork; /* Receive workqueue */
115 struct work_struct swork; /* Send workqueue */
118}; 116};
119#define sock2con(x) ((struct connection *)(x)->sk_user_data) 117#define sock2con(x) ((struct connection *)(x)->sk_user_data)
120 118
@@ -131,14 +129,9 @@ struct writequeue_entry {
131 129
132static struct sockaddr_storage dlm_local_addr; 130static struct sockaddr_storage dlm_local_addr;
133 131
134/* Manage daemons */ 132/* Work queues */
135static struct task_struct *recv_task; 133static struct workqueue_struct *recv_workqueue;
136static struct task_struct *send_task; 134static struct workqueue_struct *send_workqueue;
137
138static wait_queue_t lowcomms_send_waitq_head;
139static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
140static wait_queue_t lowcomms_recv_waitq_head;
141static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
142 135
143/* An array of pointers to connections, indexed by NODEID */ 136/* An array of pointers to connections, indexed by NODEID */
144static struct connection **connections; 137static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
146static struct kmem_cache *con_cache; 139static struct kmem_cache *con_cache;
147static int conn_array_size; 140static int conn_array_size;
148 141
149/* List of sockets that have reads pending */ 142static void process_recv_sockets(struct work_struct *work);
150static LIST_HEAD(read_sockets); 143static void process_send_sockets(struct work_struct *work);
151static DEFINE_SPINLOCK(read_sockets_lock);
152
153/* List of sockets which have writes pending */
154static LIST_HEAD(write_sockets);
155static DEFINE_SPINLOCK(write_sockets_lock);
156
157/* List of sockets which have connects pending */
158static LIST_HEAD(state_sockets);
159static DEFINE_SPINLOCK(state_sockets_lock);
160 144
161static struct connection *nodeid2con(int nodeid, gfp_t allocation) 145static struct connection *nodeid2con(int nodeid, gfp_t allocation)
162{ 146{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
186 goto finish; 170 goto finish;
187 171
188 con->nodeid = nodeid; 172 con->nodeid = nodeid;
189 init_rwsem(&con->sock_sem); 173 mutex_init(&con->sock_mutex);
190 INIT_LIST_HEAD(&con->writequeue); 174 INIT_LIST_HEAD(&con->writequeue);
191 spin_lock_init(&con->writequeue_lock); 175 spin_lock_init(&con->writequeue_lock);
176 INIT_WORK(&con->swork, process_send_sockets);
177 INIT_WORK(&con->rwork, process_recv_sockets);
192 178
193 connections[nodeid] = con; 179 connections[nodeid] = con;
194 } 180 }
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
203{ 189{
204 struct connection *con = sock2con(sk); 190 struct connection *con = sock2con(sk);
205 191
206 atomic_inc(&con->waiting_requests); 192 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
207 if (test_and_set_bit(CF_READ_PENDING, &con->flags)) 193 queue_work(recv_workqueue, &con->rwork);
208 return;
209
210 spin_lock_bh(&read_sockets_lock);
211 list_add_tail(&con->read_list, &read_sockets);
212 spin_unlock_bh(&read_sockets_lock);
213
214 wake_up_interruptible(&lowcomms_recv_waitq);
215} 194}
216 195
217static void lowcomms_write_space(struct sock *sk) 196static void lowcomms_write_space(struct sock *sk)
218{ 197{
219 struct connection *con = sock2con(sk); 198 struct connection *con = sock2con(sk);
220 199
221 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 200 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
222 return; 201 queue_work(send_workqueue, &con->swork);
223
224 spin_lock_bh(&write_sockets_lock);
225 list_add_tail(&con->write_list, &write_sockets);
226 spin_unlock_bh(&write_sockets_lock);
227
228 wake_up_interruptible(&lowcomms_send_waitq);
229} 202}
230 203
231static inline void lowcomms_connect_sock(struct connection *con) 204static inline void lowcomms_connect_sock(struct connection *con)
232{ 205{
233 if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) 206 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
234 return; 207 queue_work(send_workqueue, &con->swork);
235
236 spin_lock_bh(&state_sockets_lock);
237 list_add_tail(&con->state_list, &state_sockets);
238 spin_unlock_bh(&state_sockets_lock);
239
240 wake_up_interruptible(&lowcomms_send_waitq);
241} 208}
242 209
243static void lowcomms_state_change(struct sock *sk) 210static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
279/* Close a remote connection and tidy up */ 246/* Close a remote connection and tidy up */
280static void close_connection(struct connection *con, bool and_other) 247static void close_connection(struct connection *con, bool and_other)
281{ 248{
282 down_write(&con->sock_sem); 249 mutex_lock(&con->sock_mutex);
283 250
284 if (con->sock) { 251 if (con->sock) {
285 sock_release(con->sock); 252 sock_release(con->sock);
@@ -294,24 +261,27 @@ static void close_connection(struct connection *con, bool and_other)
294 con->rx_page = NULL; 261 con->rx_page = NULL;
295 } 262 }
296 con->retries = 0; 263 con->retries = 0;
297 up_write(&con->sock_sem); 264 mutex_unlock(&con->sock_mutex);
298} 265}
299 266
300/* Data received from remote end */ 267/* Data received from remote end */
301static int receive_from_sock(struct connection *con) 268static int receive_from_sock(struct connection *con)
302{ 269{
303 int ret = 0; 270 int ret = 0;
304 struct msghdr msg; 271 struct msghdr msg = {};
305 struct iovec iov[2]; 272 struct kvec iov[2];
306 mm_segment_t fs;
307 unsigned len; 273 unsigned len;
308 int r; 274 int r;
309 int call_again_soon = 0; 275 int call_again_soon = 0;
276 int nvec;
310 277
311 down_read(&con->sock_sem); 278 mutex_lock(&con->sock_mutex);
279
280 if (con->sock == NULL) {
281 ret = -EAGAIN;
282 goto out_close;
283 }
312 284
313 if (con->sock == NULL)
314 goto out;
315 if (con->rx_page == NULL) { 285 if (con->rx_page == NULL) {
316 /* 286 /*
317 * This doesn't need to be atomic, but I think it should 287 * This doesn't need to be atomic, but I think it should
@@ -323,21 +293,13 @@ static int receive_from_sock(struct connection *con)
323 cbuf_init(&con->cb, PAGE_CACHE_SIZE); 293 cbuf_init(&con->cb, PAGE_CACHE_SIZE);
324 } 294 }
325 295
326 msg.msg_control = NULL;
327 msg.msg_controllen = 0;
328 msg.msg_iovlen = 1;
329 msg.msg_iov = iov;
330 msg.msg_name = NULL;
331 msg.msg_namelen = 0;
332 msg.msg_flags = 0;
333
334 /* 296 /*
335 * iov[0] is the bit of the circular buffer between the current end 297 * iov[0] is the bit of the circular buffer between the current end
336 * point (cb.base + cb.len) and the end of the buffer. 298 * point (cb.base + cb.len) and the end of the buffer.
337 */ 299 */
338 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); 300 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
339 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); 301 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
340 iov[1].iov_len = 0; 302 nvec = 1;
341 303
342 /* 304 /*
343 * iov[1] is the bit of the circular buffer between the start of the 305 * iov[1] is the bit of the circular buffer between the start of the
@@ -347,18 +309,18 @@ static int receive_from_sock(struct connection *con)
347 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb); 309 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
348 iov[1].iov_len = con->cb.base; 310 iov[1].iov_len = con->cb.base;
349 iov[1].iov_base = page_address(con->rx_page); 311 iov[1].iov_base = page_address(con->rx_page);
350 msg.msg_iovlen = 2; 312 nvec = 2;
351 } 313 }
352 len = iov[0].iov_len + iov[1].iov_len; 314 len = iov[0].iov_len + iov[1].iov_len;
353 315
354 fs = get_fs(); 316 r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
355 set_fs(get_ds());
356 r = ret = sock_recvmsg(con->sock, &msg, len,
357 MSG_DONTWAIT | MSG_NOSIGNAL); 317 MSG_DONTWAIT | MSG_NOSIGNAL);
358 set_fs(fs);
359 318
360 if (ret <= 0) 319 if (ret <= 0)
361 goto out_close; 320 goto out_close;
321 if (ret == -EAGAIN)
322 goto out_resched;
323
362 if (ret == len) 324 if (ret == len)
363 call_again_soon = 1; 325 call_again_soon = 1;
364 cbuf_add(&con->cb, ret); 326 cbuf_add(&con->cb, ret);
@@ -381,24 +343,26 @@ static int receive_from_sock(struct connection *con)
381 con->rx_page = NULL; 343 con->rx_page = NULL;
382 } 344 }
383 345
384out:
385 if (call_again_soon) 346 if (call_again_soon)
386 goto out_resched; 347 goto out_resched;
387 up_read(&con->sock_sem); 348 mutex_unlock(&con->sock_mutex);
388 return 0; 349 return 0;
389 350
390out_resched: 351out_resched:
391 lowcomms_data_ready(con->sock->sk, 0); 352 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
392 up_read(&con->sock_sem); 353 queue_work(recv_workqueue, &con->rwork);
393 cond_resched(); 354 mutex_unlock(&con->sock_mutex);
394 return 0; 355 return -EAGAIN;
395 356
396out_close: 357out_close:
397 up_read(&con->sock_sem); 358 mutex_unlock(&con->sock_mutex);
398 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { 359 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
399 close_connection(con, false); 360 close_connection(con, false);
400 /* Reconnect when there is something to send */ 361 /* Reconnect when there is something to send */
401 } 362 }
363 /* Don't return success if we really got EOF */
364 if (ret == 0)
365 ret = -EAGAIN;
402 366
403 return ret; 367 return ret;
404} 368}
@@ -412,6 +376,7 @@ static int accept_from_sock(struct connection *con)
412 int len; 376 int len;
413 int nodeid; 377 int nodeid;
414 struct connection *newcon; 378 struct connection *newcon;
379 struct connection *addcon;
415 380
416 memset(&peeraddr, 0, sizeof(peeraddr)); 381 memset(&peeraddr, 0, sizeof(peeraddr));
417 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, 382 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +384,7 @@ static int accept_from_sock(struct connection *con)
419 if (result < 0) 384 if (result < 0)
420 return -ENOMEM; 385 return -ENOMEM;
421 386
422 down_read(&con->sock_sem); 387 mutex_lock_nested(&con->sock_mutex, 0);
423 388
424 result = -ENOTCONN; 389 result = -ENOTCONN;
425 if (con->sock == NULL) 390 if (con->sock == NULL)
@@ -445,7 +410,7 @@ static int accept_from_sock(struct connection *con)
445 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 410 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
446 printk("dlm: connect from non cluster node\n"); 411 printk("dlm: connect from non cluster node\n");
447 sock_release(newsock); 412 sock_release(newsock);
448 up_read(&con->sock_sem); 413 mutex_unlock(&con->sock_mutex);
449 return -1; 414 return -1;
450 } 415 }
451 416
@@ -462,7 +427,7 @@ static int accept_from_sock(struct connection *con)
462 result = -ENOMEM; 427 result = -ENOMEM;
463 goto accept_err; 428 goto accept_err;
464 } 429 }
465 down_write(&newcon->sock_sem); 430 mutex_lock_nested(&newcon->sock_mutex, 1);
466 if (newcon->sock) { 431 if (newcon->sock) {
467 struct connection *othercon = newcon->othercon; 432 struct connection *othercon = newcon->othercon;
468 433
@@ -470,41 +435,45 @@ static int accept_from_sock(struct connection *con)
470 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); 435 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
471 if (!othercon) { 436 if (!othercon) {
472 printk("dlm: failed to allocate incoming socket\n"); 437 printk("dlm: failed to allocate incoming socket\n");
473 up_write(&newcon->sock_sem); 438 mutex_unlock(&newcon->sock_mutex);
474 result = -ENOMEM; 439 result = -ENOMEM;
475 goto accept_err; 440 goto accept_err;
476 } 441 }
477 othercon->nodeid = nodeid; 442 othercon->nodeid = nodeid;
478 othercon->rx_action = receive_from_sock; 443 othercon->rx_action = receive_from_sock;
479 init_rwsem(&othercon->sock_sem); 444 mutex_init(&othercon->sock_mutex);
445 INIT_WORK(&othercon->swork, process_send_sockets);
446 INIT_WORK(&othercon->rwork, process_recv_sockets);
480 set_bit(CF_IS_OTHERCON, &othercon->flags); 447 set_bit(CF_IS_OTHERCON, &othercon->flags);
481 newcon->othercon = othercon; 448 newcon->othercon = othercon;
482 } 449 }
483 othercon->sock = newsock; 450 othercon->sock = newsock;
484 newsock->sk->sk_user_data = othercon; 451 newsock->sk->sk_user_data = othercon;
485 add_sock(newsock, othercon); 452 add_sock(newsock, othercon);
453 addcon = othercon;
486 } 454 }
487 else { 455 else {
488 newsock->sk->sk_user_data = newcon; 456 newsock->sk->sk_user_data = newcon;
489 newcon->rx_action = receive_from_sock; 457 newcon->rx_action = receive_from_sock;
490 add_sock(newsock, newcon); 458 add_sock(newsock, newcon);
491 459 addcon = newcon;
492 } 460 }
493 461
494 up_write(&newcon->sock_sem); 462 mutex_unlock(&newcon->sock_mutex);
495 463
496 /* 464 /*
497 * Add it to the active queue in case we got data 465 * Add it to the active queue in case we got data
498 * beween processing the accept adding the socket 466 * beween processing the accept adding the socket
499 * to the read_sockets list 467 * to the read_sockets list
500 */ 468 */
501 lowcomms_data_ready(newsock->sk, 0); 469 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
502 up_read(&con->sock_sem); 470 queue_work(recv_workqueue, &addcon->rwork);
471 mutex_unlock(&con->sock_mutex);
503 472
504 return 0; 473 return 0;
505 474
506accept_err: 475accept_err:
507 up_read(&con->sock_sem); 476 mutex_unlock(&con->sock_mutex);
508 sock_release(newsock); 477 sock_release(newsock);
509 478
510 if (result != -EAGAIN) 479 if (result != -EAGAIN)
@@ -525,7 +494,7 @@ static void connect_to_sock(struct connection *con)
525 return; 494 return;
526 } 495 }
527 496
528 down_write(&con->sock_sem); 497 mutex_lock(&con->sock_mutex);
529 if (con->retries++ > MAX_CONNECT_RETRIES) 498 if (con->retries++ > MAX_CONNECT_RETRIES)
530 goto out; 499 goto out;
531 500
@@ -548,7 +517,7 @@ static void connect_to_sock(struct connection *con)
548 sock->sk->sk_user_data = con; 517 sock->sk->sk_user_data = con;
549 con->rx_action = receive_from_sock; 518 con->rx_action = receive_from_sock;
550 519
551 make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); 520 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
552 521
553 add_sock(sock, con); 522 add_sock(sock, con);
554 523
@@ -577,7 +546,7 @@ out_err:
577 result = 0; 546 result = 0;
578 } 547 }
579out: 548out:
580 up_write(&con->sock_sem); 549 mutex_unlock(&con->sock_mutex);
581 return; 550 return;
582} 551}
583 552
@@ -616,10 +585,10 @@ static struct socket *create_listen_sock(struct connection *con,
616 con->sock = sock; 585 con->sock = sock;
617 586
618 /* Bind to our port */ 587 /* Bind to our port */
619 make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); 588 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
620 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 589 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
621 if (result < 0) { 590 if (result < 0) {
622 printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); 591 printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
623 sock_release(sock); 592 sock_release(sock);
624 sock = NULL; 593 sock = NULL;
625 con->sock = NULL; 594 con->sock = NULL;
@@ -638,7 +607,7 @@ static struct socket *create_listen_sock(struct connection *con,
638 607
639 result = sock->ops->listen(sock, 5); 608 result = sock->ops->listen(sock, 5);
640 if (result < 0) { 609 if (result < 0) {
641 printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); 610 printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
642 sock_release(sock); 611 sock_release(sock);
643 sock = NULL; 612 sock = NULL;
644 goto create_out; 613 goto create_out;
@@ -709,6 +678,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
709 if (!con) 678 if (!con)
710 return NULL; 679 return NULL;
711 680
681 spin_lock(&con->writequeue_lock);
712 e = list_entry(con->writequeue.prev, struct writequeue_entry, list); 682 e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
713 if ((&e->list == &con->writequeue) || 683 if ((&e->list == &con->writequeue) ||
714 (PAGE_CACHE_SIZE - e->end < len)) { 684 (PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +717,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
747 struct connection *con = e->con; 717 struct connection *con = e->con;
748 int users; 718 int users;
749 719
720 spin_lock(&con->writequeue_lock);
750 users = --e->users; 721 users = --e->users;
751 if (users) 722 if (users)
752 goto out; 723 goto out;
@@ -754,12 +725,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
754 kunmap(e->page); 725 kunmap(e->page);
755 spin_unlock(&con->writequeue_lock); 726 spin_unlock(&con->writequeue_lock);
756 727
757 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { 728 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
758 spin_lock_bh(&write_sockets_lock); 729 queue_work(send_workqueue, &con->swork);
759 list_add_tail(&con->write_list, &write_sockets);
760 spin_unlock_bh(&write_sockets_lock);
761
762 wake_up_interruptible(&lowcomms_send_waitq);
763 } 730 }
764 return; 731 return;
765 732
@@ -783,7 +750,7 @@ static void send_to_sock(struct connection *con)
783 struct writequeue_entry *e; 750 struct writequeue_entry *e;
784 int len, offset; 751 int len, offset;
785 752
786 down_read(&con->sock_sem); 753 mutex_lock(&con->sock_mutex);
787 if (con->sock == NULL) 754 if (con->sock == NULL)
788 goto out_connect; 755 goto out_connect;
789 756
@@ -800,6 +767,7 @@ static void send_to_sock(struct connection *con)
800 offset = e->offset; 767 offset = e->offset;
801 BUG_ON(len == 0 && e->users == 0); 768 BUG_ON(len == 0 && e->users == 0);
802 spin_unlock(&con->writequeue_lock); 769 spin_unlock(&con->writequeue_lock);
770 kmap(e->page);
803 771
804 ret = 0; 772 ret = 0;
805 if (len) { 773 if (len) {
@@ -828,18 +796,18 @@ static void send_to_sock(struct connection *con)
828 } 796 }
829 spin_unlock(&con->writequeue_lock); 797 spin_unlock(&con->writequeue_lock);
830out: 798out:
831 up_read(&con->sock_sem); 799 mutex_unlock(&con->sock_mutex);
832 return; 800 return;
833 801
834send_error: 802send_error:
835 up_read(&con->sock_sem); 803 mutex_unlock(&con->sock_mutex);
836 close_connection(con, false); 804 close_connection(con, false);
837 lowcomms_connect_sock(con); 805 lowcomms_connect_sock(con);
838 return; 806 return;
839 807
840out_connect: 808out_connect:
841 up_read(&con->sock_sem); 809 mutex_unlock(&con->sock_mutex);
842 lowcomms_connect_sock(con); 810 connect_to_sock(con);
843 return; 811 return;
844} 812}
845 813
@@ -872,7 +840,6 @@ int dlm_lowcomms_close(int nodeid)
872 if (con) { 840 if (con) {
873 clean_one_writequeue(con); 841 clean_one_writequeue(con);
874 close_connection(con, true); 842 close_connection(con, true);
875 atomic_set(&con->waiting_requests, 0);
876 } 843 }
877 return 0; 844 return 0;
878 845
@@ -880,102 +847,29 @@ out:
880 return -1; 847 return -1;
881} 848}
882 849
883/* API send message call, may queue the request */
884/* N.B. This is the old interface - use the new one for new calls */
885int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
886{
887 struct writequeue_entry *e;
888 char *b;
889
890 e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
891 if (e) {
892 memcpy(b, buf, len);
893 dlm_lowcomms_commit_buffer(e);
894 return 0;
895 }
896 return -ENOBUFS;
897}
898
899/* Look for activity on active sockets */ 850/* Look for activity on active sockets */
900static void process_sockets(void) 851static void process_recv_sockets(struct work_struct *work)
901{ 852{
902 struct list_head *list; 853 struct connection *con = container_of(work, struct connection, rwork);
903 struct list_head *temp; 854 int err;
904 int count = 0;
905
906 spin_lock_bh(&read_sockets_lock);
907 list_for_each_safe(list, temp, &read_sockets) {
908
909 struct connection *con =
910 list_entry(list, struct connection, read_list);
911 list_del(&con->read_list);
912 clear_bit(CF_READ_PENDING, &con->flags);
913
914 spin_unlock_bh(&read_sockets_lock);
915
916 /* This can reach zero if we are processing requests
917 * as they come in.
918 */
919 if (atomic_read(&con->waiting_requests) == 0) {
920 spin_lock_bh(&read_sockets_lock);
921 continue;
922 }
923
924 do {
925 con->rx_action(con);
926
927 /* Don't starve out everyone else */
928 if (++count >= MAX_RX_MSG_COUNT) {
929 cond_resched();
930 count = 0;
931 }
932 855
933 } while (!atomic_dec_and_test(&con->waiting_requests) && 856 clear_bit(CF_READ_PENDING, &con->flags);
934 !kthread_should_stop()); 857 do {
935 858 err = con->rx_action(con);
936 spin_lock_bh(&read_sockets_lock); 859 } while (!err);
937 }
938 spin_unlock_bh(&read_sockets_lock);
939} 860}
940 861
941/* Try to send any messages that are pending
942 */
943static void process_output_queue(void)
944{
945 struct list_head *list;
946 struct list_head *temp;
947
948 spin_lock_bh(&write_sockets_lock);
949 list_for_each_safe(list, temp, &write_sockets) {
950 struct connection *con =
951 list_entry(list, struct connection, write_list);
952 clear_bit(CF_WRITE_PENDING, &con->flags);
953 list_del(&con->write_list);
954 862
955 spin_unlock_bh(&write_sockets_lock); 863static void process_send_sockets(struct work_struct *work)
956 send_to_sock(con);
957 spin_lock_bh(&write_sockets_lock);
958 }
959 spin_unlock_bh(&write_sockets_lock);
960}
961
962static void process_state_queue(void)
963{ 864{
964 struct list_head *list; 865 struct connection *con = container_of(work, struct connection, swork);
965 struct list_head *temp;
966
967 spin_lock_bh(&state_sockets_lock);
968 list_for_each_safe(list, temp, &state_sockets) {
969 struct connection *con =
970 list_entry(list, struct connection, state_list);
971 list_del(&con->state_list);
972 clear_bit(CF_CONNECT_PENDING, &con->flags);
973 spin_unlock_bh(&state_sockets_lock);
974 866
867 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
975 connect_to_sock(con); 868 connect_to_sock(con);
976 spin_lock_bh(&state_sockets_lock);
977 } 869 }
978 spin_unlock_bh(&state_sockets_lock); 870
871 clear_bit(CF_WRITE_PENDING, &con->flags);
872 send_to_sock(con);
979} 873}
980 874
981 875
@@ -992,109 +886,33 @@ static void clean_writequeues(void)
992 } 886 }
993} 887}
994 888
995static int read_list_empty(void) 889static void work_stop(void)
996{
997 int status;
998
999 spin_lock_bh(&read_sockets_lock);
1000 status = list_empty(&read_sockets);
1001 spin_unlock_bh(&read_sockets_lock);
1002
1003 return status;
1004}
1005
1006/* DLM Transport comms receive daemon */
1007static int dlm_recvd(void *data)
1008{ 890{
1009 init_waitqueue_entry(&lowcomms_recv_waitq_head, current); 891 destroy_workqueue(recv_workqueue);
1010 add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); 892 destroy_workqueue(send_workqueue);
1011
1012 while (!kthread_should_stop()) {
1013 set_current_state(TASK_INTERRUPTIBLE);
1014 if (read_list_empty())
1015 cond_resched();
1016 set_current_state(TASK_RUNNING);
1017
1018 process_sockets();
1019 }
1020
1021 return 0;
1022} 893}
1023 894
1024static int write_and_state_lists_empty(void) 895static int work_start(void)
1025{ 896{
1026 int status;
1027
1028 spin_lock_bh(&write_sockets_lock);
1029 status = list_empty(&write_sockets);
1030 spin_unlock_bh(&write_sockets_lock);
1031
1032 spin_lock_bh(&state_sockets_lock);
1033 if (list_empty(&state_sockets) == 0)
1034 status = 0;
1035 spin_unlock_bh(&state_sockets_lock);
1036
1037 return status;
1038}
1039
1040/* DLM Transport send daemon */
1041static int dlm_sendd(void *data)
1042{
1043 init_waitqueue_entry(&lowcomms_send_waitq_head, current);
1044 add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
1045
1046 while (!kthread_should_stop()) {
1047 set_current_state(TASK_INTERRUPTIBLE);
1048 if (write_and_state_lists_empty())
1049 cond_resched();
1050 set_current_state(TASK_RUNNING);
1051
1052 process_state_queue();
1053 process_output_queue();
1054 }
1055
1056 return 0;
1057}
1058
1059static void daemons_stop(void)
1060{
1061 kthread_stop(recv_task);
1062 kthread_stop(send_task);
1063}
1064
1065static int daemons_start(void)
1066{
1067 struct task_struct *p;
1068 int error; 897 int error;
1069 898 recv_workqueue = create_workqueue("dlm_recv");
1070 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 899 error = IS_ERR(recv_workqueue);
1071 error = IS_ERR(p);
1072 if (error) { 900 if (error) {
1073 log_print("can't start dlm_recvd %d", error); 901 log_print("can't start dlm_recv %d", error);
1074 return error; 902 return error;
1075 } 903 }
1076 recv_task = p;
1077 904
1078 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 905 send_workqueue = create_singlethread_workqueue("dlm_send");
1079 error = IS_ERR(p); 906 error = IS_ERR(send_workqueue);
1080 if (error) { 907 if (error) {
1081 log_print("can't start dlm_sendd %d", error); 908 log_print("can't start dlm_send %d", error);
1082 kthread_stop(recv_task); 909 destroy_workqueue(recv_workqueue);
1083 return error; 910 return error;
1084 } 911 }
1085 send_task = p;
1086 912
1087 return 0; 913 return 0;
1088} 914}
1089 915
1090/*
1091 * Return the largest buffer size we can cope with.
1092 */
1093int lowcomms_max_buffer_size(void)
1094{
1095 return PAGE_CACHE_SIZE;
1096}
1097
1098void dlm_lowcomms_stop(void) 916void dlm_lowcomms_stop(void)
1099{ 917{
1100 int i; 918 int i;
@@ -1107,7 +925,7 @@ void dlm_lowcomms_stop(void)
1107 connections[i]->flags |= 0xFF; 925 connections[i]->flags |= 0xFF;
1108 } 926 }
1109 927
1110 daemons_stop(); 928 work_stop();
1111 clean_writequeues(); 929 clean_writequeues();
1112 930
1113 for (i = 0; i < conn_array_size; i++) { 931 for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +977,7 @@ int dlm_lowcomms_start(void)
1159 if (error) 977 if (error)
1160 goto fail_unlisten; 978 goto fail_unlisten;
1161 979
1162 error = daemons_start(); 980 error = work_start();
1163 if (error) 981 if (error)
1164 goto fail_unlisten; 982 goto fail_unlisten;
1165 983
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5352b03ff5aa..f858fef6e41c 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -76,9 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
76{ 76{
77 struct dlm_lkb *lkb; 77 struct dlm_lkb *lkb;
78 78
79 lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL); 79 lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
80 if (lkb)
81 memset(lkb, 0, sizeof(*lkb));
82 return lkb; 80 return lkb;
83} 81}
84 82
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f4..a5126e0c68a6 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
82 if (msglen < sizeof(struct dlm_header)) 82 if (msglen < sizeof(struct dlm_header))
83 break; 83 break;
84 err = -E2BIG; 84 err = -E2BIG;
85 if (msglen > dlm_config.buffer_size) { 85 if (msglen > dlm_config.ci_buffer_size) {
86 log_print("message size %d from %d too big, buf len %d", 86 log_print("message size %d from %d too big, buf len %d",
87 msglen, nodeid, len); 87 msglen, nodeid, len);
88 break; 88 break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
103 103
104 if (msglen > sizeof(__tmp) && 104 if (msglen > sizeof(__tmp) &&
105 msg == (struct dlm_header *) __tmp) { 105 msg == (struct dlm_header *) __tmp) {
106 msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 106 msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
107 if (msg == NULL) 107 if (msg == NULL)
108 return ret; 108 return ret;
109 } 109 }
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9d..6bfbd6153809 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
56 56
57 rc->rc_type = type; 57 rc->rc_type = type;
58 58
59 spin_lock(&ls->ls_recover_lock);
60 rc->rc_seq = ls->ls_recover_seq;
61 spin_unlock(&ls->ls_recover_lock);
62
59 *mh_ret = mh; 63 *mh_ret = mh;
60 *rc_ret = rc; 64 *rc_ret = rc;
61 return 0; 65 return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
78 rf->rf_lsflags = ls->ls_exflags; 82 rf->rf_lsflags = ls->ls_exflags;
79} 83}
80 84
81static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) 85static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
82{ 86{
87 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
88
89 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version);
93 return -EINVAL;
94 }
95
83 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
84 rf->rf_lsflags != ls->ls_exflags) { 97 rf->rf_lsflags != ls->ls_exflags) {
85 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
125 goto out; 138 goto out;
126 139
127 allow_sync_reply(ls, &rc->rc_id); 140 allow_sync_reply(ls, &rc->rc_id);
128 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 141 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
129 142
130 send_rcom(ls, mh, rc); 143 send_rcom(ls, mh, rc);
131 144
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
141 log_debug(ls, "remote node %d not ready", nodeid); 154 log_debug(ls, "remote node %d not ready", nodeid);
142 rc->rc_result = 0; 155 rc->rc_result = 0;
143 } else 156 } else
144 error = check_config(ls, (struct rcom_config *) rc->rc_buf, 157 error = check_config(ls, rc, nodeid);
145 nodeid);
146 /* the caller looks at rc_result for the remote recovery status */ 158 /* the caller looks at rc_result for the remote recovery status */
147 out: 159 out:
148 return error; 160 return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
159 if (error) 171 if (error)
160 return; 172 return;
161 rc->rc_id = rc_in->rc_id; 173 rc->rc_id = rc_in->rc_id;
174 rc->rc_seq_reply = rc_in->rc_seq;
162 rc->rc_result = dlm_recover_status(ls); 175 rc->rc_result = dlm_recover_status(ls);
163 make_config(ls, (struct rcom_config *) rc->rc_buf); 176 make_config(ls, (struct rcom_config *) rc->rc_buf);
164 177
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
200 if (nodeid == dlm_our_nodeid()) { 213 if (nodeid == dlm_our_nodeid()) {
201 dlm_copy_master_names(ls, last_name, last_len, 214 dlm_copy_master_names(ls, last_name, last_len,
202 ls->ls_recover_buf + len, 215 ls->ls_recover_buf + len,
203 dlm_config.buffer_size - len, nodeid); 216 dlm_config.ci_buffer_size - len, nodeid);
204 goto out; 217 goto out;
205 } 218 }
206 219
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
210 memcpy(rc->rc_buf, last_name, last_len); 223 memcpy(rc->rc_buf, last_name, last_len);
211 224
212 allow_sync_reply(ls, &rc->rc_id); 225 allow_sync_reply(ls, &rc->rc_id);
213 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 226 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
214 227
215 send_rcom(ls, mh, rc); 228 send_rcom(ls, mh, rc);
216 229
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
224{ 237{
225 struct dlm_rcom *rc; 238 struct dlm_rcom *rc;
226 struct dlm_mhandle *mh; 239 struct dlm_mhandle *mh;
227 int error, inlen, outlen; 240 int error, inlen, outlen, nodeid;
228 int nodeid = rc_in->rc_header.h_nodeid;
229 uint32_t status = dlm_recover_status(ls);
230
231 /*
232 * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
233 * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
234 * It could only happen in rare cases where we get a late NAMES
235 * message from a previous instance of recovery.
236 */
237
238 if (!(status & DLM_RS_NODES)) {
239 log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
240 return;
241 }
242 241
243 nodeid = rc_in->rc_header.h_nodeid; 242 nodeid = rc_in->rc_header.h_nodeid;
244 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); 243 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
245 outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); 244 outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
246 245
247 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); 246 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
248 if (error) 247 if (error)
249 return; 248 return;
250 rc->rc_id = rc_in->rc_id; 249 rc->rc_id = rc_in->rc_id;
250 rc->rc_seq_reply = rc_in->rc_seq;
251 251
252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, 252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
253 nodeid); 253 nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
294 ret_nodeid = error; 294 ret_nodeid = error;
295 rc->rc_result = ret_nodeid; 295 rc->rc_result = ret_nodeid;
296 rc->rc_id = rc_in->rc_id; 296 rc->rc_id = rc_in->rc_id;
297 rc->rc_seq_reply = rc_in->rc_seq;
297 298
298 send_rcom(ls, mh, rc); 299 send_rcom(ls, mh, rc);
299} 300}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
375 376
376 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); 377 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
377 rc->rc_id = rc_in->rc_id; 378 rc->rc_id = rc_in->rc_id;
379 rc->rc_seq_reply = rc_in->rc_seq;
378 380
379 send_rcom(ls, mh, rc); 381 send_rcom(ls, mh, rc);
380} 382}
381 383
382static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) 384static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
383{ 385{
384 uint32_t status = dlm_recover_status(ls);
385
386 if (!(status & DLM_RS_DIR)) {
387 log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
388 rc_in->rc_header.h_nodeid);
389 return;
390 }
391
392 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
393} 387}
394 388
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
415 409
416 rc->rc_type = DLM_RCOM_STATUS_REPLY; 410 rc->rc_type = DLM_RCOM_STATUS_REPLY;
417 rc->rc_id = rc_in->rc_id; 411 rc->rc_id = rc_in->rc_id;
412 rc->rc_seq_reply = rc_in->rc_seq;
418 rc->rc_result = -ESRCH; 413 rc->rc_result = -ESRCH;
419 414
420 rf = (struct rcom_config *) rc->rc_buf; 415 rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
426 return 0; 421 return 0;
427} 422}
428 423
424static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
425{
426 uint64_t seq;
427 int rv = 0;
428
429 switch (rc->rc_type) {
430 case DLM_RCOM_STATUS_REPLY:
431 case DLM_RCOM_NAMES_REPLY:
432 case DLM_RCOM_LOOKUP_REPLY:
433 case DLM_RCOM_LOCK_REPLY:
434 spin_lock(&ls->ls_recover_lock);
435 seq = ls->ls_recover_seq;
436 spin_unlock(&ls->ls_recover_lock);
437 if (rc->rc_seq_reply != seq) {
438 log_debug(ls, "ignoring old reply %x from %d "
439 "seq_reply %llx expect %llx",
440 rc->rc_type, rc->rc_header.h_nodeid,
441 (unsigned long long)rc->rc_seq_reply,
442 (unsigned long long)seq);
443 rv = 1;
444 }
445 }
446 return rv;
447}
448
429/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
430 recovery-only comms are sent through here. */ 450 recovery-only comms are sent through here. */
431 451
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
449 } 469 }
450 470
451 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
452 log_error(ls, "ignoring recovery message %x from %d", 472 log_debug(ls, "ignoring recovery message %x from %d",
453 rc->rc_type, nodeid); 473 rc->rc_type, nodeid);
454 goto out; 474 goto out;
455 } 475 }
456 476
477 if (is_old_reply(ls, rc))
478 goto out;
479
457 if (nodeid != rc->rc_header.h_nodeid) { 480 if (nodeid != rc->rc_header.h_nodeid) {
458 log_error(ls, "bad rcom nodeid %d from %d", 481 log_error(ls, "bad rcom nodeid %d from %d",
459 rc->rc_header.h_nodeid, nodeid); 482 rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab5..c2cc7694cd16 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
44static void dlm_wait_timer_fn(unsigned long data) 44static void dlm_wait_timer_fn(unsigned long data)
45{ 45{
46 struct dlm_ls *ls = (struct dlm_ls *) data; 46 struct dlm_ls *ls = (struct dlm_ls *) data;
47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); 47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
48 wake_up(&ls->ls_wait_general); 48 wake_up(&ls->ls_wait_general);
49} 49}
50 50
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
55 init_timer(&ls->ls_timer); 55 init_timer(&ls->ls_timer);
56 ls->ls_timer.function = dlm_wait_timer_fn; 56 ls->ls_timer.function = dlm_wait_timer_fn;
57 ls->ls_timer.data = (long) ls; 57 ls->ls_timer.data = (long) ls;
58 ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); 58 ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
59 add_timer(&ls->ls_timer); 59 add_timer(&ls->ls_timer);
60 60
61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); 61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
397 397
398 if (dlm_no_directory(ls)) 398 if (dlm_no_directory(ls))
399 count += recover_master_static(r); 399 count += recover_master_static(r);
400 else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { 400 else if (!is_master(r) &&
401 (dlm_is_removed(ls, r->res_nodeid) ||
402 rsb_flag(r, RSB_NEW_MASTER))) {
401 recover_master(r); 403 recover_master(r);
402 count++; 404 count++;
403 } 405 }
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa5139..3cb636d60249 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
77 77
78 error = dlm_recover_members(ls, rv, &neg); 78 error = dlm_recover_members(ls, rv, &neg);
79 if (error) { 79 if (error) {
80 log_error(ls, "recover_members failed %d", error); 80 log_debug(ls, "recover_members failed %d", error);
81 goto fail; 81 goto fail;
82 } 82 }
83 start = jiffies; 83 start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
89 89
90 error = dlm_recover_directory(ls); 90 error = dlm_recover_directory(ls);
91 if (error) { 91 if (error) {
92 log_error(ls, "recover_directory failed %d", error); 92 log_debug(ls, "recover_directory failed %d", error);
93 goto fail; 93 goto fail;
94 } 94 }
95 95
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
99 99
100 error = dlm_recover_directory_wait(ls); 100 error = dlm_recover_directory_wait(ls);
101 if (error) { 101 if (error) {
102 log_error(ls, "recover_directory_wait failed %d", error); 102 log_debug(ls, "recover_directory_wait failed %d", error);
103 goto fail; 103 goto fail;
104 } 104 }
105 105
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
129 129
130 error = dlm_recover_masters(ls); 130 error = dlm_recover_masters(ls);
131 if (error) { 131 if (error) {
132 log_error(ls, "recover_masters failed %d", error); 132 log_debug(ls, "recover_masters failed %d", error);
133 goto fail; 133 goto fail;
134 } 134 }
135 135
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
139 139
140 error = dlm_recover_locks(ls); 140 error = dlm_recover_locks(ls);
141 if (error) { 141 if (error) {
142 log_error(ls, "recover_locks failed %d", error); 142 log_debug(ls, "recover_locks failed %d", error);
143 goto fail; 143 goto fail;
144 } 144 }
145 145
146 error = dlm_recover_locks_wait(ls); 146 error = dlm_recover_locks_wait(ls);
147 if (error) { 147 if (error) {
148 log_error(ls, "recover_locks_wait failed %d", error); 148 log_debug(ls, "recover_locks_wait failed %d", error);
149 goto fail; 149 goto fail;
150 } 150 }
151 151
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
166 166
167 error = dlm_recover_locks_wait(ls); 167 error = dlm_recover_locks_wait(ls);
168 if (error) { 168 if (error) {
169 log_error(ls, "recover_locks_wait failed %d", error); 169 log_debug(ls, "recover_locks_wait failed %d", error);
170 goto fail; 170 goto fail;
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
184 dlm_set_recover_status(ls, DLM_RS_DONE); 184 dlm_set_recover_status(ls, DLM_RS_DONE);
185 error = dlm_recover_done_wait(ls); 185 error = dlm_recover_done_wait(ls);
186 if (error) { 186 if (error) {
187 log_error(ls, "recover_done_wait failed %d", error); 187 log_debug(ls, "recover_done_wait failed %d", error);
188 goto fail; 188 goto fail;
189 } 189 }
190 190
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
192 192
193 error = enable_locking(ls, rv->seq); 193 error = enable_locking(ls, rv->seq);
194 if (error) { 194 if (error) {
195 log_error(ls, "enable_locking failed %d", error); 195 log_debug(ls, "enable_locking failed %d", error);
196 goto fail; 196 goto fail;
197 } 197 }
198 198
199 error = dlm_process_requestqueue(ls); 199 error = dlm_process_requestqueue(ls);
200 if (error) { 200 if (error) {
201 log_error(ls, "process_requestqueue failed %d", error); 201 log_debug(ls, "process_requestqueue failed %d", error);
202 goto fail; 202 goto fail;
203 } 203 }
204 204
205 error = dlm_recover_waiters_post(ls); 205 error = dlm_recover_waiters_post(ls);
206 if (error) { 206 if (error) {
207 log_error(ls, "recover_waiters_post failed %d", error); 207 log_debug(ls, "recover_waiters_post failed %d", error);
208 goto fail; 208 goto fail;
209 } 209 }
210 210
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2df..40db61dc95f2 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -25,7 +25,7 @@
25 25
26static const char *name_prefix="dlm"; 26static const char *name_prefix="dlm";
27static struct miscdevice ctl_device; 27static struct miscdevice ctl_device;
28static struct file_operations device_fops; 28static const struct file_operations device_fops;
29 29
30#ifdef CONFIG_COMPAT 30#ifdef CONFIG_COMPAT
31 31
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) 180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
181 remove_ownqueue = 1; 181 remove_ownqueue = 1;
182 182
183 /* unlocks or cancels of waiting requests need to be removed from the
184 proc's unlocking list, again there must be a better way... */
185
186 if (ua->lksb.sb_status == -DLM_EUNLOCK ||
187 (ua->lksb.sb_status == -DLM_ECANCEL &&
188 lkb->lkb_grmode == DLM_LOCK_IV))
189 remove_ownqueue = 1;
190
183 /* We want to copy the lvb to userspace when the completion 191 /* We want to copy the lvb to userspace when the completion
184 ast is read if the status is 0, the lock has an lvb and 192 ast is read if the status is 0, the lock has an lvb and
185 lvb_ops says we should. We could probably have set_lvb_lock() 193 lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
523 proc->lockspace = ls->ls_local_handle; 531 proc->lockspace = ls->ls_local_handle;
524 INIT_LIST_HEAD(&proc->asts); 532 INIT_LIST_HEAD(&proc->asts);
525 INIT_LIST_HEAD(&proc->locks); 533 INIT_LIST_HEAD(&proc->locks);
534 INIT_LIST_HEAD(&proc->unlocking);
526 spin_lock_init(&proc->asts_spin); 535 spin_lock_init(&proc->asts_spin);
527 spin_lock_init(&proc->locks_spin); 536 spin_lock_init(&proc->locks_spin);
528 init_waitqueue_head(&proc->wait); 537 init_waitqueue_head(&proc->wait);
@@ -750,7 +759,7 @@ static int ctl_device_close(struct inode *inode, struct file *file)
750 return 0; 759 return 0;
751} 760}
752 761
753static struct file_operations device_fops = { 762static const struct file_operations device_fops = {
754 .open = device_open, 763 .open = device_open,
755 .release = device_close, 764 .release = device_close,
756 .read = device_read, 765 .read = device_read,
@@ -759,7 +768,7 @@ static struct file_operations device_fops = {
759 .owner = THIS_MODULE, 768 .owner = THIS_MODULE,
760}; 769};
761 770
762static struct file_operations ctl_device_fops = { 771static const struct file_operations ctl_device_fops = {
763 .open = ctl_device_open, 772 .open = ctl_device_open,
764 .release = ctl_device_close, 773 .release = ctl_device_close,
765 .write = device_write, 774 .write = device_write,
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db9944..963889cf6740 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
134 rc->rc_type = cpu_to_le32(rc->rc_type); 134 rc->rc_type = cpu_to_le32(rc->rc_type);
135 rc->rc_result = cpu_to_le32(rc->rc_result); 135 rc->rc_result = cpu_to_le32(rc->rc_result);
136 rc->rc_id = cpu_to_le64(rc->rc_id); 136 rc->rc_id = cpu_to_le64(rc->rc_id);
137 rc->rc_seq = cpu_to_le64(rc->rc_seq);
138 rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
137 139
138 if (type == DLM_RCOM_LOCK) 140 if (type == DLM_RCOM_LOCK)
139 rcom_lock_out((struct rcom_lock *) rc->rc_buf); 141 rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
151 rc->rc_type = le32_to_cpu(rc->rc_type); 153 rc->rc_type = le32_to_cpu(rc->rc_type);
152 rc->rc_result = le32_to_cpu(rc->rc_result); 154 rc->rc_result = le32_to_cpu(rc->rc_result);
153 rc->rc_id = le64_to_cpu(rc->rc_id); 155 rc->rc_id = le64_to_cpu(rc->rc_id);
156 rc->rc_seq = le64_to_cpu(rc->rc_seq);
157 rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
154 158
155 if (rc->rc_type == DLM_RCOM_LOCK) 159 if (rc->rc_type == DLM_RCOM_LOCK)
156 rcom_lock_in((struct rcom_lock *) rc->rc_buf); 160 rcom_lock_in((struct rcom_lock *) rc->rc_buf);
diff --git a/fs/dquot.c b/fs/dquot.c
index 0952cc474d9a..9eb166f91489 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -79,6 +79,7 @@
79#include <linux/buffer_head.h> 79#include <linux/buffer_head.h>
80#include <linux/capability.h> 80#include <linux/capability.h>
81#include <linux/quotaops.h> 81#include <linux/quotaops.h>
82#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
82 83
83#include <asm/uaccess.h> 84#include <asm/uaccess.h>
84 85
@@ -600,11 +601,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
600{ 601{
601 struct dquot *dquot; 602 struct dquot *dquot;
602 603
603 dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS); 604 dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
604 if(!dquot) 605 if(!dquot)
605 return NODQUOT; 606 return NODQUOT;
606 607
607 memset((caddr_t)dquot, 0, sizeof(struct dquot));
608 mutex_init(&dquot->dq_lock); 608 mutex_init(&dquot->dq_lock);
609 INIT_LIST_HEAD(&dquot->dq_free); 609 INIT_LIST_HEAD(&dquot->dq_free);
610 INIT_LIST_HEAD(&dquot->dq_inuse); 610 INIT_LIST_HEAD(&dquot->dq_inuse);
@@ -688,23 +688,27 @@ static int dqinit_needed(struct inode *inode, int type)
688/* This routine is guarded by dqonoff_mutex mutex */ 688/* This routine is guarded by dqonoff_mutex mutex */
689static void add_dquot_ref(struct super_block *sb, int type) 689static void add_dquot_ref(struct super_block *sb, int type)
690{ 690{
691 struct list_head *p; 691 struct inode *inode;
692 692
693restart: 693restart:
694 file_list_lock(); 694 spin_lock(&inode_lock);
695 list_for_each(p, &sb->s_files) { 695 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
696 struct file *filp = list_entry(p, struct file, f_u.fu_list); 696 if (!atomic_read(&inode->i_writecount))
697 struct inode *inode = filp->f_path.dentry->d_inode; 697 continue;
698 if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) { 698 if (!dqinit_needed(inode, type))
699 struct dentry *dentry = dget(filp->f_path.dentry); 699 continue;
700 file_list_unlock(); 700 if (inode->i_state & (I_FREEING|I_WILL_FREE))
701 sb->dq_op->initialize(inode, type); 701 continue;
702 dput(dentry); 702
703 /* As we may have blocked we had better restart... */ 703 __iget(inode);
704 goto restart; 704 spin_unlock(&inode_lock);
705 } 705
706 sb->dq_op->initialize(inode, type);
707 iput(inode);
708 /* As we may have blocked we had better restart... */
709 goto restart;
706 } 710 }
707 file_list_unlock(); 711 spin_unlock(&inode_lock);
708} 712}
709 713
710/* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */ 714/* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */
@@ -756,15 +760,30 @@ static void put_dquot_list(struct list_head *tofree_head)
756 } 760 }
757} 761}
758 762
763static void remove_dquot_ref(struct super_block *sb, int type,
764 struct list_head *tofree_head)
765{
766 struct inode *inode;
767
768 spin_lock(&inode_lock);
769 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
770 if (!IS_NOQUOTA(inode))
771 remove_inode_dquot_ref(inode, type, tofree_head);
772 }
773 spin_unlock(&inode_lock);
774}
775
759/* Gather all references from inodes and drop them */ 776/* Gather all references from inodes and drop them */
760static void drop_dquot_ref(struct super_block *sb, int type) 777static void drop_dquot_ref(struct super_block *sb, int type)
761{ 778{
762 LIST_HEAD(tofree_head); 779 LIST_HEAD(tofree_head);
763 780
764 down_write(&sb_dqopt(sb)->dqptr_sem); 781 if (sb->dq_op) {
765 remove_dquot_ref(sb, type, &tofree_head); 782 down_write(&sb_dqopt(sb)->dqptr_sem);
766 up_write(&sb_dqopt(sb)->dqptr_sem); 783 remove_dquot_ref(sb, type, &tofree_head);
767 put_dquot_list(&tofree_head); 784 up_write(&sb_dqopt(sb)->dqptr_sem);
785 put_dquot_list(&tofree_head);
786 }
768} 787}
769 788
770static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) 789static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 4e4762389bdc..03ea7696fe39 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb)
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 invalidate_inode_pages(inode->i_mapping); 23 invalidate_mapping_pages(inode->i_mapping, 0, -1);
24 } 24 }
25 spin_unlock(&inode_lock); 25 spin_unlock(&inode_lock);
26} 26}
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index ca6562451eeb..1f1107237eab 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o messaging.o netlink.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7196f50fe152..6ac630625b70 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 1997-2004 Erez Zadok 4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University 5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp. 6 * Copyright (C) 2004-2007 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com> 8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 * 9 *
@@ -207,7 +207,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
207 mutex_init(&crypt_stat->cs_mutex); 207 mutex_init(&crypt_stat->cs_mutex);
208 mutex_init(&crypt_stat->cs_tfm_mutex); 208 mutex_init(&crypt_stat->cs_tfm_mutex);
209 mutex_init(&crypt_stat->cs_hash_tfm_mutex); 209 mutex_init(&crypt_stat->cs_hash_tfm_mutex);
210 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED); 210 crypt_stat->flags |= ECRYPTFS_STRUCT_INITIALIZED;
211} 211}
212 212
213/** 213/**
@@ -305,8 +305,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
305 int rc = 0; 305 int rc = 0;
306 306
307 BUG_ON(!crypt_stat || !crypt_stat->tfm 307 BUG_ON(!crypt_stat || !crypt_stat->tfm
308 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, 308 || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
309 ECRYPTFS_STRUCT_INITIALIZED));
310 if (unlikely(ecryptfs_verbosity > 0)) { 309 if (unlikely(ecryptfs_verbosity > 0)) {
311 ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", 310 ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
312 crypt_stat->key_size); 311 crypt_stat->key_size);
@@ -429,10 +428,10 @@ static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
429 goto out; 428 goto out;
430 } 429 }
431 } else { 430 } else {
432 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, 431 *lower_page = grab_cache_page(lower_inode->i_mapping,
433 lower_inode, 432 lower_page_idx);
434 lower_page_idx); 433 if (!(*lower_page)) {
435 if (rc) { 434 rc = -EINVAL;
436 ecryptfs_printk( 435 ecryptfs_printk(
437 KERN_ERR, "Error attempting to grab and map " 436 KERN_ERR, "Error attempting to grab and map "
438 "lower page with index [0x%.16x]; rc = [%d]\n", 437 "lower page with index [0x%.16x]; rc = [%d]\n",
@@ -485,7 +484,7 @@ int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
485 lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host); 484 lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
486 inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host); 485 inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
487 crypt_stat = &inode_info->crypt_stat; 486 crypt_stat = &inode_info->crypt_stat;
488 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { 487 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
489 rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, 488 rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
490 ctx->param.lower_file); 489 ctx->param.lower_file);
491 if (rc) 490 if (rc)
@@ -617,7 +616,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page)
617 crypt_stat = &(ecryptfs_inode_to_private( 616 crypt_stat = &(ecryptfs_inode_to_private(
618 page->mapping->host)->crypt_stat); 617 page->mapping->host)->crypt_stat);
619 lower_inode = ecryptfs_inode_to_lower(page->mapping->host); 618 lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
620 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { 619 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
621 rc = ecryptfs_do_readpage(file, page, page->index); 620 rc = ecryptfs_do_readpage(file, page, page->index);
622 if (rc) 621 if (rc)
623 ecryptfs_printk(KERN_ERR, "Error attempting to copy " 622 ecryptfs_printk(KERN_ERR, "Error attempting to copy "
@@ -828,9 +827,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
828 mutex_unlock(&crypt_stat->cs_tfm_mutex); 827 mutex_unlock(&crypt_stat->cs_tfm_mutex);
829 goto out; 828 goto out;
830 } 829 }
831 crypto_blkcipher_set_flags(crypt_stat->tfm, 830 crypto_blkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
832 (ECRYPTFS_DEFAULT_CHAINING_MODE
833 | CRYPTO_TFM_REQ_WEAK_KEY));
834 mutex_unlock(&crypt_stat->cs_tfm_mutex); 831 mutex_unlock(&crypt_stat->cs_tfm_mutex);
835 rc = 0; 832 rc = 0;
836out: 833out:
@@ -865,7 +862,10 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
865 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 862 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
866 } else 863 } else
867 crypt_stat->header_extent_size = PAGE_CACHE_SIZE; 864 crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
868 crypt_stat->num_header_extents_at_front = 1; 865 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
866 crypt_stat->num_header_extents_at_front = 0;
867 else
868 crypt_stat->num_header_extents_at_front = 1;
869} 869}
870 870
871/** 871/**
@@ -881,7 +881,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
881 881
882 BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE); 882 BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
883 BUG_ON(crypt_stat->iv_bytes <= 0); 883 BUG_ON(crypt_stat->iv_bytes <= 0);
884 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) { 884 if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
885 rc = -EINVAL; 885 rc = -EINVAL;
886 ecryptfs_printk(KERN_WARNING, "Session key not valid; " 886 ecryptfs_printk(KERN_WARNING, "Session key not valid; "
887 "cannot generate root IV\n"); 887 "cannot generate root IV\n");
@@ -898,8 +898,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
898out: 898out:
899 if (rc) { 899 if (rc) {
900 memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes); 900 memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
901 ECRYPTFS_SET_FLAG(crypt_stat->flags, 901 crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING;
902 ECRYPTFS_SECURITY_WARNING);
903 } 902 }
904 return rc; 903 return rc;
905} 904}
@@ -907,7 +906,7 @@ out:
907static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) 906static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
908{ 907{
909 get_random_bytes(crypt_stat->key, crypt_stat->key_size); 908 get_random_bytes(crypt_stat->key, crypt_stat->key_size);
910 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); 909 crypt_stat->flags |= ECRYPTFS_KEY_VALID;
911 ecryptfs_compute_root_iv(crypt_stat); 910 ecryptfs_compute_root_iv(crypt_stat);
912 if (unlikely(ecryptfs_verbosity > 0)) { 911 if (unlikely(ecryptfs_verbosity > 0)) {
913 ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n"); 912 ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n");
@@ -917,6 +916,22 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
917} 916}
918 917
919/** 918/**
919 * ecryptfs_copy_mount_wide_flags_to_inode_flags
920 *
921 * This function propagates the mount-wide flags to individual inode
922 * flags.
923 */
924static void ecryptfs_copy_mount_wide_flags_to_inode_flags(
925 struct ecryptfs_crypt_stat *crypt_stat,
926 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
927{
928 if (mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED)
929 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
930 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
931 crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
932}
933
934/**
920 * ecryptfs_set_default_crypt_stat_vals 935 * ecryptfs_set_default_crypt_stat_vals
921 * @crypt_stat 936 * @crypt_stat
922 * 937 *
@@ -926,10 +941,12 @@ static void ecryptfs_set_default_crypt_stat_vals(
926 struct ecryptfs_crypt_stat *crypt_stat, 941 struct ecryptfs_crypt_stat *crypt_stat,
927 struct ecryptfs_mount_crypt_stat *mount_crypt_stat) 942 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
928{ 943{
944 ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
945 mount_crypt_stat);
929 ecryptfs_set_default_sizes(crypt_stat); 946 ecryptfs_set_default_sizes(crypt_stat);
930 strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER); 947 strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER);
931 crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES; 948 crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES;
932 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); 949 crypt_stat->flags &= ~(ECRYPTFS_KEY_VALID);
933 crypt_stat->file_version = ECRYPTFS_FILE_VERSION; 950 crypt_stat->file_version = ECRYPTFS_FILE_VERSION;
934 crypt_stat->mount_crypt_stat = mount_crypt_stat; 951 crypt_stat->mount_crypt_stat = mount_crypt_stat;
935} 952}
@@ -969,8 +986,10 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
969 if (mount_crypt_stat->global_auth_tok) { 986 if (mount_crypt_stat->global_auth_tok) {
970 ecryptfs_printk(KERN_DEBUG, "Initializing context for new " 987 ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
971 "file using mount_crypt_stat\n"); 988 "file using mount_crypt_stat\n");
972 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); 989 crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
973 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); 990 crypt_stat->flags |= ECRYPTFS_KEY_VALID;
991 ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
992 mount_crypt_stat);
974 memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], 993 memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
975 mount_crypt_stat->global_auth_tok_sig, 994 mount_crypt_stat->global_auth_tok_sig,
976 ECRYPTFS_SIG_SIZE_HEX); 995 ECRYPTFS_SIG_SIZE_HEX);
@@ -1003,7 +1022,7 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
1003 * 1022 *
1004 * Returns one if marker found; zero if not found 1023 * Returns one if marker found; zero if not found
1005 */ 1024 */
1006int contains_ecryptfs_marker(char *data) 1025static int contains_ecryptfs_marker(char *data)
1007{ 1026{
1008 u32 m_1, m_2; 1027 u32 m_1, m_2;
1009 1028
@@ -1029,7 +1048,8 @@ struct ecryptfs_flag_map_elem {
1029/* Add support for additional flags by adding elements here. */ 1048/* Add support for additional flags by adding elements here. */
1030static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { 1049static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
1031 {0x00000001, ECRYPTFS_ENABLE_HMAC}, 1050 {0x00000001, ECRYPTFS_ENABLE_HMAC},
1032 {0x00000002, ECRYPTFS_ENCRYPTED} 1051 {0x00000002, ECRYPTFS_ENCRYPTED},
1052 {0x00000004, ECRYPTFS_METADATA_IN_XATTR}
1033}; 1053};
1034 1054
1035/** 1055/**
@@ -1052,11 +1072,9 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
1052 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1072 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1053 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1073 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1054 if (flags & ecryptfs_flag_map[i].file_flag) { 1074 if (flags & ecryptfs_flag_map[i].file_flag) {
1055 ECRYPTFS_SET_FLAG(crypt_stat->flags, 1075 crypt_stat->flags |= ecryptfs_flag_map[i].local_flag;
1056 ecryptfs_flag_map[i].local_flag);
1057 } else 1076 } else
1058 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, 1077 crypt_stat->flags &= ~(ecryptfs_flag_map[i].local_flag);
1059 ecryptfs_flag_map[i].local_flag);
1060 /* Version is in top 8 bits of the 32-bit flag vector */ 1078 /* Version is in top 8 bits of the 32-bit flag vector */
1061 crypt_stat->file_version = ((flags >> 24) & 0xFF); 1079 crypt_stat->file_version = ((flags >> 24) & 0xFF);
1062 (*bytes_read) = 4; 1080 (*bytes_read) = 4;
@@ -1093,8 +1111,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1093 1111
1094 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1112 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1095 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1113 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1096 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, 1114 if (crypt_stat->flags & ecryptfs_flag_map[i].local_flag)
1097 ecryptfs_flag_map[i].local_flag))
1098 flags |= ecryptfs_flag_map[i].file_flag; 1115 flags |= ecryptfs_flag_map[i].file_flag;
1099 /* Version is in top 8 bits of the 32-bit flag vector */ 1116 /* Version is in top 8 bits of the 32-bit flag vector */
1100 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); 1117 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
@@ -1189,8 +1206,8 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
1189 * 1206 *
1190 * Returns zero on success; non-zero otherwise 1207 * Returns zero on success; non-zero otherwise
1191 */ 1208 */
1192int ecryptfs_read_header_region(char *data, struct dentry *dentry, 1209static int ecryptfs_read_header_region(char *data, struct dentry *dentry,
1193 struct vfsmount *mnt) 1210 struct vfsmount *mnt)
1194{ 1211{
1195 struct file *lower_file; 1212 struct file *lower_file;
1196 mm_segment_t oldfs; 1213 mm_segment_t oldfs;
@@ -1219,9 +1236,25 @@ out:
1219 return rc; 1236 return rc;
1220} 1237}
1221 1238
1222static void 1239int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
1223write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, 1240 struct vfsmount *mnt)
1224 size_t *written) 1241{
1242 int rc;
1243
1244 rc = ecryptfs_read_header_region(data, dentry, mnt);
1245 if (rc)
1246 goto out;
1247 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES))
1248 rc = -EINVAL;
1249out:
1250 return rc;
1251}
1252
1253
1254void
1255ecryptfs_write_header_metadata(char *virt,
1256 struct ecryptfs_crypt_stat *crypt_stat,
1257 size_t *written)
1225{ 1258{
1226 u32 header_extent_size; 1259 u32 header_extent_size;
1227 u16 num_header_extents_at_front; 1260 u16 num_header_extents_at_front;
@@ -1270,9 +1303,9 @@ struct kmem_cache *ecryptfs_header_cache_2;
1270 * 1303 *
1271 * Returns zero on success 1304 * Returns zero on success
1272 */ 1305 */
1273int ecryptfs_write_headers_virt(char *page_virt, 1306static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
1274 struct ecryptfs_crypt_stat *crypt_stat, 1307 struct ecryptfs_crypt_stat *crypt_stat,
1275 struct dentry *ecryptfs_dentry) 1308 struct dentry *ecryptfs_dentry)
1276{ 1309{
1277 int rc; 1310 int rc;
1278 size_t written; 1311 size_t written;
@@ -1283,7 +1316,8 @@ int ecryptfs_write_headers_virt(char *page_virt,
1283 offset += written; 1316 offset += written;
1284 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); 1317 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
1285 offset += written; 1318 offset += written;
1286 write_header_metadata((page_virt + offset), crypt_stat, &written); 1319 ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
1320 &written);
1287 offset += written; 1321 offset += written;
1288 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, 1322 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
1289 ecryptfs_dentry, &written, 1323 ecryptfs_dentry, &written,
@@ -1291,11 +1325,70 @@ int ecryptfs_write_headers_virt(char *page_virt,
1291 if (rc) 1325 if (rc)
1292 ecryptfs_printk(KERN_WARNING, "Error generating key packet " 1326 ecryptfs_printk(KERN_WARNING, "Error generating key packet "
1293 "set; rc = [%d]\n", rc); 1327 "set; rc = [%d]\n", rc);
1328 if (size) {
1329 offset += written;
1330 *size = offset;
1331 }
1332 return rc;
1333}
1334
1335static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1336 struct file *lower_file,
1337 char *page_virt)
1338{
1339 mm_segment_t oldfs;
1340 int current_header_page;
1341 int header_pages;
1342 ssize_t size;
1343 int rc = 0;
1344
1345 lower_file->f_pos = 0;
1346 oldfs = get_fs();
1347 set_fs(get_ds());
1348 size = vfs_write(lower_file, (char __user *)page_virt, PAGE_CACHE_SIZE,
1349 &lower_file->f_pos);
1350 if (size < 0) {
1351 rc = (int)size;
1352 printk(KERN_ERR "Error attempting to write lower page; "
1353 "rc = [%d]\n", rc);
1354 set_fs(oldfs);
1355 goto out;
1356 }
1357 header_pages = ((crypt_stat->header_extent_size
1358 * crypt_stat->num_header_extents_at_front)
1359 / PAGE_CACHE_SIZE);
1360 memset(page_virt, 0, PAGE_CACHE_SIZE);
1361 current_header_page = 1;
1362 while (current_header_page < header_pages) {
1363 size = vfs_write(lower_file, (char __user *)page_virt,
1364 PAGE_CACHE_SIZE, &lower_file->f_pos);
1365 if (size < 0) {
1366 rc = (int)size;
1367 printk(KERN_ERR "Error attempting to write lower page; "
1368 "rc = [%d]\n", rc);
1369 set_fs(oldfs);
1370 goto out;
1371 }
1372 current_header_page++;
1373 }
1374 set_fs(oldfs);
1375out:
1376 return rc;
1377}
1378
1379static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1380 struct ecryptfs_crypt_stat *crypt_stat,
1381 char *page_virt, size_t size)
1382{
1383 int rc;
1384
1385 rc = ecryptfs_setxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME, page_virt,
1386 size, 0);
1294 return rc; 1387 return rc;
1295} 1388}
1296 1389
1297/** 1390/**
1298 * ecryptfs_write_headers 1391 * ecryptfs_write_metadata
1299 * @lower_file: The lower file struct, which was returned from dentry_open 1392 * @lower_file: The lower file struct, which was returned from dentry_open
1300 * 1393 *
1301 * Write the file headers out. This will likely involve a userspace 1394 * Write the file headers out. This will likely involve a userspace
@@ -1306,22 +1399,18 @@ int ecryptfs_write_headers_virt(char *page_virt,
1306 * 1399 *
1307 * Returns zero on success; non-zero on error 1400 * Returns zero on success; non-zero on error
1308 */ 1401 */
1309int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, 1402int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
1310 struct file *lower_file) 1403 struct file *lower_file)
1311{ 1404{
1312 mm_segment_t oldfs;
1313 struct ecryptfs_crypt_stat *crypt_stat; 1405 struct ecryptfs_crypt_stat *crypt_stat;
1314 char *page_virt; 1406 char *page_virt;
1315 int current_header_page; 1407 size_t size;
1316 int header_pages;
1317 int rc = 0; 1408 int rc = 0;
1318 1409
1319 crypt_stat = &ecryptfs_inode_to_private( 1410 crypt_stat = &ecryptfs_inode_to_private(
1320 ecryptfs_dentry->d_inode)->crypt_stat; 1411 ecryptfs_dentry->d_inode)->crypt_stat;
1321 if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags, 1412 if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
1322 ECRYPTFS_ENCRYPTED))) { 1413 if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
1323 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1324 ECRYPTFS_KEY_VALID)) {
1325 ecryptfs_printk(KERN_DEBUG, "Key is " 1414 ecryptfs_printk(KERN_DEBUG, "Key is "
1326 "invalid; bailing out\n"); 1415 "invalid; bailing out\n");
1327 rc = -EINVAL; 1416 rc = -EINVAL;
@@ -1334,54 +1423,42 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
1334 goto out; 1423 goto out;
1335 } 1424 }
1336 /* Released in this function */ 1425 /* Released in this function */
1337 page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER); 1426 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_0, GFP_USER);
1338 if (!page_virt) { 1427 if (!page_virt) {
1339 ecryptfs_printk(KERN_ERR, "Out of memory\n"); 1428 ecryptfs_printk(KERN_ERR, "Out of memory\n");
1340 rc = -ENOMEM; 1429 rc = -ENOMEM;
1341 goto out; 1430 goto out;
1342 } 1431 }
1343 memset(page_virt, 0, PAGE_CACHE_SIZE); 1432 rc = ecryptfs_write_headers_virt(page_virt, &size, crypt_stat,
1344 rc = ecryptfs_write_headers_virt(page_virt, crypt_stat, 1433 ecryptfs_dentry);
1345 ecryptfs_dentry);
1346 if (unlikely(rc)) { 1434 if (unlikely(rc)) {
1347 ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); 1435 ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n");
1348 memset(page_virt, 0, PAGE_CACHE_SIZE); 1436 memset(page_virt, 0, PAGE_CACHE_SIZE);
1349 goto out_free; 1437 goto out_free;
1350 } 1438 }
1351 ecryptfs_printk(KERN_DEBUG, 1439 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
1352 "Writing key packet set to underlying file\n"); 1440 rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry,
1353 lower_file->f_pos = 0; 1441 crypt_stat, page_virt,
1354 oldfs = get_fs(); 1442 size);
1355 set_fs(get_ds()); 1443 else
1356 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" 1444 rc = ecryptfs_write_metadata_to_contents(crypt_stat, lower_file,
1357 "write() w/ header page; lower_file->f_pos = " 1445 page_virt);
1358 "[0x%.16x]\n", lower_file->f_pos); 1446 if (rc) {
1359 lower_file->f_op->write(lower_file, (char __user *)page_virt, 1447 printk(KERN_ERR "Error writing metadata out to lower file; "
1360 PAGE_CACHE_SIZE, &lower_file->f_pos); 1448 "rc = [%d]\n", rc);
1361 header_pages = ((crypt_stat->header_extent_size 1449 goto out_free;
1362 * crypt_stat->num_header_extents_at_front)
1363 / PAGE_CACHE_SIZE);
1364 memset(page_virt, 0, PAGE_CACHE_SIZE);
1365 current_header_page = 1;
1366 while (current_header_page < header_pages) {
1367 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
1368 "write() w/ zero'd page; lower_file->f_pos = "
1369 "[0x%.16x]\n", lower_file->f_pos);
1370 lower_file->f_op->write(lower_file, (char __user *)page_virt,
1371 PAGE_CACHE_SIZE, &lower_file->f_pos);
1372 current_header_page++;
1373 } 1450 }
1374 set_fs(oldfs);
1375 ecryptfs_printk(KERN_DEBUG,
1376 "Done writing key packet set to underlying file.\n");
1377out_free: 1451out_free:
1378 kmem_cache_free(ecryptfs_header_cache_0, page_virt); 1452 kmem_cache_free(ecryptfs_header_cache_0, page_virt);
1379out: 1453out:
1380 return rc; 1454 return rc;
1381} 1455}
1382 1456
1457#define ECRYPTFS_DONT_VALIDATE_HEADER_SIZE 0
1458#define ECRYPTFS_VALIDATE_HEADER_SIZE 1
1383static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, 1459static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1384 char *virt, int *bytes_read) 1460 char *virt, int *bytes_read,
1461 int validate_header_size)
1385{ 1462{
1386 int rc = 0; 1463 int rc = 0;
1387 u32 header_extent_size; 1464 u32 header_extent_size;
@@ -1396,9 +1473,10 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1396 crypt_stat->num_header_extents_at_front = 1473 crypt_stat->num_header_extents_at_front =
1397 (int)num_header_extents_at_front; 1474 (int)num_header_extents_at_front;
1398 (*bytes_read) = 6; 1475 (*bytes_read) = 6;
1399 if ((crypt_stat->header_extent_size 1476 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1400 * crypt_stat->num_header_extents_at_front) 1477 && ((crypt_stat->header_extent_size
1401 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { 1478 * crypt_stat->num_header_extents_at_front)
1479 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
1402 rc = -EINVAL; 1480 rc = -EINVAL;
1403 ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " 1481 ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
1404 "[%d]\n", crypt_stat->header_extent_size); 1482 "[%d]\n", crypt_stat->header_extent_size);
@@ -1429,7 +1507,8 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1429 */ 1507 */
1430static int ecryptfs_read_headers_virt(char *page_virt, 1508static int ecryptfs_read_headers_virt(char *page_virt,
1431 struct ecryptfs_crypt_stat *crypt_stat, 1509 struct ecryptfs_crypt_stat *crypt_stat,
1432 struct dentry *ecryptfs_dentry) 1510 struct dentry *ecryptfs_dentry,
1511 int validate_header_size)
1433{ 1512{
1434 int rc = 0; 1513 int rc = 0;
1435 int offset; 1514 int offset;
@@ -1463,7 +1542,7 @@ static int ecryptfs_read_headers_virt(char *page_virt,
1463 offset += bytes_read; 1542 offset += bytes_read;
1464 if (crypt_stat->file_version >= 1) { 1543 if (crypt_stat->file_version >= 1) {
1465 rc = parse_header_metadata(crypt_stat, (page_virt + offset), 1544 rc = parse_header_metadata(crypt_stat, (page_virt + offset),
1466 &bytes_read); 1545 &bytes_read, validate_header_size);
1467 if (rc) { 1546 if (rc) {
1468 ecryptfs_printk(KERN_WARNING, "Error reading header " 1547 ecryptfs_printk(KERN_WARNING, "Error reading header "
1469 "metadata; rc = [%d]\n", rc); 1548 "metadata; rc = [%d]\n", rc);
@@ -1478,12 +1557,60 @@ out:
1478} 1557}
1479 1558
1480/** 1559/**
1481 * ecryptfs_read_headers 1560 * ecryptfs_read_xattr_region
1561 *
1562 * Attempts to read the crypto metadata from the extended attribute
1563 * region of the lower file.
1564 */
1565int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry)
1566{
1567 ssize_t size;
1568 int rc = 0;
1569
1570 size = ecryptfs_getxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME,
1571 page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE);
1572 if (size < 0) {
1573 printk(KERN_DEBUG "Error attempting to read the [%s] "
1574 "xattr from the lower file; return value = [%zd]\n",
1575 ECRYPTFS_XATTR_NAME, size);
1576 rc = -EINVAL;
1577 goto out;
1578 }
1579out:
1580 return rc;
1581}
1582
1583int ecryptfs_read_and_validate_xattr_region(char *page_virt,
1584 struct dentry *ecryptfs_dentry)
1585{
1586 int rc;
1587
1588 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry);
1589 if (rc)
1590 goto out;
1591 if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) {
1592 printk(KERN_WARNING "Valid data found in [%s] xattr, but "
1593 "the marker is invalid\n", ECRYPTFS_XATTR_NAME);
1594 rc = -EINVAL;
1595 }
1596out:
1597 return rc;
1598}
1599
1600/**
1601 * ecryptfs_read_metadata
1602 *
1603 * Common entry point for reading file metadata. From here, we could
1604 * retrieve the header information from the header region of the file,
1605 * the xattr region of the file, or some other repostory that is
1606 * stored separately from the file itself. The current implementation
1607 * supports retrieving the metadata information from the file contents
1608 * and from the xattr region.
1482 * 1609 *
1483 * Returns zero if valid headers found and parsed; non-zero otherwise 1610 * Returns zero if valid headers found and parsed; non-zero otherwise
1484 */ 1611 */
1485int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, 1612int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
1486 struct file *lower_file) 1613 struct file *lower_file)
1487{ 1614{
1488 int rc = 0; 1615 int rc = 0;
1489 char *page_virt = NULL; 1616 char *page_virt = NULL;
@@ -1491,7 +1618,12 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
1491 ssize_t bytes_read; 1618 ssize_t bytes_read;
1492 struct ecryptfs_crypt_stat *crypt_stat = 1619 struct ecryptfs_crypt_stat *crypt_stat =
1493 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 1620 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
1621 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1622 &ecryptfs_superblock_to_private(
1623 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1494 1624
1625 ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
1626 mount_crypt_stat);
1495 /* Read the first page from the underlying file */ 1627 /* Read the first page from the underlying file */
1496 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); 1628 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
1497 if (!page_virt) { 1629 if (!page_virt) {
@@ -1512,11 +1644,36 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
1512 goto out; 1644 goto out;
1513 } 1645 }
1514 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, 1646 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1515 ecryptfs_dentry); 1647 ecryptfs_dentry,
1648 ECRYPTFS_VALIDATE_HEADER_SIZE);
1516 if (rc) { 1649 if (rc) {
1517 ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not " 1650 rc = ecryptfs_read_xattr_region(page_virt,
1518 "found\n"); 1651 ecryptfs_dentry);
1519 rc = -EINVAL; 1652 if (rc) {
1653 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
1654 "file header region or xattr region\n");
1655 rc = -EINVAL;
1656 goto out;
1657 }
1658 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1659 ecryptfs_dentry,
1660 ECRYPTFS_DONT_VALIDATE_HEADER_SIZE);
1661 if (rc) {
1662 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
1663 "file xattr region either\n");
1664 rc = -EINVAL;
1665 }
1666 if (crypt_stat->mount_crypt_stat->flags
1667 & ECRYPTFS_XATTR_METADATA_ENABLED) {
1668 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
1669 } else {
1670 printk(KERN_WARNING "Attempt to access file with "
1671 "crypto metadata only in the extended attribute "
1672 "region, but eCryptfs was mounted without "
1673 "xattr support enabled. eCryptfs will not treat "
1674 "this like an encrypted file.\n");
1675 rc = -EINVAL;
1676 }
1520 } 1677 }
1521out: 1678out:
1522 if (page_virt) { 1679 if (page_virt) {
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 61f8e894284f..434c7efd80f8 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -36,7 +36,7 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
36 36
37 ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n", 37 ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
38 auth_tok); 38 auth_tok);
39 if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) { 39 if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) {
40 ecryptfs_printk(KERN_DEBUG, " * private key type\n"); 40 ecryptfs_printk(KERN_DEBUG, " * private key type\n");
41 ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT " 41 ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
42 "IN ECRYPTFS VERSION 0.1)\n"); 42 "IN ECRYPTFS VERSION 0.1)\n");
@@ -46,8 +46,8 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
46 ECRYPTFS_SALT_SIZE); 46 ECRYPTFS_SALT_SIZE);
47 salt[ECRYPTFS_SALT_SIZE * 2] = '\0'; 47 salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
48 ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt); 48 ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
49 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, 49 if (auth_tok->token.password.flags &
50 ECRYPTFS_PERSISTENT_PASSWORD)) { 50 ECRYPTFS_PERSISTENT_PASSWORD) {
51 ecryptfs_printk(KERN_DEBUG, " * persistent\n"); 51 ecryptfs_printk(KERN_DEBUG, " * persistent\n");
52 } 52 }
53 memcpy(sig, auth_tok->token.password.signature, 53 memcpy(sig, auth_tok->token.password.signature,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index afb64bdbe6ad..b3609b7cdf11 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,8 +4,10 @@
4 * 4 *
5 * Copyright (C) 1997-2003 Erez Zadok 5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University 6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2006 International Business Machines Corp. 7 * Copyright (C) 2004-2007 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 * Trevor S. Highland <trevor.highland@gmail.com>
10 * Tyler Hicks <tyhicks@ou.edu>
9 * 11 *
10 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as 13 * modify it under the terms of the GNU General Public License as
@@ -31,22 +33,25 @@
31#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
32#include <linux/namei.h> 34#include <linux/namei.h>
33#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h>
34 37
35/* Version verification for shared data structures w/ userspace */ 38/* Version verification for shared data structures w/ userspace */
36#define ECRYPTFS_VERSION_MAJOR 0x00 39#define ECRYPTFS_VERSION_MAJOR 0x00
37#define ECRYPTFS_VERSION_MINOR 0x04 40#define ECRYPTFS_VERSION_MINOR 0x04
38#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01 41#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x02
39/* These flags indicate which features are supported by the kernel 42/* These flags indicate which features are supported by the kernel
40 * module; userspace tools such as the mount helper read 43 * module; userspace tools such as the mount helper read
41 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine 44 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
42 * how to behave. */ 45 * how to behave. */
43#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 46#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
44#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 47#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
45#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 48#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
46#define ECRYPTFS_VERSIONING_POLICY 0x00000008 49#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010
47#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 51#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
48 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH) 52 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
49 53 | ECRYPTFS_VERSIONING_PUBKEY \
54 | ECRYPTFS_VERSIONING_XATTR)
50#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 55#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
51#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 56#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
52#define ECRYPTFS_SALT_SIZE 8 57#define ECRYPTFS_SALT_SIZE 8
@@ -60,10 +65,25 @@
60#define ECRYPTFS_MAX_KEY_BYTES 64 65#define ECRYPTFS_MAX_KEY_BYTES 64
61#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 66#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
62#define ECRYPTFS_DEFAULT_IV_BYTES 16 67#define ECRYPTFS_DEFAULT_IV_BYTES 16
63#define ECRYPTFS_FILE_VERSION 0x01 68#define ECRYPTFS_FILE_VERSION 0x02
64#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192 69#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
65#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 70#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
66#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 71#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
72#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
73#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
74#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
75#define ECRYPTFS_NLMSG_HELO 100
76#define ECRYPTFS_NLMSG_QUIT 101
77#define ECRYPTFS_NLMSG_REQUEST 102
78#define ECRYPTFS_NLMSG_RESPONSE 103
79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
80#define ECRYPTFS_DEFAULT_NUM_USERS 4
81#define ECRYPTFS_MAX_NUM_USERS 32768
82#define ECRYPTFS_TRANSPORT_NETLINK 0
83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
84#define ECRYPTFS_TRANSPORT_RELAYFS 2
85#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK
86#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
67 87
68#define RFC2440_CIPHER_DES3_EDE 0x02 88#define RFC2440_CIPHER_DES3_EDE 0x02
69#define RFC2440_CIPHER_CAST_5 0x03 89#define RFC2440_CIPHER_CAST_5 0x03
@@ -74,9 +94,7 @@
74#define RFC2440_CIPHER_TWOFISH 0x0a 94#define RFC2440_CIPHER_TWOFISH 0x0a
75#define RFC2440_CIPHER_CAST_6 0x0b 95#define RFC2440_CIPHER_CAST_6 0x0b
76 96
77#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag)) 97#define RFC2440_CIPHER_RSA 0x01
78#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag))
79#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag))
80 98
81/** 99/**
82 * For convenience, we may need to pass around the encrypted session 100 * For convenience, we may need to pass around the encrypted session
@@ -114,6 +132,14 @@ struct ecryptfs_password {
114 132
115enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; 133enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
116 134
135struct ecryptfs_private_key {
136 u32 key_size;
137 u32 data_len;
138 u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
139 char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
140 u8 data[];
141};
142
117/* May be a password or a private key */ 143/* May be a password or a private key */
118struct ecryptfs_auth_tok { 144struct ecryptfs_auth_tok {
119 u16 version; /* 8-bit major and 8-bit minor */ 145 u16 version; /* 8-bit major and 8-bit minor */
@@ -123,7 +149,7 @@ struct ecryptfs_auth_tok {
123 u8 reserved[32]; 149 u8 reserved[32];
124 union { 150 union {
125 struct ecryptfs_password password; 151 struct ecryptfs_password password;
126 /* Private key is in future eCryptfs releases */ 152 struct ecryptfs_private_key private_key;
127 } token; 153 } token;
128} __attribute__ ((packed)); 154} __attribute__ ((packed));
129 155
@@ -176,10 +202,14 @@ ecryptfs_get_key_payload_data(struct key *key)
176#define ECRYPTFS_FILE_SIZE_BYTES 8 202#define ECRYPTFS_FILE_SIZE_BYTES 8
177#define ECRYPTFS_DEFAULT_CIPHER "aes" 203#define ECRYPTFS_DEFAULT_CIPHER "aes"
178#define ECRYPTFS_DEFAULT_KEY_BYTES 16 204#define ECRYPTFS_DEFAULT_KEY_BYTES 16
179#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
180#define ECRYPTFS_DEFAULT_HASH "md5" 205#define ECRYPTFS_DEFAULT_HASH "md5"
206#define ECRYPTFS_TAG_1_PACKET_TYPE 0x01
181#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C 207#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
182#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED 208#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
209#define ECRYPTFS_TAG_64_PACKET_TYPE 0x40
210#define ECRYPTFS_TAG_65_PACKET_TYPE 0x41
211#define ECRYPTFS_TAG_66_PACKET_TYPE 0x42
212#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
183#define MD5_DIGEST_SIZE 16 213#define MD5_DIGEST_SIZE 16
184 214
185/** 215/**
@@ -196,6 +226,8 @@ struct ecryptfs_crypt_stat {
196#define ECRYPTFS_ENABLE_HMAC 0x00000020 226#define ECRYPTFS_ENABLE_HMAC 0x00000020
197#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 227#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
198#define ECRYPTFS_KEY_VALID 0x00000080 228#define ECRYPTFS_KEY_VALID 0x00000080
229#define ECRYPTFS_METADATA_IN_XATTR 0x00000100
230#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200
199 u32 flags; 231 u32 flags;
200 unsigned int file_version; 232 unsigned int file_version;
201 size_t iv_bytes; 233 size_t iv_bytes;
@@ -242,6 +274,8 @@ struct ecryptfs_dentry_info {
242struct ecryptfs_mount_crypt_stat { 274struct ecryptfs_mount_crypt_stat {
243 /* Pointers to memory we do not own, do not free these */ 275 /* Pointers to memory we do not own, do not free these */
244#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001 276#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
277#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002
278#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004
245 u32 flags; 279 u32 flags;
246 struct ecryptfs_auth_tok *global_auth_tok; 280 struct ecryptfs_auth_tok *global_auth_tok;
247 struct key *global_auth_tok_key; 281 struct key *global_auth_tok_key;
@@ -272,6 +306,33 @@ struct ecryptfs_auth_tok_list_item {
272 struct ecryptfs_auth_tok auth_tok; 306 struct ecryptfs_auth_tok auth_tok;
273}; 307};
274 308
309struct ecryptfs_message {
310 u32 index;
311 u32 data_len;
312 u8 data[];
313};
314
315struct ecryptfs_msg_ctx {
316#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001
317#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002
318#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003
319 u32 state;
320 unsigned int index;
321 unsigned int counter;
322 struct ecryptfs_message *msg;
323 struct task_struct *task;
324 struct list_head node;
325 struct mutex mux;
326};
327
328extern unsigned int ecryptfs_transport;
329
330struct ecryptfs_daemon_id {
331 pid_t pid;
332 uid_t uid;
333 struct hlist_node id_chain;
334};
335
275static inline struct ecryptfs_file_info * 336static inline struct ecryptfs_file_info *
276ecryptfs_file_to_private(struct file *file) 337ecryptfs_file_to_private(struct file *file)
277{ 338{
@@ -385,13 +446,16 @@ void __ecryptfs_printk(const char *fmt, ...);
385 446
386extern const struct file_operations ecryptfs_main_fops; 447extern const struct file_operations ecryptfs_main_fops;
387extern const struct file_operations ecryptfs_dir_fops; 448extern const struct file_operations ecryptfs_dir_fops;
388extern struct inode_operations ecryptfs_main_iops; 449extern const struct inode_operations ecryptfs_main_iops;
389extern struct inode_operations ecryptfs_dir_iops; 450extern const struct inode_operations ecryptfs_dir_iops;
390extern struct inode_operations ecryptfs_symlink_iops; 451extern const struct inode_operations ecryptfs_symlink_iops;
391extern struct super_operations ecryptfs_sops; 452extern const struct super_operations ecryptfs_sops;
392extern struct dentry_operations ecryptfs_dops; 453extern struct dentry_operations ecryptfs_dops;
393extern struct address_space_operations ecryptfs_aops; 454extern struct address_space_operations ecryptfs_aops;
394extern int ecryptfs_verbosity; 455extern int ecryptfs_verbosity;
456extern unsigned int ecryptfs_message_buf_len;
457extern signed long ecryptfs_message_wait_timeout;
458extern unsigned int ecryptfs_number_of_users;
395 459
396extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; 460extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
397extern struct kmem_cache *ecryptfs_file_info_cache; 461extern struct kmem_cache *ecryptfs_file_info_cache;
@@ -401,6 +465,7 @@ extern struct kmem_cache *ecryptfs_sb_info_cache;
401extern struct kmem_cache *ecryptfs_header_cache_0; 465extern struct kmem_cache *ecryptfs_header_cache_0;
402extern struct kmem_cache *ecryptfs_header_cache_1; 466extern struct kmem_cache *ecryptfs_header_cache_1;
403extern struct kmem_cache *ecryptfs_header_cache_2; 467extern struct kmem_cache *ecryptfs_header_cache_2;
468extern struct kmem_cache *ecryptfs_xattr_cache;
404extern struct kmem_cache *ecryptfs_lower_page_cache; 469extern struct kmem_cache *ecryptfs_lower_page_cache;
405 470
406int ecryptfs_interpose(struct dentry *hidden_dentry, 471int ecryptfs_interpose(struct dentry *hidden_dentry,
@@ -427,9 +492,13 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
427int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, 492int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
428 char *cipher_name, 493 char *cipher_name,
429 char *chaining_modifier); 494 char *chaining_modifier);
430int ecryptfs_write_inode_size_to_header(struct file *lower_file, 495#define ECRYPTFS_LOWER_I_MUTEX_NOT_HELD 0
431 struct inode *lower_inode, 496#define ECRYPTFS_LOWER_I_MUTEX_HELD 1
432 struct inode *inode); 497int ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
498 struct inode *lower_inode,
499 struct inode *inode,
500 struct dentry *ecryptfs_dentry,
501 int lower_i_mutex_held);
433int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, 502int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
434 struct file *lower_file, 503 struct file *lower_file,
435 unsigned long lower_page_index, int byte_offset, 504 unsigned long lower_page_index, int byte_offset,
@@ -442,26 +511,20 @@ int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
442 struct file *lower_file); 511 struct file *lower_file);
443int ecryptfs_do_readpage(struct file *file, struct page *page, 512int ecryptfs_do_readpage(struct file *file, struct page *page,
444 pgoff_t lower_page_index); 513 pgoff_t lower_page_index);
445int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
446 char **lower_virt,
447 struct inode *lower_inode,
448 unsigned long lower_page_index);
449int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, 514int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
450 struct inode *lower_inode, 515 struct inode *lower_inode,
451 struct writeback_control *wbc); 516 struct writeback_control *wbc);
452int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx); 517int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
453int ecryptfs_decrypt_page(struct file *file, struct page *page); 518int ecryptfs_decrypt_page(struct file *file, struct page *page);
454int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, 519int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
520 struct file *lower_file);
521int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
455 struct file *lower_file); 522 struct file *lower_file);
456int ecryptfs_write_headers_virt(char *page_virt,
457 struct ecryptfs_crypt_stat *crypt_stat,
458 struct dentry *ecryptfs_dentry);
459int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
460 struct file *lower_file);
461int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); 523int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
462int contains_ecryptfs_marker(char *data); 524int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
463int ecryptfs_read_header_region(char *data, struct dentry *dentry, 525 struct vfsmount *mnt);
464 struct vfsmount *mnt); 526int ecryptfs_read_and_validate_xattr_region(char *page_virt,
527 struct dentry *ecryptfs_dentry);
465u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); 528u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
466int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); 529int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code);
467void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); 530void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
@@ -484,5 +547,37 @@ int ecryptfs_open_lower_file(struct file **lower_file,
484 struct dentry *lower_dentry, 547 struct dentry *lower_dentry,
485 struct vfsmount *lower_mnt, int flags); 548 struct vfsmount *lower_mnt, int flags);
486int ecryptfs_close_lower_file(struct file *lower_file); 549int ecryptfs_close_lower_file(struct file *lower_file);
550ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
551 size_t size);
552int
553ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
554 size_t size, int flags);
555int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry);
556int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid);
557int ecryptfs_process_quit(uid_t uid, pid_t pid);
558int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
559 pid_t pid, u32 seq);
560int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
561 struct ecryptfs_msg_ctx **msg_ctx);
562int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
563 struct ecryptfs_message **emsg);
564int ecryptfs_init_messaging(unsigned int transport);
565void ecryptfs_release_messaging(unsigned int transport);
566
567int ecryptfs_send_netlink(char *data, int data_len,
568 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
569 u16 msg_flags, pid_t daemon_pid);
570int ecryptfs_init_netlink(void);
571void ecryptfs_release_netlink(void);
572
573int ecryptfs_send_connector(char *data, int data_len,
574 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
575 u16 msg_flags, pid_t daemon_pid);
576int ecryptfs_init_connector(void);
577void ecryptfs_release_connector(void);
578void
579ecryptfs_write_header_metadata(char *virt,
580 struct ecryptfs_crypt_stat *crypt_stat,
581 size_t *written);
487 582
488#endif /* #ifndef ECRYPTFS_KERNEL_H */ 583#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c5a2e5298f15..bd969adf70d7 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 1997-2004 Erez Zadok 4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University 5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp. 6 * Copyright (C) 2004-2007 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 7 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com> 8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 * 9 *
@@ -250,8 +250,19 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
250 struct ecryptfs_file_info *file_info; 250 struct ecryptfs_file_info *file_info;
251 int lower_flags; 251 int lower_flags;
252 252
253 mount_crypt_stat = &ecryptfs_superblock_to_private(
254 ecryptfs_dentry->d_sb)->mount_crypt_stat;
255 if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
256 && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
257 || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
258 || (file->f_flags & O_APPEND))) {
259 printk(KERN_WARNING "Mount has encrypted view enabled; "
260 "files may only be read\n");
261 rc = -EPERM;
262 goto out;
263 }
253 /* Released in ecryptfs_release or end of function if failure */ 264 /* Released in ecryptfs_release or end of function if failure */
254 file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL); 265 file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
255 ecryptfs_set_file_private(file, file_info); 266 ecryptfs_set_file_private(file, file_info);
256 if (!file_info) { 267 if (!file_info) {
257 ecryptfs_printk(KERN_ERR, 268 ecryptfs_printk(KERN_ERR,
@@ -259,17 +270,14 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
259 rc = -ENOMEM; 270 rc = -ENOMEM;
260 goto out; 271 goto out;
261 } 272 }
262 memset(file_info, 0, sizeof(*file_info));
263 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 273 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
264 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; 274 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
265 mount_crypt_stat = &ecryptfs_superblock_to_private(
266 ecryptfs_dentry->d_sb)->mount_crypt_stat;
267 mutex_lock(&crypt_stat->cs_mutex); 275 mutex_lock(&crypt_stat->cs_mutex);
268 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) { 276 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
269 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); 277 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
270 /* Policy code enabled in future release */ 278 /* Policy code enabled in future release */
271 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED); 279 crypt_stat->flags |= ECRYPTFS_POLICY_APPLIED;
272 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); 280 crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
273 } 281 }
274 mutex_unlock(&crypt_stat->cs_mutex); 282 mutex_unlock(&crypt_stat->cs_mutex);
275 lower_flags = file->f_flags; 283 lower_flags = file->f_flags;
@@ -289,31 +297,14 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
289 lower_inode = lower_dentry->d_inode; 297 lower_inode = lower_dentry->d_inode;
290 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 298 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
291 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 299 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
292 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); 300 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
293 rc = 0; 301 rc = 0;
294 goto out; 302 goto out;
295 } 303 }
296 mutex_lock(&crypt_stat->cs_mutex); 304 mutex_lock(&crypt_stat->cs_mutex);
297 if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { 305 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
298 if (!(mount_crypt_stat->flags 306 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
299 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { 307 rc = ecryptfs_read_metadata(ecryptfs_dentry, lower_file);
300 rc = -EIO;
301 printk(KERN_WARNING "Attempt to read file that is "
302 "not in a valid eCryptfs format, and plaintext "
303 "passthrough mode is not enabled; returning "
304 "-EIO\n");
305 mutex_unlock(&crypt_stat->cs_mutex);
306 goto out_puts;
307 }
308 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
309 rc = 0;
310 mutex_unlock(&crypt_stat->cs_mutex);
311 goto out;
312 } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
313 ECRYPTFS_POLICY_APPLIED)
314 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
315 ECRYPTFS_KEY_VALID)) {
316 rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file);
317 if (rc) { 308 if (rc) {
318 ecryptfs_printk(KERN_DEBUG, 309 ecryptfs_printk(KERN_DEBUG,
319 "Valid headers not found\n"); 310 "Valid headers not found\n");
@@ -327,9 +318,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
327 mutex_unlock(&crypt_stat->cs_mutex); 318 mutex_unlock(&crypt_stat->cs_mutex);
328 goto out_puts; 319 goto out_puts;
329 } 320 }
330 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
331 ECRYPTFS_ENCRYPTED);
332 rc = 0; 321 rc = 0;
322 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
333 mutex_unlock(&crypt_stat->cs_mutex); 323 mutex_unlock(&crypt_stat->cs_mutex);
334 goto out; 324 goto out;
335 } 325 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f5e5076aef..9fa7e0b27a96 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 1997-2004 Erez Zadok 4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University 5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp. 6 * Copyright (C) 2004-2007 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompsion <mcthomps@us.ibm.com> 8 * Michael C. Thompsion <mcthomps@us.ibm.com>
9 * 9 *
@@ -161,17 +161,17 @@ static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
161 ecryptfs_set_file_lower(&fake_file, lower_file); 161 ecryptfs_set_file_lower(&fake_file, lower_file);
162 rc = ecryptfs_fill_zeros(&fake_file, 1); 162 rc = ecryptfs_fill_zeros(&fake_file, 1);
163 if (rc) { 163 if (rc) {
164 ECRYPTFS_SET_FLAG( 164 ecryptfs_inode_to_private(inode)->crypt_stat.flags |=
165 ecryptfs_inode_to_private(inode)->crypt_stat.flags, 165 ECRYPTFS_SECURITY_WARNING;
166 ECRYPTFS_SECURITY_WARNING);
167 ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " 166 ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
168 "in file; rc = [%d]\n", rc); 167 "in file; rc = [%d]\n", rc);
169 goto out; 168 goto out;
170 } 169 }
171 i_size_write(inode, 0); 170 i_size_write(inode, 0);
172 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); 171 ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, inode,
173 ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags, 172 ecryptfs_dentry,
174 ECRYPTFS_NEW_FILE); 173 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
174 ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE;
175out: 175out:
176 return rc; 176 return rc;
177} 177}
@@ -199,7 +199,7 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
199 lower_dentry->d_name.name); 199 lower_dentry->d_name.name);
200 inode = ecryptfs_dentry->d_inode; 200 inode = ecryptfs_dentry->d_inode;
201 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; 201 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
202 lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR; 202 lower_flags = ((O_CREAT | O_TRUNC) & O_ACCMODE) | O_RDWR;
203#if BITS_PER_LONG != 32 203#if BITS_PER_LONG != 32
204 lower_flags |= O_LARGEFILE; 204 lower_flags |= O_LARGEFILE;
205#endif 205#endif
@@ -214,10 +214,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
214 lower_inode = lower_dentry->d_inode; 214 lower_inode = lower_dentry->d_inode;
215 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 215 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
216 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 216 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
217 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); 217 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
218 goto out_fput; 218 goto out_fput;
219 } 219 }
220 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); 220 crypt_stat->flags |= ECRYPTFS_NEW_FILE;
221 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); 221 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
222 rc = ecryptfs_new_file_context(ecryptfs_dentry); 222 rc = ecryptfs_new_file_context(ecryptfs_dentry);
223 if (rc) { 223 if (rc) {
@@ -225,7 +225,7 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
225 "context\n"); 225 "context\n");
226 goto out_fput; 226 goto out_fput;
227 } 227 }
228 rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file); 228 rc = ecryptfs_write_metadata(ecryptfs_dentry, lower_file);
229 if (rc) { 229 if (rc) {
230 ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); 230 ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
231 goto out_fput; 231 goto out_fput;
@@ -287,6 +287,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
287 char *encoded_name; 287 char *encoded_name;
288 unsigned int encoded_namelen; 288 unsigned int encoded_namelen;
289 struct ecryptfs_crypt_stat *crypt_stat = NULL; 289 struct ecryptfs_crypt_stat *crypt_stat = NULL;
290 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
290 char *page_virt = NULL; 291 char *page_virt = NULL;
291 struct inode *lower_inode; 292 struct inode *lower_inode;
292 u64 file_size; 293 u64 file_size;
@@ -361,34 +362,44 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
361 goto out; 362 goto out;
362 } 363 }
363 /* Released in this function */ 364 /* Released in this function */
364 page_virt = 365 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2,
365 (char *)kmem_cache_alloc(ecryptfs_header_cache_2, 366 GFP_USER);
366 GFP_USER);
367 if (!page_virt) { 367 if (!page_virt) {
368 rc = -ENOMEM; 368 rc = -ENOMEM;
369 ecryptfs_printk(KERN_ERR, 369 ecryptfs_printk(KERN_ERR,
370 "Cannot ecryptfs_kmalloc a page\n"); 370 "Cannot ecryptfs_kmalloc a page\n");
371 goto out_dput; 371 goto out_dput;
372 } 372 }
373 memset(page_virt, 0, PAGE_CACHE_SIZE);
374 rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt);
375 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 373 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
376 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) 374 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
377 ecryptfs_set_default_sizes(crypt_stat); 375 ecryptfs_set_default_sizes(crypt_stat);
376 rc = ecryptfs_read_and_validate_header_region(page_virt, lower_dentry,
377 nd->mnt);
378 if (rc) { 378 if (rc) {
379 rc = 0; 379 rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry);
380 ecryptfs_printk(KERN_WARNING, "Error reading header region;" 380 if (rc) {
381 " assuming unencrypted\n"); 381 printk(KERN_DEBUG "Valid metadata not found in header "
382 } else { 382 "region or xattr region; treating file as "
383 if (!contains_ecryptfs_marker(page_virt 383 "unencrypted\n");
384 + ECRYPTFS_FILE_SIZE_BYTES)) { 384 rc = 0;
385 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 385 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
386 goto out; 386 goto out;
387 } 387 }
388 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
389 }
390 mount_crypt_stat = &ecryptfs_superblock_to_private(
391 dentry->d_sb)->mount_crypt_stat;
392 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
393 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
394 file_size = (crypt_stat->header_extent_size
395 + i_size_read(lower_dentry->d_inode));
396 else
397 file_size = i_size_read(lower_dentry->d_inode);
398 } else {
388 memcpy(&file_size, page_virt, sizeof(file_size)); 399 memcpy(&file_size, page_virt, sizeof(file_size));
389 file_size = be64_to_cpu(file_size); 400 file_size = be64_to_cpu(file_size);
390 i_size_write(dentry->d_inode, (loff_t)file_size);
391 } 401 }
402 i_size_write(dentry->d_inode, (loff_t)file_size);
392 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 403 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
393 goto out; 404 goto out;
394 405
@@ -782,20 +793,26 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
782 goto out_fput; 793 goto out_fput;
783 } 794 }
784 i_size_write(inode, new_length); 795 i_size_write(inode, new_length);
785 rc = ecryptfs_write_inode_size_to_header(lower_file, 796 rc = ecryptfs_write_inode_size_to_metadata(
786 lower_dentry->d_inode, 797 lower_file, lower_dentry->d_inode, inode, dentry,
787 inode); 798 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
788 if (rc) { 799 if (rc) {
789 ecryptfs_printk(KERN_ERR, 800 printk(KERN_ERR "Problem with "
790 "Problem with ecryptfs_write" 801 "ecryptfs_write_inode_size_to_metadata; "
791 "_inode_size\n"); 802 "rc = [%d]\n", rc);
792 goto out_fput; 803 goto out_fput;
793 } 804 }
794 } else { /* new_length < i_size_read(inode) */ 805 } else { /* new_length < i_size_read(inode) */
795 vmtruncate(inode, new_length); 806 vmtruncate(inode, new_length);
796 ecryptfs_write_inode_size_to_header(lower_file, 807 rc = ecryptfs_write_inode_size_to_metadata(
797 lower_dentry->d_inode, 808 lower_file, lower_dentry->d_inode, inode, dentry,
798 inode); 809 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
810 if (rc) {
811 printk(KERN_ERR "Problem with "
812 "ecryptfs_write_inode_size_to_metadata; "
813 "rc = [%d]\n", rc);
814 goto out_fput;
815 }
799 /* We are reducing the size of the ecryptfs file, and need to 816 /* We are reducing the size of the ecryptfs file, and need to
800 * know if we need to reduce the size of the lower file. */ 817 * know if we need to reduce the size of the lower file. */
801 lower_size_before_truncate = 818 lower_size_before_truncate =
@@ -882,7 +899,7 @@ out:
882 return rc; 899 return rc;
883} 900}
884 901
885static int 902int
886ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 903ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
887 size_t size, int flags) 904 size_t size, int flags)
888{ 905{
@@ -902,7 +919,7 @@ out:
902 return rc; 919 return rc;
903} 920}
904 921
905static ssize_t 922ssize_t
906ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, 923ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
907 size_t size) 924 size_t size)
908{ 925{
@@ -972,7 +989,7 @@ int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
972 return 0; 989 return 0;
973} 990}
974 991
975struct inode_operations ecryptfs_symlink_iops = { 992const struct inode_operations ecryptfs_symlink_iops = {
976 .readlink = ecryptfs_readlink, 993 .readlink = ecryptfs_readlink,
977 .follow_link = ecryptfs_follow_link, 994 .follow_link = ecryptfs_follow_link,
978 .put_link = ecryptfs_put_link, 995 .put_link = ecryptfs_put_link,
@@ -984,7 +1001,7 @@ struct inode_operations ecryptfs_symlink_iops = {
984 .removexattr = ecryptfs_removexattr 1001 .removexattr = ecryptfs_removexattr
985}; 1002};
986 1003
987struct inode_operations ecryptfs_dir_iops = { 1004const struct inode_operations ecryptfs_dir_iops = {
988 .create = ecryptfs_create, 1005 .create = ecryptfs_create,
989 .lookup = ecryptfs_lookup, 1006 .lookup = ecryptfs_lookup,
990 .link = ecryptfs_link, 1007 .link = ecryptfs_link,
@@ -1002,7 +1019,7 @@ struct inode_operations ecryptfs_dir_iops = {
1002 .removexattr = ecryptfs_removexattr 1019 .removexattr = ecryptfs_removexattr
1003}; 1020};
1004 1021
1005struct inode_operations ecryptfs_main_iops = { 1022const struct inode_operations ecryptfs_main_iops = {
1006 .permission = ecryptfs_permission, 1023 .permission = ecryptfs_permission,
1007 .setattr = ecryptfs_setattr, 1024 .setattr = ecryptfs_setattr,
1008 .setxattr = ecryptfs_setxattr, 1025 .setxattr = ecryptfs_setxattr,
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 745c0f1bfbbd..c209f67e7a26 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2004-2006 International Business Machines Corp. 7 * Copyright (C) 2004-2006 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
9 * Michael C. Thompson <mcthomps@us.ibm.com> 9 * Michael C. Thompson <mcthomps@us.ibm.com>
10 * Trevor S. Highland <trevor.highland@gmail.com>
10 * 11 *
11 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as 13 * modify it under the terms of the GNU General Public License as
@@ -64,26 +65,6 @@ int process_request_key_err(long err_code)
64 return rc; 65 return rc;
65} 66}
66 67
67static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
68{
69 struct list_head *walker;
70 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
71
72 walker = auth_tok_list_head->next;
73 while (walker != auth_tok_list_head) {
74 auth_tok_list_item =
75 list_entry(walker, struct ecryptfs_auth_tok_list_item,
76 list);
77 walker = auth_tok_list_item->list.next;
78 memset(auth_tok_list_item, 0,
79 sizeof(struct ecryptfs_auth_tok_list_item));
80 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
81 auth_tok_list_item);
82 }
83}
84
85struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
86
87/** 68/**
88 * parse_packet_length 69 * parse_packet_length
89 * @data: Pointer to memory containing length at offset 70 * @data: Pointer to memory containing length at offset
@@ -102,12 +83,12 @@ static int parse_packet_length(unsigned char *data, size_t *size,
102 (*size) = 0; 83 (*size) = 0;
103 if (data[0] < 192) { 84 if (data[0] < 192) {
104 /* One-byte length */ 85 /* One-byte length */
105 (*size) = data[0]; 86 (*size) = (unsigned char)data[0];
106 (*length_size) = 1; 87 (*length_size) = 1;
107 } else if (data[0] < 224) { 88 } else if (data[0] < 224) {
108 /* Two-byte length */ 89 /* Two-byte length */
109 (*size) = ((data[0] - 192) * 256); 90 (*size) = (((unsigned char)(data[0]) - 192) * 256);
110 (*size) += (data[1] + 192); 91 (*size) += ((unsigned char)(data[1]) + 192);
111 (*length_size) = 2; 92 (*length_size) = 2;
112 } else if (data[0] == 255) { 93 } else if (data[0] == 255) {
113 /* Five-byte length; we're not supposed to see this */ 94 /* Five-byte length; we're not supposed to see this */
@@ -154,6 +135,499 @@ static int write_packet_length(char *dest, size_t size,
154 return rc; 135 return rc;
155} 136}
156 137
138static int
139write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
140 char **packet, size_t *packet_len)
141{
142 size_t i = 0;
143 size_t data_len;
144 size_t packet_size_len;
145 char *message;
146 int rc;
147
148 /*
149 * ***** TAG 64 Packet Format *****
150 * | Content Type | 1 byte |
151 * | Key Identifier Size | 1 or 2 bytes |
152 * | Key Identifier | arbitrary |
153 * | Encrypted File Encryption Key Size | 1 or 2 bytes |
154 * | Encrypted File Encryption Key | arbitrary |
155 */
156 data_len = (5 + ECRYPTFS_SIG_SIZE_HEX
157 + session_key->encrypted_key_size);
158 *packet = kmalloc(data_len, GFP_KERNEL);
159 message = *packet;
160 if (!message) {
161 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
162 rc = -ENOMEM;
163 goto out;
164 }
165 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
166 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
167 &packet_size_len);
168 if (rc) {
169 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
170 "header; cannot generate packet length\n");
171 goto out;
172 }
173 i += packet_size_len;
174 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
175 i += ECRYPTFS_SIG_SIZE_HEX;
176 rc = write_packet_length(&message[i], session_key->encrypted_key_size,
177 &packet_size_len);
178 if (rc) {
179 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
180 "header; cannot generate packet length\n");
181 goto out;
182 }
183 i += packet_size_len;
184 memcpy(&message[i], session_key->encrypted_key,
185 session_key->encrypted_key_size);
186 i += session_key->encrypted_key_size;
187 *packet_len = i;
188out:
189 return rc;
190}
191
192static int
193parse_tag_65_packet(struct ecryptfs_session_key *session_key, u16 *cipher_code,
194 struct ecryptfs_message *msg)
195{
196 size_t i = 0;
197 char *data;
198 size_t data_len;
199 size_t m_size;
200 size_t message_len;
201 u16 checksum = 0;
202 u16 expected_checksum = 0;
203 int rc;
204
205 /*
206 * ***** TAG 65 Packet Format *****
207 * | Content Type | 1 byte |
208 * | Status Indicator | 1 byte |
209 * | File Encryption Key Size | 1 or 2 bytes |
210 * | File Encryption Key | arbitrary |
211 */
212 message_len = msg->data_len;
213 data = msg->data;
214 if (message_len < 4) {
215 rc = -EIO;
216 goto out;
217 }
218 if (data[i++] != ECRYPTFS_TAG_65_PACKET_TYPE) {
219 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_65\n");
220 rc = -EIO;
221 goto out;
222 }
223 if (data[i++]) {
224 ecryptfs_printk(KERN_ERR, "Status indicator has non-zero value "
225 "[%d]\n", data[i-1]);
226 rc = -EIO;
227 goto out;
228 }
229 rc = parse_packet_length(&data[i], &m_size, &data_len);
230 if (rc) {
231 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
232 "rc = [%d]\n", rc);
233 goto out;
234 }
235 i += data_len;
236 if (message_len < (i + m_size)) {
237 ecryptfs_printk(KERN_ERR, "The received netlink message is "
238 "shorter than expected\n");
239 rc = -EIO;
240 goto out;
241 }
242 if (m_size < 3) {
243 ecryptfs_printk(KERN_ERR,
244 "The decrypted key is not long enough to "
245 "include a cipher code and checksum\n");
246 rc = -EIO;
247 goto out;
248 }
249 *cipher_code = data[i++];
250 /* The decrypted key includes 1 byte cipher code and 2 byte checksum */
251 session_key->decrypted_key_size = m_size - 3;
252 if (session_key->decrypted_key_size > ECRYPTFS_MAX_KEY_BYTES) {
253 ecryptfs_printk(KERN_ERR, "key_size [%d] larger than "
254 "the maximum key size [%d]\n",
255 session_key->decrypted_key_size,
256 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
257 rc = -EIO;
258 goto out;
259 }
260 memcpy(session_key->decrypted_key, &data[i],
261 session_key->decrypted_key_size);
262 i += session_key->decrypted_key_size;
263 expected_checksum += (unsigned char)(data[i++]) << 8;
264 expected_checksum += (unsigned char)(data[i++]);
265 for (i = 0; i < session_key->decrypted_key_size; i++)
266 checksum += session_key->decrypted_key[i];
267 if (expected_checksum != checksum) {
268 ecryptfs_printk(KERN_ERR, "Invalid checksum for file "
269 "encryption key; expected [%x]; calculated "
270 "[%x]\n", expected_checksum, checksum);
271 rc = -EIO;
272 }
273out:
274 return rc;
275}
276
277
278static int
279write_tag_66_packet(char *signature, size_t cipher_code,
280 struct ecryptfs_crypt_stat *crypt_stat, char **packet,
281 size_t *packet_len)
282{
283 size_t i = 0;
284 size_t j;
285 size_t data_len;
286 size_t checksum = 0;
287 size_t packet_size_len;
288 char *message;
289 int rc;
290
291 /*
292 * ***** TAG 66 Packet Format *****
293 * | Content Type | 1 byte |
294 * | Key Identifier Size | 1 or 2 bytes |
295 * | Key Identifier | arbitrary |
296 * | File Encryption Key Size | 1 or 2 bytes |
297 * | File Encryption Key | arbitrary |
298 */
299 data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
300 *packet = kmalloc(data_len, GFP_KERNEL);
301 message = *packet;
302 if (!message) {
303 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
304 rc = -ENOMEM;
305 goto out;
306 }
307 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
308 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
309 &packet_size_len);
310 if (rc) {
311 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
312 "header; cannot generate packet length\n");
313 goto out;
314 }
315 i += packet_size_len;
316 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
317 i += ECRYPTFS_SIG_SIZE_HEX;
318 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
319 rc = write_packet_length(&message[i], crypt_stat->key_size + 3,
320 &packet_size_len);
321 if (rc) {
322 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
323 "header; cannot generate packet length\n");
324 goto out;
325 }
326 i += packet_size_len;
327 message[i++] = cipher_code;
328 memcpy(&message[i], crypt_stat->key, crypt_stat->key_size);
329 i += crypt_stat->key_size;
330 for (j = 0; j < crypt_stat->key_size; j++)
331 checksum += crypt_stat->key[j];
332 message[i++] = (checksum / 256) % 256;
333 message[i++] = (checksum % 256);
334 *packet_len = i;
335out:
336 return rc;
337}
338
339static int
340parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
341 struct ecryptfs_message *msg)
342{
343 size_t i = 0;
344 char *data;
345 size_t data_len;
346 size_t message_len;
347 int rc;
348
349 /*
350 * ***** TAG 65 Packet Format *****
351 * | Content Type | 1 byte |
352 * | Status Indicator | 1 byte |
353 * | Encrypted File Encryption Key Size | 1 or 2 bytes |
354 * | Encrypted File Encryption Key | arbitrary |
355 */
356 message_len = msg->data_len;
357 data = msg->data;
358 /* verify that everything through the encrypted FEK size is present */
359 if (message_len < 4) {
360 rc = -EIO;
361 goto out;
362 }
363 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
364 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
365 rc = -EIO;
366 goto out;
367 }
368 if (data[i++]) {
369 ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
370 " [%d]\n", data[i-1]);
371 rc = -EIO;
372 goto out;
373 }
374 rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len);
375 if (rc) {
376 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
377 "rc = [%d]\n", rc);
378 goto out;
379 }
380 i += data_len;
381 if (message_len < (i + key_rec->enc_key_size)) {
382 ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
383 message_len, (i + key_rec->enc_key_size));
384 rc = -EIO;
385 goto out;
386 }
387 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
388 ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
389 "the maximum key size [%d]\n",
390 key_rec->enc_key_size,
391 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
392 rc = -EIO;
393 goto out;
394 }
395 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
396out:
397 return rc;
398}
399
400/**
401 * decrypt_pki_encrypted_session_key - Decrypt the session key with
402 * the given auth_tok.
403 *
404 * Returns Zero on success; non-zero error otherwise.
405 */
406static int decrypt_pki_encrypted_session_key(
407 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
408 struct ecryptfs_auth_tok *auth_tok,
409 struct ecryptfs_crypt_stat *crypt_stat)
410{
411 u16 cipher_code = 0;
412 struct ecryptfs_msg_ctx *msg_ctx;
413 struct ecryptfs_message *msg = NULL;
414 char *netlink_message;
415 size_t netlink_message_length;
416 int rc;
417
418 rc = write_tag_64_packet(mount_crypt_stat->global_auth_tok_sig,
419 &(auth_tok->session_key),
420 &netlink_message, &netlink_message_length);
421 if (rc) {
422 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet");
423 goto out;
424 }
425 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
426 netlink_message_length, &msg_ctx);
427 if (rc) {
428 ecryptfs_printk(KERN_ERR, "Error sending netlink message\n");
429 goto out;
430 }
431 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
432 if (rc) {
433 ecryptfs_printk(KERN_ERR, "Failed to receive tag 65 packet "
434 "from the user space daemon\n");
435 rc = -EIO;
436 goto out;
437 }
438 rc = parse_tag_65_packet(&(auth_tok->session_key),
439 &cipher_code, msg);
440 if (rc) {
441 printk(KERN_ERR "Failed to parse tag 65 packet; rc = [%d]\n",
442 rc);
443 goto out;
444 }
445 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
446 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
447 auth_tok->session_key.decrypted_key_size);
448 crypt_stat->key_size = auth_tok->session_key.decrypted_key_size;
449 rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code);
450 if (rc) {
451 ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n",
452 cipher_code)
453 goto out;
454 }
455 crypt_stat->flags |= ECRYPTFS_KEY_VALID;
456 if (ecryptfs_verbosity > 0) {
457 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
458 ecryptfs_dump_hex(crypt_stat->key,
459 crypt_stat->key_size);
460 }
461out:
462 if (msg)
463 kfree(msg);
464 return rc;
465}
466
467static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
468{
469 struct list_head *walker;
470 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
471
472 walker = auth_tok_list_head->next;
473 while (walker != auth_tok_list_head) {
474 auth_tok_list_item =
475 list_entry(walker, struct ecryptfs_auth_tok_list_item,
476 list);
477 walker = auth_tok_list_item->list.next;
478 memset(auth_tok_list_item, 0,
479 sizeof(struct ecryptfs_auth_tok_list_item));
480 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
481 auth_tok_list_item);
482 }
483 auth_tok_list_head->next = NULL;
484}
485
486struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
487
488
489/**
490 * parse_tag_1_packet
491 * @crypt_stat: The cryptographic context to modify based on packet
492 * contents.
493 * @data: The raw bytes of the packet.
494 * @auth_tok_list: eCryptfs parses packets into authentication tokens;
495 * a new authentication token will be placed at the end
496 * of this list for this packet.
497 * @new_auth_tok: Pointer to a pointer to memory that this function
498 * allocates; sets the memory address of the pointer to
499 * NULL on error. This object is added to the
500 * auth_tok_list.
501 * @packet_size: This function writes the size of the parsed packet
502 * into this memory location; zero on error.
503 *
504 * Returns zero on success; non-zero on error.
505 */
506static int
507parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
508 unsigned char *data, struct list_head *auth_tok_list,
509 struct ecryptfs_auth_tok **new_auth_tok,
510 size_t *packet_size, size_t max_packet_size)
511{
512 size_t body_size;
513 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
514 size_t length_size;
515 int rc = 0;
516
517 (*packet_size) = 0;
518 (*new_auth_tok) = NULL;
519
520 /* we check that:
521 * one byte for the Tag 1 ID flag
522 * two bytes for the body size
523 * do not exceed the maximum_packet_size
524 */
525 if (unlikely((*packet_size) + 3 > max_packet_size)) {
526 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
527 rc = -EINVAL;
528 goto out;
529 }
530 /* check for Tag 1 identifier - one byte */
531 if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) {
532 ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
533 ECRYPTFS_TAG_1_PACKET_TYPE);
534 rc = -EINVAL;
535 goto out;
536 }
537 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
538 * at end of function upon failure */
539 auth_tok_list_item =
540 kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache,
541 GFP_KERNEL);
542 if (!auth_tok_list_item) {
543 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
544 rc = -ENOMEM;
545 goto out;
546 }
547 memset(auth_tok_list_item, 0,
548 sizeof(struct ecryptfs_auth_tok_list_item));
549 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
550 /* check for body size - one to two bytes
551 *
552 * ***** TAG 1 Packet Format *****
553 * | version number | 1 byte |
554 * | key ID | 8 bytes |
555 * | public key algorithm | 1 byte |
556 * | encrypted session key | arbitrary |
557 */
558 rc = parse_packet_length(&data[(*packet_size)], &body_size,
559 &length_size);
560 if (rc) {
561 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
562 "rc = [%d]\n", rc);
563 goto out_free;
564 }
565 if (unlikely(body_size < (0x02 + ECRYPTFS_SIG_SIZE))) {
566 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
567 body_size);
568 rc = -EINVAL;
569 goto out_free;
570 }
571 (*packet_size) += length_size;
572 if (unlikely((*packet_size) + body_size > max_packet_size)) {
573 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
574 rc = -EINVAL;
575 goto out_free;
576 }
577 /* Version 3 (from RFC2440) - one byte */
578 if (unlikely(data[(*packet_size)++] != 0x03)) {
579 ecryptfs_printk(KERN_DEBUG, "Unknown version number "
580 "[%d]\n", data[(*packet_size) - 1]);
581 rc = -EINVAL;
582 goto out_free;
583 }
584 /* Read Signature */
585 ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature,
586 &data[(*packet_size)], ECRYPTFS_SIG_SIZE);
587 *packet_size += ECRYPTFS_SIG_SIZE;
588 /* This byte is skipped because the kernel does not need to
589 * know which public key encryption algorithm was used */
590 (*packet_size)++;
591 (*new_auth_tok)->session_key.encrypted_key_size =
592 body_size - (0x02 + ECRYPTFS_SIG_SIZE);
593 if ((*new_auth_tok)->session_key.encrypted_key_size
594 > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
595 ecryptfs_printk(KERN_ERR, "Tag 1 packet contains key larger "
596 "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
597 rc = -EINVAL;
598 goto out;
599 }
600 ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
601 (*new_auth_tok)->session_key.encrypted_key_size);
602 memcpy((*new_auth_tok)->session_key.encrypted_key,
603 &data[(*packet_size)], (body_size - 0x02 - ECRYPTFS_SIG_SIZE));
604 (*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size;
605 (*new_auth_tok)->session_key.flags &=
606 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
607 (*new_auth_tok)->session_key.flags |=
608 ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
609 (*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY;
610 (*new_auth_tok)->flags |= ECRYPTFS_PRIVATE_KEY;
611 /* TODO: Why are we setting this flag here? Don't we want the
612 * userspace to decrypt the session key? */
613 (*new_auth_tok)->session_key.flags &=
614 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
615 (*new_auth_tok)->session_key.flags &=
616 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
617 list_add(&auth_tok_list_item->list, auth_tok_list);
618 goto out;
619out_free:
620 (*new_auth_tok) = NULL;
621 memset(auth_tok_list_item, 0,
622 sizeof(struct ecryptfs_auth_tok_list_item));
623 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
624 auth_tok_list_item);
625out:
626 if (rc)
627 (*packet_size) = 0;
628 return rc;
629}
630
157/** 631/**
158 * parse_tag_3_packet 632 * parse_tag_3_packet
159 * @crypt_stat: The cryptographic context to modify based on packet 633 * @crypt_stat: The cryptographic context to modify based on packet
@@ -178,10 +652,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
178 struct ecryptfs_auth_tok **new_auth_tok, 652 struct ecryptfs_auth_tok **new_auth_tok,
179 size_t *packet_size, size_t max_packet_size) 653 size_t *packet_size, size_t max_packet_size)
180{ 654{
181 int rc = 0;
182 size_t body_size; 655 size_t body_size;
183 struct ecryptfs_auth_tok_list_item *auth_tok_list_item; 656 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
184 size_t length_size; 657 size_t length_size;
658 int rc = 0;
185 659
186 (*packet_size) = 0; 660 (*packet_size) = 0;
187 (*new_auth_tok) = NULL; 661 (*new_auth_tok) = NULL;
@@ -207,14 +681,12 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
207 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or 681 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
208 * at end of function upon failure */ 682 * at end of function upon failure */
209 auth_tok_list_item = 683 auth_tok_list_item =
210 kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL); 684 kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
211 if (!auth_tok_list_item) { 685 if (!auth_tok_list_item) {
212 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); 686 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
213 rc = -ENOMEM; 687 rc = -ENOMEM;
214 goto out; 688 goto out;
215 } 689 }
216 memset(auth_tok_list_item, 0,
217 sizeof(struct ecryptfs_auth_tok_list_item));
218 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 690 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
219 691
220 /* check for body size - one to two bytes */ 692 /* check for body size - one to two bytes */
@@ -321,10 +793,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
321 (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD; 793 (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD;
322 /* TODO: Parametarize; we might actually want userspace to 794 /* TODO: Parametarize; we might actually want userspace to
323 * decrypt the session key. */ 795 * decrypt the session key. */
324 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, 796 (*new_auth_tok)->session_key.flags &=
325 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); 797 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
326 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, 798 (*new_auth_tok)->session_key.flags &=
327 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT); 799 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
328 list_add(&auth_tok_list_item->list, auth_tok_list); 800 list_add(&auth_tok_list_item->list, auth_tok_list);
329 goto out; 801 goto out;
330out_free: 802out_free:
@@ -360,9 +832,9 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
360 size_t max_contents_bytes, size_t *tag_11_contents_size, 832 size_t max_contents_bytes, size_t *tag_11_contents_size,
361 size_t *packet_size, size_t max_packet_size) 833 size_t *packet_size, size_t max_packet_size)
362{ 834{
363 int rc = 0;
364 size_t body_size; 835 size_t body_size;
365 size_t length_size; 836 size_t length_size;
837 int rc = 0;
366 838
367 (*packet_size) = 0; 839 (*packet_size) = 0;
368 (*tag_11_contents_size) = 0; 840 (*tag_11_contents_size) = 0;
@@ -461,7 +933,6 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
461 struct ecryptfs_password *password_s_ptr; 933 struct ecryptfs_password *password_s_ptr;
462 struct scatterlist src_sg[2], dst_sg[2]; 934 struct scatterlist src_sg[2], dst_sg[2];
463 struct mutex *tfm_mutex = NULL; 935 struct mutex *tfm_mutex = NULL;
464 /* TODO: Use virt_to_scatterlist for these */
465 char *encrypted_session_key; 936 char *encrypted_session_key;
466 char *session_key; 937 char *session_key;
467 struct blkcipher_desc desc = { 938 struct blkcipher_desc desc = {
@@ -470,8 +941,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
470 int rc = 0; 941 int rc = 0;
471 942
472 password_s_ptr = &auth_tok->token.password; 943 password_s_ptr = &auth_tok->token.password;
473 if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags, 944 if (password_s_ptr->flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)
474 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET))
475 ecryptfs_printk(KERN_DEBUG, "Session key encryption key " 945 ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
476 "set; skipping key generation\n"); 946 "set; skipping key generation\n");
477 ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" 947 ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
@@ -553,7 +1023,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
553 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; 1023 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
554 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, 1024 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
555 auth_tok->session_key.decrypted_key_size); 1025 auth_tok->session_key.decrypted_key_size);
556 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); 1026 crypt_stat->flags |= ECRYPTFS_KEY_VALID;
557 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); 1027 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
558 if (ecryptfs_verbosity > 0) 1028 if (ecryptfs_verbosity > 0)
559 ecryptfs_dump_hex(crypt_stat->key, 1029 ecryptfs_dump_hex(crypt_stat->key,
@@ -589,7 +1059,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
589 struct dentry *ecryptfs_dentry) 1059 struct dentry *ecryptfs_dentry)
590{ 1060{
591 size_t i = 0; 1061 size_t i = 0;
592 int rc = 0;
593 size_t found_auth_tok = 0; 1062 size_t found_auth_tok = 0;
594 size_t next_packet_is_auth_tok_packet; 1063 size_t next_packet_is_auth_tok_packet;
595 char sig[ECRYPTFS_SIG_SIZE_HEX]; 1064 char sig[ECRYPTFS_SIG_SIZE_HEX];
@@ -605,6 +1074,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
605 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; 1074 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
606 size_t tag_11_contents_size; 1075 size_t tag_11_contents_size;
607 size_t tag_11_packet_size; 1076 size_t tag_11_packet_size;
1077 int rc = 0;
608 1078
609 INIT_LIST_HEAD(&auth_tok_list); 1079 INIT_LIST_HEAD(&auth_tok_list);
610 /* Parse the header to find as many packets as we can, these will be 1080 /* Parse the header to find as many packets as we can, these will be
@@ -656,8 +1126,21 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
656 sig_tmp_space, tag_11_contents_size); 1126 sig_tmp_space, tag_11_contents_size);
657 new_auth_tok->token.password.signature[ 1127 new_auth_tok->token.password.signature[
658 ECRYPTFS_PASSWORD_SIG_SIZE] = '\0'; 1128 ECRYPTFS_PASSWORD_SIG_SIZE] = '\0';
659 ECRYPTFS_SET_FLAG(crypt_stat->flags, 1129 crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
660 ECRYPTFS_ENCRYPTED); 1130 break;
1131 case ECRYPTFS_TAG_1_PACKET_TYPE:
1132 rc = parse_tag_1_packet(crypt_stat,
1133 (unsigned char *)&src[i],
1134 &auth_tok_list, &new_auth_tok,
1135 &packet_size, max_packet_size);
1136 if (rc) {
1137 ecryptfs_printk(KERN_ERR, "Error parsing "
1138 "tag 1 packet\n");
1139 rc = -EIO;
1140 goto out_wipe_list;
1141 }
1142 i += packet_size;
1143 crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
661 break; 1144 break;
662 case ECRYPTFS_TAG_11_PACKET_TYPE: 1145 case ECRYPTFS_TAG_11_PACKET_TYPE:
663 ecryptfs_printk(KERN_WARNING, "Invalid packet set " 1146 ecryptfs_printk(KERN_WARNING, "Invalid packet set "
@@ -706,31 +1189,46 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
706 goto leave_list; 1189 goto leave_list;
707 /* TODO: Transfer the common salt into the 1190 /* TODO: Transfer the common salt into the
708 * crypt_stat salt */ 1191 * crypt_stat salt */
1192 } else if ((candidate_auth_tok->token_type
1193 == ECRYPTFS_PRIVATE_KEY)
1194 && !strncmp(candidate_auth_tok->token.private_key.signature,
1195 sig, ECRYPTFS_SIG_SIZE_HEX)) {
1196 found_auth_tok = 1;
1197 goto leave_list;
709 } 1198 }
710 } 1199 }
711leave_list:
712 if (!found_auth_tok) { 1200 if (!found_auth_tok) {
713 ecryptfs_printk(KERN_ERR, "Could not find authentication " 1201 ecryptfs_printk(KERN_ERR, "Could not find authentication "
714 "token on temporary list for sig [%.*s]\n", 1202 "token on temporary list for sig [%.*s]\n",
715 ECRYPTFS_SIG_SIZE_HEX, sig); 1203 ECRYPTFS_SIG_SIZE_HEX, sig);
716 rc = -EIO; 1204 rc = -EIO;
717 goto out_wipe_list; 1205 goto out_wipe_list;
718 } else { 1206 }
1207leave_list:
1208 rc = -ENOTSUPP;
1209 if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
1210 memcpy(&(candidate_auth_tok->token.private_key),
1211 &(chosen_auth_tok->token.private_key),
1212 sizeof(struct ecryptfs_private_key));
1213 rc = decrypt_pki_encrypted_session_key(mount_crypt_stat,
1214 candidate_auth_tok,
1215 crypt_stat);
1216 } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) {
719 memcpy(&(candidate_auth_tok->token.password), 1217 memcpy(&(candidate_auth_tok->token.password),
720 &(chosen_auth_tok->token.password), 1218 &(chosen_auth_tok->token.password),
721 sizeof(struct ecryptfs_password)); 1219 sizeof(struct ecryptfs_password));
722 rc = decrypt_session_key(candidate_auth_tok, crypt_stat); 1220 rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
723 if (rc) { 1221 }
724 ecryptfs_printk(KERN_ERR, "Error decrypting the " 1222 if (rc) {
725 "session key\n"); 1223 ecryptfs_printk(KERN_ERR, "Error decrypting the "
726 goto out_wipe_list; 1224 "session key; rc = [%d]\n", rc);
727 } 1225 goto out_wipe_list;
728 rc = ecryptfs_compute_root_iv(crypt_stat); 1226 }
729 if (rc) { 1227 rc = ecryptfs_compute_root_iv(crypt_stat);
730 ecryptfs_printk(KERN_ERR, "Error computing " 1228 if (rc) {
731 "the root IV\n"); 1229 ecryptfs_printk(KERN_ERR, "Error computing "
732 goto out_wipe_list; 1230 "the root IV\n");
733 } 1231 goto out_wipe_list;
734 } 1232 }
735 rc = ecryptfs_init_crypt_ctx(crypt_stat); 1233 rc = ecryptfs_init_crypt_ctx(crypt_stat);
736 if (rc) { 1234 if (rc) {
@@ -743,6 +1241,145 @@ out_wipe_list:
743out: 1241out:
744 return rc; 1242 return rc;
745} 1243}
1244static int
1245pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1246 struct ecryptfs_crypt_stat *crypt_stat,
1247 struct ecryptfs_key_record *key_rec)
1248{
1249 struct ecryptfs_msg_ctx *msg_ctx = NULL;
1250 char *netlink_payload;
1251 size_t netlink_payload_length;
1252 struct ecryptfs_message *msg;
1253 int rc;
1254
1255 rc = write_tag_66_packet(auth_tok->token.private_key.signature,
1256 ecryptfs_code_for_cipher_string(crypt_stat),
1257 crypt_stat, &netlink_payload,
1258 &netlink_payload_length);
1259 if (rc) {
1260 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
1261 goto out;
1262 }
1263 rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload,
1264 netlink_payload_length, &msg_ctx);
1265 if (rc) {
1266 ecryptfs_printk(KERN_ERR, "Error sending netlink message\n");
1267 goto out;
1268 }
1269 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
1270 if (rc) {
1271 ecryptfs_printk(KERN_ERR, "Failed to receive tag 67 packet "
1272 "from the user space daemon\n");
1273 rc = -EIO;
1274 goto out;
1275 }
1276 rc = parse_tag_67_packet(key_rec, msg);
1277 if (rc)
1278 ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n");
1279 kfree(msg);
1280out:
1281 if (netlink_payload)
1282 kfree(netlink_payload);
1283 return rc;
1284}
1285/**
1286 * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet
1287 * @dest: Buffer into which to write the packet
1288 * @max: Maximum number of bytes that can be writtn
1289 * @packet_size: This function will write the number of bytes that end
1290 * up constituting the packet; set to zero on error
1291 *
1292 * Returns zero on success; non-zero on error.
1293 */
1294static int
1295write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1296 struct ecryptfs_crypt_stat *crypt_stat,
1297 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
1298 struct ecryptfs_key_record *key_rec, size_t *packet_size)
1299{
1300 size_t i;
1301 size_t encrypted_session_key_valid = 0;
1302 size_t key_rec_size;
1303 size_t packet_size_length;
1304 int rc = 0;
1305
1306 (*packet_size) = 0;
1307 ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature,
1308 ECRYPTFS_SIG_SIZE);
1309 encrypted_session_key_valid = 0;
1310 for (i = 0; i < crypt_stat->key_size; i++)
1311 encrypted_session_key_valid |=
1312 auth_tok->session_key.encrypted_key[i];
1313 if (encrypted_session_key_valid) {
1314 memcpy(key_rec->enc_key,
1315 auth_tok->session_key.encrypted_key,
1316 auth_tok->session_key.encrypted_key_size);
1317 goto encrypted_session_key_set;
1318 }
1319 if (auth_tok->session_key.encrypted_key_size == 0)
1320 auth_tok->session_key.encrypted_key_size =
1321 auth_tok->token.private_key.key_size;
1322 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
1323 if (rc) {
1324 ecryptfs_printk(KERN_ERR, "Failed to encrypt session key "
1325 "via a pki");
1326 goto out;
1327 }
1328 if (ecryptfs_verbosity > 0) {
1329 ecryptfs_printk(KERN_DEBUG, "Encrypted key:\n");
1330 ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size);
1331 }
1332encrypted_session_key_set:
1333 /* Now we have a valid key_rec. Append it to the
1334 * key_rec set. */
1335 key_rec_size = (sizeof(struct ecryptfs_key_record)
1336 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
1337 + (key_rec->enc_key_size));
1338 /* TODO: Include a packet size limit as a parameter to this
1339 * function once we have multi-packet headers (for versions
1340 * later than 0.1 */
1341 if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
1342 ecryptfs_printk(KERN_ERR, "Keyset too large\n");
1343 rc = -EINVAL;
1344 goto out;
1345 }
1346 /* ***** TAG 1 Packet Format *****
1347 * | version number | 1 byte |
1348 * | key ID | 8 bytes |
1349 * | public key algorithm | 1 byte |
1350 * | encrypted session key | arbitrary |
1351 */
1352 if ((0x02 + ECRYPTFS_SIG_SIZE + key_rec->enc_key_size) >= max) {
1353 ecryptfs_printk(KERN_ERR,
1354 "Authentication token is too large\n");
1355 rc = -EINVAL;
1356 goto out;
1357 }
1358 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1359 /* This format is inspired by OpenPGP; see RFC 2440
1360 * packet tag 1 */
1361 rc = write_packet_length(&dest[(*packet_size)],
1362 (0x02 + ECRYPTFS_SIG_SIZE +
1363 key_rec->enc_key_size),
1364 &packet_size_length);
1365 if (rc) {
1366 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
1367 "header; cannot generate packet length\n");
1368 goto out;
1369 }
1370 (*packet_size) += packet_size_length;
1371 dest[(*packet_size)++] = 0x03; /* version 3 */
1372 memcpy(&dest[(*packet_size)], key_rec->sig, ECRYPTFS_SIG_SIZE);
1373 (*packet_size) += ECRYPTFS_SIG_SIZE;
1374 dest[(*packet_size)++] = RFC2440_CIPHER_RSA;
1375 memcpy(&dest[(*packet_size)], key_rec->enc_key,
1376 key_rec->enc_key_size);
1377 (*packet_size) += key_rec->enc_key_size;
1378out:
1379 if (rc)
1380 (*packet_size) = 0;
1381 return rc;
1382}
746 1383
747/** 1384/**
748 * write_tag_11_packet 1385 * write_tag_11_packet
@@ -758,8 +1395,8 @@ static int
758write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, 1395write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
759 size_t *packet_length) 1396 size_t *packet_length)
760{ 1397{
761 int rc = 0;
762 size_t packet_size_length; 1398 size_t packet_size_length;
1399 int rc = 0;
763 1400
764 (*packet_length) = 0; 1401 (*packet_length) = 0;
765 if ((13 + contents_length) > max) { 1402 if ((13 + contents_length) > max) {
@@ -817,7 +1454,6 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
817 struct ecryptfs_key_record *key_rec, size_t *packet_size) 1454 struct ecryptfs_key_record *key_rec, size_t *packet_size)
818{ 1455{
819 size_t i; 1456 size_t i;
820 size_t signature_is_valid = 0;
821 size_t encrypted_session_key_valid = 0; 1457 size_t encrypted_session_key_valid = 0;
822 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; 1458 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
823 struct scatterlist dest_sg[2]; 1459 struct scatterlist dest_sg[2];
@@ -833,19 +1469,14 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
833 int rc = 0; 1469 int rc = 0;
834 1470
835 (*packet_size) = 0; 1471 (*packet_size) = 0;
836 /* Check for a valid signature on the auth_tok */ 1472 ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
837 for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++)
838 signature_is_valid |= auth_tok->token.password.signature[i];
839 if (!signature_is_valid)
840 BUG();
841 ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature,
842 ECRYPTFS_SIG_SIZE); 1473 ECRYPTFS_SIG_SIZE);
843 encrypted_session_key_valid = 0; 1474 encrypted_session_key_valid = 0;
844 for (i = 0; i < crypt_stat->key_size; i++) 1475 for (i = 0; i < crypt_stat->key_size; i++)
845 encrypted_session_key_valid |= 1476 encrypted_session_key_valid |=
846 auth_tok->session_key.encrypted_key[i]; 1477 auth_tok->session_key.encrypted_key[i];
847 if (encrypted_session_key_valid) { 1478 if (encrypted_session_key_valid) {
848 memcpy((*key_rec).enc_key, 1479 memcpy(key_rec->enc_key,
849 auth_tok->session_key.encrypted_key, 1480 auth_tok->session_key.encrypted_key,
850 auth_tok->session_key.encrypted_key_size); 1481 auth_tok->session_key.encrypted_key_size);
851 goto encrypted_session_key_set; 1482 goto encrypted_session_key_set;
@@ -858,10 +1489,10 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
858 memset((crypt_stat->key + 24), 0, 8); 1489 memset((crypt_stat->key + 24), 0, 8);
859 auth_tok->session_key.encrypted_key_size = 32; 1490 auth_tok->session_key.encrypted_key_size = 32;
860 } 1491 }
861 (*key_rec).enc_key_size = 1492 key_rec->enc_key_size =
862 auth_tok->session_key.encrypted_key_size; 1493 auth_tok->session_key.encrypted_key_size;
863 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, 1494 if (auth_tok->token.password.flags &
864 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) { 1495 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) {
865 ecryptfs_printk(KERN_DEBUG, "Using previously generated " 1496 ecryptfs_printk(KERN_DEBUG, "Using previously generated "
866 "session key encryption key of size [%d]\n", 1497 "session key encryption key of size [%d]\n",
867 auth_tok->token.password. 1498 auth_tok->token.password.
@@ -879,15 +1510,15 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
879 ecryptfs_dump_hex(session_key_encryption_key, 16); 1510 ecryptfs_dump_hex(session_key_encryption_key, 16);
880 } 1511 }
881 rc = virt_to_scatterlist(crypt_stat->key, 1512 rc = virt_to_scatterlist(crypt_stat->key,
882 (*key_rec).enc_key_size, src_sg, 2); 1513 key_rec->enc_key_size, src_sg, 2);
883 if (!rc) { 1514 if (!rc) {
884 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1515 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
885 "for crypt_stat session key\n"); 1516 "for crypt_stat session key\n");
886 rc = -ENOMEM; 1517 rc = -ENOMEM;
887 goto out; 1518 goto out;
888 } 1519 }
889 rc = virt_to_scatterlist((*key_rec).enc_key, 1520 rc = virt_to_scatterlist(key_rec->enc_key,
890 (*key_rec).enc_key_size, dest_sg, 2); 1521 key_rec->enc_key_size, dest_sg, 2);
891 if (!rc) { 1522 if (!rc) {
892 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1523 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
893 "for crypt_stat encrypted session key\n"); 1524 "for crypt_stat encrypted session key\n");
@@ -943,14 +1574,14 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
943 mutex_unlock(tfm_mutex); 1574 mutex_unlock(tfm_mutex);
944 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); 1575 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
945 if (ecryptfs_verbosity > 0) 1576 if (ecryptfs_verbosity > 0)
946 ecryptfs_dump_hex((*key_rec).enc_key, 1577 ecryptfs_dump_hex(key_rec->enc_key,
947 (*key_rec).enc_key_size); 1578 key_rec->enc_key_size);
948encrypted_session_key_set: 1579encrypted_session_key_set:
949 /* Now we have a valid key_rec. Append it to the 1580 /* Now we have a valid key_rec. Append it to the
950 * key_rec set. */ 1581 * key_rec set. */
951 key_rec_size = (sizeof(struct ecryptfs_key_record) 1582 key_rec_size = (sizeof(struct ecryptfs_key_record)
952 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 1583 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
953 + ((*key_rec).enc_key_size)); 1584 + (key_rec->enc_key_size));
954 /* TODO: Include a packet size limit as a parameter to this 1585 /* TODO: Include a packet size limit as a parameter to this
955 * function once we have multi-packet headers (for versions 1586 * function once we have multi-packet headers (for versions
956 * later than 0.1 */ 1587 * later than 0.1 */
@@ -962,7 +1593,7 @@ encrypted_session_key_set:
962 /* TODO: Packet size limit */ 1593 /* TODO: Packet size limit */
963 /* We have 5 bytes of surrounding packet data */ 1594 /* We have 5 bytes of surrounding packet data */
964 if ((0x05 + ECRYPTFS_SALT_SIZE 1595 if ((0x05 + ECRYPTFS_SALT_SIZE
965 + (*key_rec).enc_key_size) >= max) { 1596 + key_rec->enc_key_size) >= max) {
966 ecryptfs_printk(KERN_ERR, "Authentication token is too " 1597 ecryptfs_printk(KERN_ERR, "Authentication token is too "
967 "large\n"); 1598 "large\n");
968 rc = -EINVAL; 1599 rc = -EINVAL;
@@ -974,7 +1605,7 @@ encrypted_session_key_set:
974 /* ver+cipher+s2k+hash+salt+iter+enc_key */ 1605 /* ver+cipher+s2k+hash+salt+iter+enc_key */
975 rc = write_packet_length(&dest[(*packet_size)], 1606 rc = write_packet_length(&dest[(*packet_size)],
976 (0x05 + ECRYPTFS_SALT_SIZE 1607 (0x05 + ECRYPTFS_SALT_SIZE
977 + (*key_rec).enc_key_size), 1608 + key_rec->enc_key_size),
978 &packet_size_length); 1609 &packet_size_length);
979 if (rc) { 1610 if (rc) {
980 ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " 1611 ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
@@ -997,9 +1628,9 @@ encrypted_session_key_set:
997 ECRYPTFS_SALT_SIZE); 1628 ECRYPTFS_SALT_SIZE);
998 (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */ 1629 (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */
999 dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */ 1630 dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */
1000 memcpy(&dest[(*packet_size)], (*key_rec).enc_key, 1631 memcpy(&dest[(*packet_size)], key_rec->enc_key,
1001 (*key_rec).enc_key_size); 1632 key_rec->enc_key_size);
1002 (*packet_size) += (*key_rec).enc_key_size; 1633 (*packet_size) += key_rec->enc_key_size;
1003out: 1634out:
1004 if (desc.tfm && !tfm_mutex) 1635 if (desc.tfm && !tfm_mutex)
1005 crypto_free_blkcipher(desc.tfm); 1636 crypto_free_blkcipher(desc.tfm);
@@ -1029,13 +1660,13 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1029 struct dentry *ecryptfs_dentry, size_t *len, 1660 struct dentry *ecryptfs_dentry, size_t *len,
1030 size_t max) 1661 size_t max)
1031{ 1662{
1032 int rc = 0;
1033 struct ecryptfs_auth_tok *auth_tok; 1663 struct ecryptfs_auth_tok *auth_tok;
1034 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 1664 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1035 &ecryptfs_superblock_to_private( 1665 &ecryptfs_superblock_to_private(
1036 ecryptfs_dentry->d_sb)->mount_crypt_stat; 1666 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1037 size_t written; 1667 size_t written;
1038 struct ecryptfs_key_record key_rec; 1668 struct ecryptfs_key_record key_rec;
1669 int rc = 0;
1039 1670
1040 (*len) = 0; 1671 (*len) = 0;
1041 if (mount_crypt_stat->global_auth_tok) { 1672 if (mount_crypt_stat->global_auth_tok) {
@@ -1062,20 +1693,23 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1062 goto out; 1693 goto out;
1063 } 1694 }
1064 (*len) += written; 1695 (*len) += written;
1696 } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
1697 rc = write_tag_1_packet(dest_base + (*len),
1698 max, auth_tok,
1699 crypt_stat,mount_crypt_stat,
1700 &key_rec, &written);
1701 if (rc) {
1702 ecryptfs_printk(KERN_WARNING, "Error "
1703 "writing tag 1 packet\n");
1704 goto out;
1705 }
1706 (*len) += written;
1065 } else { 1707 } else {
1066 ecryptfs_printk(KERN_WARNING, "Unsupported " 1708 ecryptfs_printk(KERN_WARNING, "Unsupported "
1067 "authentication token type\n"); 1709 "authentication token type\n");
1068 rc = -EINVAL; 1710 rc = -EINVAL;
1069 goto out; 1711 goto out;
1070 } 1712 }
1071 if (rc) {
1072 ecryptfs_printk(KERN_WARNING, "Error writing "
1073 "authentication token packet with sig "
1074 "= [%s]\n",
1075 mount_crypt_stat->global_auth_tok_sig);
1076 rc = -EIO;
1077 goto out;
1078 }
1079 } else 1713 } else
1080 BUG(); 1714 BUG();
1081 if (likely((max - (*len)) > 0)) { 1715 if (likely((max - (*len)) > 0)) {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d0541ae8faba..26fe405a5763 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -3,9 +3,10 @@
3 * 3 *
4 * Copyright (C) 1997-2003 Erez Zadok 4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University 5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp. 6 * Copyright (C) 2004-2007 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com> 8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 * Tyler Hicks <tyhicks@ou.edu>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as 12 * modify it under the terms of the GNU General Public License as
@@ -48,6 +49,43 @@ MODULE_PARM_DESC(ecryptfs_verbosity,
48 "Initial verbosity level (0 or 1; defaults to " 49 "Initial verbosity level (0 or 1; defaults to "
49 "0, which is Quiet)"); 50 "0, which is Quiet)");
50 51
52/**
53 * Module parameter that defines the number of netlink message buffer
54 * elements
55 */
56unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS;
57
58module_param(ecryptfs_message_buf_len, uint, 0);
59MODULE_PARM_DESC(ecryptfs_message_buf_len,
60 "Number of message buffer elements");
61
62/**
63 * Module parameter that defines the maximum guaranteed amount of time to wait
64 * for a response through netlink. The actual sleep time will be, more than
65 * likely, a small amount greater than this specified value, but only less if
66 * the netlink message successfully arrives.
67 */
68signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ;
69
70module_param(ecryptfs_message_wait_timeout, long, 0);
71MODULE_PARM_DESC(ecryptfs_message_wait_timeout,
72 "Maximum number of seconds that an operation will "
73 "sleep while waiting for a message response from "
74 "userspace");
75
76/**
77 * Module parameter that is an estimate of the maximum number of users
78 * that will be concurrently using eCryptfs. Set this to the right
79 * value to balance performance and memory use.
80 */
81unsigned int ecryptfs_number_of_users = ECRYPTFS_DEFAULT_NUM_USERS;
82
83module_param(ecryptfs_number_of_users, uint, 0);
84MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of "
85 "concurrent users of eCryptfs");
86
87unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT;
88
51void __ecryptfs_printk(const char *fmt, ...) 89void __ecryptfs_printk(const char *fmt, ...)
52{ 90{
53 va_list args; 91 va_list args;
@@ -124,7 +162,8 @@ out:
124enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, 162enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug,
125 ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, 163 ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher,
126 ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, 164 ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes,
127 ecryptfs_opt_passthrough, ecryptfs_opt_err }; 165 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
166 ecryptfs_opt_encrypted_view, ecryptfs_opt_err };
128 167
129static match_table_t tokens = { 168static match_table_t tokens = {
130 {ecryptfs_opt_sig, "sig=%s"}, 169 {ecryptfs_opt_sig, "sig=%s"},
@@ -135,6 +174,8 @@ static match_table_t tokens = {
135 {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, 174 {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"},
136 {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, 175 {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"},
137 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"}, 176 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
177 {ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"},
178 {ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"},
138 {ecryptfs_opt_err, NULL} 179 {ecryptfs_opt_err, NULL}
139}; 180};
140 181
@@ -275,6 +316,16 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
275 mount_crypt_stat->flags |= 316 mount_crypt_stat->flags |=
276 ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED; 317 ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED;
277 break; 318 break;
319 case ecryptfs_opt_xattr_metadata:
320 mount_crypt_stat->flags |=
321 ECRYPTFS_XATTR_METADATA_ENABLED;
322 break;
323 case ecryptfs_opt_encrypted_view:
324 mount_crypt_stat->flags |=
325 ECRYPTFS_XATTR_METADATA_ENABLED;
326 mount_crypt_stat->flags |=
327 ECRYPTFS_ENCRYPTED_VIEW_ENABLED;
328 break;
278 case ecryptfs_opt_err: 329 case ecryptfs_opt_err:
279 default: 330 default:
280 ecryptfs_printk(KERN_WARNING, 331 ecryptfs_printk(KERN_WARNING,
@@ -347,9 +398,10 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
347 rc = -EINVAL; 398 rc = -EINVAL;
348 goto out; 399 goto out;
349 } 400 }
350 if (auth_tok->token_type != ECRYPTFS_PASSWORD) { 401 if (auth_tok->token_type != ECRYPTFS_PASSWORD
402 && auth_tok->token_type != ECRYPTFS_PRIVATE_KEY) {
351 ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure " 403 ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
352 "returned from key\n"); 404 "returned from key query\n");
353 rc = -EINVAL; 405 rc = -EINVAL;
354 goto out; 406 goto out;
355 } 407 }
@@ -378,15 +430,13 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
378 430
379 /* Released in ecryptfs_put_super() */ 431 /* Released in ecryptfs_put_super() */
380 ecryptfs_set_superblock_private(sb, 432 ecryptfs_set_superblock_private(sb,
381 kmem_cache_alloc(ecryptfs_sb_info_cache, 433 kmem_cache_zalloc(ecryptfs_sb_info_cache,
382 GFP_KERNEL)); 434 GFP_KERNEL));
383 if (!ecryptfs_superblock_to_private(sb)) { 435 if (!ecryptfs_superblock_to_private(sb)) {
384 ecryptfs_printk(KERN_WARNING, "Out of memory\n"); 436 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
385 rc = -ENOMEM; 437 rc = -ENOMEM;
386 goto out; 438 goto out;
387 } 439 }
388 memset(ecryptfs_superblock_to_private(sb), 0,
389 sizeof(struct ecryptfs_sb_info));
390 sb->s_op = &ecryptfs_sops; 440 sb->s_op = &ecryptfs_sops;
391 /* Released through deactivate_super(sb) from get_sb_nodev */ 441 /* Released through deactivate_super(sb) from get_sb_nodev */
392 sb->s_root = d_alloc(NULL, &(const struct qstr) { 442 sb->s_root = d_alloc(NULL, &(const struct qstr) {
@@ -402,7 +452,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
402 /* Released in d_release when dput(sb->s_root) is called */ 452 /* Released in d_release when dput(sb->s_root) is called */
403 /* through deactivate_super(sb) from get_sb_nodev() */ 453 /* through deactivate_super(sb) from get_sb_nodev() */
404 ecryptfs_set_dentry_private(sb->s_root, 454 ecryptfs_set_dentry_private(sb->s_root,
405 kmem_cache_alloc(ecryptfs_dentry_info_cache, 455 kmem_cache_zalloc(ecryptfs_dentry_info_cache,
406 GFP_KERNEL)); 456 GFP_KERNEL));
407 if (!ecryptfs_dentry_to_private(sb->s_root)) { 457 if (!ecryptfs_dentry_to_private(sb->s_root)) {
408 ecryptfs_printk(KERN_ERR, 458 ecryptfs_printk(KERN_ERR,
@@ -410,8 +460,6 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
410 rc = -ENOMEM; 460 rc = -ENOMEM;
411 goto out; 461 goto out;
412 } 462 }
413 memset(ecryptfs_dentry_to_private(sb->s_root), 0,
414 sizeof(struct ecryptfs_dentry_info));
415 rc = 0; 463 rc = 0;
416out: 464out:
417 /* Should be able to rely on deactivate_super called from 465 /* Should be able to rely on deactivate_super called from
@@ -594,6 +642,11 @@ static struct ecryptfs_cache_info {
594 .size = PAGE_CACHE_SIZE, 642 .size = PAGE_CACHE_SIZE,
595 }, 643 },
596 { 644 {
645 .cache = &ecryptfs_xattr_cache,
646 .name = "ecryptfs_xattr_cache",
647 .size = PAGE_CACHE_SIZE,
648 },
649 {
597 .cache = &ecryptfs_lower_page_cache, 650 .cache = &ecryptfs_lower_page_cache,
598 .name = "ecryptfs_lower_page_cache", 651 .name = "ecryptfs_lower_page_cache",
599 .size = PAGE_CACHE_SIZE, 652 .size = PAGE_CACHE_SIZE,
@@ -699,7 +752,8 @@ static struct ecryptfs_version_str_map_elem {
699 {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, 752 {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
700 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, 753 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
701 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, 754 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
702 {ECRYPTFS_VERSIONING_POLICY, "policy"} 755 {ECRYPTFS_VERSIONING_POLICY, "policy"},
756 {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}
703}; 757};
704 758
705static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) 759static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
@@ -798,6 +852,11 @@ static int __init ecryptfs_init(void)
798 ecryptfs_free_kmem_caches(); 852 ecryptfs_free_kmem_caches();
799 goto out; 853 goto out;
800 } 854 }
855 rc = ecryptfs_init_messaging(ecryptfs_transport);
856 if (rc) {
857 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
858 "initialize the eCryptfs netlink socket\n");
859 }
801out: 860out:
802 return rc; 861 return rc;
803} 862}
@@ -809,6 +868,7 @@ static void __exit ecryptfs_exit(void)
809 sysfs_remove_file(&ecryptfs_subsys.kset.kobj, 868 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
810 &sysfs_attr_version_str.attr); 869 &sysfs_attr_version_str.attr);
811 subsystem_unregister(&ecryptfs_subsys); 870 subsystem_unregister(&ecryptfs_subsys);
871 ecryptfs_release_messaging(ecryptfs_transport);
812 unregister_filesystem(&ecryptfs_fs_type); 872 unregister_filesystem(&ecryptfs_fs_type);
813 ecryptfs_free_kmem_caches(); 873 ecryptfs_free_kmem_caches();
814} 874}
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
new file mode 100644
index 000000000000..47d7e7b611f7
--- /dev/null
+++ b/fs/ecryptfs/messaging.c
@@ -0,0 +1,515 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include "ecryptfs_kernel.h"
24
25static LIST_HEAD(ecryptfs_msg_ctx_free_list);
26static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
27static struct mutex ecryptfs_msg_ctx_lists_mux;
28
29static struct hlist_head *ecryptfs_daemon_id_hash;
30static struct mutex ecryptfs_daemon_id_hash_mux;
31static int ecryptfs_hash_buckets;
32#define ecryptfs_uid_hash(uid) \
33 hash_long((unsigned long)uid, ecryptfs_hash_buckets)
34
35static unsigned int ecryptfs_msg_counter;
36static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
37
38/**
39 * ecryptfs_acquire_free_msg_ctx
40 * @msg_ctx: The context that was acquired from the free list
41 *
42 * Acquires a context element from the free list and locks the mutex
43 * on the context. Returns zero on success; non-zero on error or upon
44 * failure to acquire a free context element. Be sure to lock the
45 * list mutex before calling.
46 */
47static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
48{
49 struct list_head *p;
50 int rc;
51
52 if (list_empty(&ecryptfs_msg_ctx_free_list)) {
53 ecryptfs_printk(KERN_WARNING, "The eCryptfs free "
54 "context list is empty. It may be helpful to "
55 "specify the ecryptfs_message_buf_len "
56 "parameter to be greater than the current "
57 "value of [%d]\n", ecryptfs_message_buf_len);
58 rc = -ENOMEM;
59 goto out;
60 }
61 list_for_each(p, &ecryptfs_msg_ctx_free_list) {
62 *msg_ctx = list_entry(p, struct ecryptfs_msg_ctx, node);
63 if (mutex_trylock(&(*msg_ctx)->mux)) {
64 (*msg_ctx)->task = current;
65 rc = 0;
66 goto out;
67 }
68 }
69 rc = -ENOMEM;
70out:
71 return rc;
72}
73
74/**
75 * ecryptfs_msg_ctx_free_to_alloc
76 * @msg_ctx: The context to move from the free list to the alloc list
77 *
78 * Be sure to lock the list mutex and the context mutex before
79 * calling.
80 */
81static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
82{
83 list_move(&msg_ctx->node, &ecryptfs_msg_ctx_alloc_list);
84 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_PENDING;
85 msg_ctx->counter = ++ecryptfs_msg_counter;
86}
87
88/**
89 * ecryptfs_msg_ctx_alloc_to_free
90 * @msg_ctx: The context to move from the alloc list to the free list
91 *
92 * Be sure to lock the list mutex and the context mutex before
93 * calling.
94 */
95static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
96{
97 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
98 if (msg_ctx->msg)
99 kfree(msg_ctx->msg);
100 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
101}
102
103/**
104 * ecryptfs_find_daemon_id
105 * @uid: The user id which maps to the desired daemon id
106 * @id: If return value is zero, points to the desired daemon id
107 * pointer
108 *
109 * Search the hash list for the given user id. Returns zero if the
110 * user id exists in the list; non-zero otherwise. The daemon id hash
111 * mutex should be held before calling this function.
112 */
113static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id)
114{
115 struct hlist_node *elem;
116 int rc;
117
118 hlist_for_each_entry(*id, elem,
119 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)],
120 id_chain) {
121 if ((*id)->uid == uid) {
122 rc = 0;
123 goto out;
124 }
125 }
126 rc = -EINVAL;
127out:
128 return rc;
129}
130
131static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type,
132 pid_t pid)
133{
134 int rc;
135
136 switch(transport) {
137 case ECRYPTFS_TRANSPORT_NETLINK:
138 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid);
139 break;
140 case ECRYPTFS_TRANSPORT_CONNECTOR:
141 case ECRYPTFS_TRANSPORT_RELAYFS:
142 default:
143 rc = -ENOSYS;
144 }
145 return rc;
146}
147
148/**
149 * ecryptfs_process_helo
150 * @transport: The underlying transport (netlink, etc.)
151 * @uid: The user ID owner of the message
152 * @pid: The process ID for the userspace program that sent the
153 * message
154 *
155 * Adds the uid and pid values to the daemon id hash. If a uid
156 * already has a daemon pid registered, the daemon will be
157 * unregistered before the new daemon id is put into the hash list.
158 * Returns zero after adding a new daemon id to the hash list;
159 * non-zero otherwise.
160 */
161int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid)
162{
163 struct ecryptfs_daemon_id *new_id;
164 struct ecryptfs_daemon_id *old_id;
165 int rc;
166
167 mutex_lock(&ecryptfs_daemon_id_hash_mux);
168 new_id = kmalloc(sizeof(*new_id), GFP_KERNEL);
169 if (!new_id) {
170 rc = -ENOMEM;
171 ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
172 "to register daemon [%d] for user\n", pid, uid);
173 goto unlock;
174 }
175 if (!ecryptfs_find_daemon_id(uid, &old_id)) {
176 printk(KERN_WARNING "Received request from user [%d] "
177 "to register daemon [%d]; unregistering daemon "
178 "[%d]\n", uid, pid, old_id->pid);
179 hlist_del(&old_id->id_chain);
180 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT,
181 old_id->pid);
182 if (rc)
183 printk(KERN_WARNING "Failed to send QUIT "
184 "message to daemon [%d]; rc = [%d]\n",
185 old_id->pid, rc);
186 kfree(old_id);
187 }
188 new_id->uid = uid;
189 new_id->pid = pid;
190 hlist_add_head(&new_id->id_chain,
191 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]);
192 rc = 0;
193unlock:
194 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
195 return rc;
196}
197
198/**
199 * ecryptfs_process_quit
200 * @uid: The user ID owner of the message
201 * @pid: The process ID for the userspace program that sent the
202 * message
203 *
204 * Deletes the corresponding daemon id for the given uid and pid, if
205 * it is the registered that is requesting the deletion. Returns zero
206 * after deleting the desired daemon id; non-zero otherwise.
207 */
208int ecryptfs_process_quit(uid_t uid, pid_t pid)
209{
210 struct ecryptfs_daemon_id *id;
211 int rc;
212
213 mutex_lock(&ecryptfs_daemon_id_hash_mux);
214 if (ecryptfs_find_daemon_id(uid, &id)) {
215 rc = -EINVAL;
216 ecryptfs_printk(KERN_ERR, "Received request from user [%d] to "
217 "unregister unrecognized daemon [%d]\n", uid,
218 pid);
219 goto unlock;
220 }
221 if (id->pid != pid) {
222 rc = -EINVAL;
223 ecryptfs_printk(KERN_WARNING, "Received request from user [%d] "
224 "with pid [%d] to unregister daemon [%d]\n",
225 uid, pid, id->pid);
226 goto unlock;
227 }
228 hlist_del(&id->id_chain);
229 kfree(id);
230 rc = 0;
231unlock:
232 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
233 return rc;
234}
235
236/**
237 * ecryptfs_process_reponse
238 * @msg: The ecryptfs message received; the caller should sanity check
239 * msg->data_len
240 * @pid: The process ID of the userspace application that sent the
241 * message
242 * @seq: The sequence number of the message
243 *
244 * Processes a response message after sending a operation request to
245 * userspace. Returns zero upon delivery to desired context element;
246 * non-zero upon delivery failure or error.
247 */
248int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
249 pid_t pid, u32 seq)
250{
251 struct ecryptfs_daemon_id *id;
252 struct ecryptfs_msg_ctx *msg_ctx;
253 int msg_size;
254 int rc;
255
256 if (msg->index >= ecryptfs_message_buf_len) {
257 rc = -EINVAL;
258 ecryptfs_printk(KERN_ERR, "Attempt to reference "
259 "context buffer at index [%d]; maximum "
260 "allowable is [%d]\n", msg->index,
261 (ecryptfs_message_buf_len - 1));
262 goto out;
263 }
264 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
265 mutex_lock(&msg_ctx->mux);
266 if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) {
267 rc = -EBADMSG;
268 ecryptfs_printk(KERN_WARNING, "User [%d] received a "
269 "message response from process [%d] but does "
270 "not have a registered daemon\n",
271 msg_ctx->task->euid, pid);
272 goto wake_up;
273 }
274 if (msg_ctx->task->euid != uid) {
275 rc = -EBADMSG;
276 ecryptfs_printk(KERN_WARNING, "Received message from user "
277 "[%d]; expected message from user [%d]\n",
278 uid, msg_ctx->task->euid);
279 goto unlock;
280 }
281 if (id->pid != pid) {
282 rc = -EBADMSG;
283 ecryptfs_printk(KERN_ERR, "User [%d] received a "
284 "message response from an unrecognized "
285 "process [%d]\n", msg_ctx->task->euid, pid);
286 goto unlock;
287 }
288 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
289 rc = -EINVAL;
290 ecryptfs_printk(KERN_WARNING, "Desired context element is not "
291 "pending a response\n");
292 goto unlock;
293 } else if (msg_ctx->counter != seq) {
294 rc = -EINVAL;
295 ecryptfs_printk(KERN_WARNING, "Invalid message sequence; "
296 "expected [%d]; received [%d]\n",
297 msg_ctx->counter, seq);
298 goto unlock;
299 }
300 msg_size = sizeof(*msg) + msg->data_len;
301 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
302 if (!msg_ctx->msg) {
303 rc = -ENOMEM;
304 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
305 goto unlock;
306 }
307 memcpy(msg_ctx->msg, msg, msg_size);
308 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE;
309 rc = 0;
310wake_up:
311 wake_up_process(msg_ctx->task);
312unlock:
313 mutex_unlock(&msg_ctx->mux);
314out:
315 return rc;
316}
317
318/**
319 * ecryptfs_send_message
320 * @transport: The transport over which to send the message (i.e.,
321 * netlink)
322 * @data: The data to send
323 * @data_len: The length of data
324 * @msg_ctx: The message context allocated for the send
325 */
326int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
327 struct ecryptfs_msg_ctx **msg_ctx)
328{
329 struct ecryptfs_daemon_id *id;
330 int rc;
331
332 mutex_lock(&ecryptfs_daemon_id_hash_mux);
333 if (ecryptfs_find_daemon_id(current->euid, &id)) {
334 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
335 rc = -ENOTCONN;
336 ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon "
337 "registered\n", current->euid);
338 goto out;
339 }
340 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
341 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
342 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
343 if (rc) {
344 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
345 ecryptfs_printk(KERN_WARNING, "Could not claim a free "
346 "context element\n");
347 goto out;
348 }
349 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
350 mutex_unlock(&(*msg_ctx)->mux);
351 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
352 switch (transport) {
353 case ECRYPTFS_TRANSPORT_NETLINK:
354 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx,
355 ECRYPTFS_NLMSG_REQUEST, 0, id->pid);
356 break;
357 case ECRYPTFS_TRANSPORT_CONNECTOR:
358 case ECRYPTFS_TRANSPORT_RELAYFS:
359 default:
360 rc = -ENOSYS;
361 }
362 if (rc) {
363 printk(KERN_ERR "Error attempting to send message to userspace "
364 "daemon; rc = [%d]\n", rc);
365 }
366out:
367 return rc;
368}
369
370/**
371 * ecryptfs_wait_for_response
372 * @msg_ctx: The context that was assigned when sending a message
373 * @msg: The incoming message from userspace; not set if rc != 0
374 *
375 * Sleeps until awaken by ecryptfs_receive_message or until the amount
376 * of time exceeds ecryptfs_message_wait_timeout. If zero is
377 * returned, msg will point to a valid message from userspace; a
378 * non-zero value is returned upon failure to receive a message or an
379 * error occurs.
380 */
381int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
382 struct ecryptfs_message **msg)
383{
384 signed long timeout = ecryptfs_message_wait_timeout * HZ;
385 int rc = 0;
386
387sleep:
388 timeout = schedule_timeout_interruptible(timeout);
389 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
390 mutex_lock(&msg_ctx->mux);
391 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_DONE) {
392 if (timeout) {
393 mutex_unlock(&msg_ctx->mux);
394 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
395 goto sleep;
396 }
397 rc = -ENOMSG;
398 } else {
399 *msg = msg_ctx->msg;
400 msg_ctx->msg = NULL;
401 }
402 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
403 mutex_unlock(&msg_ctx->mux);
404 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
405 return rc;
406}
407
408int ecryptfs_init_messaging(unsigned int transport)
409{
410 int i;
411 int rc = 0;
412
413 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
414 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
415 ecryptfs_printk(KERN_WARNING, "Specified number of users is "
416 "too large, defaulting to [%d] users\n",
417 ecryptfs_number_of_users);
418 }
419 mutex_init(&ecryptfs_daemon_id_hash_mux);
420 mutex_lock(&ecryptfs_daemon_id_hash_mux);
421 ecryptfs_hash_buckets = 0;
422 while (ecryptfs_number_of_users >> ++ecryptfs_hash_buckets);
423 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head)
424 * ecryptfs_hash_buckets, GFP_KERNEL);
425 if (!ecryptfs_daemon_id_hash) {
426 rc = -ENOMEM;
427 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
428 goto out;
429 }
430 for (i = 0; i < ecryptfs_hash_buckets; i++)
431 INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]);
432 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
433
434 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
435 * ecryptfs_message_buf_len), GFP_KERNEL);
436 if (!ecryptfs_msg_ctx_arr) {
437 rc = -ENOMEM;
438 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
439 goto out;
440 }
441 mutex_init(&ecryptfs_msg_ctx_lists_mux);
442 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
443 ecryptfs_msg_counter = 0;
444 for (i = 0; i < ecryptfs_message_buf_len; i++) {
445 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
446 mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
447 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
448 ecryptfs_msg_ctx_arr[i].index = i;
449 ecryptfs_msg_ctx_arr[i].state = ECRYPTFS_MSG_CTX_STATE_FREE;
450 ecryptfs_msg_ctx_arr[i].counter = 0;
451 ecryptfs_msg_ctx_arr[i].task = NULL;
452 ecryptfs_msg_ctx_arr[i].msg = NULL;
453 list_add_tail(&ecryptfs_msg_ctx_arr[i].node,
454 &ecryptfs_msg_ctx_free_list);
455 mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux);
456 }
457 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
458 switch(transport) {
459 case ECRYPTFS_TRANSPORT_NETLINK:
460 rc = ecryptfs_init_netlink();
461 if (rc)
462 ecryptfs_release_messaging(transport);
463 break;
464 case ECRYPTFS_TRANSPORT_CONNECTOR:
465 case ECRYPTFS_TRANSPORT_RELAYFS:
466 default:
467 rc = -ENOSYS;
468 }
469out:
470 return rc;
471}
472
473void ecryptfs_release_messaging(unsigned int transport)
474{
475 if (ecryptfs_msg_ctx_arr) {
476 int i;
477
478 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
479 for (i = 0; i < ecryptfs_message_buf_len; i++) {
480 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
481 if (ecryptfs_msg_ctx_arr[i].msg)
482 kfree(ecryptfs_msg_ctx_arr[i].msg);
483 mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux);
484 }
485 kfree(ecryptfs_msg_ctx_arr);
486 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
487 }
488 if (ecryptfs_daemon_id_hash) {
489 struct hlist_node *elem;
490 struct ecryptfs_daemon_id *id;
491 int i;
492
493 mutex_lock(&ecryptfs_daemon_id_hash_mux);
494 for (i = 0; i < ecryptfs_hash_buckets; i++) {
495 hlist_for_each_entry(id, elem,
496 &ecryptfs_daemon_id_hash[i],
497 id_chain) {
498 hlist_del(elem);
499 kfree(id);
500 }
501 }
502 kfree(ecryptfs_daemon_id_hash);
503 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
504 }
505 switch(transport) {
506 case ECRYPTFS_TRANSPORT_NETLINK:
507 ecryptfs_release_netlink();
508 break;
509 case ECRYPTFS_TRANSPORT_CONNECTOR:
510 case ECRYPTFS_TRANSPORT_RELAYFS:
511 default:
512 break;
513 }
514 return;
515}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 06843d24f239..3a6f65c3f14f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * Copyright (C) 1997-2003 Erez Zadok 7 * Copyright (C) 1997-2003 Erez Zadok
8 * Copyright (C) 2001-2003 Stony Brook University 8 * Copyright (C) 2001-2003 Stony Brook University
9 * Copyright (C) 2004-2006 International Business Machines Corp. 9 * Copyright (C) 2004-2007 International Business Machines Corp.
10 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 10 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
11 * 11 *
12 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
@@ -234,22 +234,13 @@ int ecryptfs_do_readpage(struct file *file, struct page *page,
234 goto out; 234 goto out;
235 } 235 }
236 wait_on_page_locked(lower_page); 236 wait_on_page_locked(lower_page);
237 page_data = (char *)kmap(page); 237 page_data = kmap_atomic(page, KM_USER0);
238 if (!page_data) { 238 lower_page_data = kmap_atomic(lower_page, KM_USER1);
239 rc = -ENOMEM;
240 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
241 goto out;
242 }
243 lower_page_data = (char *)kmap(lower_page);
244 if (!lower_page_data) {
245 rc = -ENOMEM;
246 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
247 kunmap(page);
248 goto out;
249 }
250 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); 239 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
251 kunmap(lower_page); 240 kunmap_atomic(lower_page_data, KM_USER1);
252 kunmap(page); 241 flush_dcache_page(lower_page);
242 kunmap_atomic(page_data, KM_USER0);
243 flush_dcache_page(page);
253 rc = 0; 244 rc = 0;
254out: 245out:
255 if (likely(lower_page)) 246 if (likely(lower_page))
@@ -260,6 +251,33 @@ out:
260 ClearPageUptodate(page); 251 ClearPageUptodate(page);
261 return rc; 252 return rc;
262} 253}
254/**
255 * Header Extent:
256 * Octets 0-7: Unencrypted file size (big-endian)
257 * Octets 8-15: eCryptfs special marker
258 * Octets 16-19: Flags
259 * Octet 16: File format version number (between 0 and 255)
260 * Octets 17-18: Reserved
261 * Octet 19: Bit 1 (lsb): Reserved
262 * Bit 2: Encrypted?
263 * Bits 3-8: Reserved
264 * Octets 20-23: Header extent size (big-endian)
265 * Octets 24-25: Number of header extents at front of file
266 * (big-endian)
267 * Octet 26: Begin RFC 2440 authentication token packet set
268 */
269static void set_header_info(char *page_virt,
270 struct ecryptfs_crypt_stat *crypt_stat)
271{
272 size_t written;
273 int save_num_header_extents_at_front =
274 crypt_stat->num_header_extents_at_front;
275
276 crypt_stat->num_header_extents_at_front = 1;
277 ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
278 crypt_stat->num_header_extents_at_front =
279 save_num_header_extents_at_front;
280}
263 281
264/** 282/**
265 * ecryptfs_readpage 283 * ecryptfs_readpage
@@ -279,8 +297,8 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
279 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode) 297 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
280 ->crypt_stat; 298 ->crypt_stat;
281 if (!crypt_stat 299 if (!crypt_stat
282 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED) 300 || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
283 || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { 301 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
284 ecryptfs_printk(KERN_DEBUG, 302 ecryptfs_printk(KERN_DEBUG,
285 "Passing through unencrypted page\n"); 303 "Passing through unencrypted page\n");
286 rc = ecryptfs_do_readpage(file, page, page->index); 304 rc = ecryptfs_do_readpage(file, page, page->index);
@@ -289,10 +307,51 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
289 "[%d]\n", rc); 307 "[%d]\n", rc);
290 goto out; 308 goto out;
291 } 309 }
310 } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
311 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
312 int num_pages_in_header_region =
313 (crypt_stat->header_extent_size
314 / PAGE_CACHE_SIZE);
315
316 if (page->index < num_pages_in_header_region) {
317 char *page_virt;
318
319 page_virt = kmap_atomic(page, KM_USER0);
320 memset(page_virt, 0, PAGE_CACHE_SIZE);
321 if (page->index == 0) {
322 rc = ecryptfs_read_xattr_region(
323 page_virt, file->f_path.dentry);
324 set_header_info(page_virt, crypt_stat);
325 }
326 kunmap_atomic(page_virt, KM_USER0);
327 flush_dcache_page(page);
328 if (rc) {
329 printk(KERN_ERR "Error reading xattr "
330 "region\n");
331 goto out;
332 }
333 } else {
334 rc = ecryptfs_do_readpage(
335 file, page,
336 (page->index
337 - num_pages_in_header_region));
338 if (rc) {
339 printk(KERN_ERR "Error reading page; "
340 "rc = [%d]\n", rc);
341 goto out;
342 }
343 }
344 } else {
345 rc = ecryptfs_do_readpage(file, page, page->index);
346 if (rc) {
347 printk(KERN_ERR "Error reading page; rc = "
348 "[%d]\n", rc);
349 goto out;
350 }
351 }
292 } else { 352 } else {
293 rc = ecryptfs_decrypt_page(file, page); 353 rc = ecryptfs_decrypt_page(file, page);
294 if (rc) { 354 if (rc) {
295
296 ecryptfs_printk(KERN_ERR, "Error decrypting page; " 355 ecryptfs_printk(KERN_ERR, "Error decrypting page; "
297 "rc = [%d]\n", rc); 356 "rc = [%d]\n", rc);
298 goto out; 357 goto out;
@@ -308,30 +367,27 @@ out:
308 return rc; 367 return rc;
309} 368}
310 369
370/**
371 * Called with lower inode mutex held.
372 */
311static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) 373static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
312{ 374{
313 struct inode *inode = page->mapping->host; 375 struct inode *inode = page->mapping->host;
314 int end_byte_in_page; 376 int end_byte_in_page;
315 int rc = 0;
316 char *page_virt; 377 char *page_virt;
317 378
318 if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) { 379 if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
319 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; 380 goto out;
320 if (to > end_byte_in_page) 381 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
321 end_byte_in_page = to; 382 if (to > end_byte_in_page)
322 page_virt = kmap(page); 383 end_byte_in_page = to;
323 if (!page_virt) { 384 page_virt = kmap_atomic(page, KM_USER0);
324 rc = -ENOMEM; 385 memset((page_virt + end_byte_in_page), 0,
325 ecryptfs_printk(KERN_WARNING, 386 (PAGE_CACHE_SIZE - end_byte_in_page));
326 "Could not map page\n"); 387 kunmap_atomic(page_virt, KM_USER0);
327 goto out; 388 flush_dcache_page(page);
328 }
329 memset((page_virt + end_byte_in_page), 0,
330 (PAGE_CACHE_SIZE - end_byte_in_page));
331 kunmap(page);
332 }
333out: 389out:
334 return rc; 390 return 0;
335} 391}
336 392
337static int ecryptfs_prepare_write(struct file *file, struct page *page, 393static int ecryptfs_prepare_write(struct file *file, struct page *page,
@@ -339,7 +395,6 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
339{ 395{
340 int rc = 0; 396 int rc = 0;
341 397
342 kmap(page);
343 if (from == 0 && to == PAGE_CACHE_SIZE) 398 if (from == 0 && to == PAGE_CACHE_SIZE)
344 goto out; /* If we are writing a full page, it will be 399 goto out; /* If we are writing a full page, it will be
345 up to date. */ 400 up to date. */
@@ -349,30 +404,6 @@ out:
349 return rc; 404 return rc;
350} 405}
351 406
352int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
353 char **lower_virt,
354 struct inode *lower_inode,
355 unsigned long lower_page_index)
356{
357 int rc = 0;
358
359 (*lower_page) = grab_cache_page(lower_inode->i_mapping,
360 lower_page_index);
361 if (!(*lower_page)) {
362 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
363 "lower_page_index = [0x%.16x] failed\n",
364 lower_page_index);
365 rc = -EINVAL;
366 goto out;
367 }
368 if (lower_virt)
369 (*lower_virt) = kmap((*lower_page));
370 else
371 kmap((*lower_page));
372out:
373 return rc;
374}
375
376int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, 407int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
377 struct inode *lower_inode, 408 struct inode *lower_inode,
378 struct writeback_control *wbc) 409 struct writeback_control *wbc)
@@ -391,11 +422,8 @@ out:
391 return rc; 422 return rc;
392} 423}
393 424
394static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page) 425static void ecryptfs_release_lower_page(struct page *lower_page)
395{ 426{
396 kunmap(lower_page);
397 ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = "
398 "[0x%.16x]\n", lower_page->index);
399 unlock_page(lower_page); 427 unlock_page(lower_page);
400 page_cache_release(lower_page); 428 page_cache_release(lower_page);
401} 429}
@@ -407,10 +435,9 @@ static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
407 * 435 *
408 * Returns zero on success; non-zero on error. 436 * Returns zero on success; non-zero on error.
409 */ 437 */
410int 438static int ecryptfs_write_inode_size_to_header(struct file *lower_file,
411ecryptfs_write_inode_size_to_header(struct file *lower_file, 439 struct inode *lower_inode,
412 struct inode *lower_inode, 440 struct inode *inode)
413 struct inode *inode)
414{ 441{
415 int rc = 0; 442 int rc = 0;
416 struct page *header_page; 443 struct page *header_page;
@@ -418,11 +445,11 @@ ecryptfs_write_inode_size_to_header(struct file *lower_file,
418 const struct address_space_operations *lower_a_ops; 445 const struct address_space_operations *lower_a_ops;
419 u64 file_size; 446 u64 file_size;
420 447
421 rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt, 448 header_page = grab_cache_page(lower_inode->i_mapping, 0);
422 lower_inode, 0); 449 if (!header_page) {
423 if (rc) { 450 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
424 ecryptfs_printk(KERN_ERR, "grab_cache_page for header page " 451 "lower_page_index 0 failed\n");
425 "failed\n"); 452 rc = -EINVAL;
426 goto out; 453 goto out;
427 } 454 }
428 lower_a_ops = lower_inode->i_mapping->a_ops; 455 lower_a_ops = lower_inode->i_mapping->a_ops;
@@ -430,18 +457,95 @@ ecryptfs_write_inode_size_to_header(struct file *lower_file,
430 file_size = (u64)i_size_read(inode); 457 file_size = (u64)i_size_read(inode);
431 ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); 458 ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
432 file_size = cpu_to_be64(file_size); 459 file_size = cpu_to_be64(file_size);
460 header_virt = kmap_atomic(header_page, KM_USER0);
433 memcpy(header_virt, &file_size, sizeof(u64)); 461 memcpy(header_virt, &file_size, sizeof(u64));
462 kunmap_atomic(header_virt, KM_USER0);
463 flush_dcache_page(header_page);
434 rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); 464 rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
435 if (rc < 0) 465 if (rc < 0)
436 ecryptfs_printk(KERN_ERR, "Error commiting header page " 466 ecryptfs_printk(KERN_ERR, "Error commiting header page "
437 "write\n"); 467 "write\n");
438 ecryptfs_unmap_and_release_lower_page(header_page); 468 ecryptfs_release_lower_page(header_page);
439 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; 469 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
440 mark_inode_dirty_sync(inode); 470 mark_inode_dirty_sync(inode);
441out: 471out:
442 return rc; 472 return rc;
443} 473}
444 474
475static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode,
476 struct inode *inode,
477 struct dentry *ecryptfs_dentry,
478 int lower_i_mutex_held)
479{
480 ssize_t size;
481 void *xattr_virt;
482 struct dentry *lower_dentry;
483 u64 file_size;
484 int rc;
485
486 xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
487 if (!xattr_virt) {
488 printk(KERN_ERR "Out of memory whilst attempting to write "
489 "inode size to xattr\n");
490 rc = -ENOMEM;
491 goto out;
492 }
493 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
494 if (!lower_dentry->d_inode->i_op->getxattr) {
495 printk(KERN_WARNING
496 "No support for setting xattr in lower filesystem\n");
497 rc = -ENOSYS;
498 kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
499 goto out;
500 }
501 if (!lower_i_mutex_held)
502 mutex_lock(&lower_dentry->d_inode->i_mutex);
503 size = lower_dentry->d_inode->i_op->getxattr(lower_dentry,
504 ECRYPTFS_XATTR_NAME,
505 xattr_virt,
506 PAGE_CACHE_SIZE);
507 if (!lower_i_mutex_held)
508 mutex_unlock(&lower_dentry->d_inode->i_mutex);
509 if (size < 0)
510 size = 8;
511 file_size = (u64)i_size_read(inode);
512 file_size = cpu_to_be64(file_size);
513 memcpy(xattr_virt, &file_size, sizeof(u64));
514 if (!lower_i_mutex_held)
515 mutex_lock(&lower_dentry->d_inode->i_mutex);
516 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry,
517 ECRYPTFS_XATTR_NAME,
518 xattr_virt, size, 0);
519 if (!lower_i_mutex_held)
520 mutex_unlock(&lower_dentry->d_inode->i_mutex);
521 if (rc)
522 printk(KERN_ERR "Error whilst attempting to write inode size "
523 "to lower file xattr; rc = [%d]\n", rc);
524 kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
525out:
526 return rc;
527}
528
529int
530ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
531 struct inode *lower_inode,
532 struct inode *inode,
533 struct dentry *ecryptfs_dentry,
534 int lower_i_mutex_held)
535{
536 struct ecryptfs_crypt_stat *crypt_stat;
537
538 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
539 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
540 return ecryptfs_write_inode_size_to_xattr(lower_inode, inode,
541 ecryptfs_dentry,
542 lower_i_mutex_held);
543 else
544 return ecryptfs_write_inode_size_to_header(lower_file,
545 lower_inode,
546 inode);
547}
548
445int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, 549int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
446 struct file *lower_file, 550 struct file *lower_file,
447 unsigned long lower_page_index, int byte_offset, 551 unsigned long lower_page_index, int byte_offset,
@@ -449,10 +553,10 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
449{ 553{
450 int rc = 0; 554 int rc = 0;
451 555
452 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode, 556 *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index);
453 lower_page_index); 557 if (!(*lower_page)) {
454 if (rc) { 558 rc = -EINVAL;
455 ecryptfs_printk(KERN_ERR, "Error attempting to grab and map " 559 ecryptfs_printk(KERN_ERR, "Error attempting to grab "
456 "lower page with index [0x%.16x]\n", 560 "lower page with index [0x%.16x]\n",
457 lower_page_index); 561 lower_page_index);
458 goto out; 562 goto out;
@@ -468,7 +572,7 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
468 } 572 }
469out: 573out:
470 if (rc && (*lower_page)) { 574 if (rc && (*lower_page)) {
471 ecryptfs_unmap_and_release_lower_page(*lower_page); 575 ecryptfs_release_lower_page(*lower_page);
472 (*lower_page) = NULL; 576 (*lower_page) = NULL;
473 } 577 }
474 return rc; 578 return rc;
@@ -493,7 +597,7 @@ ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
493 "Error committing write; rc = [%d]\n", rc); 597 "Error committing write; rc = [%d]\n", rc);
494 } else 598 } else
495 rc = 0; 599 rc = 0;
496 ecryptfs_unmap_and_release_lower_page(lower_page); 600 ecryptfs_release_lower_page(lower_page);
497 return rc; 601 return rc;
498} 602}
499 603
@@ -528,89 +632,7 @@ out:
528 return rc; 632 return rc;
529} 633}
530 634
531static int 635struct kmem_cache *ecryptfs_xattr_cache;
532process_new_file(struct ecryptfs_crypt_stat *crypt_stat,
533 struct file *file, struct inode *inode)
534{
535 struct page *header_page;
536 const struct address_space_operations *lower_a_ops;
537 struct inode *lower_inode;
538 struct file *lower_file;
539 char *header_virt;
540 int rc = 0;
541 int current_header_page = 0;
542 int header_pages;
543 int more_header_data_to_be_written = 1;
544
545 lower_inode = ecryptfs_inode_to_lower(inode);
546 lower_file = ecryptfs_file_to_lower(file);
547 lower_a_ops = lower_inode->i_mapping->a_ops;
548 header_pages = ((crypt_stat->header_extent_size
549 * crypt_stat->num_header_extents_at_front)
550 / PAGE_CACHE_SIZE);
551 BUG_ON(header_pages < 1);
552 while (current_header_page < header_pages) {
553 rc = ecryptfs_grab_and_map_lower_page(&header_page,
554 &header_virt,
555 lower_inode,
556 current_header_page);
557 if (rc) {
558 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
559 "header page [%d] failed; rc = [%d]\n",
560 current_header_page, rc);
561 goto out;
562 }
563 rc = lower_a_ops->prepare_write(lower_file, header_page, 0,
564 PAGE_CACHE_SIZE);
565 if (rc) {
566 ecryptfs_printk(KERN_ERR, "Error preparing to write "
567 "header page out; rc = [%d]\n", rc);
568 goto out;
569 }
570 memset(header_virt, 0, PAGE_CACHE_SIZE);
571 if (more_header_data_to_be_written) {
572 rc = ecryptfs_write_headers_virt(header_virt,
573 crypt_stat,
574 file->f_dentry);
575 if (rc) {
576 ecryptfs_printk(KERN_WARNING, "Error "
577 "generating header; rc = "
578 "[%d]\n", rc);
579 rc = -EIO;
580 memset(header_virt, 0, PAGE_CACHE_SIZE);
581 ecryptfs_unmap_and_release_lower_page(
582 header_page);
583 goto out;
584 }
585 if (current_header_page == 0)
586 memset(header_virt, 0, 8);
587 more_header_data_to_be_written = 0;
588 }
589 rc = lower_a_ops->commit_write(lower_file, header_page, 0,
590 PAGE_CACHE_SIZE);
591 ecryptfs_unmap_and_release_lower_page(header_page);
592 if (rc < 0) {
593 ecryptfs_printk(KERN_ERR,
594 "Error commiting header page write; "
595 "rc = [%d]\n", rc);
596 break;
597 }
598 current_header_page++;
599 }
600 if (rc >= 0) {
601 rc = 0;
602 ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = "
603 "[0x%.16x]\n", lower_inode->i_blocks);
604 i_size_write(inode, 0);
605 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
606 mark_inode_dirty_sync(inode);
607 }
608 ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in "
609 "crypt_stat at memory location [%p]\n", crypt_stat);
610 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
611out:
612 return rc;
613}
614 636
615/** 637/**
616 * ecryptfs_commit_write 638 * ecryptfs_commit_write
@@ -640,15 +662,10 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
640 mutex_lock(&lower_inode->i_mutex); 662 mutex_lock(&lower_inode->i_mutex);
641 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode) 663 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
642 ->crypt_stat; 664 ->crypt_stat;
643 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { 665 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
644 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " 666 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
645 "crypt_stat at memory location [%p]\n", crypt_stat); 667 "crypt_stat at memory location [%p]\n", crypt_stat);
646 rc = process_new_file(crypt_stat, file, inode); 668 crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE);
647 if (rc) {
648 ecryptfs_printk(KERN_ERR, "Error processing new "
649 "file; rc = [%d]\n", rc);
650 goto out;
651 }
652 } else 669 } else
653 ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); 670 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
654 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 671 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
@@ -670,7 +687,6 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
670 "index [0x%.16x])\n", page->index); 687 "index [0x%.16x])\n", page->index);
671 goto out; 688 goto out;
672 } 689 }
673 rc = 0;
674 inode->i_blocks = lower_inode->i_blocks; 690 inode->i_blocks = lower_inode->i_blocks;
675 pos = (page->index << PAGE_CACHE_SHIFT) + to; 691 pos = (page->index << PAGE_CACHE_SHIFT) + to;
676 if (pos > i_size_read(inode)) { 692 if (pos > i_size_read(inode)) {
@@ -678,11 +694,15 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
678 ecryptfs_printk(KERN_DEBUG, "Expanded file size to " 694 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
679 "[0x%.16x]\n", i_size_read(inode)); 695 "[0x%.16x]\n", i_size_read(inode));
680 } 696 }
681 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); 697 rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode,
698 inode, file->f_dentry,
699 ECRYPTFS_LOWER_I_MUTEX_HELD);
700 if (rc)
701 printk(KERN_ERR "Error writing inode size to metadata; "
702 "rc = [%d]\n", rc);
682 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; 703 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
683 mark_inode_dirty_sync(inode); 704 mark_inode_dirty_sync(inode);
684out: 705out:
685 kunmap(page); /* mapped in prior call (prepare_write) */
686 if (rc < 0) 706 if (rc < 0)
687 ClearPageUptodate(page); 707 ClearPageUptodate(page);
688 else 708 else
@@ -707,6 +727,7 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
707{ 727{
708 int rc = 0; 728 int rc = 0;
709 struct page *tmp_page; 729 struct page *tmp_page;
730 char *tmp_page_virt;
710 731
711 tmp_page = ecryptfs_get1page(file, index); 732 tmp_page = ecryptfs_get1page(file, index);
712 if (IS_ERR(tmp_page)) { 733 if (IS_ERR(tmp_page)) {
@@ -715,28 +736,27 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
715 rc = PTR_ERR(tmp_page); 736 rc = PTR_ERR(tmp_page);
716 goto out; 737 goto out;
717 } 738 }
718 kmap(tmp_page);
719 rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); 739 rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros);
720 if (rc) { 740 if (rc) {
721 ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " 741 ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
722 "to remainder of page at index [0x%.16x]\n", 742 "to remainder of page at index [0x%.16x]\n",
723 index); 743 index);
724 kunmap(tmp_page);
725 page_cache_release(tmp_page); 744 page_cache_release(tmp_page);
726 goto out; 745 goto out;
727 } 746 }
728 memset(((char *)page_address(tmp_page) + start), 0, num_zeros); 747 tmp_page_virt = kmap_atomic(tmp_page, KM_USER0);
748 memset(((char *)tmp_page_virt + start), 0, num_zeros);
749 kunmap_atomic(tmp_page_virt, KM_USER0);
750 flush_dcache_page(tmp_page);
729 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); 751 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
730 if (rc < 0) { 752 if (rc < 0) {
731 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " 753 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
732 "to remainder of page at index [0x%.16x]\n", 754 "to remainder of page at index [0x%.16x]\n",
733 index); 755 index);
734 kunmap(tmp_page);
735 page_cache_release(tmp_page); 756 page_cache_release(tmp_page);
736 goto out; 757 goto out;
737 } 758 }
738 rc = 0; 759 rc = 0;
739 kunmap(tmp_page);
740 page_cache_release(tmp_page); 760 page_cache_release(tmp_page);
741out: 761out:
742 return rc; 762 return rc;
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
new file mode 100644
index 000000000000..e3aa2253c850
--- /dev/null
+++ b/fs/ecryptfs/netlink.c
@@ -0,0 +1,255 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <net/sock.h>
24#include <linux/hash.h>
25#include <linux/random.h>
26#include "ecryptfs_kernel.h"
27
28static struct sock *ecryptfs_nl_sock;
29
30/**
31 * ecryptfs_send_netlink
32 * @data: The data to include as the payload
33 * @data_len: The byte count of the data
34 * @msg_ctx: The netlink context that will be used to handle the
35 * response message
36 * @msg_type: The type of netlink message to send
37 * @msg_flags: The flags to include in the netlink header
38 * @daemon_pid: The process id of the daemon to send the message to
39 *
40 * Sends the data to the specified daemon pid and uses the netlink
41 * context element to store the data needed for validation upon
42 * receiving the response. The data and the netlink context can be
43 * null if just sending a netlink header is sufficient. Returns zero
44 * upon sending the message; non-zero upon error.
45 */
46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
48 u16 msg_flags, pid_t daemon_pid)
49{
50 struct sk_buff *skb;
51 struct nlmsghdr *nlh;
52 struct ecryptfs_message *msg;
53 size_t payload_len;
54 int rc;
55
56 payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0);
57 skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL);
58 if (!skb) {
59 rc = -ENOMEM;
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out;
62 }
63 nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) {
67 msg = (struct ecryptfs_message *)NLMSG_DATA(nlh);
68 msg->index = msg_ctx->index;
69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len);
71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0);
73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc);
76 goto out;
77 }
78 rc = 0;
79 goto out;
80nlmsg_failure:
81 rc = -EMSGSIZE;
82 kfree_skb(skb);
83out:
84 return rc;
85}
86
87/**
88 * ecryptfs_process_nl_reponse
89 * @skb: The socket buffer containing the netlink message of state
90 * RESPONSE
91 *
92 * Processes a response message after sending a operation request to
93 * userspace. Attempts to assign the msg to a netlink context element
94 * at the index specified in the msg. The sk_buff and nlmsghdr must
95 * be validated before this function. Returns zero upon delivery to
96 * desired context element; non-zero upon delivery failure or error.
97 */
98static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{
100 struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data;
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 int rc;
103
104 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
105 rc = -EINVAL;
106 ecryptfs_printk(KERN_ERR, "Received netlink message with "
107 "incorrectly specified data length\n");
108 goto out;
109 }
110 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid,
111 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq);
112 if (rc)
113 printk(KERN_ERR
114 "Error processing response message; rc = [%d]\n", rc);
115out:
116 return rc;
117}
118
119/**
120 * ecryptfs_process_nl_helo
121 * @skb: The socket buffer containing the nlmsghdr in HELO state
122 *
123 * Gets uid and pid of the skb and adds the values to the daemon id
124 * hash. Returns zero after adding a new daemon id to the hash list;
125 * non-zero otherwise.
126 */
127static int ecryptfs_process_nl_helo(struct sk_buff *skb)
128{
129 int rc;
130
131 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
132 NETLINK_CREDS(skb)->uid,
133 NETLINK_CREDS(skb)->pid);
134 if (rc)
135 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
136 return rc;
137}
138
139/**
140 * ecryptfs_process_nl_quit
141 * @skb: The socket buffer containing the nlmsghdr in QUIT state
142 *
143 * Gets uid and pid of the skb and deletes the corresponding daemon
144 * id, if it is the registered that is requesting the
145 * deletion. Returns zero after deleting the desired daemon id;
146 * non-zero otherwise.
147 */
148static int ecryptfs_process_nl_quit(struct sk_buff *skb)
149{
150 int rc;
151
152 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid,
153 NETLINK_CREDS(skb)->pid);
154 if (rc)
155 printk(KERN_WARNING
156 "Error processing QUIT message; rc = [%d]\n", rc);
157 return rc;
158}
159
160/**
161 * ecryptfs_receive_nl_message
162 *
163 * Callback function called by netlink system when a message arrives.
164 * If the message looks to be valid, then an attempt is made to assign
165 * it to its desired netlink context element and wake up the process
166 * that is waiting for a response.
167 */
168static void ecryptfs_receive_nl_message(struct sock *sk, int len)
169{
170 struct sk_buff *skb;
171 struct nlmsghdr *nlh;
172 int rc = 0; /* skb_recv_datagram requires this */
173
174receive:
175 skb = skb_recv_datagram(sk, 0, 0, &rc);
176 if (rc == -EINTR)
177 goto receive;
178 else if (rc < 0) {
179 ecryptfs_printk(KERN_ERR, "Error occurred while "
180 "receiving eCryptfs netlink message; "
181 "rc = [%d]\n", rc);
182 return;
183 }
184 nlh = (struct nlmsghdr *)skb->data;
185 if (!NLMSG_OK(nlh, skb->len)) {
186 ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
187 "message\n");
188 goto free;
189 }
190 switch (nlh->nlmsg_type) {
191 case ECRYPTFS_NLMSG_RESPONSE:
192 if (ecryptfs_process_nl_response(skb)) {
193 ecryptfs_printk(KERN_WARNING, "Failed to "
194 "deliver netlink response to "
195 "requesting operation\n");
196 }
197 break;
198 case ECRYPTFS_NLMSG_HELO:
199 if (ecryptfs_process_nl_helo(skb)) {
200 ecryptfs_printk(KERN_WARNING, "Failed to "
201 "fulfill HELO request\n");
202 }
203 break;
204 case ECRYPTFS_NLMSG_QUIT:
205 if (ecryptfs_process_nl_quit(skb)) {
206 ecryptfs_printk(KERN_WARNING, "Failed to "
207 "fulfill QUIT request\n");
208 }
209 break;
210 default:
211 ecryptfs_printk(KERN_WARNING, "Dropping netlink "
212 "message of unrecognized type [%d]\n",
213 nlh->nlmsg_type);
214 break;
215 }
216free:
217 kfree_skb(skb);
218}
219
220/**
221 * ecryptfs_init_netlink
222 *
223 * Initializes the daemon id hash list, netlink context array, and
224 * necessary locks. Returns zero upon success; non-zero upon error.
225 */
226int ecryptfs_init_netlink(void)
227{
228 int rc;
229
230 ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
231 ecryptfs_receive_nl_message,
232 THIS_MODULE);
233 if (!ecryptfs_nl_sock) {
234 rc = -EIO;
235 ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
236 goto out;
237 }
238 ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT;
239 rc = 0;
240out:
241 return rc;
242}
243
244/**
245 * ecryptfs_release_netlink
246 *
247 * Frees all memory used by the netlink context array and releases the
248 * netlink socket.
249 */
250void ecryptfs_release_netlink(void)
251{
252 if (ecryptfs_nl_sock && ecryptfs_nl_sock->sk_socket)
253 sock_release(ecryptfs_nl_sock->sk_socket);
254 ecryptfs_nl_sock = NULL;
255}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index eaa5daaf106e..7b3f0cc09a6f 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -168,7 +168,7 @@ out:
168 return rc; 168 return rc;
169} 169}
170 170
171struct super_operations ecryptfs_sops = { 171const struct super_operations ecryptfs_sops = {
172 .alloc_inode = ecryptfs_alloc_inode, 172 .alloc_inode = ecryptfs_alloc_inode,
173 .destroy_inode = ecryptfs_destroy_inode, 173 .destroy_inode = ecryptfs_destroy_inode,
174 .drop_inode = generic_delete_inode, 174 .drop_inode = generic_delete_inode,
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index b46c488eefc8..dfb5cb400217 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -15,7 +15,7 @@ const struct file_operations efs_dir_operations = {
15 .readdir = efs_readdir, 15 .readdir = efs_readdir,
16}; 16};
17 17
18struct inode_operations efs_dir_inode_operations = { 18const struct inode_operations efs_dir_inode_operations = {
19 .lookup = efs_lookup, 19 .lookup = efs_lookup,
20}; 20};
21 21
diff --git a/fs/efs/super.c b/fs/efs/super.c
index dfebf21289f4..c2235e46edcd 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -105,7 +105,7 @@ static int efs_remount(struct super_block *sb, int *flags, char *data)
105 return 0; 105 return 0;
106} 106}
107 107
108static struct super_operations efs_superblock_operations = { 108static const struct super_operations efs_superblock_operations = {
109 .alloc_inode = efs_alloc_inode, 109 .alloc_inode = efs_alloc_inode,
110 .destroy_inode = efs_destroy_inode, 110 .destroy_inode = efs_destroy_inode,
111 .read_inode = efs_read_inode, 111 .read_inode = efs_read_inode,
diff --git a/fs/exec.c b/fs/exec.c
index 11fe93f7363c..7e36c6f6f538 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -405,12 +405,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
405 bprm->loader += stack_base; 405 bprm->loader += stack_base;
406 bprm->exec += stack_base; 406 bprm->exec += stack_base;
407 407
408 mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 408 mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
409 if (!mpnt) 409 if (!mpnt)
410 return -ENOMEM; 410 return -ENOMEM;
411 411
412 memset(mpnt, 0, sizeof(*mpnt));
413
414 down_write(&mm->mmap_sem); 412 down_write(&mm->mmap_sem);
415 { 413 {
416 mpnt->vm_mm = mm; 414 mpnt->vm_mm = mm;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0b02ba9642d2..e89bfc8cf957 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -368,6 +368,14 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
368 } 368 }
369 if (++n >= npages) 369 if (++n >= npages)
370 n = 0; 370 n = 0;
371 /* next page is past the blocks we've got */
372 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
373 ext2_error(dir->i_sb, __FUNCTION__,
374 "dir %lu size %lld exceeds block count %llu",
375 dir->i_ino, dir->i_size,
376 (unsigned long long)dir->i_blocks);
377 goto out;
378 }
371 } while (n != start); 379 } while (n != start);
372out: 380out:
373 return NULL; 381 return NULL;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index c19ac153f56b..e2a0ea50af1d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -158,7 +158,7 @@ extern void ext2_write_super (struct super_block *);
158extern const struct file_operations ext2_dir_operations; 158extern const struct file_operations ext2_dir_operations;
159 159
160/* file.c */ 160/* file.c */
161extern struct inode_operations ext2_file_inode_operations; 161extern const struct inode_operations ext2_file_inode_operations;
162extern const struct file_operations ext2_file_operations; 162extern const struct file_operations ext2_file_operations;
163extern const struct file_operations ext2_xip_file_operations; 163extern const struct file_operations ext2_xip_file_operations;
164 164
@@ -168,9 +168,9 @@ extern const struct address_space_operations ext2_aops_xip;
168extern const struct address_space_operations ext2_nobh_aops; 168extern const struct address_space_operations ext2_nobh_aops;
169 169
170/* namei.c */ 170/* namei.c */
171extern struct inode_operations ext2_dir_inode_operations; 171extern const struct inode_operations ext2_dir_inode_operations;
172extern struct inode_operations ext2_special_inode_operations; 172extern const struct inode_operations ext2_special_inode_operations;
173 173
174/* symlink.c */ 174/* symlink.c */
175extern struct inode_operations ext2_fast_symlink_inode_operations; 175extern const struct inode_operations ext2_fast_symlink_inode_operations;
176extern struct inode_operations ext2_symlink_inode_operations; 176extern const struct inode_operations ext2_symlink_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2dba473c524a..566d4e2d3852 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -75,7 +75,7 @@ const struct file_operations ext2_xip_file_operations = {
75}; 75};
76#endif 76#endif
77 77
78struct inode_operations ext2_file_inode_operations = { 78const struct inode_operations ext2_file_inode_operations = {
79 .truncate = ext2_truncate, 79 .truncate = ext2_truncate,
80#ifdef CONFIG_EXT2_FS_XATTR 80#ifdef CONFIG_EXT2_FS_XATTR
81 .setxattr = generic_setxattr, 81 .setxattr = generic_setxattr,
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1af5b4cf80c..e69beed839ac 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -373,7 +373,7 @@ out:
373 return err; 373 return err;
374} 374}
375 375
376struct inode_operations ext2_dir_inode_operations = { 376const struct inode_operations ext2_dir_inode_operations = {
377 .create = ext2_create, 377 .create = ext2_create,
378 .lookup = ext2_lookup, 378 .lookup = ext2_lookup,
379 .link = ext2_link, 379 .link = ext2_link,
@@ -393,7 +393,7 @@ struct inode_operations ext2_dir_inode_operations = {
393 .permission = ext2_permission, 393 .permission = ext2_permission,
394}; 394};
395 395
396struct inode_operations ext2_special_inode_operations = { 396const struct inode_operations ext2_special_inode_operations = {
397#ifdef CONFIG_EXT2_FS_XATTR 397#ifdef CONFIG_EXT2_FS_XATTR
398 .setxattr = generic_setxattr, 398 .setxattr = generic_setxattr,
399 .getxattr = generic_getxattr, 399 .getxattr = generic_getxattr,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6347c2dbdd81..a046a419d8af 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -231,7 +231,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, siz
231static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); 231static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
232#endif 232#endif
233 233
234static struct super_operations ext2_sops = { 234static const struct super_operations ext2_sops = {
235 .alloc_inode = ext2_alloc_inode, 235 .alloc_inode = ext2_alloc_inode,
236 .destroy_inode = ext2_destroy_inode, 236 .destroy_inode = ext2_destroy_inode,
237 .read_inode = ext2_read_inode, 237 .read_inode = ext2_read_inode,
@@ -708,10 +708,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
708 set_opt(sbi->s_mount_opt, GRPID); 708 set_opt(sbi->s_mount_opt, GRPID);
709 if (def_mount_opts & EXT2_DEFM_UID16) 709 if (def_mount_opts & EXT2_DEFM_UID16)
710 set_opt(sbi->s_mount_opt, NO_UID32); 710 set_opt(sbi->s_mount_opt, NO_UID32);
711#ifdef CONFIG_EXT2_FS_XATTR
711 if (def_mount_opts & EXT2_DEFM_XATTR_USER) 712 if (def_mount_opts & EXT2_DEFM_XATTR_USER)
712 set_opt(sbi->s_mount_opt, XATTR_USER); 713 set_opt(sbi->s_mount_opt, XATTR_USER);
714#endif
715#ifdef CONFIG_EXT2_FS_POSIX_ACL
713 if (def_mount_opts & EXT2_DEFM_ACL) 716 if (def_mount_opts & EXT2_DEFM_ACL)
714 set_opt(sbi->s_mount_opt, POSIX_ACL); 717 set_opt(sbi->s_mount_opt, POSIX_ACL);
718#endif
715 719
716 if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) 720 if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
717 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 721 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 1e67d87cfa91..4e2426e22bbe 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -28,7 +28,7 @@ static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
28 return NULL; 28 return NULL;
29} 29}
30 30
31struct inode_operations ext2_symlink_inode_operations = { 31const struct inode_operations ext2_symlink_inode_operations = {
32 .readlink = generic_readlink, 32 .readlink = generic_readlink,
33 .follow_link = page_follow_link_light, 33 .follow_link = page_follow_link_light,
34 .put_link = page_put_link, 34 .put_link = page_put_link,
@@ -40,7 +40,7 @@ struct inode_operations ext2_symlink_inode_operations = {
40#endif 40#endif
41}; 41};
42 42
43struct inode_operations ext2_fast_symlink_inode_operations = { 43const struct inode_operations ext2_fast_symlink_inode_operations = {
44 .readlink = generic_readlink, 44 .readlink = generic_readlink,
45 .follow_link = ext2_follow_link, 45 .follow_link = ext2_follow_link,
46#ifdef CONFIG_EXT2_FS_XATTR 46#ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 881f6365c41a..1e6f13864536 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -125,7 +125,7 @@ const struct file_operations ext3_file_operations = {
125 .splice_write = generic_file_splice_write, 125 .splice_write = generic_file_splice_write,
126}; 126};
127 127
128struct inode_operations ext3_file_inode_operations = { 128const struct inode_operations ext3_file_inode_operations = {
129 .truncate = ext3_truncate, 129 .truncate = ext3_truncate,
130 .setattr = ext3_setattr, 130 .setattr = ext3_setattr,
131#ifdef CONFIG_EXT3_FS_XATTR 131#ifdef CONFIG_EXT3_FS_XATTR
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index beaf25f5112f..8a824f4ce5c6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -947,7 +947,7 @@ out:
947static int ext3_get_block(struct inode *inode, sector_t iblock, 947static int ext3_get_block(struct inode *inode, sector_t iblock,
948 struct buffer_head *bh_result, int create) 948 struct buffer_head *bh_result, int create)
949{ 949{
950 handle_t *handle = journal_current_handle(); 950 handle_t *handle = ext3_journal_current_handle();
951 int ret = 0; 951 int ret = 0;
952 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 952 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
953 953
@@ -1717,7 +1717,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1717 /* 1717 /*
1718 * Reacquire the handle: ext3_get_block() can restart the transaction 1718 * Reacquire the handle: ext3_get_block() can restart the transaction
1719 */ 1719 */
1720 handle = journal_current_handle(); 1720 handle = ext3_journal_current_handle();
1721 1721
1722out_stop: 1722out_stop:
1723 if (handle) { 1723 if (handle) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 4df39c4315e1..49159f13cc1f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1618,21 +1618,6 @@ static int ext3_delete_entry (handle_t *handle,
1618 return -ENOENT; 1618 return -ENOENT;
1619} 1619}
1620 1620
1621/*
1622 * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
1623 * do not perform it in these functions. We perform it at the call site,
1624 * if it is needed.
1625 */
1626static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
1627{
1628 inc_nlink(inode);
1629}
1630
1631static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
1632{
1633 drop_nlink(inode);
1634}
1635
1636static int ext3_add_nondir(handle_t *handle, 1621static int ext3_add_nondir(handle_t *handle,
1637 struct dentry *dentry, struct inode *inode) 1622 struct dentry *dentry, struct inode *inode)
1638{ 1623{
@@ -1642,7 +1627,7 @@ static int ext3_add_nondir(handle_t *handle,
1642 d_instantiate(dentry, inode); 1627 d_instantiate(dentry, inode);
1643 return 0; 1628 return 0;
1644 } 1629 }
1645 ext3_dec_count(handle, inode); 1630 drop_nlink(inode);
1646 iput(inode); 1631 iput(inode);
1647 return err; 1632 return err;
1648} 1633}
@@ -2163,7 +2148,7 @@ retry:
2163 err = __page_symlink(inode, symname, l, 2148 err = __page_symlink(inode, symname, l,
2164 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 2149 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2165 if (err) { 2150 if (err) {
2166 ext3_dec_count(handle, inode); 2151 drop_nlink(inode);
2167 ext3_mark_inode_dirty(handle, inode); 2152 ext3_mark_inode_dirty(handle, inode);
2168 iput (inode); 2153 iput (inode);
2169 goto out_stop; 2154 goto out_stop;
@@ -2191,6 +2176,12 @@ static int ext3_link (struct dentry * old_dentry,
2191 2176
2192 if (inode->i_nlink >= EXT3_LINK_MAX) 2177 if (inode->i_nlink >= EXT3_LINK_MAX)
2193 return -EMLINK; 2178 return -EMLINK;
2179 /*
2180 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2181 * otherwise has the potential to corrupt the orphan inode list.
2182 */
2183 if (inode->i_nlink == 0)
2184 return -ENOENT;
2194 2185
2195retry: 2186retry:
2196 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2187 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2202,7 +2193,7 @@ retry:
2202 handle->h_sync = 1; 2193 handle->h_sync = 1;
2203 2194
2204 inode->i_ctime = CURRENT_TIME_SEC; 2195 inode->i_ctime = CURRENT_TIME_SEC;
2205 ext3_inc_count(handle, inode); 2196 inc_nlink(inode);
2206 atomic_inc(&inode->i_count); 2197 atomic_inc(&inode->i_count);
2207 2198
2208 err = ext3_add_nondir(handle, dentry, inode); 2199 err = ext3_add_nondir(handle, dentry, inode);
@@ -2374,7 +2365,7 @@ end_rename:
2374/* 2365/*
2375 * directories can handle most operations... 2366 * directories can handle most operations...
2376 */ 2367 */
2377struct inode_operations ext3_dir_inode_operations = { 2368const struct inode_operations ext3_dir_inode_operations = {
2378 .create = ext3_create, 2369 .create = ext3_create,
2379 .lookup = ext3_lookup, 2370 .lookup = ext3_lookup,
2380 .link = ext3_link, 2371 .link = ext3_link,
@@ -2394,7 +2385,7 @@ struct inode_operations ext3_dir_inode_operations = {
2394 .permission = ext3_permission, 2385 .permission = ext3_permission,
2395}; 2386};
2396 2387
2397struct inode_operations ext3_special_inode_operations = { 2388const struct inode_operations ext3_special_inode_operations = {
2398 .setattr = ext3_setattr, 2389 .setattr = ext3_setattr,
2399#ifdef CONFIG_EXT3_FS_XATTR 2390#ifdef CONFIG_EXT3_FS_XATTR
2400 .setxattr = generic_setxattr, 2391 .setxattr = generic_setxattr,
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b34886734a44..4a4fcd6868c7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -639,7 +639,7 @@ static struct quotactl_ops ext3_qctl_operations = {
639}; 639};
640#endif 640#endif
641 641
642static struct super_operations ext3_sops = { 642static const struct super_operations ext3_sops = {
643 .alloc_inode = ext3_alloc_inode, 643 .alloc_inode = ext3_alloc_inode,
644 .destroy_inode = ext3_destroy_inode, 644 .destroy_inode = ext3_destroy_inode,
645 .read_inode = ext3_read_inode, 645 .read_inode = ext3_read_inode,
@@ -1459,10 +1459,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1459 set_opt(sbi->s_mount_opt, GRPID); 1459 set_opt(sbi->s_mount_opt, GRPID);
1460 if (def_mount_opts & EXT3_DEFM_UID16) 1460 if (def_mount_opts & EXT3_DEFM_UID16)
1461 set_opt(sbi->s_mount_opt, NO_UID32); 1461 set_opt(sbi->s_mount_opt, NO_UID32);
1462#ifdef CONFIG_EXT3_FS_XATTR
1462 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1463 if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1463 set_opt(sbi->s_mount_opt, XATTR_USER); 1464 set_opt(sbi->s_mount_opt, XATTR_USER);
1465#endif
1466#ifdef CONFIG_EXT3_FS_POSIX_ACL
1464 if (def_mount_opts & EXT3_DEFM_ACL) 1467 if (def_mount_opts & EXT3_DEFM_ACL)
1465 set_opt(sbi->s_mount_opt, POSIX_ACL); 1468 set_opt(sbi->s_mount_opt, POSIX_ACL);
1469#endif
1466 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1470 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1467 sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA; 1471 sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
1468 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1472 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
@@ -2344,6 +2348,22 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2344 err = -EROFS; 2348 err = -EROFS;
2345 goto restore_opts; 2349 goto restore_opts;
2346 } 2350 }
2351
2352 /*
2353 * If we have an unprocessed orphan list hanging
2354 * around from a previously readonly bdev mount,
2355 * require a full umount/remount for now.
2356 */
2357 if (es->s_last_orphan) {
2358 printk(KERN_WARNING "EXT3-fs: %s: couldn't "
2359 "remount RDWR because of unprocessed "
2360 "orphan inode list. Please "
2361 "umount/remount instead.\n",
2362 sb->s_id);
2363 err = -EINVAL;
2364 goto restore_opts;
2365 }
2366
2347 /* 2367 /*
2348 * Mounting a RDONLY partition read-write, so reread 2368 * Mounting a RDONLY partition read-write, so reread
2349 * and store the current valid flag. (It may have 2369 * and store the current valid flag. (It may have
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 4f79122cde67..ff7b4ccd8983 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -30,7 +30,7 @@ static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
30 return NULL; 30 return NULL;
31} 31}
32 32
33struct inode_operations ext3_symlink_inode_operations = { 33const struct inode_operations ext3_symlink_inode_operations = {
34 .readlink = generic_readlink, 34 .readlink = generic_readlink,
35 .follow_link = page_follow_link_light, 35 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 36 .put_link = page_put_link,
@@ -42,7 +42,7 @@ struct inode_operations ext3_symlink_inode_operations = {
42#endif 42#endif
43}; 43};
44 44
45struct inode_operations ext3_fast_symlink_inode_operations = { 45const struct inode_operations ext3_fast_symlink_inode_operations = {
46 .readlink = generic_readlink, 46 .readlink = generic_readlink,
47 .follow_link = ext3_follow_link, 47 .follow_link = ext3_follow_link,
48#ifdef CONFIG_EXT3_FS_XATTR 48#ifdef CONFIG_EXT3_FS_XATTR
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3bbc24b58785..3c6c1fd2be90 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -125,7 +125,7 @@ const struct file_operations ext4_file_operations = {
125 .splice_write = generic_file_splice_write, 125 .splice_write = generic_file_splice_write,
126}; 126};
127 127
128struct inode_operations ext4_file_inode_operations = { 128const struct inode_operations ext4_file_inode_operations = {
129 .truncate = ext4_truncate, 129 .truncate = ext4_truncate,
130 .setattr = ext4_setattr, 130 .setattr = ext4_setattr,
131#ifdef CONFIG_EXT4DEV_FS_XATTR 131#ifdef CONFIG_EXT4DEV_FS_XATTR
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a127cc03c9fa..fbff4b9e122a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -946,7 +946,7 @@ out:
946static int ext4_get_block(struct inode *inode, sector_t iblock, 946static int ext4_get_block(struct inode *inode, sector_t iblock,
947 struct buffer_head *bh_result, int create) 947 struct buffer_head *bh_result, int create)
948{ 948{
949 handle_t *handle = journal_current_handle(); 949 handle_t *handle = ext4_journal_current_handle();
950 int ret = 0; 950 int ret = 0;
951 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 951 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
952 952
@@ -1716,7 +1716,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
1716 /* 1716 /*
1717 * Reacquire the handle: ext4_get_block() can restart the transaction 1717 * Reacquire the handle: ext4_get_block() can restart the transaction
1718 */ 1718 */
1719 handle = journal_current_handle(); 1719 handle = ext4_journal_current_handle();
1720 1720
1721out_stop: 1721out_stop:
1722 if (handle) { 1722 if (handle) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e5a74a5ac261..e7e1d79a7d75 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1616,21 +1616,6 @@ static int ext4_delete_entry (handle_t *handle,
1616 return -ENOENT; 1616 return -ENOENT;
1617} 1617}
1618 1618
1619/*
1620 * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we
1621 * do not perform it in these functions. We perform it at the call site,
1622 * if it is needed.
1623 */
1624static inline void ext4_inc_count(handle_t *handle, struct inode *inode)
1625{
1626 inc_nlink(inode);
1627}
1628
1629static inline void ext4_dec_count(handle_t *handle, struct inode *inode)
1630{
1631 drop_nlink(inode);
1632}
1633
1634static int ext4_add_nondir(handle_t *handle, 1619static int ext4_add_nondir(handle_t *handle,
1635 struct dentry *dentry, struct inode *inode) 1620 struct dentry *dentry, struct inode *inode)
1636{ 1621{
@@ -1640,7 +1625,7 @@ static int ext4_add_nondir(handle_t *handle,
1640 d_instantiate(dentry, inode); 1625 d_instantiate(dentry, inode);
1641 return 0; 1626 return 0;
1642 } 1627 }
1643 ext4_dec_count(handle, inode); 1628 drop_nlink(inode);
1644 iput(inode); 1629 iput(inode);
1645 return err; 1630 return err;
1646} 1631}
@@ -2161,7 +2146,7 @@ retry:
2161 err = __page_symlink(inode, symname, l, 2146 err = __page_symlink(inode, symname, l,
2162 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 2147 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2163 if (err) { 2148 if (err) {
2164 ext4_dec_count(handle, inode); 2149 drop_nlink(inode);
2165 ext4_mark_inode_dirty(handle, inode); 2150 ext4_mark_inode_dirty(handle, inode);
2166 iput (inode); 2151 iput (inode);
2167 goto out_stop; 2152 goto out_stop;
@@ -2189,6 +2174,12 @@ static int ext4_link (struct dentry * old_dentry,
2189 2174
2190 if (inode->i_nlink >= EXT4_LINK_MAX) 2175 if (inode->i_nlink >= EXT4_LINK_MAX)
2191 return -EMLINK; 2176 return -EMLINK;
2177 /*
2178 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2179 * otherwise has the potential to corrupt the orphan inode list.
2180 */
2181 if (inode->i_nlink == 0)
2182 return -ENOENT;
2192 2183
2193retry: 2184retry:
2194 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2185 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2200,7 +2191,7 @@ retry:
2200 handle->h_sync = 1; 2191 handle->h_sync = 1;
2201 2192
2202 inode->i_ctime = CURRENT_TIME_SEC; 2193 inode->i_ctime = CURRENT_TIME_SEC;
2203 ext4_inc_count(handle, inode); 2194 inc_nlink(inode);
2204 atomic_inc(&inode->i_count); 2195 atomic_inc(&inode->i_count);
2205 2196
2206 err = ext4_add_nondir(handle, dentry, inode); 2197 err = ext4_add_nondir(handle, dentry, inode);
@@ -2372,7 +2363,7 @@ end_rename:
2372/* 2363/*
2373 * directories can handle most operations... 2364 * directories can handle most operations...
2374 */ 2365 */
2375struct inode_operations ext4_dir_inode_operations = { 2366const struct inode_operations ext4_dir_inode_operations = {
2376 .create = ext4_create, 2367 .create = ext4_create,
2377 .lookup = ext4_lookup, 2368 .lookup = ext4_lookup,
2378 .link = ext4_link, 2369 .link = ext4_link,
@@ -2392,7 +2383,7 @@ struct inode_operations ext4_dir_inode_operations = {
2392 .permission = ext4_permission, 2383 .permission = ext4_permission,
2393}; 2384};
2394 2385
2395struct inode_operations ext4_special_inode_operations = { 2386const struct inode_operations ext4_special_inode_operations = {
2396 .setattr = ext4_setattr, 2387 .setattr = ext4_setattr,
2397#ifdef CONFIG_EXT4DEV_FS_XATTR 2388#ifdef CONFIG_EXT4DEV_FS_XATTR
2398 .setxattr = generic_setxattr, 2389 .setxattr = generic_setxattr,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 486a641ca71b..61c4718e4a53 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -690,7 +690,7 @@ static struct quotactl_ops ext4_qctl_operations = {
690}; 690};
691#endif 691#endif
692 692
693static struct super_operations ext4_sops = { 693static const struct super_operations ext4_sops = {
694 .alloc_inode = ext4_alloc_inode, 694 .alloc_inode = ext4_alloc_inode,
695 .destroy_inode = ext4_destroy_inode, 695 .destroy_inode = ext4_destroy_inode,
696 .read_inode = ext4_read_inode, 696 .read_inode = ext4_read_inode,
@@ -1518,10 +1518,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1518 set_opt(sbi->s_mount_opt, GRPID); 1518 set_opt(sbi->s_mount_opt, GRPID);
1519 if (def_mount_opts & EXT4_DEFM_UID16) 1519 if (def_mount_opts & EXT4_DEFM_UID16)
1520 set_opt(sbi->s_mount_opt, NO_UID32); 1520 set_opt(sbi->s_mount_opt, NO_UID32);
1521#ifdef CONFIG_EXT4DEV_FS_XATTR
1521 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1522 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
1522 set_opt(sbi->s_mount_opt, XATTR_USER); 1523 set_opt(sbi->s_mount_opt, XATTR_USER);
1524#endif
1525#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
1523 if (def_mount_opts & EXT4_DEFM_ACL) 1526 if (def_mount_opts & EXT4_DEFM_ACL)
1524 set_opt(sbi->s_mount_opt, POSIX_ACL); 1527 set_opt(sbi->s_mount_opt, POSIX_ACL);
1528#endif
1525 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 1529 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
1526 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 1530 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
1527 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 1531 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -2419,6 +2423,22 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2419 err = -EROFS; 2423 err = -EROFS;
2420 goto restore_opts; 2424 goto restore_opts;
2421 } 2425 }
2426
2427 /*
2428 * If we have an unprocessed orphan list hanging
2429 * around from a previously readonly bdev mount,
2430 * require a full umount/remount for now.
2431 */
2432 if (es->s_last_orphan) {
2433 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
2434 "remount RDWR because of unprocessed "
2435 "orphan inode list. Please "
2436 "umount/remount instead.\n",
2437 sb->s_id);
2438 err = -EINVAL;
2439 goto restore_opts;
2440 }
2441
2422 /* 2442 /*
2423 * Mounting a RDONLY partition read-write, so reread 2443 * Mounting a RDONLY partition read-write, so reread
2424 * and store the current valid flag. (It may have 2444 * and store the current valid flag. (It may have
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index fcf527286d75..e6f9da4287c4 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -30,7 +30,7 @@ static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
30 return NULL; 30 return NULL;
31} 31}
32 32
33struct inode_operations ext4_symlink_inode_operations = { 33const struct inode_operations ext4_symlink_inode_operations = {
34 .readlink = generic_readlink, 34 .readlink = generic_readlink,
35 .follow_link = page_follow_link_light, 35 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 36 .put_link = page_put_link,
@@ -42,7 +42,7 @@ struct inode_operations ext4_symlink_inode_operations = {
42#endif 42#endif
43}; 43};
44 44
45struct inode_operations ext4_fast_symlink_inode_operations = { 45const struct inode_operations ext4_fast_symlink_inode_operations = {
46 .readlink = generic_readlink, 46 .readlink = generic_readlink,
47 .follow_link = ext4_follow_link, 47 .follow_link = ext4_follow_link,
48#ifdef CONFIG_EXT4DEV_FS_XATTR 48#ifdef CONFIG_EXT4DEV_FS_XATTR
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c1237b70c1fe..55d3c7461c5b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -312,7 +312,7 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
312} 312}
313EXPORT_SYMBOL_GPL(fat_getattr); 313EXPORT_SYMBOL_GPL(fat_getattr);
314 314
315struct inode_operations fat_file_inode_operations = { 315const struct inode_operations fat_file_inode_operations = {
316 .truncate = fat_truncate, 316 .truncate = fat_truncate,
317 .setattr = fat_notify_change, 317 .setattr = fat_notify_change,
318 .getattr = fat_getattr, 318 .getattr = fat_getattr,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a9e4688582a2..761073544217 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -618,7 +618,7 @@ int fat_sync_inode(struct inode *inode)
618EXPORT_SYMBOL_GPL(fat_sync_inode); 618EXPORT_SYMBOL_GPL(fat_sync_inode);
619 619
620static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); 620static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
621static struct super_operations fat_sops = { 621static const struct super_operations fat_sops = {
622 .alloc_inode = fat_alloc_inode, 622 .alloc_inode = fat_alloc_inode,
623 .destroy_inode = fat_destroy_inode, 623 .destroy_inode = fat_destroy_inode,
624 .write_inode = fat_write_inode, 624 .write_inode = fat_write_inode,
@@ -1151,7 +1151,7 @@ static int fat_read_root(struct inode *inode)
1151 * Read the super block of an MS-DOS FS. 1151 * Read the super block of an MS-DOS FS.
1152 */ 1152 */
1153int fat_fill_super(struct super_block *sb, void *data, int silent, 1153int fat_fill_super(struct super_block *sb, void *data, int silent,
1154 struct inode_operations *fs_dir_inode_ops, int isvfat) 1154 const struct inode_operations *fs_dir_inode_ops, int isvfat)
1155{ 1155{
1156 struct inode *root_inode = NULL; 1156 struct inode *root_inode = NULL;
1157 struct buffer_head *bh; 1157 struct buffer_head *bh;
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 1cf1fe8466a2..91ccee8723f7 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -62,7 +62,7 @@ extern void vxfs_read_inode(struct inode *);
62extern void vxfs_clear_inode(struct inode *); 62extern void vxfs_clear_inode(struct inode *);
63 63
64/* vxfs_lookup.c */ 64/* vxfs_lookup.c */
65extern struct inode_operations vxfs_dir_inode_ops; 65extern const struct inode_operations vxfs_dir_inode_ops;
66extern const struct file_operations vxfs_dir_operations; 66extern const struct file_operations vxfs_dir_operations;
67 67
68/* vxfs_olt.c */ 68/* vxfs_olt.c */
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 4e25f3fbed86..24b5a775ff96 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -48,7 +48,7 @@ static int vxfs_immed_readpage(struct file *, struct page *);
48 * Unliked all other operations we do not go through the pagecache, 48 * Unliked all other operations we do not go through the pagecache,
49 * but do all work directly on the inode. 49 * but do all work directly on the inode.
50 */ 50 */
51struct inode_operations vxfs_immed_symlink_iops = { 51const struct inode_operations vxfs_immed_symlink_iops = {
52 .readlink = generic_readlink, 52 .readlink = generic_readlink,
53 .follow_link = vxfs_immed_follow_link, 53 .follow_link = vxfs_immed_follow_link,
54}; 54};
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 0b7ae897cb78..098a915fd9a1 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -44,7 +44,7 @@
44extern const struct address_space_operations vxfs_aops; 44extern const struct address_space_operations vxfs_aops;
45extern const struct address_space_operations vxfs_immed_aops; 45extern const struct address_space_operations vxfs_immed_aops;
46 46
47extern struct inode_operations vxfs_immed_symlink_iops; 47extern const struct inode_operations vxfs_immed_symlink_iops;
48 48
49struct kmem_cache *vxfs_inode_cachep; 49struct kmem_cache *vxfs_inode_cachep;
50 50
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 3995d7fbedab..bf86e5444ea6 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -52,7 +52,7 @@
52static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); 52static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *);
53static int vxfs_readdir(struct file *, void *, filldir_t); 53static int vxfs_readdir(struct file *, void *, filldir_t);
54 54
55struct inode_operations vxfs_dir_inode_ops = { 55const struct inode_operations vxfs_dir_inode_ops = {
56 .lookup = vxfs_lookup, 56 .lookup = vxfs_lookup,
57}; 57};
58 58
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index ac28b0835ffc..647d600f0bc8 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -59,7 +59,7 @@ static void vxfs_put_super(struct super_block *);
59static int vxfs_statfs(struct dentry *, struct kstatfs *); 59static int vxfs_statfs(struct dentry *, struct kstatfs *);
60static int vxfs_remount(struct super_block *, int *, char *); 60static int vxfs_remount(struct super_block *, int *, char *);
61 61
62static struct super_operations vxfs_super_ops = { 62static const struct super_operations vxfs_super_ops = {
63 .read_inode = vxfs_read_inode, 63 .read_inode = vxfs_read_inode,
64 .clear_inode = vxfs_clear_inode, 64 .clear_inode = vxfs_clear_inode,
65 .put_super = vxfs_put_super, 65 .put_super = vxfs_put_super,
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 1794305f9ed8..105d4a271e07 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -73,7 +73,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
73 struct fuse_conn *fc, 73 struct fuse_conn *fc,
74 const char *name, 74 const char *name,
75 int mode, int nlink, 75 int mode, int nlink,
76 struct inode_operations *iop, 76 const struct inode_operations *iop,
77 const struct file_operations *fop) 77 const struct file_operations *fop)
78{ 78{
79 struct dentry *dentry; 79 struct dentry *dentry;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 40080477ceb4..406bf61ed510 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1242,7 +1242,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1242 return err; 1242 return err;
1243} 1243}
1244 1244
1245static struct inode_operations fuse_dir_inode_operations = { 1245static const struct inode_operations fuse_dir_inode_operations = {
1246 .lookup = fuse_lookup, 1246 .lookup = fuse_lookup,
1247 .mkdir = fuse_mkdir, 1247 .mkdir = fuse_mkdir,
1248 .symlink = fuse_symlink, 1248 .symlink = fuse_symlink,
@@ -1270,7 +1270,7 @@ static const struct file_operations fuse_dir_operations = {
1270 .fsync = fuse_dir_fsync, 1270 .fsync = fuse_dir_fsync,
1271}; 1271};
1272 1272
1273static struct inode_operations fuse_common_inode_operations = { 1273static const struct inode_operations fuse_common_inode_operations = {
1274 .setattr = fuse_setattr, 1274 .setattr = fuse_setattr,
1275 .permission = fuse_permission, 1275 .permission = fuse_permission,
1276 .getattr = fuse_getattr, 1276 .getattr = fuse_getattr,
@@ -1280,7 +1280,7 @@ static struct inode_operations fuse_common_inode_operations = {
1280 .removexattr = fuse_removexattr, 1280 .removexattr = fuse_removexattr,
1281}; 1281};
1282 1282
1283static struct inode_operations fuse_symlink_inode_operations = { 1283static const struct inode_operations fuse_symlink_inode_operations = {
1284 .setattr = fuse_setattr, 1284 .setattr = fuse_setattr,
1285 .follow_link = fuse_follow_link, 1285 .follow_link = fuse_follow_link,
1286 .put_link = fuse_put_link, 1286 .put_link = fuse_put_link,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f63efe1337ec..2fd06927e851 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -69,7 +69,7 @@ void fuse_finish_open(struct inode *inode, struct file *file,
69 if (outarg->open_flags & FOPEN_DIRECT_IO) 69 if (outarg->open_flags & FOPEN_DIRECT_IO)
70 file->f_op = &fuse_direct_io_file_operations; 70 file->f_op = &fuse_direct_io_file_operations;
71 if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) 71 if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
72 invalidate_inode_pages(inode->i_mapping); 72 invalidate_mapping_pages(inode->i_mapping, 0, -1);
73 ff->fh = outarg->fh; 73 ff->fh = outarg->fh;
74 file->private_data = ff; 74 file->private_data = ff;
75} 75}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 12450d2b320e..5ab8e50e7808 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -112,7 +112,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
112{ 112{
113 struct fuse_conn *fc = get_fuse_conn(inode); 113 struct fuse_conn *fc = get_fuse_conn(inode);
114 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) 114 if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
115 invalidate_inode_pages(inode->i_mapping); 115 invalidate_mapping_pages(inode->i_mapping, 0, -1);
116 116
117 inode->i_ino = attr->ino; 117 inode->i_ino = attr->ino;
118 inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777); 118 inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
@@ -446,7 +446,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
446 return fuse_iget(sb, 1, 0, &attr); 446 return fuse_iget(sb, 1, 0, &attr);
447} 447}
448 448
449static struct super_operations fuse_super_operations = { 449static const struct super_operations fuse_super_operations = {
450 .alloc_inode = fuse_alloc_inode, 450 .alloc_inode = fuse_alloc_inode,
451 .destroy_inode = fuse_destroy_inode, 451 .destroy_inode = fuse_destroy_inode,
452 .read_inode = fuse_read_inode, 452 .read_inode = fuse_read_inode,
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 6a2ffa2db14f..de8e64c03f73 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,44 +4,43 @@ config GFS2_FS
4 select FS_POSIX_ACL 4 select FS_POSIX_ACL
5 select CRC32 5 select CRC32
6 help 6 help
7 A cluster filesystem. 7 A cluster filesystem.
8 8
9 Allows a cluster of computers to simultaneously use a block device 9 Allows a cluster of computers to simultaneously use a block device
10 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads 10 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
11 and writes to the block device like a local filesystem, but also uses 11 and writes to the block device like a local filesystem, but also uses
12 a lock module to allow the computers coordinate their I/O so 12 a lock module to allow the computers coordinate their I/O so
13 filesystem consistency is maintained. One of the nifty features of 13 filesystem consistency is maintained. One of the nifty features of
14 GFS is perfect consistency -- changes made to the filesystem on one 14 GFS is perfect consistency -- changes made to the filesystem on one
15 machine show up immediately on all other machines in the cluster. 15 machine show up immediately on all other machines in the cluster.
16 16
17 To use the GFS2 filesystem, you will need to enable one or more of 17 To use the GFS2 filesystem, you will need to enable one or more of
18 the below locking modules. Documentation and utilities for GFS2 can 18 the below locking modules. Documentation and utilities for GFS2 can
19 be found here: http://sources.redhat.com/cluster 19 be found here: http://sources.redhat.com/cluster
20 20
21config GFS2_FS_LOCKING_NOLOCK 21config GFS2_FS_LOCKING_NOLOCK
22 tristate "GFS2 \"nolock\" locking module" 22 tristate "GFS2 \"nolock\" locking module"
23 depends on GFS2_FS 23 depends on GFS2_FS
24 help 24 help
25 Single node locking module for GFS2. 25 Single node locking module for GFS2.
26 26
27 Use this module if you want to use GFS2 on a single node without 27 Use this module if you want to use GFS2 on a single node without
28 its clustering features. You can still take advantage of the 28 its clustering features. You can still take advantage of the
29 large file support, and upgrade to running a full cluster later on 29 large file support, and upgrade to running a full cluster later on
30 if required. 30 if required.
31 31
32 If you will only be using GFS2 in cluster mode, you do not need this 32 If you will only be using GFS2 in cluster mode, you do not need this
33 module. 33 module.
34 34
35config GFS2_FS_LOCKING_DLM 35config GFS2_FS_LOCKING_DLM
36 tristate "GFS2 DLM locking module" 36 tristate "GFS2 DLM locking module"
37 depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) 37 depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
38 select IP_SCTP if DLM_SCTP 38 select IP_SCTP if DLM_SCTP
39 select CONFIGFS_FS 39 select CONFIGFS_FS
40 select DLM 40 select DLM
41 help 41 help
42 Multiple node locking module for GFS2 42 Multiple node locking module for GFS2
43
44 Most users of GFS2 will require this module. It provides the locking
45 interface between GFS2 and the DLM, which is required to use GFS2
46 in a cluster environment.
47 43
44 Most users of GFS2 will require this module. It provides the locking
45 interface between GFS2 and the DLM, which is required to use GFS2
46 in a cluster environment.
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8240c1ff94f4..113f6c9110c7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
773 gfs2_free_data(ip, bstart, blen); 773 gfs2_free_data(ip, bstart, blen);
774 } 774 }
775 775
776 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 776 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
777 777
778 gfs2_dinode_out(ip, dibh->b_data); 778 gfs2_dinode_out(ip, dibh->b_data);
779 779
@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
848 } 848 }
849 849
850 ip->i_di.di_size = size; 850 ip->i_di.di_size = size;
851 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 851 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
852 852
853 error = gfs2_meta_inode_buffer(ip, &dibh); 853 error = gfs2_meta_inode_buffer(ip, &dibh);
854 if (error) 854 if (error)
@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
963 963
964 if (gfs2_is_stuffed(ip)) { 964 if (gfs2_is_stuffed(ip)) {
965 ip->i_di.di_size = size; 965 ip->i_di.di_size = size;
966 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 966 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
967 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 967 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
968 gfs2_dinode_out(ip, dibh->b_data); 968 gfs2_dinode_out(ip, dibh->b_data);
969 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 969 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
975 975
976 if (!error) { 976 if (!error) {
977 ip->i_di.di_size = size; 977 ip->i_di.di_size = size;
978 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 978 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
979 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; 979 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
980 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 980 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
981 gfs2_dinode_out(ip, dibh->b_data); 981 gfs2_dinode_out(ip, dibh->b_data);
@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip)
1048 ip->i_num.no_addr; 1048 ip->i_num.no_addr;
1049 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1049 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1050 } 1050 }
1051 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 1051 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1052 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; 1052 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1053 1053
1054 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1054 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0fdcb7713cd9..c93ca8f361b5 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_di.di_size < offset + size) 132 if (ip->i_di.di_size < offset + size)
133 ip->i_di.di_size = offset + size; 133 ip->i_di.di_size = offset + size;
134 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 134 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
135 gfs2_dinode_out(ip, dibh->b_data); 135 gfs2_dinode_out(ip, dibh->b_data);
136 136
137 brelse(dibh); 137 brelse(dibh);
@@ -229,7 +229,7 @@ out:
229 229
230 if (ip->i_di.di_size < offset + copied) 230 if (ip->i_di.di_size < offset + copied)
231 ip->i_di.di_size = offset + copied; 231 ip->i_di.di_size = offset + copied;
232 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b)
1198 */ 1198 */
1199 1199
1200static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, 1200static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1201 void *opaque, gfs2_filldir_t filldir, 1201 void *opaque, filldir_t filldir,
1202 const struct gfs2_dirent **darr, u32 entries, 1202 const struct gfs2_dirent **darr, u32 entries,
1203 int *copied) 1203 int *copied)
1204{ 1204{
1205 const struct gfs2_dirent *dent, *dent_next; 1205 const struct gfs2_dirent *dent, *dent_next;
1206 struct gfs2_inum_host inum;
1207 u64 off, off_next; 1206 u64 off, off_next;
1208 unsigned int x, y; 1207 unsigned int x, y;
1209 int run = 0; 1208 int run = 0;
@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1240 *offset = off; 1239 *offset = off;
1241 } 1240 }
1242 1241
1243 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1244
1245 error = filldir(opaque, (const char *)(dent + 1), 1242 error = filldir(opaque, (const char *)(dent + 1),
1246 be16_to_cpu(dent->de_name_len), 1243 be16_to_cpu(dent->de_name_len),
1247 off, &inum, 1244 off, be64_to_cpu(dent->de_inum.no_addr),
1248 be16_to_cpu(dent->de_type)); 1245 be16_to_cpu(dent->de_type));
1249 if (error) 1246 if (error)
1250 return 1; 1247 return 1;
@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1262} 1259}
1263 1260
1264static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1261static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1265 gfs2_filldir_t filldir, int *copied, 1262 filldir_t filldir, int *copied, unsigned *depth,
1266 unsigned *depth, u64 leaf_no) 1263 u64 leaf_no)
1267{ 1264{
1268 struct gfs2_inode *ip = GFS2_I(inode); 1265 struct gfs2_inode *ip = GFS2_I(inode);
1269 struct buffer_head *bh; 1266 struct buffer_head *bh;
@@ -1343,7 +1340,7 @@ out:
1343 */ 1340 */
1344 1341
1345static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1342static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1346 gfs2_filldir_t filldir) 1343 filldir_t filldir)
1347{ 1344{
1348 struct gfs2_inode *dip = GFS2_I(inode); 1345 struct gfs2_inode *dip = GFS2_I(inode);
1349 struct gfs2_sbd *sdp = GFS2_SB(inode); 1346 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1402,7 +1399,7 @@ out:
1402} 1399}
1403 1400
1404int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1401int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1405 gfs2_filldir_t filldir) 1402 filldir_t filldir)
1406{ 1403{
1407 struct gfs2_inode *dip = GFS2_I(inode); 1404 struct gfs2_inode *dip = GFS2_I(inode);
1408 struct dirent_gather g; 1405 struct dirent_gather g;
@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1568 break; 1565 break;
1569 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1566 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1570 ip->i_di.di_entries++; 1567 ip->i_di.di_entries++;
1571 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 1568 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1572 gfs2_dinode_out(ip, bh->b_data); 1569 gfs2_dinode_out(ip, bh->b_data);
1573 brelse(bh); 1570 brelse(bh);
1574 error = 0; 1571 error = 0;
@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1654 gfs2_consist_inode(dip); 1651 gfs2_consist_inode(dip);
1655 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1652 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1656 dip->i_di.di_entries--; 1653 dip->i_di.di_entries--;
1657 dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); 1654 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
1658 gfs2_dinode_out(dip, bh->b_data); 1655 gfs2_dinode_out(dip, bh->b_data);
1659 brelse(bh); 1656 brelse(bh);
1660 mark_inode_dirty(&dip->i_inode); 1657 mark_inode_dirty(&dip->i_inode);
@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1702 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1699 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1703 } 1700 }
1704 1701
1705 dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); 1702 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
1706 gfs2_dinode_out(dip, bh->b_data); 1703 gfs2_dinode_out(dip, bh->b_data);
1707 brelse(bh); 1704 brelse(bh);
1708 return 0; 1705 return 0;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index b21b33668a5b..48fe89046bba 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,30 +16,13 @@ struct inode;
16struct gfs2_inode; 16struct gfs2_inode;
17struct gfs2_inum; 17struct gfs2_inum;
18 18
19/**
20 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
21 * @opaque: opaque data used by the function
22 * @name: the name of the directory entry
23 * @length: the length of the name
24 * @offset: the entry's offset in the directory
25 * @inum: the inode number the entry points to
26 * @type: the type of inode the entry points to
27 *
28 * Returns: 0 on success, 1 if buffer full
29 */
30
31typedef int (*gfs2_filldir_t) (void *opaque,
32 const char *name, unsigned int length,
33 u64 offset,
34 struct gfs2_inum_host *inum, unsigned int type);
35
36int gfs2_dir_search(struct inode *dir, const struct qstr *filename, 19int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
37 struct gfs2_inum_host *inum, unsigned int *type); 20 struct gfs2_inum_host *inum, unsigned int *type);
38int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 21int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
39 const struct gfs2_inum_host *inum, unsigned int type); 22 const struct gfs2_inum_host *inum, unsigned int type);
40int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 23int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
41int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, 24int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
42 gfs2_filldir_t filldir); 25 filldir_t filldir);
43int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 26int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
44 struct gfs2_inum_host *new_inum, unsigned int new_type); 27 struct gfs2_inum_host *new_inum, unsigned int new_type);
45 28
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index ebebbdcd7057..0c83c7f4dda8 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
301 301
302 error = gfs2_meta_inode_buffer(ip, &dibh); 302 error = gfs2_meta_inode_buffer(ip, &dibh);
303 if (!error) { 303 if (!error) {
304 ip->i_inode.i_ctime.tv_sec = get_seconds(); 304 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
305 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 305 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
306 gfs2_dinode_out(ip, dibh->b_data); 306 gfs2_dinode_out(ip, dibh->b_data);
307 brelse(dibh); 307 brelse(dibh);
@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
718 (er->er_mode & S_IFMT)); 718 (er->er_mode & S_IFMT));
719 ip->i_inode.i_mode = er->er_mode; 719 ip->i_inode.i_mode = er->er_mode;
720 } 720 }
721 ip->i_inode.i_ctime.tv_sec = get_seconds(); 721 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
722 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 722 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
723 gfs2_dinode_out(ip, dibh->b_data); 723 gfs2_dinode_out(ip, dibh->b_data);
724 brelse(dibh); 724 brelse(dibh);
@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
853 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); 853 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
854 ip->i_inode.i_mode = er->er_mode; 854 ip->i_inode.i_mode = er->er_mode;
855 } 855 }
856 ip->i_inode.i_ctime.tv_sec = get_seconds(); 856 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
857 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 857 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
858 gfs2_dinode_out(ip, dibh->b_data); 858 gfs2_dinode_out(ip, dibh->b_data);
859 brelse(dibh); 859 brelse(dibh);
@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1134 1134
1135 error = gfs2_meta_inode_buffer(ip, &dibh); 1135 error = gfs2_meta_inode_buffer(ip, &dibh);
1136 if (!error) { 1136 if (!error) {
1137 ip->i_inode.i_ctime.tv_sec = get_seconds(); 1137 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1138 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1138 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1139 gfs2_dinode_out(ip, dibh->b_data); 1139 gfs2_dinode_out(ip, dibh->b_data);
1140 brelse(dibh); 1140 brelse(dibh);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 438146904b58..6618c1190252 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -19,6 +19,8 @@
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/lm_interface.h> 21#include <linux/lm_interface.h>
22#include <linux/wait.h>
23#include <linux/rwsem.h>
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23 25
24#include "gfs2.h" 26#include "gfs2.h"
@@ -33,11 +35,6 @@
33#include "super.h" 35#include "super.h"
34#include "util.h" 36#include "util.h"
35 37
36struct greedy {
37 struct gfs2_holder gr_gh;
38 struct delayed_work gr_work;
39};
40
41struct gfs2_gl_hash_bucket { 38struct gfs2_gl_hash_bucket {
42 struct hlist_head hb_list; 39 struct hlist_head hb_list;
43}; 40};
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
47static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); 44static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
48static int dump_glock(struct gfs2_glock *gl); 45static int dump_glock(struct gfs2_glock *gl);
49static int dump_inode(struct gfs2_inode *ip); 46static int dump_inode(struct gfs2_inode *ip);
47static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
48static void gfs2_glock_drop_th(struct gfs2_glock *gl);
49static DECLARE_RWSEM(gfs2_umount_flush_sem);
50 50
51#define GFS2_GL_HASH_SHIFT 15 51#define GFS2_GL_HASH_SHIFT 15
52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -213,30 +213,6 @@ out:
213} 213}
214 214
215/** 215/**
216 * queue_empty - check to see if a glock's queue is empty
217 * @gl: the glock
218 * @head: the head of the queue to check
219 *
220 * This function protects the list in the event that a process already
221 * has a holder on the list and is adding a second holder for itself.
222 * The glmutex lock is what generally prevents processes from working
223 * on the same glock at once, but the special case of adding a second
224 * holder for yourself ("recursive" locking) doesn't involve locking
225 * glmutex, making the spin lock necessary.
226 *
227 * Returns: 1 if the queue is empty
228 */
229
230static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
231{
232 int empty;
233 spin_lock(&gl->gl_spin);
234 empty = list_empty(head);
235 spin_unlock(&gl->gl_spin);
236 return empty;
237}
238
239/**
240 * search_bucket() - Find struct gfs2_glock by lock number 216 * search_bucket() - Find struct gfs2_glock by lock number
241 * @bucket: the bucket to search 217 * @bucket: the bucket to search
242 * @name: The lock name 218 * @name: The lock name
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
395 gh->gh_flags = flags; 371 gh->gh_flags = flags;
396 gh->gh_error = 0; 372 gh->gh_error = 0;
397 gh->gh_iflags = 0; 373 gh->gh_iflags = 0;
398 init_completion(&gh->gh_wait);
399
400 if (gh->gh_state == LM_ST_EXCLUSIVE)
401 gh->gh_flags |= GL_LOCAL_EXCL;
402
403 gfs2_glock_hold(gl); 374 gfs2_glock_hold(gl);
404} 375}
405 376
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
417{ 388{
418 gh->gh_state = state; 389 gh->gh_state = state;
419 gh->gh_flags = flags; 390 gh->gh_flags = flags;
420 if (gh->gh_state == LM_ST_EXCLUSIVE)
421 gh->gh_flags |= GL_LOCAL_EXCL;
422
423 gh->gh_iflags &= 1 << HIF_ALLOCED; 391 gh->gh_iflags &= 1 << HIF_ALLOCED;
424 gh->gh_ip = (unsigned long)__builtin_return_address(0); 392 gh->gh_ip = (unsigned long)__builtin_return_address(0);
425} 393}
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh)
479 kfree(gh); 447 kfree(gh);
480} 448}
481 449
450static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
451{
452 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
453 gfs2_holder_put(gh);
454 return;
455 }
456 clear_bit(HIF_WAIT, &gh->gh_iflags);
457 smp_mb();
458 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
459}
460
461static int holder_wait(void *word)
462{
463 schedule();
464 return 0;
465}
466
467static void wait_on_holder(struct gfs2_holder *gh)
468{
469 might_sleep();
470 wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
471}
472
482/** 473/**
483 * rq_mutex - process a mutex request in the queue 474 * rq_mutex - process a mutex request in the queue
484 * @gh: the glock holder 475 * @gh: the glock holder
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh)
493 list_del_init(&gh->gh_list); 484 list_del_init(&gh->gh_list);
494 /* gh->gh_error never examined. */ 485 /* gh->gh_error never examined. */
495 set_bit(GLF_LOCK, &gl->gl_flags); 486 set_bit(GLF_LOCK, &gl->gl_flags);
496 complete(&gh->gh_wait); 487 clear_bit(HIF_WAIT, &gh->gh_iflags);
488 smp_mb();
489 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
497 490
498 return 1; 491 return 1;
499} 492}
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh)
511{ 504{
512 struct gfs2_glock *gl = gh->gh_gl; 505 struct gfs2_glock *gl = gh->gh_gl;
513 struct gfs2_sbd *sdp = gl->gl_sbd; 506 struct gfs2_sbd *sdp = gl->gl_sbd;
514 const struct gfs2_glock_operations *glops = gl->gl_ops;
515 507
516 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { 508 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
517 if (list_empty(&gl->gl_holders)) { 509 if (list_empty(&gl->gl_holders)) {
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh)
526 gfs2_reclaim_glock(sdp); 518 gfs2_reclaim_glock(sdp);
527 } 519 }
528 520
529 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); 521 gfs2_glock_xmote_th(gh);
530 spin_lock(&gl->gl_spin); 522 spin_lock(&gl->gl_spin);
531 } 523 }
532 return 1; 524 return 1;
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh)
537 set_bit(GLF_LOCK, &gl->gl_flags); 529 set_bit(GLF_LOCK, &gl->gl_flags);
538 } else { 530 } else {
539 struct gfs2_holder *next_gh; 531 struct gfs2_holder *next_gh;
540 if (gh->gh_flags & GL_LOCAL_EXCL) 532 if (gh->gh_state == LM_ST_EXCLUSIVE)
541 return 1; 533 return 1;
542 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, 534 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
543 gh_list); 535 gh_list);
544 if (next_gh->gh_flags & GL_LOCAL_EXCL) 536 if (next_gh->gh_state == LM_ST_EXCLUSIVE)
545 return 1; 537 return 1;
546 } 538 }
547 539
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh)
549 gh->gh_error = 0; 541 gh->gh_error = 0;
550 set_bit(HIF_HOLDER, &gh->gh_iflags); 542 set_bit(HIF_HOLDER, &gh->gh_iflags);
551 543
552 complete(&gh->gh_wait); 544 gfs2_holder_dispose_or_wake(gh);
553 545
554 return 0; 546 return 0;
555} 547}
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh)
564static int rq_demote(struct gfs2_holder *gh) 556static int rq_demote(struct gfs2_holder *gh)
565{ 557{
566 struct gfs2_glock *gl = gh->gh_gl; 558 struct gfs2_glock *gl = gh->gh_gl;
567 const struct gfs2_glock_operations *glops = gl->gl_ops;
568 559
569 if (!list_empty(&gl->gl_holders)) 560 if (!list_empty(&gl->gl_holders))
570 return 1; 561 return 1;
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh)
573 list_del_init(&gh->gh_list); 564 list_del_init(&gh->gh_list);
574 gh->gh_error = 0; 565 gh->gh_error = 0;
575 spin_unlock(&gl->gl_spin); 566 spin_unlock(&gl->gl_spin);
576 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 567 gfs2_holder_dispose_or_wake(gh);
577 gfs2_holder_put(gh);
578 else
579 complete(&gh->gh_wait);
580 spin_lock(&gl->gl_spin); 568 spin_lock(&gl->gl_spin);
581 } else { 569 } else {
582 gl->gl_req_gh = gh; 570 gl->gl_req_gh = gh;
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh)
585 573
586 if (gh->gh_state == LM_ST_UNLOCKED || 574 if (gh->gh_state == LM_ST_UNLOCKED ||
587 gl->gl_state != LM_ST_EXCLUSIVE) 575 gl->gl_state != LM_ST_EXCLUSIVE)
588 glops->go_drop_th(gl); 576 gfs2_glock_drop_th(gl);
589 else 577 else
590 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); 578 gfs2_glock_xmote_th(gh);
591 579
592 spin_lock(&gl->gl_spin); 580 spin_lock(&gl->gl_spin);
593 } 581 }
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh)
596} 584}
597 585
598/** 586/**
599 * rq_greedy - process a queued request to drop greedy status
600 * @gh: the glock holder
601 *
602 * Returns: 1 if the queue is blocked
603 */
604
605static int rq_greedy(struct gfs2_holder *gh)
606{
607 struct gfs2_glock *gl = gh->gh_gl;
608
609 list_del_init(&gh->gh_list);
610 /* gh->gh_error never examined. */
611 clear_bit(GLF_GREEDY, &gl->gl_flags);
612 spin_unlock(&gl->gl_spin);
613
614 gfs2_holder_uninit(gh);
615 kfree(container_of(gh, struct greedy, gr_gh));
616
617 spin_lock(&gl->gl_spin);
618
619 return 0;
620}
621
622/**
623 * run_queue - process holder structures on a glock 587 * run_queue - process holder structures on a glock
624 * @gl: the glock 588 * @gl: the glock
625 * 589 *
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl)
649 613
650 if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) 614 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
651 blocked = rq_demote(gh); 615 blocked = rq_demote(gh);
652 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
653 blocked = rq_greedy(gh);
654 else 616 else
655 gfs2_assert_warn(gl->gl_sbd, 0); 617 gfs2_assert_warn(gl->gl_sbd, 0);
656 618
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
684 646
685 gfs2_holder_init(gl, 0, 0, &gh); 647 gfs2_holder_init(gl, 0, 0, &gh);
686 set_bit(HIF_MUTEX, &gh.gh_iflags); 648 set_bit(HIF_MUTEX, &gh.gh_iflags);
649 if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
650 BUG();
687 651
688 spin_lock(&gl->gl_spin); 652 spin_lock(&gl->gl_spin);
689 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 653 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
691 } else { 655 } else {
692 gl->gl_owner = current; 656 gl->gl_owner = current;
693 gl->gl_ip = (unsigned long)__builtin_return_address(0); 657 gl->gl_ip = (unsigned long)__builtin_return_address(0);
694 complete(&gh.gh_wait); 658 clear_bit(HIF_WAIT, &gh.gh_iflags);
659 smp_mb();
660 wake_up_bit(&gh.gh_iflags, HIF_WAIT);
695 } 661 }
696 spin_unlock(&gl->gl_spin); 662 spin_unlock(&gl->gl_spin);
697 663
698 wait_for_completion(&gh.gh_wait); 664 wait_on_holder(&gh);
699 gfs2_holder_uninit(&gh); 665 gfs2_holder_uninit(&gh);
700} 666}
701 667
@@ -774,6 +740,7 @@ restart:
774 return; 740 return;
775 set_bit(HIF_DEMOTE, &new_gh->gh_iflags); 741 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
776 set_bit(HIF_DEALLOC, &new_gh->gh_iflags); 742 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
743 set_bit(HIF_WAIT, &new_gh->gh_iflags);
777 744
778 goto restart; 745 goto restart;
779 } 746 }
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
825 int op_done = 1; 792 int op_done = 1;
826 793
827 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 794 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
828 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 795 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
829 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); 796 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
830 797
831 state_change(gl, ret & LM_OUT_ST_MASK); 798 state_change(gl, ret & LM_OUT_ST_MASK);
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
908 875
909 gfs2_glock_put(gl); 876 gfs2_glock_put(gl);
910 877
911 if (gh) { 878 if (gh)
912 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 879 gfs2_holder_dispose_or_wake(gh);
913 gfs2_holder_put(gh);
914 else
915 complete(&gh->gh_wait);
916 }
917} 880}
918 881
919/** 882/**
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
924 * 887 *
925 */ 888 */
926 889
927void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) 890void gfs2_glock_xmote_th(struct gfs2_holder *gh)
928{ 891{
892 struct gfs2_glock *gl = gh->gh_gl;
929 struct gfs2_sbd *sdp = gl->gl_sbd; 893 struct gfs2_sbd *sdp = gl->gl_sbd;
894 int flags = gh->gh_flags;
895 unsigned state = gh->gh_state;
930 const struct gfs2_glock_operations *glops = gl->gl_ops; 896 const struct gfs2_glock_operations *glops = gl->gl_ops;
931 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | 897 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
932 LM_FLAG_NOEXP | LM_FLAG_ANY | 898 LM_FLAG_NOEXP | LM_FLAG_ANY |
933 LM_FLAG_PRIORITY); 899 LM_FLAG_PRIORITY);
934 unsigned int lck_ret; 900 unsigned int lck_ret;
935 901
902 if (glops->go_xmote_th)
903 glops->go_xmote_th(gl);
904
936 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 905 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
937 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 906 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
938 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); 907 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
939 gfs2_assert_warn(sdp, state != gl->gl_state); 908 gfs2_assert_warn(sdp, state != gl->gl_state);
940 909
941 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
942 glops->go_sync(gl);
943
944 gfs2_glock_hold(gl); 910 gfs2_glock_hold(gl);
945 gl->gl_req_bh = xmote_bh; 911 gl->gl_req_bh = xmote_bh;
946 912
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
971 const struct gfs2_glock_operations *glops = gl->gl_ops; 937 const struct gfs2_glock_operations *glops = gl->gl_ops;
972 struct gfs2_holder *gh = gl->gl_req_gh; 938 struct gfs2_holder *gh = gl->gl_req_gh;
973 939
974 clear_bit(GLF_PREFETCH, &gl->gl_flags);
975
976 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 940 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
977 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 941 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
978 gfs2_assert_warn(sdp, !ret); 942 gfs2_assert_warn(sdp, !ret);
979 943
980 state_change(gl, LM_ST_UNLOCKED); 944 state_change(gl, LM_ST_UNLOCKED);
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1001 965
1002 gfs2_glock_put(gl); 966 gfs2_glock_put(gl);
1003 967
1004 if (gh) { 968 if (gh)
1005 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 969 gfs2_holder_dispose_or_wake(gh);
1006 gfs2_holder_put(gh);
1007 else
1008 complete(&gh->gh_wait);
1009 }
1010} 970}
1011 971
1012/** 972/**
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1015 * 975 *
1016 */ 976 */
1017 977
1018void gfs2_glock_drop_th(struct gfs2_glock *gl) 978static void gfs2_glock_drop_th(struct gfs2_glock *gl)
1019{ 979{
1020 struct gfs2_sbd *sdp = gl->gl_sbd; 980 struct gfs2_sbd *sdp = gl->gl_sbd;
1021 const struct gfs2_glock_operations *glops = gl->gl_ops; 981 const struct gfs2_glock_operations *glops = gl->gl_ops;
1022 unsigned int ret; 982 unsigned int ret;
1023 983
984 if (glops->go_drop_th)
985 glops->go_drop_th(gl);
986
1024 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 987 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1025 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 988 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
1026 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); 989 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1027 990
1028 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
1029 glops->go_sync(gl);
1030
1031 gfs2_glock_hold(gl); 991 gfs2_glock_hold(gl);
1032 gl->gl_req_bh = drop_bh; 992 gl->gl_req_bh = drop_bh;
1033 993
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh)
1107 if (gh->gh_flags & LM_FLAG_PRIORITY) 1067 if (gh->gh_flags & LM_FLAG_PRIORITY)
1108 do_cancels(gh); 1068 do_cancels(gh);
1109 1069
1110 wait_for_completion(&gh->gh_wait); 1070 wait_on_holder(gh);
1111
1112 if (gh->gh_error) 1071 if (gh->gh_error)
1113 return gh->gh_error; 1072 return gh->gh_error;
1114 1073
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh)
1164 struct gfs2_holder *existing; 1123 struct gfs2_holder *existing;
1165 1124
1166 BUG_ON(!gh->gh_owner); 1125 BUG_ON(!gh->gh_owner);
1126 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1127 BUG();
1167 1128
1168 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); 1129 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1169 if (existing) { 1130 if (existing) {
@@ -1227,8 +1188,6 @@ restart:
1227 } 1188 }
1228 } 1189 }
1229 1190
1230 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1231
1232 return error; 1191 return error;
1233} 1192}
1234 1193
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1321} 1280}
1322 1281
1323/** 1282/**
1324 * gfs2_glock_prefetch - Try to prefetch a glock
1325 * @gl: the glock
1326 * @state: the state to prefetch in
1327 * @flags: flags passed to go_xmote_th()
1328 *
1329 */
1330
1331static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
1332 int flags)
1333{
1334 const struct gfs2_glock_operations *glops = gl->gl_ops;
1335
1336 spin_lock(&gl->gl_spin);
1337
1338 if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
1339 !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
1340 !list_empty(&gl->gl_waiters3) ||
1341 relaxed_state_ok(gl->gl_state, state, flags)) {
1342 spin_unlock(&gl->gl_spin);
1343 return;
1344 }
1345
1346 set_bit(GLF_PREFETCH, &gl->gl_flags);
1347 set_bit(GLF_LOCK, &gl->gl_flags);
1348 spin_unlock(&gl->gl_spin);
1349
1350 glops->go_xmote_th(gl, state, flags);
1351}
1352
1353static void greedy_work(struct work_struct *work)
1354{
1355 struct greedy *gr = container_of(work, struct greedy, gr_work.work);
1356 struct gfs2_holder *gh = &gr->gr_gh;
1357 struct gfs2_glock *gl = gh->gh_gl;
1358 const struct gfs2_glock_operations *glops = gl->gl_ops;
1359
1360 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1361
1362 if (glops->go_greedy)
1363 glops->go_greedy(gl);
1364
1365 spin_lock(&gl->gl_spin);
1366
1367 if (list_empty(&gl->gl_waiters2)) {
1368 clear_bit(GLF_GREEDY, &gl->gl_flags);
1369 spin_unlock(&gl->gl_spin);
1370 gfs2_holder_uninit(gh);
1371 kfree(gr);
1372 } else {
1373 gfs2_glock_hold(gl);
1374 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1375 run_queue(gl);
1376 spin_unlock(&gl->gl_spin);
1377 gfs2_glock_put(gl);
1378 }
1379}
1380
1381/**
1382 * gfs2_glock_be_greedy -
1383 * @gl:
1384 * @time:
1385 *
1386 * Returns: 0 if go_greedy will be called, 1 otherwise
1387 */
1388
1389int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1390{
1391 struct greedy *gr;
1392 struct gfs2_holder *gh;
1393
1394 if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
1395 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1396 return 1;
1397
1398 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1399 if (!gr) {
1400 clear_bit(GLF_GREEDY, &gl->gl_flags);
1401 return 1;
1402 }
1403 gh = &gr->gr_gh;
1404
1405 gfs2_holder_init(gl, 0, 0, gh);
1406 set_bit(HIF_GREEDY, &gh->gh_iflags);
1407 INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
1408
1409 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1410 schedule_delayed_work(&gr->gr_work, time);
1411
1412 return 0;
1413}
1414
1415/**
1416 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1283 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1417 * @gh: the holder structure 1284 * @gh: the holder structure
1418 * 1285 *
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b)
1470 return 1; 1337 return 1;
1471 if (a->ln_number < b->ln_number) 1338 if (a->ln_number < b->ln_number)
1472 return -1; 1339 return -1;
1473 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) 1340 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1474 return 1;
1475 if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
1476 return 1;
1477 return 0; 1341 return 0;
1478} 1342}
1479 1343
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1618} 1482}
1619 1483
1620/** 1484/**
1621 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1622 * @sdp: the filesystem
1623 * @number: the lock number
1624 * @glops: the glock operations for the type of glock
1625 * @state: the state to acquire the glock in
1626 * @flags: modifier flags for the aquisition
1627 *
1628 * Returns: errno
1629 */
1630
1631void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
1632 const struct gfs2_glock_operations *glops,
1633 unsigned int state, int flags)
1634{
1635 struct gfs2_glock *gl;
1636 int error;
1637
1638 if (atomic_read(&sdp->sd_reclaim_count) <
1639 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1640 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1641 if (!error) {
1642 gfs2_glock_prefetch(gl, state, flags);
1643 gfs2_glock_put(gl);
1644 }
1645 }
1646}
1647
1648/**
1649 * gfs2_lvb_hold - attach a LVB from a glock 1485 * gfs2_lvb_hold - attach a LVB from a glock
1650 * @gl: The glock in question 1486 * @gl: The glock in question
1651 * 1487 *
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1703 if (!gl) 1539 if (!gl)
1704 return; 1540 return;
1705 1541
1706 if (gl->gl_ops->go_callback)
1707 gl->gl_ops->go_callback(gl, state);
1708 handle_callback(gl, state); 1542 handle_callback(gl, state);
1709 1543
1710 spin_lock(&gl->gl_spin); 1544 spin_lock(&gl->gl_spin);
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1746 struct lm_async_cb *async = data; 1580 struct lm_async_cb *async = data;
1747 struct gfs2_glock *gl; 1581 struct gfs2_glock *gl;
1748 1582
1583 down_read(&gfs2_umount_flush_sem);
1749 gl = gfs2_glock_find(sdp, &async->lc_name); 1584 gl = gfs2_glock_find(sdp, &async->lc_name);
1750 if (gfs2_assert_warn(sdp, gl)) 1585 if (gfs2_assert_warn(sdp, gl))
1751 return; 1586 return;
1752 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1587 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1753 gl->gl_req_bh(gl, async->lc_ret); 1588 gl->gl_req_bh(gl, async->lc_ret);
1754 gfs2_glock_put(gl); 1589 gfs2_glock_put(gl);
1590 up_read(&gfs2_umount_flush_sem);
1755 return; 1591 return;
1756 } 1592 }
1757 1593
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1781 1617
1782static int demote_ok(struct gfs2_glock *gl) 1618static int demote_ok(struct gfs2_glock *gl)
1783{ 1619{
1784 struct gfs2_sbd *sdp = gl->gl_sbd;
1785 const struct gfs2_glock_operations *glops = gl->gl_ops; 1620 const struct gfs2_glock_operations *glops = gl->gl_ops;
1786 int demote = 1; 1621 int demote = 1;
1787 1622
1788 if (test_bit(GLF_STICKY, &gl->gl_flags)) 1623 if (test_bit(GLF_STICKY, &gl->gl_flags))
1789 demote = 0; 1624 demote = 0;
1790 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
1791 demote = time_after_eq(jiffies, gl->gl_stamp +
1792 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
1793 else if (glops->go_demote_ok) 1625 else if (glops->go_demote_ok)
1794 demote = glops->go_demote_ok(gl); 1626 demote = glops->go_demote_ok(gl);
1795 1627
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1845 atomic_inc(&sdp->sd_reclaimed); 1677 atomic_inc(&sdp->sd_reclaimed);
1846 1678
1847 if (gfs2_glmutex_trylock(gl)) { 1679 if (gfs2_glmutex_trylock(gl)) {
1848 if (queue_empty(gl, &gl->gl_holders) && 1680 if (list_empty(&gl->gl_holders) &&
1849 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1681 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1850 handle_callback(gl, LM_ST_UNLOCKED); 1682 handle_callback(gl, LM_ST_UNLOCKED);
1851 gfs2_glmutex_unlock(gl); 1683 gfs2_glmutex_unlock(gl);
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl)
1909 return; 1741 return;
1910 1742
1911 if (gfs2_glmutex_trylock(gl)) { 1743 if (gfs2_glmutex_trylock(gl)) {
1912 if (queue_empty(gl, &gl->gl_holders) && 1744 if (list_empty(&gl->gl_holders) &&
1913 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1745 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1914 goto out_schedule; 1746 goto out_schedule;
1915 gfs2_glmutex_unlock(gl); 1747 gfs2_glmutex_unlock(gl);
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl)
1958 } 1790 }
1959 1791
1960 if (gfs2_glmutex_trylock(gl)) { 1792 if (gfs2_glmutex_trylock(gl)) {
1961 if (queue_empty(gl, &gl->gl_holders) && 1793 if (list_empty(&gl->gl_holders) &&
1962 gl->gl_state != LM_ST_UNLOCKED) 1794 gl->gl_state != LM_ST_UNLOCKED)
1963 handle_callback(gl, LM_ST_UNLOCKED); 1795 handle_callback(gl, LM_ST_UNLOCKED);
1964 gfs2_glmutex_unlock(gl); 1796 gfs2_glmutex_unlock(gl);
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2000 t = jiffies; 1832 t = jiffies;
2001 } 1833 }
2002 1834
1835 down_write(&gfs2_umount_flush_sem);
2003 invalidate_inodes(sdp->sd_vfs); 1836 invalidate_inodes(sdp->sd_vfs);
1837 up_write(&gfs2_umount_flush_sem);
2004 msleep(10); 1838 msleep(10);
2005 } 1839 }
2006} 1840}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index fb39108fc05c..f50e40ceca43 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -20,7 +20,6 @@
20#define LM_FLAG_ANY 0x00000008 20#define LM_FLAG_ANY 0x00000008
21#define LM_FLAG_PRIORITY 0x00000010 */ 21#define LM_FLAG_PRIORITY 0x00000010 */
22 22
23#define GL_LOCAL_EXCL 0x00000020
24#define GL_ASYNC 0x00000040 23#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080 24#define GL_EXACT 0x00000080
26#define GL_SKIP 0x00000100 25#define GL_SKIP 0x00000100
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
83void gfs2_holder_reinit(unsigned int state, unsigned flags, 82void gfs2_holder_reinit(unsigned int state, unsigned flags,
84 struct gfs2_holder *gh); 83 struct gfs2_holder *gh);
85void gfs2_holder_uninit(struct gfs2_holder *gh); 84void gfs2_holder_uninit(struct gfs2_holder *gh);
86
87void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
88void gfs2_glock_drop_th(struct gfs2_glock *gl);
89
90int gfs2_glock_nq(struct gfs2_holder *gh); 85int gfs2_glock_nq(struct gfs2_holder *gh);
91int gfs2_glock_poll(struct gfs2_holder *gh); 86int gfs2_glock_poll(struct gfs2_holder *gh);
92int gfs2_glock_wait(struct gfs2_holder *gh); 87int gfs2_glock_wait(struct gfs2_holder *gh);
93void gfs2_glock_dq(struct gfs2_holder *gh); 88void gfs2_glock_dq(struct gfs2_holder *gh);
94 89
95int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
96
97void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 90void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
98int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 91int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
99 u64 number, const struct gfs2_glock_operations *glops, 92 u64 number, const struct gfs2_glock_operations *glops,
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
103void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 96void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
104void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 97void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
105 98
106void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
107 const struct gfs2_glock_operations *glops,
108 unsigned int state, int flags);
109
110/** 99/**
111 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock 100 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
112 * @gl: the glock 101 * @gl: the glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b068d10bcb6e..c4b0391b7aa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
117 117
118static void meta_go_sync(struct gfs2_glock *gl) 118static void meta_go_sync(struct gfs2_glock *gl)
119{ 119{
120 if (gl->gl_state != LM_ST_EXCLUSIVE)
121 return;
122
120 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { 123 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
121 gfs2_log_flush(gl->gl_sbd, gl); 124 gfs2_log_flush(gl->gl_sbd, gl);
122 gfs2_meta_sync(gl); 125 gfs2_meta_sync(gl);
123 gfs2_ail_empty_gl(gl); 126 gfs2_ail_empty_gl(gl);
124 } 127 }
125
126} 128}
127 129
128/** 130/**
@@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
142} 144}
143 145
144/** 146/**
147 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
148 * @gl: the glock protecting the inode
149 *
150 */
151
152static void inode_go_sync(struct gfs2_glock *gl)
153{
154 struct gfs2_inode *ip = gl->gl_object;
155
156 if (ip && !S_ISREG(ip->i_inode.i_mode))
157 ip = NULL;
158
159 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
160 gfs2_log_flush(gl->gl_sbd, gl);
161 if (ip)
162 filemap_fdatawrite(ip->i_inode.i_mapping);
163 gfs2_meta_sync(gl);
164 if (ip) {
165 struct address_space *mapping = ip->i_inode.i_mapping;
166 int error = filemap_fdatawait(mapping);
167 if (error == -ENOSPC)
168 set_bit(AS_ENOSPC, &mapping->flags);
169 else if (error)
170 set_bit(AS_EIO, &mapping->flags);
171 }
172 clear_bit(GLF_DIRTY, &gl->gl_flags);
173 gfs2_ail_empty_gl(gl);
174 }
175}
176
177/**
145 * inode_go_xmote_th - promote/demote a glock 178 * inode_go_xmote_th - promote/demote a glock
146 * @gl: the glock 179 * @gl: the glock
147 * @state: the requested state 180 * @state: the requested state
@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
149 * 182 *
150 */ 183 */
151 184
152static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, 185static void inode_go_xmote_th(struct gfs2_glock *gl)
153 int flags)
154{ 186{
155 if (gl->gl_state != LM_ST_UNLOCKED) 187 if (gl->gl_state != LM_ST_UNLOCKED)
156 gfs2_pte_inval(gl); 188 gfs2_pte_inval(gl);
157 gfs2_glock_xmote_th(gl, state, flags); 189 if (gl->gl_state == LM_ST_EXCLUSIVE)
190 inode_go_sync(gl);
158} 191}
159 192
160/** 193/**
@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
189static void inode_go_drop_th(struct gfs2_glock *gl) 222static void inode_go_drop_th(struct gfs2_glock *gl)
190{ 223{
191 gfs2_pte_inval(gl); 224 gfs2_pte_inval(gl);
192 gfs2_glock_drop_th(gl); 225 if (gl->gl_state == LM_ST_EXCLUSIVE)
193} 226 inode_go_sync(gl);
194
195/**
196 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
197 * @gl: the glock protecting the inode
198 *
199 */
200
201static void inode_go_sync(struct gfs2_glock *gl)
202{
203 struct gfs2_inode *ip = gl->gl_object;
204
205 if (ip && !S_ISREG(ip->i_inode.i_mode))
206 ip = NULL;
207
208 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
209 gfs2_log_flush(gl->gl_sbd, gl);
210 if (ip)
211 filemap_fdatawrite(ip->i_inode.i_mapping);
212 gfs2_meta_sync(gl);
213 if (ip) {
214 struct address_space *mapping = ip->i_inode.i_mapping;
215 int error = filemap_fdatawait(mapping);
216 if (error == -ENOSPC)
217 set_bit(AS_ENOSPC, &mapping->flags);
218 else if (error)
219 set_bit(AS_EIO, &mapping->flags);
220 }
221 clear_bit(GLF_DIRTY, &gl->gl_flags);
222 gfs2_ail_empty_gl(gl);
223 }
224} 227}
225 228
226/** 229/**
@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
295 298
296 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && 299 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
297 (gl->gl_state == LM_ST_EXCLUSIVE) && 300 (gl->gl_state == LM_ST_EXCLUSIVE) &&
298 (gh->gh_flags & GL_LOCAL_EXCL)) 301 (gh->gh_state == LM_ST_EXCLUSIVE))
299 error = gfs2_truncatei_resume(ip); 302 error = gfs2_truncatei_resume(ip);
300 303
301 return error; 304 return error;
@@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh)
319} 322}
320 323
321/** 324/**
322 * inode_greedy -
323 * @gl: the glock
324 *
325 */
326
327static void inode_greedy(struct gfs2_glock *gl)
328{
329 struct gfs2_sbd *sdp = gl->gl_sbd;
330 struct gfs2_inode *ip = gl->gl_object;
331 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
332 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
333 unsigned int new_time;
334
335 spin_lock(&ip->i_spin);
336
337 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
338 new_time = ip->i_greedy + quantum;
339 if (new_time > max)
340 new_time = max;
341 } else {
342 new_time = ip->i_greedy - quantum;
343 if (!new_time || new_time > max)
344 new_time = 1;
345 }
346
347 ip->i_greedy = new_time;
348
349 spin_unlock(&ip->i_spin);
350
351 iput(&ip->i_inode);
352}
353
354/**
355 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock 325 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
356 * @gl: the glock 326 * @gl: the glock
357 * 327 *
@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
398 * 368 *
399 */ 369 */
400 370
401static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, 371static void trans_go_xmote_th(struct gfs2_glock *gl)
402 int flags)
403{ 372{
404 struct gfs2_sbd *sdp = gl->gl_sbd; 373 struct gfs2_sbd *sdp = gl->gl_sbd;
405 374
@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
408 gfs2_meta_syncfs(sdp); 377 gfs2_meta_syncfs(sdp);
409 gfs2_log_shutdown(sdp); 378 gfs2_log_shutdown(sdp);
410 } 379 }
411
412 gfs2_glock_xmote_th(gl, state, flags);
413} 380}
414 381
415/** 382/**
@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl)
461 gfs2_meta_syncfs(sdp); 428 gfs2_meta_syncfs(sdp);
462 gfs2_log_shutdown(sdp); 429 gfs2_log_shutdown(sdp);
463 } 430 }
464
465 gfs2_glock_drop_th(gl);
466} 431}
467 432
468/** 433/**
@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
478} 443}
479 444
480const struct gfs2_glock_operations gfs2_meta_glops = { 445const struct gfs2_glock_operations gfs2_meta_glops = {
481 .go_xmote_th = gfs2_glock_xmote_th, 446 .go_xmote_th = meta_go_sync,
482 .go_drop_th = gfs2_glock_drop_th, 447 .go_drop_th = meta_go_sync,
483 .go_type = LM_TYPE_META, 448 .go_type = LM_TYPE_META,
484}; 449};
485 450
@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
487 .go_xmote_th = inode_go_xmote_th, 452 .go_xmote_th = inode_go_xmote_th,
488 .go_xmote_bh = inode_go_xmote_bh, 453 .go_xmote_bh = inode_go_xmote_bh,
489 .go_drop_th = inode_go_drop_th, 454 .go_drop_th = inode_go_drop_th,
490 .go_sync = inode_go_sync,
491 .go_inval = inode_go_inval, 455 .go_inval = inode_go_inval,
492 .go_demote_ok = inode_go_demote_ok, 456 .go_demote_ok = inode_go_demote_ok,
493 .go_lock = inode_go_lock, 457 .go_lock = inode_go_lock,
494 .go_unlock = inode_go_unlock, 458 .go_unlock = inode_go_unlock,
495 .go_greedy = inode_greedy,
496 .go_type = LM_TYPE_INODE, 459 .go_type = LM_TYPE_INODE,
497}; 460};
498 461
499const struct gfs2_glock_operations gfs2_rgrp_glops = { 462const struct gfs2_glock_operations gfs2_rgrp_glops = {
500 .go_xmote_th = gfs2_glock_xmote_th,
501 .go_drop_th = gfs2_glock_drop_th,
502 .go_sync = meta_go_sync,
503 .go_inval = meta_go_inval, 463 .go_inval = meta_go_inval,
504 .go_demote_ok = rgrp_go_demote_ok, 464 .go_demote_ok = rgrp_go_demote_ok,
505 .go_lock = rgrp_go_lock, 465 .go_lock = rgrp_go_lock,
@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
515}; 475};
516 476
517const struct gfs2_glock_operations gfs2_iopen_glops = { 477const struct gfs2_glock_operations gfs2_iopen_glops = {
518 .go_xmote_th = gfs2_glock_xmote_th,
519 .go_drop_th = gfs2_glock_drop_th,
520 .go_type = LM_TYPE_IOPEN, 478 .go_type = LM_TYPE_IOPEN,
521}; 479};
522 480
523const struct gfs2_glock_operations gfs2_flock_glops = { 481const struct gfs2_glock_operations gfs2_flock_glops = {
524 .go_xmote_th = gfs2_glock_xmote_th,
525 .go_drop_th = gfs2_glock_drop_th,
526 .go_type = LM_TYPE_FLOCK, 482 .go_type = LM_TYPE_FLOCK,
527}; 483};
528 484
529const struct gfs2_glock_operations gfs2_nondisk_glops = { 485const struct gfs2_glock_operations gfs2_nondisk_glops = {
530 .go_xmote_th = gfs2_glock_xmote_th,
531 .go_drop_th = gfs2_glock_drop_th,
532 .go_type = LM_TYPE_NONDISK, 486 .go_type = LM_TYPE_NONDISK,
533}; 487};
534 488
535const struct gfs2_glock_operations gfs2_quota_glops = { 489const struct gfs2_glock_operations gfs2_quota_glops = {
536 .go_xmote_th = gfs2_glock_xmote_th,
537 .go_drop_th = gfs2_glock_drop_th,
538 .go_demote_ok = quota_go_demote_ok, 490 .go_demote_ok = quota_go_demote_ok,
539 .go_type = LM_TYPE_QUOTA, 491 .go_type = LM_TYPE_QUOTA,
540}; 492};
541 493
542const struct gfs2_glock_operations gfs2_journal_glops = { 494const struct gfs2_glock_operations gfs2_journal_glops = {
543 .go_xmote_th = gfs2_glock_xmote_th,
544 .go_drop_th = gfs2_glock_drop_th,
545 .go_type = LM_TYPE_JOURNAL, 495 .go_type = LM_TYPE_JOURNAL,
546}; 496};
547 497
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 734421edae85..12c80fd28db5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -101,17 +101,14 @@ struct gfs2_bufdata {
101}; 101};
102 102
103struct gfs2_glock_operations { 103struct gfs2_glock_operations {
104 void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); 104 void (*go_xmote_th) (struct gfs2_glock *gl);
105 void (*go_xmote_bh) (struct gfs2_glock *gl); 105 void (*go_xmote_bh) (struct gfs2_glock *gl);
106 void (*go_drop_th) (struct gfs2_glock *gl); 106 void (*go_drop_th) (struct gfs2_glock *gl);
107 void (*go_drop_bh) (struct gfs2_glock *gl); 107 void (*go_drop_bh) (struct gfs2_glock *gl);
108 void (*go_sync) (struct gfs2_glock *gl);
109 void (*go_inval) (struct gfs2_glock *gl, int flags); 108 void (*go_inval) (struct gfs2_glock *gl, int flags);
110 int (*go_demote_ok) (struct gfs2_glock *gl); 109 int (*go_demote_ok) (struct gfs2_glock *gl);
111 int (*go_lock) (struct gfs2_holder *gh); 110 int (*go_lock) (struct gfs2_holder *gh);
112 void (*go_unlock) (struct gfs2_holder *gh); 111 void (*go_unlock) (struct gfs2_holder *gh);
113 void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
114 void (*go_greedy) (struct gfs2_glock *gl);
115 const int go_type; 112 const int go_type;
116}; 113};
117 114
@@ -120,7 +117,6 @@ enum {
120 HIF_MUTEX = 0, 117 HIF_MUTEX = 0,
121 HIF_PROMOTE = 1, 118 HIF_PROMOTE = 1,
122 HIF_DEMOTE = 2, 119 HIF_DEMOTE = 2,
123 HIF_GREEDY = 3,
124 120
125 /* States */ 121 /* States */
126 HIF_ALLOCED = 4, 122 HIF_ALLOCED = 4,
@@ -128,6 +124,7 @@ enum {
128 HIF_HOLDER = 6, 124 HIF_HOLDER = 6,
129 HIF_FIRST = 7, 125 HIF_FIRST = 7,
130 HIF_ABORTED = 9, 126 HIF_ABORTED = 9,
127 HIF_WAIT = 10,
131}; 128};
132 129
133struct gfs2_holder { 130struct gfs2_holder {
@@ -140,17 +137,14 @@ struct gfs2_holder {
140 137
141 int gh_error; 138 int gh_error;
142 unsigned long gh_iflags; 139 unsigned long gh_iflags;
143 struct completion gh_wait;
144 unsigned long gh_ip; 140 unsigned long gh_ip;
145}; 141};
146 142
147enum { 143enum {
148 GLF_LOCK = 1, 144 GLF_LOCK = 1,
149 GLF_STICKY = 2, 145 GLF_STICKY = 2,
150 GLF_PREFETCH = 3,
151 GLF_DIRTY = 5, 146 GLF_DIRTY = 5,
152 GLF_SKIP_WAITERS2 = 6, 147 GLF_SKIP_WAITERS2 = 6,
153 GLF_GREEDY = 7,
154}; 148};
155 149
156struct gfs2_glock { 150struct gfs2_glock {
@@ -167,7 +161,7 @@ struct gfs2_glock {
167 unsigned long gl_ip; 161 unsigned long gl_ip;
168 struct list_head gl_holders; 162 struct list_head gl_holders;
169 struct list_head gl_waiters1; /* HIF_MUTEX */ 163 struct list_head gl_waiters1; /* HIF_MUTEX */
170 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ 164 struct list_head gl_waiters2; /* HIF_DEMOTE */
171 struct list_head gl_waiters3; /* HIF_PROMOTE */ 165 struct list_head gl_waiters3; /* HIF_PROMOTE */
172 166
173 const struct gfs2_glock_operations *gl_ops; 167 const struct gfs2_glock_operations *gl_ops;
@@ -236,7 +230,6 @@ struct gfs2_inode {
236 230
237 spinlock_t i_spin; 231 spinlock_t i_spin;
238 struct rw_semaphore i_rw_mutex; 232 struct rw_semaphore i_rw_mutex;
239 unsigned int i_greedy;
240 unsigned long i_last_pfault; 233 unsigned long i_last_pfault;
241 234
242 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; 235 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
@@ -418,17 +411,12 @@ struct gfs2_tune {
418 unsigned int gt_atime_quantum; /* Min secs between atime updates */ 411 unsigned int gt_atime_quantum; /* Min secs between atime updates */
419 unsigned int gt_new_files_jdata; 412 unsigned int gt_new_files_jdata;
420 unsigned int gt_new_files_directio; 413 unsigned int gt_new_files_directio;
421 unsigned int gt_max_atomic_write; /* Split big writes into this size */
422 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 414 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
423 unsigned int gt_lockdump_size; 415 unsigned int gt_lockdump_size;
424 unsigned int gt_stall_secs; /* Detects trouble! */ 416 unsigned int gt_stall_secs; /* Detects trouble! */
425 unsigned int gt_complain_secs; 417 unsigned int gt_complain_secs;
426 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ 418 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
427 unsigned int gt_entries_per_readdir; 419 unsigned int gt_entries_per_readdir;
428 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
429 unsigned int gt_greedy_default;
430 unsigned int gt_greedy_quantum;
431 unsigned int gt_greedy_max;
432 unsigned int gt_statfs_quantum; 420 unsigned int gt_statfs_quantum;
433 unsigned int gt_statfs_slow; 421 unsigned int gt_statfs_slow;
434}; 422};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d122074c45e1..0d6831a40565 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -287,10 +287,8 @@ out:
287 * 287 *
288 * Returns: errno 288 * Returns: errno
289 */ 289 */
290
291int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 290int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
292{ 291{
293 struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
294 struct buffer_head *dibh; 292 struct buffer_head *dibh;
295 u32 nlink; 293 u32 nlink;
296 int error; 294 int error;
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
315 else 313 else
316 drop_nlink(&ip->i_inode); 314 drop_nlink(&ip->i_inode);
317 315
318 ip->i_inode.i_ctime.tv_sec = get_seconds(); 316 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
319 317
320 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 318 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
321 gfs2_dinode_out(ip, dibh->b_data); 319 gfs2_dinode_out(ip, dibh->b_data);
322 brelse(dibh); 320 brelse(dibh);
323 mark_inode_dirty(&ip->i_inode); 321 mark_inode_dirty(&ip->i_inode);
324 322
325 if (ip->i_inode.i_nlink == 0) { 323 if (ip->i_inode.i_nlink == 0)
326 struct gfs2_rgrpd *rgd;
327 struct gfs2_holder ri_gh, rg_gh;
328
329 error = gfs2_rindex_hold(sdp, &ri_gh);
330 if (error)
331 goto out;
332 error = -EIO;
333 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
334 if (!rgd)
335 goto out_norgrp;
336 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
337 if (error)
338 goto out_norgrp;
339
340 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 324 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
341 gfs2_glock_dq_uninit(&rg_gh); 325
342out_norgrp:
343 gfs2_glock_dq_uninit(&ri_gh);
344 }
345out:
346 return error; 326 return error;
347} 327}
348 328
349struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 329struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
350{ 330{
351 struct qstr qstr; 331 struct qstr qstr;
332 struct inode *inode;
352 gfs2_str2qstr(&qstr, name); 333 gfs2_str2qstr(&qstr, name);
353 return gfs2_lookupi(dip, &qstr, 1, NULL); 334 inode = gfs2_lookupi(dip, &qstr, 1, NULL);
335 /* gfs2_lookupi has inconsistent callers: vfs
336 * related routines expect NULL for no entry found,
337 * gfs2_lookup_simple callers expect ENOENT
338 * and do not check for NULL.
339 */
340 if (inode == NULL)
341 return ERR_PTR(-ENOENT);
342 else
343 return inode;
354} 344}
355 345
356 346
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
361 * @is_root: If 1, ignore the caller's permissions 351 * @is_root: If 1, ignore the caller's permissions
362 * @i_gh: An uninitialized holder for the new inode glock 352 * @i_gh: An uninitialized holder for the new inode glock
363 * 353 *
364 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless 354 * This can be called via the VFS filldir function when NFS is doing
365 * @is_root is true. 355 * a readdirplus and the inode which its intending to stat isn't
356 * already in cache. In this case we must not take the directory glock
357 * again, since the readdir call will have already taken that lock.
366 * 358 *
367 * Returns: errno 359 * Returns: errno
368 */ 360 */
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
375 struct gfs2_holder d_gh; 367 struct gfs2_holder d_gh;
376 struct gfs2_inum_host inum; 368 struct gfs2_inum_host inum;
377 unsigned int type; 369 unsigned int type;
378 int error = 0; 370 int error;
379 struct inode *inode = NULL; 371 struct inode *inode = NULL;
372 int unlock = 0;
380 373
381 if (!name->len || name->len > GFS2_FNAMESIZE) 374 if (!name->len || name->len > GFS2_FNAMESIZE)
382 return ERR_PTR(-ENAMETOOLONG); 375 return ERR_PTR(-ENAMETOOLONG);
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
388 return dir; 381 return dir;
389 } 382 }
390 383
391 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 384 if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
392 if (error) 385 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
393 return ERR_PTR(error); 386 if (error)
387 return ERR_PTR(error);
388 unlock = 1;
389 }
394 390
395 if (!is_root) { 391 if (!is_root) {
396 error = permission(dir, MAY_EXEC, NULL); 392 error = permission(dir, MAY_EXEC, NULL);
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
405 inode = gfs2_inode_lookup(sb, &inum, type); 401 inode = gfs2_inode_lookup(sb, &inum, type);
406 402
407out: 403out:
408 gfs2_glock_dq_uninit(&d_gh); 404 if (unlock)
405 gfs2_glock_dq_uninit(&d_gh);
409 if (error == -ENOENT) 406 if (error == -ENOENT)
410 return NULL; 407 return NULL;
411 return inode; 408 return inode ? inode : ERR_PTR(error);
412} 409}
413 410
414static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 411static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
index effe4a337c1d..e30673dd37e0 100644
--- a/fs/gfs2/lm.c
+++ b/fs/gfs2/lm.c
@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
104 vprintk(fmt, args); 104 vprintk(fmt, args);
105 va_end(args); 105 va_end(args);
106 106
107 fs_err(sdp, "about to withdraw from the cluster\n"); 107 fs_err(sdp, "about to withdraw this file system\n");
108 BUG_ON(sdp->sd_args.ar_debug); 108 BUG_ON(sdp->sd_args.ar_debug);
109 109
110
111 fs_err(sdp, "waiting for outstanding I/O\n");
112
113 /* FIXME: suspend dm device so oustanding bio's complete
114 and all further io requests fail */
115
116 fs_err(sdp, "telling LM to withdraw\n"); 110 fs_err(sdp, "telling LM to withdraw\n");
117 gfs2_withdraw_lockproto(&sdp->sd_lockstruct); 111 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
118 fs_err(sdp, "withdrawn\n"); 112 fs_err(sdp, "withdrawn\n");
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 33af707a4d3f..a87c7bf3c568 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -36,7 +36,7 @@
36 36
37#define GDLM_STRNAME_BYTES 24 37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32 38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000 39#define GDLM_DROP_COUNT 200000
40#define GDLM_DROP_PERIOD 60 40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128 41#define GDLM_NAME_LEN 128
42 42
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
index 2194b1d5b5ec..a0e7eda643ed 100644
--- a/fs/gfs2/locking/dlm/main.c
+++ b/fs/gfs2/locking/dlm/main.c
@@ -11,9 +11,6 @@
11 11
12#include "lock_dlm.h" 12#include "lock_dlm.h"
13 13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops; 14extern struct lm_lockops gdlm_ops;
18 15
19static int __init init_lock_dlm(void) 16static int __init init_lock_dlm(void)
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void)
40 return error; 37 return error;
41 } 38 }
42 39
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO 40 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); 41 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0; 42 return 0;
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index cdd1694e889b..1d8faa3da8af 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -9,8 +9,6 @@
9 9
10#include "lock_dlm.h" 10#include "lock_dlm.h"
11 11
12int gdlm_drop_count;
13int gdlm_drop_period;
14const struct lm_lockops gdlm_ops; 12const struct lm_lockops gdlm_ops;
15 13
16 14
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
24 if (!ls) 22 if (!ls)
25 return NULL; 23 return NULL;
26 24
27 ls->drop_locks_count = gdlm_drop_count; 25 ls->drop_locks_count = GDLM_DROP_COUNT;
28 ls->drop_locks_period = gdlm_drop_period; 26 ls->drop_locks_period = GDLM_DROP_PERIOD;
29 ls->fscb = cb; 27 ls->fscb = cb;
30 ls->sdp = sdp; 28 ls->sdp = sdp;
31 ls->fsflags = flags; 29 ls->fsflags = flags;
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 3799f19b282f..1dd4215b83d0 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -264,7 +264,7 @@ static unsigned int dev_poll(struct file *file, poll_table *wait)
264 return 0; 264 return 0;
265} 265}
266 266
267static struct file_operations dev_fops = { 267static const struct file_operations dev_fops = {
268 .read = dev_read, 268 .read = dev_read,
269 .write = dev_write, 269 .write = dev_write,
270 .poll = dev_poll, 270 .poll = dev_poll,
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 29ae06f94944..4746b884662d 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
116 return sprintf(buf, "%d\n", ls->recover_jid_status); 116 return sprintf(buf, "%d\n", ls->recover_jid_status);
117} 117}
118 118
119static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
120{
121 return sprintf(buf, "%d\n", ls->drop_locks_count);
122}
123
124static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
125{
126 ls->drop_locks_count = simple_strtol(buf, NULL, 0);
127 return len;
128}
129
119struct gdlm_attr { 130struct gdlm_attr {
120 struct attribute attr; 131 struct attribute attr;
121 ssize_t (*show)(struct gdlm_ls *, char *); 132 ssize_t (*show)(struct gdlm_ls *, char *);
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
135GDLM_ATTR(recover, 0644, recover_show, recover_store); 146GDLM_ATTR(recover, 0644, recover_show, recover_store);
136GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 147GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
137GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 148GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
149GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
138 150
139static struct attribute *gdlm_attrs[] = { 151static struct attribute *gdlm_attrs[] = {
140 &gdlm_attr_proto_name.attr, 152 &gdlm_attr_proto_name.attr,
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = {
147 &gdlm_attr_recover.attr, 159 &gdlm_attr_recover.attr,
148 &gdlm_attr_recover_done.attr, 160 &gdlm_attr_recover_done.attr,
149 &gdlm_attr_recover_status.attr, 161 &gdlm_attr_recover_status.attr,
162 &gdlm_attr_drop_count.attr,
150 NULL, 163 NULL,
151}; 164};
152 165
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4d7f94d8c7bd..16bb4b4561ae 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
70 struct gfs2_trans *tr; 70 struct gfs2_trans *tr;
71 71
72 if (!list_empty(&bd->bd_list_tr)) 72 gfs2_log_lock(sdp);
73 if (!list_empty(&bd->bd_list_tr)) {
74 gfs2_log_unlock(sdp);
73 return; 75 return;
74 76 }
75 tr = current->journal_info; 77 tr = current->journal_info;
76 tr->tr_touched = 1; 78 tr->tr_touched = 1;
77 tr->tr_num_buf++; 79 tr->tr_num_buf++;
78 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 80 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
81 gfs2_log_unlock(sdp);
79 82
80 if (!list_empty(&le->le_list)) 83 if (!list_empty(&le->le_list))
81 return; 84 return;
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
84 87
85 gfs2_meta_check(sdp, bd->bd_bh); 88 gfs2_meta_check(sdp, bd->bd_bh);
86 gfs2_pin(sdp, bd->bd_bh); 89 gfs2_pin(sdp, bd->bd_bh);
87
88 gfs2_log_lock(sdp); 90 gfs2_log_lock(sdp);
89 sdp->sd_log_num_buf++; 91 sdp->sd_log_num_buf++;
90 list_add(&le->le_list, &sdp->sd_log_le_buf); 92 list_add(&le->le_list, &sdp->sd_log_le_buf);
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
98 struct list_head *head = &tr->tr_list_buf; 100 struct list_head *head = &tr->tr_list_buf;
99 struct gfs2_bufdata *bd; 101 struct gfs2_bufdata *bd;
100 102
103 gfs2_log_lock(sdp);
101 while (!list_empty(head)) { 104 while (!list_empty(head)) {
102 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); 105 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
103 list_del_init(&bd->bd_list_tr); 106 list_del_init(&bd->bd_list_tr);
104 tr->tr_num_buf--; 107 tr->tr_num_buf--;
105 } 108 }
109 gfs2_log_unlock(sdp);
106 gfs2_assert_warn(sdp, !tr->tr_num_buf); 110 gfs2_assert_warn(sdp, !tr->tr_num_buf);
107} 111}
108 112
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
462 struct address_space *mapping = bd->bd_bh->b_page->mapping; 466 struct address_space *mapping = bd->bd_bh->b_page->mapping;
463 struct gfs2_inode *ip = GFS2_I(mapping->host); 467 struct gfs2_inode *ip = GFS2_I(mapping->host);
464 468
469 gfs2_log_lock(sdp);
465 tr->tr_touched = 1; 470 tr->tr_touched = 1;
466 if (list_empty(&bd->bd_list_tr) && 471 if (list_empty(&bd->bd_list_tr) &&
467 (ip->i_di.di_flags & GFS2_DIF_JDATA)) { 472 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
468 tr->tr_num_buf++; 473 tr->tr_num_buf++;
469 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 474 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
475 gfs2_log_unlock(sdp);
470 gfs2_pin(sdp, bd->bd_bh); 476 gfs2_pin(sdp, bd->bd_bh);
471 tr->tr_num_buf_new++; 477 tr->tr_num_buf_new++;
478 } else {
479 gfs2_log_unlock(sdp);
472 } 480 }
473 gfs2_trans_add_gl(bd->bd_gl); 481 gfs2_trans_add_gl(bd->bd_gl);
474 gfs2_log_lock(sdp); 482 gfs2_log_lock(sdp);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0e34d9918973..e62d4f620c58 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -282,8 +282,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
282 return; 282 return;
283 } 283 }
284 284
285 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), 285 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
286 memset(bd, 0, sizeof(struct gfs2_bufdata));
287 bd->bd_bh = bh; 286 bd->bd_bh = bh;
288 bd->bd_gl = gl; 287 bd->bd_gl = gl;
289 288
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10d..56e33590b656 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/mpage.h> 17#include <linux/mpage.h>
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/writeback.h>
19#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h> 21#include <linux/lm_interface.h>
21 22
@@ -157,6 +158,32 @@ out_ignore:
157} 158}
158 159
159/** 160/**
161 * gfs2_writepages - Write a bunch of dirty pages back to disk
162 * @mapping: The mapping to write
163 * @wbc: Write-back control
164 *
165 * For journaled files and/or ordered writes this just falls back to the
166 * kernel's default writepages path for now. We will probably want to change
167 * that eventually (i.e. when we look at allocate on flush).
168 *
169 * For the data=writeback case though we can already ignore buffer heads
170 * and write whole extents at once. This is a big reduction in the
171 * number of I/O requests we send and the bmap calls we make in this case.
172 */
173static int gfs2_writepages(struct address_space *mapping,
174 struct writeback_control *wbc)
175{
176 struct inode *inode = mapping->host;
177 struct gfs2_inode *ip = GFS2_I(inode);
178 struct gfs2_sbd *sdp = GFS2_SB(inode);
179
180 if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
181 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
182
183 return generic_writepages(mapping, wbc);
184}
185
186/**
160 * stuffed_readpage - Fill in a Linux page with stuffed file data 187 * stuffed_readpage - Fill in a Linux page with stuffed file data
161 * @ip: the inode 188 * @ip: the inode
162 * @page: the page 189 * @page: the page
@@ -256,7 +283,7 @@ out_unlock:
256 * the page lock and the glock) and return having done no I/O. Its 283 * the page lock and the glock) and return having done no I/O. Its
257 * obviously not something we'd want to do on too regular a basis. 284 * obviously not something we'd want to do on too regular a basis.
258 * Any I/O we ignore at this time will be done via readpage later. 285 * Any I/O we ignore at this time will be done via readpage later.
259 * 2. We have to handle stuffed files here too. 286 * 2. We don't handle stuffed files here we let readpage do the honours.
260 * 3. mpage_readpages() does most of the heavy lifting in the common case. 287 * 3. mpage_readpages() does most of the heavy lifting in the common case.
261 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. 288 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
262 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as 289 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
269 struct gfs2_inode *ip = GFS2_I(inode); 296 struct gfs2_inode *ip = GFS2_I(inode);
270 struct gfs2_sbd *sdp = GFS2_SB(inode); 297 struct gfs2_sbd *sdp = GFS2_SB(inode);
271 struct gfs2_holder gh; 298 struct gfs2_holder gh;
272 unsigned page_idx; 299 int ret = 0;
273 int ret;
274 int do_unlock = 0; 300 int do_unlock = 0;
275 301
276 if (likely(file != &gfs2_internal_file_sentinel)) { 302 if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
289 goto out_unlock; 315 goto out_unlock;
290 } 316 }
291skip_lock: 317skip_lock:
292 if (gfs2_is_stuffed(ip)) { 318 if (!gfs2_is_stuffed(ip))
293 struct pagevec lru_pvec;
294 pagevec_init(&lru_pvec, 0);
295 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
296 struct page *page = list_entry(pages->prev, struct page, lru);
297 prefetchw(&page->flags);
298 list_del(&page->lru);
299 if (!add_to_page_cache(page, mapping,
300 page->index, GFP_KERNEL)) {
301 ret = stuffed_readpage(ip, page);
302 unlock_page(page);
303 if (!pagevec_add(&lru_pvec, page))
304 __pagevec_lru_add(&lru_pvec);
305 } else {
306 page_cache_release(page);
307 }
308 }
309 pagevec_lru_add(&lru_pvec);
310 ret = 0;
311 } else {
312 /* What we really want to do .... */
313 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); 319 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
314 }
315 320
316 if (do_unlock) { 321 if (do_unlock) {
317 gfs2_glock_dq_m(1, &gh); 322 gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
356 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); 361 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
357 error = gfs2_glock_nq_atime(&ip->i_gh); 362 error = gfs2_glock_nq_atime(&ip->i_gh);
358 if (unlikely(error)) { 363 if (unlikely(error)) {
359 if (error == GLR_TRYFAILED) 364 if (error == GLR_TRYFAILED) {
365 unlock_page(page);
360 error = AOP_TRUNCATED_PAGE; 366 error = AOP_TRUNCATED_PAGE;
367 }
361 goto out_uninit; 368 goto out_uninit;
362 } 369 }
363 370
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
594 return; 601 return;
595} 602}
596 603
604/**
605 * gfs2_ok_for_dio - check that dio is valid on this file
606 * @ip: The inode
607 * @rw: READ or WRITE
608 * @offset: The offset at which we are reading or writing
609 *
610 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
611 * 1 (to accept the i/o request)
612 */
613static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
614{
615 /*
616 * Should we return an error here? I can't see that O_DIRECT for
617 * a journaled file makes any sense. For now we'll silently fall
618 * back to buffered I/O, likewise we do the same for stuffed
619 * files since they are (a) small and (b) unaligned.
620 */
621 if (gfs2_is_jdata(ip))
622 return 0;
623
624 if (gfs2_is_stuffed(ip))
625 return 0;
626
627 if (offset > i_size_read(&ip->i_inode))
628 return 0;
629 return 1;
630}
631
632
633
597static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 634static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
598 const struct iovec *iov, loff_t offset, 635 const struct iovec *iov, loff_t offset,
599 unsigned long nr_segs) 636 unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
604 struct gfs2_holder gh; 641 struct gfs2_holder gh;
605 int rv; 642 int rv;
606 643
607 if (rw == READ)
608 mutex_lock(&inode->i_mutex);
609 /* 644 /*
610 * Shared lock, even if its a write, since we do no allocation 645 * Deferred lock, even if its a write, since we do no allocation
611 * on this path. All we need change is atime. 646 * on this path. All we need change is atime, and this lock mode
647 * ensures that other nodes have flushed their buffered read caches
648 * (i.e. their page cache entries for this inode). We do not,
649 * unfortunately have the option of only flushing a range like
650 * the VFS does.
612 */ 651 */
613 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 652 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
614 rv = gfs2_glock_nq_atime(&gh); 653 rv = gfs2_glock_nq_atime(&gh);
615 if (rv) 654 if (rv)
616 goto out; 655 return rv;
617 656 rv = gfs2_ok_for_dio(ip, rw, offset);
618 if (offset > i_size_read(inode)) 657 if (rv != 1)
619 goto out; 658 goto out; /* dio not valid, fall back to buffered i/o */
620 659
621 /* 660 rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
622 * Should we return an error here? I can't see that O_DIRECT for 661 iov, offset, nr_segs,
623 * a journaled file makes any sense. For now we'll silently fall 662 gfs2_get_block_direct, NULL);
624 * back to buffered I/O, likewise we do the same for stuffed
625 * files since they are (a) small and (b) unaligned.
626 */
627 if (gfs2_is_jdata(ip))
628 goto out;
629
630 if (gfs2_is_stuffed(ip))
631 goto out;
632
633 rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
634 inode->i_sb->s_bdev,
635 iov, offset, nr_segs,
636 gfs2_get_block_direct, NULL);
637out: 663out:
638 gfs2_glock_dq_m(1, &gh); 664 gfs2_glock_dq_m(1, &gh);
639 gfs2_holder_uninit(&gh); 665 gfs2_holder_uninit(&gh);
640 if (rw == READ)
641 mutex_unlock(&inode->i_mutex);
642
643 return rv; 666 return rv;
644} 667}
645 668
@@ -763,6 +786,7 @@ out:
763 786
764const struct address_space_operations gfs2_file_aops = { 787const struct address_space_operations gfs2_file_aops = {
765 .writepage = gfs2_writepage, 788 .writepage = gfs2_writepage,
789 .writepages = gfs2_writepages,
766 .readpage = gfs2_readpage, 790 .readpage = gfs2_readpage,
767 .readpages = gfs2_readpages, 791 .readpages = gfs2_readpages,
768 .sync_page = block_sync_page, 792 .sync_page = block_sync_page,
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index d355899585d8..9187eb174b43 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
46 struct gfs2_inum_host inum; 46 struct gfs2_inum_host inum;
47 unsigned int type; 47 unsigned int type;
48 int error; 48 int error;
49 int had_lock=0;
49 50
50 if (inode && is_bad_inode(inode)) 51 if (inode && is_bad_inode(inode))
51 goto invalid; 52 goto invalid;
@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
53 if (sdp->sd_args.ar_localcaching) 54 if (sdp->sd_args.ar_localcaching)
54 goto valid; 55 goto valid;
55 56
56 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 57 had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
57 if (error) 58 if (!had_lock) {
58 goto fail; 59 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
60 if (error)
61 goto fail;
62 }
59 63
60 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); 64 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
61 switch (error) { 65 switch (error) {
@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
82 } 86 }
83 87
84valid_gunlock: 88valid_gunlock:
85 gfs2_glock_dq_uninit(&d_gh); 89 if (!had_lock)
90 gfs2_glock_dq_uninit(&d_gh);
86valid: 91valid:
87 dput(parent); 92 dput(parent);
88 return 1; 93 return 1;
89 94
90invalid_gunlock: 95invalid_gunlock:
91 gfs2_glock_dq_uninit(&d_gh); 96 if (!had_lock)
97 gfs2_glock_dq_uninit(&d_gh);
92invalid: 98invalid:
93 if (inode && S_ISDIR(inode->i_mode)) { 99 if (inode && S_ISDIR(inode->i_mode)) {
94 if (have_submounts(dentry)) 100 if (have_submounts(dentry))
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b4e7b8775315..4855e8cca622 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,6 +22,7 @@
22#include "glock.h" 22#include "glock.h"
23#include "glops.h" 23#include "glops.h"
24#include "inode.h" 24#include "inode.h"
25#include "ops_dentry.h"
25#include "ops_export.h" 26#include "ops_export.h"
26#include "rgrp.h" 27#include "rgrp.h"
27#include "util.h" 28#include "util.h"
@@ -112,13 +113,12 @@ struct get_name_filldir {
112 char *name; 113 char *name;
113}; 114};
114 115
115static int get_name_filldir(void *opaque, const char *name, unsigned int length, 116static int get_name_filldir(void *opaque, const char *name, int length,
116 u64 offset, struct gfs2_inum_host *inum, 117 loff_t offset, u64 inum, unsigned int type)
117 unsigned int type)
118{ 118{
119 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; 119 struct get_name_filldir *gnfd = opaque;
120 120
121 if (!gfs2_inum_equal(inum, &gnfd->inum)) 121 if (inum != gnfd->inum.no_addr)
122 return 0; 122 return 0;
123 123
124 memcpy(gnfd->name, name, length); 124 memcpy(gnfd->name, name, length);
@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
189 return ERR_PTR(-ENOMEM); 189 return ERR_PTR(-ENOMEM);
190 } 190 }
191 191
192 dentry->d_op = &gfs2_dops;
192 return dentry; 193 return dentry;
193} 194}
194 195
@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
215 } 216 }
216 217
217 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, 218 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
218 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, 219 LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
219 &i_gh);
220 if (error) 220 if (error)
221 return ERR_PTR(error); 221 return ERR_PTR(error);
222 222
@@ -269,6 +269,7 @@ out_inode:
269 return ERR_PTR(-ENOMEM); 269 return ERR_PTR(-ENOMEM);
270 } 270 }
271 271
272 dentry->d_op = &gfs2_dops;
272 return dentry; 273 return dentry;
273 274
274fail_rgd: 275fail_rgd:
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index faa07e4b97d0..c996aa739a05 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -43,15 +43,6 @@
43#include "util.h" 43#include "util.h"
44#include "eaops.h" 44#include "eaops.h"
45 45
46/* For regular, non-NFS */
47struct filldir_reg {
48 struct gfs2_sbd *fdr_sbd;
49 int fdr_prefetch;
50
51 filldir_t fdr_filldir;
52 void *fdr_opaque;
53};
54
55/* 46/*
56 * Most fields left uninitialised to catch anybody who tries to 47 * Most fields left uninitialised to catch anybody who tries to
57 * use them. f_flags set to prevent file_accessed() from touching 48 * use them. f_flags set to prevent file_accessed() from touching
@@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
128} 119}
129 120
130/** 121/**
131 * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
132 * @opaque: opaque data used by the function
133 * @name: the name of the directory entry
134 * @length: the length of the name
135 * @offset: the entry's offset in the directory
136 * @inum: the inode number the entry points to
137 * @type: the type of inode the entry points to
138 *
139 * Returns: 0 on success, 1 if buffer full
140 */
141
142static int filldir_func(void *opaque, const char *name, unsigned int length,
143 u64 offset, struct gfs2_inum_host *inum,
144 unsigned int type)
145{
146 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
147 struct gfs2_sbd *sdp = fdr->fdr_sbd;
148 int error;
149
150 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
151 inum->no_addr, type);
152 if (error)
153 return 1;
154
155 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
156 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
157 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
158 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
159 LM_ST_SHARED, LM_FLAG_TRY);
160 }
161
162 return 0;
163}
164
165/**
166 * gfs2_readdir - Read directory entries from a directory 122 * gfs2_readdir - Read directory entries from a directory
167 * @file: The directory to read from 123 * @file: The directory to read from
168 * @dirent: Buffer for dirents 124 * @dirent: Buffer for dirents
@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
175{ 131{
176 struct inode *dir = file->f_mapping->host; 132 struct inode *dir = file->f_mapping->host;
177 struct gfs2_inode *dip = GFS2_I(dir); 133 struct gfs2_inode *dip = GFS2_I(dir);
178 struct filldir_reg fdr;
179 struct gfs2_holder d_gh; 134 struct gfs2_holder d_gh;
180 u64 offset = file->f_pos; 135 u64 offset = file->f_pos;
181 int error; 136 int error;
182 137
183 fdr.fdr_sbd = GFS2_SB(dir);
184 fdr.fdr_prefetch = 1;
185 fdr.fdr_filldir = filldir;
186 fdr.fdr_opaque = dirent;
187
188 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); 138 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
189 error = gfs2_glock_nq_atime(&d_gh); 139 error = gfs2_glock_nq_atime(&d_gh);
190 if (error) { 140 if (error) {
@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
192 return error; 142 return error;
193 } 143 }
194 144
195 error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); 145 error = gfs2_dir_read(dir, &offset, dirent, filldir);
196 146
197 gfs2_glock_dq_uninit(&d_gh); 147 gfs2_glock_dq_uninit(&d_gh);
198 148
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 636dda4c7d38..60f47bf2e8e8 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
264 struct gfs2_inode *dip = GFS2_I(dir); 264 struct gfs2_inode *dip = GFS2_I(dir);
265 struct gfs2_sbd *sdp = GFS2_SB(dir); 265 struct gfs2_sbd *sdp = GFS2_SB(dir);
266 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 266 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
267 struct gfs2_holder ghs[2]; 267 struct gfs2_holder ghs[3];
268 struct gfs2_rgrpd *rgd;
269 struct gfs2_holder ri_gh;
268 int error; 270 int error;
269 271
272 error = gfs2_rindex_hold(sdp, &ri_gh);
273 if (error)
274 return error;
275
270 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 276 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
271 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 277 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
272 278
273 error = gfs2_glock_nq_m(2, ghs); 279 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
280 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
281
282
283 error = gfs2_glock_nq_m(3, ghs);
274 if (error) 284 if (error)
275 goto out; 285 goto out;
276 286
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
291out_end_trans: 301out_end_trans:
292 gfs2_trans_end(sdp); 302 gfs2_trans_end(sdp);
293out_gunlock: 303out_gunlock:
294 gfs2_glock_dq_m(2, ghs); 304 gfs2_glock_dq_m(3, ghs);
295out: 305out:
296 gfs2_holder_uninit(ghs); 306 gfs2_holder_uninit(ghs);
297 gfs2_holder_uninit(ghs + 1); 307 gfs2_holder_uninit(ghs + 1);
308 gfs2_holder_uninit(ghs + 2);
309 gfs2_glock_dq_uninit(&ri_gh);
298 return error; 310 return error;
299} 311}
300 312
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
449 struct gfs2_inode *dip = GFS2_I(dir); 461 struct gfs2_inode *dip = GFS2_I(dir);
450 struct gfs2_sbd *sdp = GFS2_SB(dir); 462 struct gfs2_sbd *sdp = GFS2_SB(dir);
451 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 463 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
452 struct gfs2_holder ghs[2]; 464 struct gfs2_holder ghs[3];
465 struct gfs2_rgrpd *rgd;
466 struct gfs2_holder ri_gh;
453 int error; 467 int error;
454 468
469
470 error = gfs2_rindex_hold(sdp, &ri_gh);
471 if (error)
472 return error;
455 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 473 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
456 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 474 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
457 475
458 error = gfs2_glock_nq_m(2, ghs); 476 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
477 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
478
479 error = gfs2_glock_nq_m(3, ghs);
459 if (error) 480 if (error)
460 goto out; 481 goto out;
461 482
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
483 gfs2_trans_end(sdp); 504 gfs2_trans_end(sdp);
484 505
485out_gunlock: 506out_gunlock:
486 gfs2_glock_dq_m(2, ghs); 507 gfs2_glock_dq_m(3, ghs);
487out: 508out:
488 gfs2_holder_uninit(ghs); 509 gfs2_holder_uninit(ghs);
489 gfs2_holder_uninit(ghs + 1); 510 gfs2_holder_uninit(ghs + 1);
511 gfs2_holder_uninit(ghs + 2);
512 gfs2_glock_dq_uninit(&ri_gh);
490 return error; 513 return error;
491} 514}
492 515
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
547 struct gfs2_inode *ip = GFS2_I(odentry->d_inode); 570 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
548 struct gfs2_inode *nip = NULL; 571 struct gfs2_inode *nip = NULL;
549 struct gfs2_sbd *sdp = GFS2_SB(odir); 572 struct gfs2_sbd *sdp = GFS2_SB(odir);
550 struct gfs2_holder ghs[4], r_gh; 573 struct gfs2_holder ghs[5], r_gh;
574 struct gfs2_rgrpd *nrgd;
551 unsigned int num_gh; 575 unsigned int num_gh;
552 int dir_rename = 0; 576 int dir_rename = 0;
553 int alloc_required; 577 int alloc_required;
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
587 if (nip) { 611 if (nip) {
588 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); 612 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
589 num_gh++; 613 num_gh++;
614 /* grab the resource lock for unlink flag twiddling
615 * this is the case of the target file already existing
616 * so we unlink before doing the rename
617 */
618 nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
619 if (nrgd)
620 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
590 } 621 }
591 622
592 error = gfs2_glock_nq_m(num_gh, ghs); 623 error = gfs2_glock_nq_m(num_gh, ghs);
@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
684 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 715 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
685 al->al_rgd->rd_ri.ri_length + 716 al->al_rgd->rd_ri.ri_length +
686 4 * RES_DINODE + 4 * RES_LEAF + 717 4 * RES_DINODE + 4 * RES_LEAF +
687 RES_STATFS + RES_QUOTA, 0); 718 RES_STATFS + RES_QUOTA + 4, 0);
688 if (error) 719 if (error)
689 goto out_ipreserv; 720 goto out_ipreserv;
690 } else { 721 } else {
691 error = gfs2_trans_begin(sdp, 4 * RES_DINODE + 722 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
692 5 * RES_LEAF, 0); 723 5 * RES_LEAF + 4, 0);
693 if (error) 724 if (error)
694 goto out_gunlock; 725 goto out_gunlock;
695 } 726 }
@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 error = gfs2_meta_inode_buffer(ip, &dibh); 759 error = gfs2_meta_inode_buffer(ip, &dibh);
729 if (error) 760 if (error)
730 goto out_end_trans; 761 goto out_end_trans;
731 ip->i_inode.i_ctime.tv_sec = get_seconds(); 762 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
732 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 763 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
733 gfs2_dinode_out(ip, dibh->b_data); 764 gfs2_dinode_out(ip, dibh->b_data);
734 brelse(dibh); 765 brelse(dibh);
@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1018 } 1049 }
1019 1050
1020 generic_fillattr(inode, stat); 1051 generic_fillattr(inode, stat);
1021 if (unlock); 1052 if (unlock)
1022 gfs2_glock_dq_uninit(&gh); 1053 gfs2_glock_dq_uninit(&gh);
1023 1054
1024 return 0; 1055 return 0;
@@ -1084,7 +1115,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1084 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); 1115 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
1085} 1116}
1086 1117
1087struct inode_operations gfs2_file_iops = { 1118const struct inode_operations gfs2_file_iops = {
1088 .permission = gfs2_permission, 1119 .permission = gfs2_permission,
1089 .setattr = gfs2_setattr, 1120 .setattr = gfs2_setattr,
1090 .getattr = gfs2_getattr, 1121 .getattr = gfs2_getattr,
@@ -1094,7 +1125,7 @@ struct inode_operations gfs2_file_iops = {
1094 .removexattr = gfs2_removexattr, 1125 .removexattr = gfs2_removexattr,
1095}; 1126};
1096 1127
1097struct inode_operations gfs2_dev_iops = { 1128const struct inode_operations gfs2_dev_iops = {
1098 .permission = gfs2_permission, 1129 .permission = gfs2_permission,
1099 .setattr = gfs2_setattr, 1130 .setattr = gfs2_setattr,
1100 .getattr = gfs2_getattr, 1131 .getattr = gfs2_getattr,
@@ -1104,7 +1135,7 @@ struct inode_operations gfs2_dev_iops = {
1104 .removexattr = gfs2_removexattr, 1135 .removexattr = gfs2_removexattr,
1105}; 1136};
1106 1137
1107struct inode_operations gfs2_dir_iops = { 1138const struct inode_operations gfs2_dir_iops = {
1108 .create = gfs2_create, 1139 .create = gfs2_create,
1109 .lookup = gfs2_lookup, 1140 .lookup = gfs2_lookup,
1110 .link = gfs2_link, 1141 .link = gfs2_link,
@@ -1123,7 +1154,7 @@ struct inode_operations gfs2_dir_iops = {
1123 .removexattr = gfs2_removexattr, 1154 .removexattr = gfs2_removexattr,
1124}; 1155};
1125 1156
1126struct inode_operations gfs2_symlink_iops = { 1157const struct inode_operations gfs2_symlink_iops = {
1127 .readlink = gfs2_readlink, 1158 .readlink = gfs2_readlink,
1128 .follow_link = gfs2_follow_link, 1159 .follow_link = gfs2_follow_link,
1129 .permission = gfs2_permission, 1160 .permission = gfs2_permission,
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index b15acb4fd34c..34f0caac1a03 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -12,9 +12,9 @@
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14 14
15extern struct inode_operations gfs2_file_iops; 15extern const struct inode_operations gfs2_file_iops;
16extern struct inode_operations gfs2_dir_iops; 16extern const struct inode_operations gfs2_dir_iops;
17extern struct inode_operations gfs2_symlink_iops; 17extern const struct inode_operations gfs2_symlink_iops;
18extern struct inode_operations gfs2_dev_iops; 18extern const struct inode_operations gfs2_dev_iops;
19 19
20#endif /* __OPS_INODE_DOT_H__ */ 20#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 7685b46f934b..b89999d3a767 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb)
173 struct gfs2_sbd *sdp = sb->s_fs_info; 173 struct gfs2_sbd *sdp = sb->s_fs_info;
174 int error; 174 int error;
175 175
176 if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
177 return;
178
176 for (;;) { 179 for (;;) {
177 error = gfs2_freeze_fs(sdp); 180 error = gfs2_freeze_fs(sdp);
178 if (!error) 181 if (!error)
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode)
426 } 429 }
427 430
428 error = gfs2_dinode_dealloc(ip); 431 error = gfs2_dinode_dealloc(ip);
432 /*
433 * Must do this before unlock to avoid trying to write back
434 * potentially dirty data now that inode no longer exists
435 * on disk.
436 */
437 truncate_inode_pages(&inode->i_data, 0);
429 438
430out_unlock: 439out_unlock:
431 gfs2_glock_dq(&ip->i_iopen_gh); 440 gfs2_glock_dq(&ip->i_iopen_gh);
@@ -443,14 +452,12 @@ out:
443 452
444static struct inode *gfs2_alloc_inode(struct super_block *sb) 453static struct inode *gfs2_alloc_inode(struct super_block *sb)
445{ 454{
446 struct gfs2_sbd *sdp = sb->s_fs_info;
447 struct gfs2_inode *ip; 455 struct gfs2_inode *ip;
448 456
449 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); 457 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
450 if (ip) { 458 if (ip) {
451 ip->i_flags = 0; 459 ip->i_flags = 0;
452 ip->i_gl = NULL; 460 ip->i_gl = NULL;
453 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
454 ip->i_last_pfault = jiffies; 461 ip->i_last_pfault = jiffies;
455 } 462 }
456 return &ip->i_inode; 463 return &ip->i_inode;
@@ -461,7 +468,7 @@ static void gfs2_destroy_inode(struct inode *inode)
461 kmem_cache_free(gfs2_inode_cachep, inode); 468 kmem_cache_free(gfs2_inode_cachep, inode);
462} 469}
463 470
464struct super_operations gfs2_super_ops = { 471const struct super_operations gfs2_super_ops = {
465 .alloc_inode = gfs2_alloc_inode, 472 .alloc_inode = gfs2_alloc_inode,
466 .destroy_inode = gfs2_destroy_inode, 473 .destroy_inode = gfs2_destroy_inode,
467 .write_inode = gfs2_write_inode, 474 .write_inode = gfs2_write_inode,
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
index 9de73f042f78..442a274c6272 100644
--- a/fs/gfs2/ops_super.h
+++ b/fs/gfs2/ops_super.h
@@ -12,6 +12,6 @@
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14 14
15extern struct super_operations gfs2_super_ops; 15extern const struct super_operations gfs2_super_ops;
16 16
17#endif /* __OPS_SUPER_DOT_H__ */ 17#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 45a5f11fc39a..14b380fb0602 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -28,34 +28,13 @@
28#include "trans.h" 28#include "trans.h"
29#include "util.h" 29#include "util.h"
30 30
31static void pfault_be_greedy(struct gfs2_inode *ip)
32{
33 unsigned int time;
34
35 spin_lock(&ip->i_spin);
36 time = ip->i_greedy;
37 ip->i_last_pfault = jiffies;
38 spin_unlock(&ip->i_spin);
39
40 igrab(&ip->i_inode);
41 if (gfs2_glock_be_greedy(ip->i_gl, time))
42 iput(&ip->i_inode);
43}
44
45static struct page *gfs2_private_nopage(struct vm_area_struct *area, 31static struct page *gfs2_private_nopage(struct vm_area_struct *area,
46 unsigned long address, int *type) 32 unsigned long address, int *type)
47{ 33{
48 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); 34 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
49 struct page *result;
50 35
51 set_bit(GIF_PAGED, &ip->i_flags); 36 set_bit(GIF_PAGED, &ip->i_flags);
52 37 return filemap_nopage(area, address, type);
53 result = filemap_nopage(area, address, type);
54
55 if (result && result != NOPAGE_OOM)
56 pfault_be_greedy(ip);
57
58 return result;
59} 38}
60 39
61static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) 40static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
167 set_page_dirty(result); 146 set_page_dirty(result);
168 } 147 }
169 148
170 pfault_be_greedy(ip);
171out: 149out:
172 gfs2_glock_dq_uninit(&i_gh); 150 gfs2_glock_dq_uninit(&i_gh);
173 151
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 43a24f2e5905..70f424fcf1cd 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt)
71 gt->gt_atime_quantum = 3600; 71 gt->gt_atime_quantum = 3600;
72 gt->gt_new_files_jdata = 0; 72 gt->gt_new_files_jdata = 0;
73 gt->gt_new_files_directio = 0; 73 gt->gt_new_files_directio = 0;
74 gt->gt_max_atomic_write = 4 << 20;
75 gt->gt_max_readahead = 1 << 18; 74 gt->gt_max_readahead = 1 << 18;
76 gt->gt_lockdump_size = 131072; 75 gt->gt_lockdump_size = 131072;
77 gt->gt_stall_secs = 600; 76 gt->gt_stall_secs = 600;
78 gt->gt_complain_secs = 10; 77 gt->gt_complain_secs = 10;
79 gt->gt_reclaim_limit = 5000; 78 gt->gt_reclaim_limit = 5000;
80 gt->gt_entries_per_readdir = 32; 79 gt->gt_entries_per_readdir = 32;
81 gt->gt_prefetch_secs = 10;
82 gt->gt_greedy_default = HZ / 10;
83 gt->gt_greedy_quantum = HZ / 40;
84 gt->gt_greedy_max = HZ / 4;
85 gt->gt_statfs_quantum = 30; 80 gt->gt_statfs_quantum = 30;
86 gt->gt_statfs_slow = 0; 81 gt->gt_statfs_slow = 0;
87} 82}
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
359 mutex_lock(&sdp->sd_jindex_mutex); 354 mutex_lock(&sdp->sd_jindex_mutex);
360 355
361 for (;;) { 356 for (;;) {
362 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 357 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
363 GL_LOCAL_EXCL, ji_gh);
364 if (error) 358 if (error)
365 break; 359 break;
366 360
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
529 struct gfs2_log_header_host head; 523 struct gfs2_log_header_host head;
530 int error; 524 int error;
531 525
532 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 526 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
533 GL_LOCAL_EXCL, &t_gh);
534 if (error) 527 if (error)
535 return error; 528 return error;
536 529
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
583 gfs2_quota_sync(sdp); 576 gfs2_quota_sync(sdp);
584 gfs2_statfs_sync(sdp); 577 gfs2_statfs_sync(sdp);
585 578
586 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 579 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
587 GL_LOCAL_EXCL | GL_NOCACHE, 580 &t_gh);
588 &t_gh);
589 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 581 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
590 return error; 582 return error;
591 583
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 983eaf1e06be..d01f9f0fda26 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0);
436TUNE_ATTR(max_readahead, 0); 436TUNE_ATTR(max_readahead, 0);
437TUNE_ATTR(complain_secs, 0); 437TUNE_ATTR(complain_secs, 0);
438TUNE_ATTR(reclaim_limit, 0); 438TUNE_ATTR(reclaim_limit, 0);
439TUNE_ATTR(prefetch_secs, 0);
440TUNE_ATTR(statfs_slow, 0); 439TUNE_ATTR(statfs_slow, 0);
441TUNE_ATTR(new_files_jdata, 0); 440TUNE_ATTR(new_files_jdata, 0);
442TUNE_ATTR(new_files_directio, 0); 441TUNE_ATTR(new_files_directio, 0);
443TUNE_ATTR(quota_simul_sync, 1); 442TUNE_ATTR(quota_simul_sync, 1);
444TUNE_ATTR(quota_cache_secs, 1); 443TUNE_ATTR(quota_cache_secs, 1);
445TUNE_ATTR(max_atomic_write, 1);
446TUNE_ATTR(stall_secs, 1); 444TUNE_ATTR(stall_secs, 1);
447TUNE_ATTR(greedy_default, 1);
448TUNE_ATTR(greedy_quantum, 1);
449TUNE_ATTR(greedy_max, 1);
450TUNE_ATTR(statfs_quantum, 1); 445TUNE_ATTR(statfs_quantum, 1);
451TUNE_ATTR_DAEMON(scand_secs, scand_process); 446TUNE_ATTR_DAEMON(scand_secs, scand_process);
452TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 447TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = {
465 &tune_attr_max_readahead.attr, 460 &tune_attr_max_readahead.attr,
466 &tune_attr_complain_secs.attr, 461 &tune_attr_complain_secs.attr,
467 &tune_attr_reclaim_limit.attr, 462 &tune_attr_reclaim_limit.attr,
468 &tune_attr_prefetch_secs.attr,
469 &tune_attr_statfs_slow.attr, 463 &tune_attr_statfs_slow.attr,
470 &tune_attr_quota_simul_sync.attr, 464 &tune_attr_quota_simul_sync.attr,
471 &tune_attr_quota_cache_secs.attr, 465 &tune_attr_quota_cache_secs.attr,
472 &tune_attr_max_atomic_write.attr,
473 &tune_attr_stall_secs.attr, 466 &tune_attr_stall_secs.attr,
474 &tune_attr_greedy_default.attr,
475 &tune_attr_greedy_quantum.attr,
476 &tune_attr_greedy_max.attr,
477 &tune_attr_statfs_quantum.attr, 467 &tune_attr_statfs_quantum.attr,
478 &tune_attr_scand_secs.attr, 468 &tune_attr_scand_secs.attr,
479 &tune_attr_recoverd_secs.attr, 469 &tune_attr_recoverd_secs.attr,
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index e2e0358da335..7c69b98a2e45 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -320,7 +320,7 @@ const struct file_operations hfs_dir_operations = {
320 .release = hfs_dir_release, 320 .release = hfs_dir_release,
321}; 321};
322 322
323struct inode_operations hfs_dir_inode_operations = { 323const struct inode_operations hfs_dir_inode_operations = {
324 .create = hfs_create, 324 .create = hfs_create,
325 .lookup = hfs_lookup, 325 .lookup = hfs_lookup,
326 .unlink = hfs_unlink, 326 .unlink = hfs_unlink,
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index 88099ab1a180..1445e3a56ed4 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -83,8 +83,6 @@
83 83
84/*======== HFS structures as they appear on the disk ========*/ 84/*======== HFS structures as they appear on the disk ========*/
85 85
86#define __packed __attribute__ ((packed))
87
88/* Pascal-style string of up to 31 characters */ 86/* Pascal-style string of up to 31 characters */
89struct hfs_name { 87struct hfs_name {
90 u8 len; 88 u8 len;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 735332dfd1b8..147374b6f675 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -170,7 +170,7 @@ extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qst
170 170
171/* dir.c */ 171/* dir.c */
172extern const struct file_operations hfs_dir_operations; 172extern const struct file_operations hfs_dir_operations;
173extern struct inode_operations hfs_dir_inode_operations; 173extern const struct inode_operations hfs_dir_inode_operations;
174 174
175/* extent.c */ 175/* extent.c */
176extern int hfs_ext_keycmp(const btree_key *, const btree_key *); 176extern int hfs_ext_keycmp(const btree_key *, const btree_key *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 5cb7f8fee8d6..fafcba593871 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -18,7 +18,7 @@
18#include "btree.h" 18#include "btree.h"
19 19
20static const struct file_operations hfs_file_operations; 20static const struct file_operations hfs_file_operations;
21static struct inode_operations hfs_file_inode_operations; 21static const struct inode_operations hfs_file_inode_operations;
22 22
23/*================ Variable-like macros ================*/ 23/*================ Variable-like macros ================*/
24 24
@@ -612,7 +612,7 @@ static const struct file_operations hfs_file_operations = {
612 .release = hfs_file_release, 612 .release = hfs_file_release,
613}; 613};
614 614
615static struct inode_operations hfs_file_inode_operations = { 615static const struct inode_operations hfs_file_inode_operations = {
616 .lookup = hfs_file_lookup, 616 .lookup = hfs_file_lookup,
617 .truncate = hfs_file_truncate, 617 .truncate = hfs_file_truncate,
618 .setattr = hfs_inode_setattr, 618 .setattr = hfs_inode_setattr,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index a36987966004..623f509f1d47 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -154,7 +154,7 @@ static void hfs_destroy_inode(struct inode *inode)
154 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 154 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
155} 155}
156 156
157static struct super_operations hfs_super_operations = { 157static const struct super_operations hfs_super_operations = {
158 .alloc_inode = hfs_alloc_inode, 158 .alloc_inode = hfs_alloc_inode,
159 .destroy_inode = hfs_destroy_inode, 159 .destroy_inode = hfs_destroy_inode,
160 .write_inode = hfs_write_inode, 160 .write_inode = hfs_write_inode,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e886ac8460d3..78137007ccc1 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -471,7 +471,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
471 return res; 471 return res;
472} 472}
473 473
474struct inode_operations hfsplus_dir_inode_operations = { 474const struct inode_operations hfsplus_dir_inode_operations = {
475 .lookup = hfsplus_lookup, 475 .lookup = hfsplus_lookup,
476 .create = hfsplus_create, 476 .create = hfsplus_create,
477 .link = hfsplus_link, 477 .link = hfsplus_link,
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 49205531a500..fe99fe8db61a 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -15,8 +15,6 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17 17
18#define __packed __attribute__ ((packed))
19
20/* Some constants */ 18/* Some constants */
21#define HFSPLUS_SECTOR_SIZE 512 19#define HFSPLUS_SECTOR_SIZE 512
22#define HFSPLUS_SECTOR_SHIFT 9 20#define HFSPLUS_SECTOR_SHIFT 9
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 75e8c4d8aac3..642012ac3370 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -268,10 +268,10 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
268 return 0; 268 return 0;
269} 269}
270 270
271extern struct inode_operations hfsplus_dir_inode_operations; 271extern const struct inode_operations hfsplus_dir_inode_operations;
272extern struct file_operations hfsplus_dir_operations; 272extern struct file_operations hfsplus_dir_operations;
273 273
274static struct inode_operations hfsplus_file_inode_operations = { 274static const struct inode_operations hfsplus_file_inode_operations = {
275 .lookup = hfsplus_file_lookup, 275 .lookup = hfsplus_file_lookup,
276 .truncate = hfsplus_file_truncate, 276 .truncate = hfsplus_file_truncate,
277 .permission = hfsplus_permission, 277 .permission = hfsplus_permission,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 0f513c6bf843..5a282f64c637 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -260,7 +260,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
260 return 0; 260 return 0;
261} 261}
262 262
263static struct super_operations hfsplus_sops = { 263static const struct super_operations hfsplus_sops = {
264 .alloc_inode = hfsplus_alloc_inode, 264 .alloc_inode = hfsplus_alloc_inode,
265 .destroy_inode = hfsplus_destroy_inode, 265 .destroy_inode = hfsplus_destroy_inode,
266 .read_inode = hfsplus_read_inode, 266 .read_inode = hfsplus_read_inode,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 69a376f35a68..e965eb11d76f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -52,8 +52,8 @@ static int append = 0;
52 52
53#define HOSTFS_SUPER_MAGIC 0x00c0ffee 53#define HOSTFS_SUPER_MAGIC 0x00c0ffee
54 54
55static struct inode_operations hostfs_iops; 55static const struct inode_operations hostfs_iops;
56static struct inode_operations hostfs_dir_iops; 56static const struct inode_operations hostfs_dir_iops;
57static const struct address_space_operations hostfs_link_aops; 57static const struct address_space_operations hostfs_link_aops;
58 58
59#ifndef MODULE 59#ifndef MODULE
@@ -309,7 +309,7 @@ static void hostfs_read_inode(struct inode *inode)
309 read_inode(inode); 309 read_inode(inode);
310} 310}
311 311
312static struct super_operations hostfs_sbops = { 312static const struct super_operations hostfs_sbops = {
313 .alloc_inode = hostfs_alloc_inode, 313 .alloc_inode = hostfs_alloc_inode,
314 .drop_inode = generic_delete_inode, 314 .drop_inode = generic_delete_inode,
315 .delete_inode = hostfs_delete_inode, 315 .delete_inode = hostfs_delete_inode,
@@ -880,7 +880,7 @@ int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
880 return(0); 880 return(0);
881} 881}
882 882
883static struct inode_operations hostfs_iops = { 883static const struct inode_operations hostfs_iops = {
884 .create = hostfs_create, 884 .create = hostfs_create,
885 .link = hostfs_link, 885 .link = hostfs_link,
886 .unlink = hostfs_unlink, 886 .unlink = hostfs_unlink,
@@ -894,7 +894,7 @@ static struct inode_operations hostfs_iops = {
894 .getattr = hostfs_getattr, 894 .getattr = hostfs_getattr,
895}; 895};
896 896
897static struct inode_operations hostfs_dir_iops = { 897static const struct inode_operations hostfs_dir_iops = {
898 .create = hostfs_create, 898 .create = hostfs_create,
899 .lookup = hostfs_lookup, 899 .lookup = hostfs_lookup,
900 .link = hostfs_link, 900 .link = hostfs_link,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fb4c8915010a..b4eafc0f1e54 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -132,7 +132,7 @@ const struct file_operations hpfs_file_ops =
132 .sendfile = generic_file_sendfile, 132 .sendfile = generic_file_sendfile,
133}; 133};
134 134
135struct inode_operations hpfs_file_iops = 135const struct inode_operations hpfs_file_iops =
136{ 136{
137 .truncate = hpfs_truncate, 137 .truncate = hpfs_truncate,
138 .setattr = hpfs_notify_change, 138 .setattr = hpfs_notify_change,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1c07aa82d327..42ff60ccf2a9 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
266 266
267int hpfs_file_fsync(struct file *, struct dentry *, int); 267int hpfs_file_fsync(struct file *, struct dentry *, int);
268extern const struct file_operations hpfs_file_ops; 268extern const struct file_operations hpfs_file_ops;
269extern struct inode_operations hpfs_file_iops; 269extern const struct inode_operations hpfs_file_iops;
270extern const struct address_space_operations hpfs_aops; 270extern const struct address_space_operations hpfs_aops;
271 271
272/* inode.c */ 272/* inode.c */
@@ -302,7 +302,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
302 302
303/* namei.c */ 303/* namei.c */
304 304
305extern struct inode_operations hpfs_dir_iops; 305extern const struct inode_operations hpfs_dir_iops;
306extern const struct address_space_operations hpfs_symlink_aops; 306extern const struct address_space_operations hpfs_symlink_aops;
307 307
308static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) 308static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2507e7393f3c..9953cf9a2f16 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -659,7 +659,7 @@ end1:
659 return err; 659 return err;
660} 660}
661 661
662struct inode_operations hpfs_dir_iops = 662const struct inode_operations hpfs_dir_iops =
663{ 663{
664 .create = hpfs_create, 664 .create = hpfs_create,
665 .lookup = hpfs_lookup, 665 .lookup = hpfs_lookup,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index d4abc1a1d566..e0174e338526 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -426,7 +426,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
426 426
427/* Super operations */ 427/* Super operations */
428 428
429static struct super_operations hpfs_sops = 429static const struct super_operations hpfs_sops =
430{ 430{
431 .alloc_inode = hpfs_alloc_inode, 431 .alloc_inode = hpfs_alloc_inode,
432 .destroy_inode = hpfs_destroy_inode, 432 .destroy_inode = hpfs_destroy_inode,
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index afd340a45da4..affb7412125e 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -43,7 +43,7 @@ static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
43 43
44#define HPPFS_SUPER_MAGIC 0xb00000ee 44#define HPPFS_SUPER_MAGIC 0xb00000ee
45 45
46static struct super_operations hppfs_sbops; 46static const struct super_operations hppfs_sbops;
47 47
48static int is_pid(struct dentry *dentry) 48static int is_pid(struct dentry *dentry)
49{ 49{
@@ -212,7 +212,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
212 return(ERR_PTR(err)); 212 return(ERR_PTR(err));
213} 213}
214 214
215static struct inode_operations hppfs_file_iops = { 215static const struct inode_operations hppfs_file_iops = {
216}; 216};
217 217
218static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, 218static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
@@ -649,7 +649,7 @@ static void hppfs_destroy_inode(struct inode *inode)
649 kfree(HPPFS_I(inode)); 649 kfree(HPPFS_I(inode));
650} 650}
651 651
652static struct super_operations hppfs_sbops = { 652static const struct super_operations hppfs_sbops = {
653 .alloc_inode = hppfs_alloc_inode, 653 .alloc_inode = hppfs_alloc_inode,
654 .destroy_inode = hppfs_destroy_inode, 654 .destroy_inode = hppfs_destroy_inode,
655 .read_inode = hppfs_read_inode, 655 .read_inode = hppfs_read_inode,
@@ -693,11 +693,11 @@ static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
693 return ret; 693 return ret;
694} 694}
695 695
696static struct inode_operations hppfs_dir_iops = { 696static const struct inode_operations hppfs_dir_iops = {
697 .lookup = hppfs_lookup, 697 .lookup = hppfs_lookup,
698}; 698};
699 699
700static struct inode_operations hppfs_link_iops = { 700static const struct inode_operations hppfs_link_iops = {
701 .readlink = hppfs_readlink, 701 .readlink = hppfs_readlink,
702 .follow_link = hppfs_follow_link, 702 .follow_link = hppfs_follow_link,
703}; 703};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4f4cd132b571..8c718a3d413f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -33,11 +33,11 @@
33/* some random number */ 33/* some random number */
34#define HUGETLBFS_MAGIC 0x958458f6 34#define HUGETLBFS_MAGIC 0x958458f6
35 35
36static struct super_operations hugetlbfs_ops; 36static const struct super_operations hugetlbfs_ops;
37static const struct address_space_operations hugetlbfs_aops; 37static const struct address_space_operations hugetlbfs_aops;
38const struct file_operations hugetlbfs_file_operations; 38const struct file_operations hugetlbfs_file_operations;
39static struct inode_operations hugetlbfs_dir_inode_operations; 39static const struct inode_operations hugetlbfs_dir_inode_operations;
40static struct inode_operations hugetlbfs_inode_operations; 40static const struct inode_operations hugetlbfs_inode_operations;
41 41
42static struct backing_dev_info hugetlbfs_backing_dev_info = { 42static struct backing_dev_info hugetlbfs_backing_dev_info = {
43 .ra_pages = 0, /* No readahead */ 43 .ra_pages = 0, /* No readahead */
@@ -449,10 +449,13 @@ static int hugetlbfs_symlink(struct inode *dir,
449} 449}
450 450
451/* 451/*
452 * For direct-IO reads into hugetlb pages 452 * mark the head page dirty
453 */ 453 */
454static int hugetlbfs_set_page_dirty(struct page *page) 454static int hugetlbfs_set_page_dirty(struct page *page)
455{ 455{
456 struct page *head = (struct page *)page_private(page);
457
458 SetPageDirty(head);
456 return 0; 459 return 0;
457} 460}
458 461
@@ -560,7 +563,7 @@ const struct file_operations hugetlbfs_file_operations = {
560 .get_unmapped_area = hugetlb_get_unmapped_area, 563 .get_unmapped_area = hugetlb_get_unmapped_area,
561}; 564};
562 565
563static struct inode_operations hugetlbfs_dir_inode_operations = { 566static const struct inode_operations hugetlbfs_dir_inode_operations = {
564 .create = hugetlbfs_create, 567 .create = hugetlbfs_create,
565 .lookup = simple_lookup, 568 .lookup = simple_lookup,
566 .link = simple_link, 569 .link = simple_link,
@@ -573,11 +576,11 @@ static struct inode_operations hugetlbfs_dir_inode_operations = {
573 .setattr = hugetlbfs_setattr, 576 .setattr = hugetlbfs_setattr,
574}; 577};
575 578
576static struct inode_operations hugetlbfs_inode_operations = { 579static const struct inode_operations hugetlbfs_inode_operations = {
577 .setattr = hugetlbfs_setattr, 580 .setattr = hugetlbfs_setattr,
578}; 581};
579 582
580static struct super_operations hugetlbfs_ops = { 583static const struct super_operations hugetlbfs_ops = {
581 .alloc_inode = hugetlbfs_alloc_inode, 584 .alloc_inode = hugetlbfs_alloc_inode,
582 .destroy_inode = hugetlbfs_destroy_inode, 585 .destroy_inode = hugetlbfs_destroy_inode,
583 .statfs = hugetlbfs_statfs, 586 .statfs = hugetlbfs_statfs,
diff --git a/fs/inode.c b/fs/inode.c
index bf21dc6d0dbd..5abb097ab1b0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -414,7 +414,8 @@ static void prune_icache(int nr_to_scan)
414 __iget(inode); 414 __iget(inode);
415 spin_unlock(&inode_lock); 415 spin_unlock(&inode_lock);
416 if (remove_inode_buffers(inode)) 416 if (remove_inode_buffers(inode))
417 reap += invalidate_inode_pages(&inode->i_data); 417 reap += invalidate_mapping_pages(&inode->i_data,
418 0, -1);
418 iput(inode); 419 iput(inode);
419 spin_lock(&inode_lock); 420 spin_lock(&inode_lock);
420 421
@@ -709,7 +710,7 @@ EXPORT_SYMBOL(iunique);
709struct inode *igrab(struct inode *inode) 710struct inode *igrab(struct inode *inode)
710{ 711{
711 spin_lock(&inode_lock); 712 spin_lock(&inode_lock);
712 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 713 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
713 __iget(inode); 714 __iget(inode);
714 else 715 else
715 /* 716 /*
@@ -999,7 +1000,7 @@ EXPORT_SYMBOL(remove_inode_hash);
999 */ 1000 */
1000void generic_delete_inode(struct inode *inode) 1001void generic_delete_inode(struct inode *inode)
1001{ 1002{
1002 struct super_operations *op = inode->i_sb->s_op; 1003 const struct super_operations *op = inode->i_sb->s_op;
1003 1004
1004 list_del_init(&inode->i_list); 1005 list_del_init(&inode->i_list);
1005 list_del_init(&inode->i_sb_list); 1006 list_del_init(&inode->i_sb_list);
@@ -1092,7 +1093,7 @@ EXPORT_SYMBOL_GPL(generic_drop_inode);
1092 */ 1093 */
1093static inline void iput_final(struct inode *inode) 1094static inline void iput_final(struct inode *inode)
1094{ 1095{
1095 struct super_operations *op = inode->i_sb->s_op; 1096 const struct super_operations *op = inode->i_sb->s_op;
1096 void (*drop)(struct inode *) = generic_drop_inode; 1097 void (*drop)(struct inode *) = generic_drop_inode;
1097 1098
1098 if (op && op->drop_inode) 1099 if (op && op->drop_inode)
@@ -1112,7 +1113,7 @@ static inline void iput_final(struct inode *inode)
1112void iput(struct inode *inode) 1113void iput(struct inode *inode)
1113{ 1114{
1114 if (inode) { 1115 if (inode) {
1115 struct super_operations *op = inode->i_sb->s_op; 1116 const struct super_operations *op = inode->i_sb->s_op;
1116 1117
1117 BUG_ON(inode->i_state == I_CLEAR); 1118 BUG_ON(inode->i_state == I_CLEAR);
1118 1119
@@ -1160,11 +1161,9 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1160 struct inode *inode = dentry->d_inode; 1161 struct inode *inode = dentry->d_inode;
1161 struct timespec now; 1162 struct timespec now;
1162 1163
1163 if (IS_RDONLY(inode))
1164 return;
1165 if (inode->i_flags & S_NOATIME) 1164 if (inode->i_flags & S_NOATIME)
1166 return; 1165 return;
1167 if (inode->i_sb->s_flags & MS_NOATIME) 1166 if (IS_NOATIME(inode))
1168 return; 1167 return;
1169 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1168 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1170 return; 1169 return;
@@ -1252,33 +1251,6 @@ int inode_needs_sync(struct inode *inode)
1252 1251
1253EXPORT_SYMBOL(inode_needs_sync); 1252EXPORT_SYMBOL(inode_needs_sync);
1254 1253
1255/*
1256 * Quota functions that want to walk the inode lists..
1257 */
1258#ifdef CONFIG_QUOTA
1259
1260void remove_dquot_ref(struct super_block *sb, int type,
1261 struct list_head *tofree_head)
1262{
1263 struct inode *inode;
1264
1265 if (!sb->dq_op)
1266 return; /* nothing to do */
1267 spin_lock(&inode_lock); /* This lock is for inodes code */
1268
1269 /*
1270 * We don't have to lock against quota code - test IS_QUOTAINIT is
1271 * just for speedup...
1272 */
1273 list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
1274 if (!IS_NOQUOTA(inode))
1275 remove_inode_dquot_ref(inode, type, tofree_head);
1276
1277 spin_unlock(&inode_lock);
1278}
1279
1280#endif
1281
1282int inode_wait(void *word) 1254int inode_wait(void *word)
1283{ 1255{
1284 schedule(); 1256 schedule();
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 55f6da55b7c0..9f2224f65a18 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -455,8 +455,16 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
455 break; 455 break;
456 456
457 kevent = inotify_dev_get_event(dev); 457 kevent = inotify_dev_get_event(dev);
458 if (event_size + kevent->event.len > count) 458 if (event_size + kevent->event.len > count) {
459 if (ret == 0 && count > 0) {
460 /*
461 * could not get a single event because we
462 * didn't have enough buffer space.
463 */
464 ret = -EINVAL;
465 }
459 break; 466 break;
467 }
460 468
461 if (copy_to_user(buf, &kevent->event, event_size)) { 469 if (copy_to_user(buf, &kevent->event, event_size)) {
462 ret = -EFAULT; 470 ret = -EFAULT;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 89e8da112a75..10d2c211d18b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -60,6 +60,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
60 int data = IOPRIO_PRIO_DATA(ioprio); 60 int data = IOPRIO_PRIO_DATA(ioprio);
61 struct task_struct *p, *g; 61 struct task_struct *p, *g;
62 struct user_struct *user; 62 struct user_struct *user;
63 struct pid *pgrp;
63 int ret; 64 int ret;
64 65
65 switch (class) { 66 switch (class) {
@@ -98,12 +99,14 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
98 break; 99 break;
99 case IOPRIO_WHO_PGRP: 100 case IOPRIO_WHO_PGRP:
100 if (!who) 101 if (!who)
101 who = process_group(current); 102 pgrp = task_pgrp(current);
102 do_each_task_pid(who, PIDTYPE_PGID, p) { 103 else
104 pgrp = find_pid(who);
105 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
103 ret = set_task_ioprio(p, ioprio); 106 ret = set_task_ioprio(p, ioprio);
104 if (ret) 107 if (ret)
105 break; 108 break;
106 } while_each_task_pid(who, PIDTYPE_PGID, p); 109 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
107 break; 110 break;
108 case IOPRIO_WHO_USER: 111 case IOPRIO_WHO_USER:
109 if (!who) 112 if (!who)
@@ -167,6 +170,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
167{ 170{
168 struct task_struct *g, *p; 171 struct task_struct *g, *p;
169 struct user_struct *user; 172 struct user_struct *user;
173 struct pid *pgrp;
170 int ret = -ESRCH; 174 int ret = -ESRCH;
171 int tmpio; 175 int tmpio;
172 176
@@ -182,8 +186,10 @@ asmlinkage long sys_ioprio_get(int which, int who)
182 break; 186 break;
183 case IOPRIO_WHO_PGRP: 187 case IOPRIO_WHO_PGRP:
184 if (!who) 188 if (!who)
185 who = process_group(current); 189 pgrp = task_pgrp(current);
186 do_each_task_pid(who, PIDTYPE_PGID, p) { 190 else
191 pgrp = find_pid(who);
192 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
187 tmpio = get_task_ioprio(p); 193 tmpio = get_task_ioprio(p);
188 if (tmpio < 0) 194 if (tmpio < 0)
189 continue; 195 continue;
@@ -191,7 +197,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
191 ret = tmpio; 197 ret = tmpio;
192 else 198 else
193 ret = ioprio_best(ret, tmpio); 199 ret = ioprio_best(ret, tmpio);
194 } while_each_task_pid(who, PIDTYPE_PGID, p); 200 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
195 break; 201 break;
196 case IOPRIO_WHO_USER: 202 case IOPRIO_WHO_USER:
197 if (!who) 203 if (!who)
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 4af2548f97a9..0e94c31cad9b 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -24,7 +24,7 @@ const struct file_operations isofs_dir_operations =
24/* 24/*
25 * directories can handle most operations... 25 * directories can handle most operations...
26 */ 26 */
27struct inode_operations isofs_dir_inode_operations = 27const struct inode_operations isofs_dir_inode_operations =
28{ 28{
29 .lookup = isofs_lookup, 29 .lookup = isofs_lookup,
30}; 30};
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ea55b6c469ec..64a96cdfe3a4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -106,7 +106,7 @@ static int isofs_remount(struct super_block *sb, int *flags, char *data)
106 return 0; 106 return 0;
107} 107}
108 108
109static struct super_operations isofs_sops = { 109static const struct super_operations isofs_sops = {
110 .alloc_inode = isofs_alloc_inode, 110 .alloc_inode = isofs_alloc_inode,
111 .destroy_inode = isofs_destroy_inode, 111 .destroy_inode = isofs_destroy_inode,
112 .read_inode = isofs_read_inode, 112 .read_inode = isofs_read_inode,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index e6308c8b5735..efe2872cd4e3 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -174,7 +174,7 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
174 } 174 }
175} 175}
176 176
177extern struct inode_operations isofs_dir_inode_operations; 177extern const struct inode_operations isofs_dir_inode_operations;
178extern const struct file_operations isofs_dir_operations; 178extern const struct file_operations isofs_dir_operations;
179extern const struct address_space_operations isofs_symlink_aops; 179extern const struct address_space_operations isofs_symlink_aops;
180extern struct export_operations isofs_export_ops; 180extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 43baa1afa021..9602b925da08 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -54,11 +54,11 @@
54 54
55static int jffs_remove(struct inode *dir, struct dentry *dentry, int type); 55static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
56 56
57static struct super_operations jffs_ops; 57static const struct super_operations jffs_ops;
58static const struct file_operations jffs_file_operations; 58static const struct file_operations jffs_file_operations;
59static struct inode_operations jffs_file_inode_operations; 59static const struct inode_operations jffs_file_inode_operations;
60static const struct file_operations jffs_dir_operations; 60static const struct file_operations jffs_dir_operations;
61static struct inode_operations jffs_dir_inode_operations; 61static const struct inode_operations jffs_dir_inode_operations;
62static const struct address_space_operations jffs_address_operations; 62static const struct address_space_operations jffs_address_operations;
63 63
64struct kmem_cache *node_cache = NULL; 64struct kmem_cache *node_cache = NULL;
@@ -296,7 +296,7 @@ jffs_setattr(struct dentry *dentry, struct iattr *iattr)
296 inode->i_blocks = (inode->i_size + 511) >> 9; 296 inode->i_blocks = (inode->i_size + 511) >> 9;
297 297
298 if (len) { 298 if (len) {
299 invalidate_inode_pages(inode->i_mapping); 299 invalidate_mapping_pages(inode->i_mapping, 0, -1);
300 } 300 }
301 inode->i_ctime = CURRENT_TIME_SEC; 301 inode->i_ctime = CURRENT_TIME_SEC;
302 inode->i_mtime = inode->i_ctime; 302 inode->i_mtime = inode->i_ctime;
@@ -1518,7 +1518,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
1518 } 1518 }
1519 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 1519 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
1520 mark_inode_dirty(inode); 1520 mark_inode_dirty(inode);
1521 invalidate_inode_pages(inode->i_mapping); 1521 invalidate_mapping_pages(inode->i_mapping, 0, -1);
1522 1522
1523 out_isem: 1523 out_isem:
1524 return err; 1524 return err;
@@ -1642,7 +1642,7 @@ static const struct file_operations jffs_file_operations =
1642}; 1642};
1643 1643
1644 1644
1645static struct inode_operations jffs_file_inode_operations = 1645static const struct inode_operations jffs_file_inode_operations =
1646{ 1646{
1647 .lookup = jffs_lookup, /* lookup */ 1647 .lookup = jffs_lookup, /* lookup */
1648 .setattr = jffs_setattr, 1648 .setattr = jffs_setattr,
@@ -1655,7 +1655,7 @@ static const struct file_operations jffs_dir_operations =
1655}; 1655};
1656 1656
1657 1657
1658static struct inode_operations jffs_dir_inode_operations = 1658static const struct inode_operations jffs_dir_inode_operations =
1659{ 1659{
1660 .create = jffs_create, 1660 .create = jffs_create,
1661 .lookup = jffs_lookup, 1661 .lookup = jffs_lookup,
@@ -1774,7 +1774,7 @@ static int jffs_remount(struct super_block *sb, int *flags, char *data)
1774 return 0; 1774 return 0;
1775} 1775}
1776 1776
1777static struct super_operations jffs_ops = 1777static const struct super_operations jffs_ops =
1778{ 1778{
1779 .read_inode = jffs_read_inode, 1779 .read_inode = jffs_read_inode,
1780 .delete_inode = jffs_delete_inode, 1780 .delete_inode = jffs_delete_inode,
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index da6034d50718..cdbe2fe14e2d 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_dir_operations =
46}; 46};
47 47
48 48
49struct inode_operations jffs2_dir_inode_operations = 49const struct inode_operations jffs2_dir_inode_operations =
50{ 50{
51 .create = jffs2_create, 51 .create = jffs2_create,
52 .lookup = jffs2_lookup, 52 .lookup = jffs2_lookup,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 242875f77cb3..e82eeaf7590d 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -54,7 +54,7 @@ const struct file_operations jffs2_file_operations =
54 54
55/* jffs2_file_inode_operations */ 55/* jffs2_file_inode_operations */
56 56
57struct inode_operations jffs2_file_inode_operations = 57const struct inode_operations jffs2_file_inode_operations =
58{ 58{
59 .permission = jffs2_permission, 59 .permission = jffs2_permission,
60 .setattr = jffs2_setattr, 60 .setattr = jffs2_setattr,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 9f41fc01a371..e07a0edcdb4f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -153,11 +153,11 @@ void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
153 153
154/* dir.c */ 154/* dir.c */
155extern const struct file_operations jffs2_dir_operations; 155extern const struct file_operations jffs2_dir_operations;
156extern struct inode_operations jffs2_dir_inode_operations; 156extern const struct inode_operations jffs2_dir_inode_operations;
157 157
158/* file.c */ 158/* file.c */
159extern const struct file_operations jffs2_file_operations; 159extern const struct file_operations jffs2_file_operations;
160extern struct inode_operations jffs2_file_inode_operations; 160extern const struct inode_operations jffs2_file_inode_operations;
161extern const struct address_space_operations jffs2_file_address_operations; 161extern const struct address_space_operations jffs2_file_address_operations;
162int jffs2_fsync(struct file *, struct dentry *, int); 162int jffs2_fsync(struct file *, struct dentry *, int);
163int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 163int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
@@ -166,7 +166,7 @@ int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
166int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long); 166int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
167 167
168/* symlink.c */ 168/* symlink.c */
169extern struct inode_operations jffs2_symlink_inode_operations; 169extern const struct inode_operations jffs2_symlink_inode_operations;
170 170
171/* fs.c */ 171/* fs.c */
172int jffs2_setattr (struct dentry *, struct iattr *); 172int jffs2_setattr (struct dentry *, struct iattr *);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 08a0e6c49e61..cc7e8e71ad46 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -66,7 +66,7 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
66 return 0; 66 return 0;
67} 67}
68 68
69static struct super_operations jffs2_super_operations = 69static const struct super_operations jffs2_super_operations =
70{ 70{
71 .alloc_inode = jffs2_alloc_inode, 71 .alloc_inode = jffs2_alloc_inode,
72 .destroy_inode =jffs2_destroy_inode, 72 .destroy_inode =jffs2_destroy_inode,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b90d5aa3d969..7e4882c8a7ed 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -20,7 +20,7 @@
20 20
21static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd); 21static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
22 22
23struct inode_operations jffs2_symlink_inode_operations = 23const struct inode_operations jffs2_symlink_inode_operations =
24{ 24{
25 .readlink = generic_readlink, 25 .readlink = generic_readlink,
26 .follow_link = jffs2_follow_link, 26 .follow_link = jffs2_follow_link,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index aa9132d04920..f7f8eff19b7b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -88,7 +88,7 @@ static int jfs_release(struct inode *inode, struct file *file)
88 return 0; 88 return 0;
89} 89}
90 90
91struct inode_operations jfs_file_inode_operations = { 91const struct inode_operations jfs_file_inode_operations = {
92 .truncate = jfs_truncate, 92 .truncate = jfs_truncate,
93 .setxattr = jfs_setxattr, 93 .setxattr = jfs_setxattr,
94 .getxattr = jfs_getxattr, 94 .getxattr = jfs_getxattr,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index f5719117edfe..e285022f006c 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -182,9 +182,9 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
182 * Take appropriate lock on inode 182 * Take appropriate lock on inode
183 */ 183 */
184 if (create) 184 if (create)
185 IWRITE_LOCK(ip); 185 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
186 else 186 else
187 IREAD_LOCK(ip); 187 IREAD_LOCK(ip, RDWRLOCK_NORMAL);
188 188
189 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && 189 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
190 (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && 190 (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
@@ -359,7 +359,7 @@ void jfs_truncate(struct inode *ip)
359 359
360 nobh_truncate_page(ip->i_mapping, ip->i_size); 360 nobh_truncate_page(ip->i_mapping, ip->i_size);
361 361
362 IWRITE_LOCK(ip); 362 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
363 jfs_truncate_nolock(ip, ip->i_size); 363 jfs_truncate_nolock(ip, ip->i_size);
364 IWRITE_UNLOCK(ip); 364 IWRITE_UNLOCK(ip);
365} 365}
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index ddffbbd4d955..7378798f0b21 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -39,10 +39,6 @@ extern void jfs_proc_clean(void);
39/* 39/*
40 * assert with traditional printf/panic 40 * assert with traditional printf/panic
41 */ 41 */
42#ifdef CONFIG_KERNEL_ASSERTS
43/* kgdb stuff */
44#define assert(p) KERNEL_ASSERT(#p, p)
45#else
46#define assert(p) do { \ 42#define assert(p) do { \
47 if (!(p)) { \ 43 if (!(p)) { \
48 printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \ 44 printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \
@@ -50,7 +46,6 @@ extern void jfs_proc_clean(void);
50 BUG(); \ 46 BUG(); \
51 } \ 47 } \
52} while (0) 48} while (0)
53#endif
54 49
55/* 50/*
56 * debug ON 51 * debug ON
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 23546c8fd48b..82b0544bd76d 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -337,7 +337,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
337 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 337 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
338 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 338 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
339 339
340 IREAD_LOCK(ipbmap); 340 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
341 341
342 /* block to be freed better be within the mapsize. */ 342 /* block to be freed better be within the mapsize. */
343 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { 343 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
@@ -733,7 +733,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
733 * allocation group size, try to allocate anywhere. 733 * allocation group size, try to allocate anywhere.
734 */ 734 */
735 if (l2nb > bmp->db_agl2size) { 735 if (l2nb > bmp->db_agl2size) {
736 IWRITE_LOCK(ipbmap); 736 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
737 737
738 rc = dbAllocAny(bmp, nblocks, l2nb, results); 738 rc = dbAllocAny(bmp, nblocks, l2nb, results);
739 739
@@ -774,7 +774,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
774 * the hint using a tiered strategy. 774 * the hint using a tiered strategy.
775 */ 775 */
776 if (nblocks <= BPERDMAP) { 776 if (nblocks <= BPERDMAP) {
777 IREAD_LOCK(ipbmap); 777 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
778 778
779 /* get the buffer for the dmap containing the hint. 779 /* get the buffer for the dmap containing the hint.
780 */ 780 */
@@ -844,7 +844,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
844 /* try to satisfy the allocation request with blocks within 844 /* try to satisfy the allocation request with blocks within
845 * the same allocation group as the hint. 845 * the same allocation group as the hint.
846 */ 846 */
847 IWRITE_LOCK(ipbmap); 847 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
848 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) 848 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
849 goto write_unlock; 849 goto write_unlock;
850 850
@@ -856,7 +856,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
856 * Let dbNextAG recommend a preferred allocation group 856 * Let dbNextAG recommend a preferred allocation group
857 */ 857 */
858 agno = dbNextAG(ipbmap); 858 agno = dbNextAG(ipbmap);
859 IWRITE_LOCK(ipbmap); 859 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
860 860
861 /* Try to allocate within this allocation group. if that fails, try to 861 /* Try to allocate within this allocation group. if that fails, try to
862 * allocate anywhere in the map. 862 * allocate anywhere in the map.
@@ -900,7 +900,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
900 s64 lblkno; 900 s64 lblkno;
901 struct metapage *mp; 901 struct metapage *mp;
902 902
903 IREAD_LOCK(ipbmap); 903 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
904 904
905 /* 905 /*
906 * validate extent request: 906 * validate extent request:
@@ -1050,7 +1050,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1050 */ 1050 */
1051 extblkno = lastblkno + 1; 1051 extblkno = lastblkno + 1;
1052 1052
1053 IREAD_LOCK(ipbmap); 1053 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
1054 1054
1055 /* better be within the file system */ 1055 /* better be within the file system */
1056 bmp = sbi->bmap; 1056 bmp = sbi->bmap;
@@ -3116,7 +3116,7 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3116 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 3116 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3117 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 3117 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
3118 3118
3119 IREAD_LOCK(ipbmap); 3119 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
3120 3120
3121 /* block to be allocated better be within the mapsize. */ 3121 /* block to be allocated better be within the mapsize. */
3122 ASSERT(nblocks <= bmp->db_mapsize - blkno); 3122 ASSERT(nblocks <= bmp->db_mapsize - blkno);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 53f63b47a6d3..aa5124b643b1 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -331,7 +331,7 @@ int diRead(struct inode *ip)
331 331
332 /* read the iag */ 332 /* read the iag */
333 imap = JFS_IP(ipimap)->i_imap; 333 imap = JFS_IP(ipimap)->i_imap;
334 IREAD_LOCK(ipimap); 334 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
335 rc = diIAGRead(imap, iagno, &mp); 335 rc = diIAGRead(imap, iagno, &mp);
336 IREAD_UNLOCK(ipimap); 336 IREAD_UNLOCK(ipimap);
337 if (rc) { 337 if (rc) {
@@ -920,7 +920,7 @@ int diFree(struct inode *ip)
920 /* Obtain read lock in imap inode. Don't release it until we have 920 /* Obtain read lock in imap inode. Don't release it until we have
921 * read all of the IAG's that we are going to. 921 * read all of the IAG's that we are going to.
922 */ 922 */
923 IREAD_LOCK(ipimap); 923 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
924 924
925 /* read the iag. 925 /* read the iag.
926 */ 926 */
@@ -1415,7 +1415,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1415 AG_LOCK(imap, agno); 1415 AG_LOCK(imap, agno);
1416 1416
1417 /* Get read lock on imap inode */ 1417 /* Get read lock on imap inode */
1418 IREAD_LOCK(ipimap); 1418 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1419 1419
1420 /* get the iag number and read the iag */ 1420 /* get the iag number and read the iag */
1421 iagno = INOTOIAG(inum); 1421 iagno = INOTOIAG(inum);
@@ -1808,7 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1808 return -ENOSPC; 1808 return -ENOSPC;
1809 1809
1810 /* obtain read lock on imap inode */ 1810 /* obtain read lock on imap inode */
1811 IREAD_LOCK(imap->im_ipimap); 1811 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1812 1812
1813 /* read the iag at the head of the list. 1813 /* read the iag at the head of the list.
1814 */ 1814 */
@@ -1946,7 +1946,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1946 } else { 1946 } else {
1947 /* read the iag. 1947 /* read the iag.
1948 */ 1948 */
1949 IREAD_LOCK(imap->im_ipimap); 1949 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1950 if ((rc = diIAGRead(imap, iagno, &mp))) { 1950 if ((rc = diIAGRead(imap, iagno, &mp))) {
1951 IREAD_UNLOCK(imap->im_ipimap); 1951 IREAD_UNLOCK(imap->im_ipimap);
1952 jfs_error(ip->i_sb, "diAllocExt: error reading iag"); 1952 jfs_error(ip->i_sb, "diAllocExt: error reading iag");
@@ -2509,7 +2509,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2509 */ 2509 */
2510 2510
2511 /* acquire inode map lock */ 2511 /* acquire inode map lock */
2512 IWRITE_LOCK(ipimap); 2512 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2513 2513
2514 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2514 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2515 IWRITE_UNLOCK(ipimap); 2515 IWRITE_UNLOCK(ipimap);
@@ -2648,7 +2648,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2648 } 2648 }
2649 2649
2650 /* obtain read lock on map */ 2650 /* obtain read lock on map */
2651 IREAD_LOCK(ipimap); 2651 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2652 2652
2653 /* read the iag */ 2653 /* read the iag */
2654 if ((rc = diIAGRead(imap, iagno, &mp))) { 2654 if ((rc = diIAGRead(imap, iagno, &mp))) {
@@ -2779,7 +2779,7 @@ diUpdatePMap(struct inode *ipimap,
2779 return -EIO; 2779 return -EIO;
2780 } 2780 }
2781 /* read the iag */ 2781 /* read the iag */
2782 IREAD_LOCK(ipimap); 2782 IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2783 rc = diIAGRead(imap, iagno, &mp); 2783 rc = diIAGRead(imap, iagno, &mp);
2784 IREAD_UNLOCK(ipimap); 2784 IREAD_UNLOCK(ipimap);
2785 if (rc) 2785 if (rc)
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 94005584445a..8f453eff3c83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -109,9 +109,11 @@ struct jfs_inode_info {
109 109
110#define JFS_ACL_NOT_CACHED ((void *)-1) 110#define JFS_ACL_NOT_CACHED ((void *)-1)
111 111
112#define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock) 112#define IREAD_LOCK(ip, subclass) \
113 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
113#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) 114#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
114#define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock) 115#define IWRITE_LOCK(ip, subclass) \
116 down_write_nested(&JFS_IP(ip)->rdwrlock, subclass)
115#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock) 117#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock)
116 118
117/* 119/*
@@ -127,6 +129,29 @@ enum cflags {
127 COMMIT_Synclist, /* metadata pages on group commit synclist */ 129 COMMIT_Synclist, /* metadata pages on group commit synclist */
128}; 130};
129 131
132/*
133 * commit_mutex nesting subclasses:
134 */
135enum commit_mutex_class
136{
137 COMMIT_MUTEX_PARENT,
138 COMMIT_MUTEX_CHILD,
139 COMMIT_MUTEX_SECOND_PARENT, /* Renaming */
140 COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */
141};
142
143/*
144 * rdwrlock subclasses:
145 * The dmap inode may be locked while a normal inode or the imap inode are
146 * locked.
147 */
148enum rdwrlock_class
149{
150 RDWRLOCK_NORMAL,
151 RDWRLOCK_IMAP,
152 RDWRLOCK_DMAP
153};
154
130#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) 155#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag))
131#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) 156#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag))
132#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) 157#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag))
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 0d06ccfaff0e..6802837f757e 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,10 +35,10 @@ extern void jfs_set_inode_flags(struct inode *);
35extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 35extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
36 36
37extern const struct address_space_operations jfs_aops; 37extern const struct address_space_operations jfs_aops;
38extern struct inode_operations jfs_dir_inode_operations; 38extern const struct inode_operations jfs_dir_inode_operations;
39extern const struct file_operations jfs_dir_operations; 39extern const struct file_operations jfs_dir_operations;
40extern struct inode_operations jfs_file_inode_operations; 40extern const struct inode_operations jfs_file_inode_operations;
41extern const struct file_operations jfs_file_operations; 41extern const struct file_operations jfs_file_operations;
42extern struct inode_operations jfs_symlink_inode_operations; 42extern const struct inode_operations jfs_symlink_inode_operations;
43extern struct dentry_operations jfs_ci_dentry_operations; 43extern struct dentry_operations jfs_ci_dentry_operations;
44#endif /* _H_JFS_INODE */ 44#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index 7d78e83d7c40..df48ece4b7a3 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -42,7 +42,7 @@ do { \
42 if (cond) \ 42 if (cond) \
43 break; \ 43 break; \
44 unlock_cmd; \ 44 unlock_cmd; \
45 schedule(); \ 45 io_schedule(); \
46 lock_cmd; \ 46 lock_cmd; \
47 } \ 47 } \
48 current->state = TASK_RUNNING; \ 48 current->state = TASK_RUNNING; \
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index ceaf03b94935..58deae007507 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -56,7 +56,7 @@ static inline void __lock_metapage(struct metapage *mp)
56 set_current_state(TASK_UNINTERRUPTIBLE); 56 set_current_state(TASK_UNINTERRUPTIBLE);
57 if (metapage_locked(mp)) { 57 if (metapage_locked(mp)) {
58 unlock_page(mp->page); 58 unlock_page(mp->page);
59 schedule(); 59 io_schedule();
60 lock_page(mp->page); 60 lock_page(mp->page);
61 } 61 }
62 } while (trylock_metapage(mp)); 62 } while (trylock_metapage(mp));
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d558e51b0df8..6988a1082f58 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -135,7 +135,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
135 add_wait_queue(event, &wait); 135 add_wait_queue(event, &wait);
136 set_current_state(TASK_UNINTERRUPTIBLE); 136 set_current_state(TASK_UNINTERRUPTIBLE);
137 TXN_UNLOCK(); 137 TXN_UNLOCK();
138 schedule(); 138 io_schedule();
139 current->state = TASK_RUNNING; 139 current->state = TASK_RUNNING;
140 remove_wait_queue(event, &wait); 140 remove_wait_queue(event, &wait);
141} 141}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index e98eb03e5310..acc97c46d8a4 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -757,6 +757,11 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
757 nsplit = 0; 757 nsplit = 0;
758 758
759 /* push (bn, index) of the parent page/entry */ 759 /* push (bn, index) of the parent page/entry */
760 if (BT_STACK_FULL(btstack)) {
761 jfs_error(ip->i_sb, "stack overrun in xtSearch!");
762 XT_PUTPAGE(mp);
763 return -EIO;
764 }
760 BT_PUSH(btstack, bn, index); 765 BT_PUSH(btstack, bn, index);
761 766
762 /* get the child page block number */ 767 /* get the child page block number */
@@ -3915,6 +3920,11 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3915 */ 3920 */
3916 getChild: 3921 getChild:
3917 /* save current parent entry for the child page */ 3922 /* save current parent entry for the child page */
3923 if (BT_STACK_FULL(&btstack)) {
3924 jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
3925 XT_PUTPAGE(mp);
3926 return -EIO;
3927 }
3918 BT_PUSH(&btstack, bn, index); 3928 BT_PUSH(&btstack, bn, index);
3919 3929
3920 /* get child page */ 3930 /* get child page */
@@ -4112,6 +4122,11 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4112 */ 4122 */
4113 getChild: 4123 getChild:
4114 /* save current parent entry for the child page */ 4124 /* save current parent entry for the child page */
4125 if (BT_STACK_FULL(&btstack)) {
4126 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
4127 XT_PUTPAGE(mp);
4128 return -EIO;
4129 }
4115 BT_PUSH(&btstack, bn, index); 4130 BT_PUSH(&btstack, bn, index);
4116 4131
4117 /* get child page */ 4132 /* get child page */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a6a8c16c872c..41c204771262 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
104 104
105 tid = txBegin(dip->i_sb, 0); 105 tid = txBegin(dip->i_sb, 0);
106 106
107 mutex_lock(&JFS_IP(dip)->commit_mutex); 107 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
108 mutex_lock(&JFS_IP(ip)->commit_mutex); 108 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
109 109
110 rc = jfs_init_acl(tid, ip, dip); 110 rc = jfs_init_acl(tid, ip, dip);
111 if (rc) 111 if (rc)
@@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
238 238
239 tid = txBegin(dip->i_sb, 0); 239 tid = txBegin(dip->i_sb, 0);
240 240
241 mutex_lock(&JFS_IP(dip)->commit_mutex); 241 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
242 mutex_lock(&JFS_IP(ip)->commit_mutex); 242 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
243 243
244 rc = jfs_init_acl(tid, ip, dip); 244 rc = jfs_init_acl(tid, ip, dip);
245 if (rc) 245 if (rc)
@@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
365 365
366 tid = txBegin(dip->i_sb, 0); 366 tid = txBegin(dip->i_sb, 0);
367 367
368 mutex_lock(&JFS_IP(dip)->commit_mutex); 368 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
369 mutex_lock(&JFS_IP(ip)->commit_mutex); 369 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
370 370
371 iplist[0] = dip; 371 iplist[0] = dip;
372 iplist[1] = ip; 372 iplist[1] = ip;
@@ -483,12 +483,12 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
483 if ((rc = get_UCSname(&dname, dentry))) 483 if ((rc = get_UCSname(&dname, dentry)))
484 goto out; 484 goto out;
485 485
486 IWRITE_LOCK(ip); 486 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
487 487
488 tid = txBegin(dip->i_sb, 0); 488 tid = txBegin(dip->i_sb, 0);
489 489
490 mutex_lock(&JFS_IP(dip)->commit_mutex); 490 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
491 mutex_lock(&JFS_IP(ip)->commit_mutex); 491 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
492 492
493 iplist[0] = dip; 493 iplist[0] = dip;
494 iplist[1] = ip; 494 iplist[1] = ip;
@@ -802,8 +802,8 @@ static int jfs_link(struct dentry *old_dentry,
802 802
803 tid = txBegin(ip->i_sb, 0); 803 tid = txBegin(ip->i_sb, 0);
804 804
805 mutex_lock(&JFS_IP(dir)->commit_mutex); 805 mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
806 mutex_lock(&JFS_IP(ip)->commit_mutex); 806 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
807 807
808 /* 808 /*
809 * scan parent directory for entry/freespace 809 * scan parent directory for entry/freespace
@@ -913,8 +913,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
913 913
914 tid = txBegin(dip->i_sb, 0); 914 tid = txBegin(dip->i_sb, 0);
915 915
916 mutex_lock(&JFS_IP(dip)->commit_mutex); 916 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
917 mutex_lock(&JFS_IP(ip)->commit_mutex); 917 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
918 918
919 rc = jfs_init_security(tid, ip, dip); 919 rc = jfs_init_security(tid, ip, dip);
920 if (rc) 920 if (rc)
@@ -1127,7 +1127,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1127 goto out3; 1127 goto out3;
1128 } 1128 }
1129 } else if (new_ip) { 1129 } else if (new_ip) {
1130 IWRITE_LOCK(new_ip); 1130 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
1131 /* Init inode for quota operations. */ 1131 /* Init inode for quota operations. */
1132 DQUOT_INIT(new_ip); 1132 DQUOT_INIT(new_ip);
1133 } 1133 }
@@ -1137,13 +1137,21 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1137 */ 1137 */
1138 tid = txBegin(new_dir->i_sb, 0); 1138 tid = txBegin(new_dir->i_sb, 0);
1139 1139
1140 mutex_lock(&JFS_IP(new_dir)->commit_mutex); 1140 /*
1141 mutex_lock(&JFS_IP(old_ip)->commit_mutex); 1141 * How do we know the locking is safe from deadlocks?
1142 * The vfs does the hard part for us. Any time we are taking nested
1143 * commit_mutexes, the vfs already has i_mutex held on the parent.
1144 * Here, the vfs has already taken i_mutex on both old_dir and new_dir.
1145 */
1146 mutex_lock_nested(&JFS_IP(new_dir)->commit_mutex, COMMIT_MUTEX_PARENT);
1147 mutex_lock_nested(&JFS_IP(old_ip)->commit_mutex, COMMIT_MUTEX_CHILD);
1142 if (old_dir != new_dir) 1148 if (old_dir != new_dir)
1143 mutex_lock(&JFS_IP(old_dir)->commit_mutex); 1149 mutex_lock_nested(&JFS_IP(old_dir)->commit_mutex,
1150 COMMIT_MUTEX_SECOND_PARENT);
1144 1151
1145 if (new_ip) { 1152 if (new_ip) {
1146 mutex_lock(&JFS_IP(new_ip)->commit_mutex); 1153 mutex_lock_nested(&JFS_IP(new_ip)->commit_mutex,
1154 COMMIT_MUTEX_VICTIM);
1147 /* 1155 /*
1148 * Change existing directory entry to new inode number 1156 * Change existing directory entry to new inode number
1149 */ 1157 */
@@ -1357,8 +1365,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1357 1365
1358 tid = txBegin(dir->i_sb, 0); 1366 tid = txBegin(dir->i_sb, 0);
1359 1367
1360 mutex_lock(&JFS_IP(dir)->commit_mutex); 1368 mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
1361 mutex_lock(&JFS_IP(ip)->commit_mutex); 1369 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
1362 1370
1363 rc = jfs_init_acl(tid, ip, dir); 1371 rc = jfs_init_acl(tid, ip, dir);
1364 if (rc) 1372 if (rc)
@@ -1495,7 +1503,7 @@ struct dentry *jfs_get_parent(struct dentry *dentry)
1495 return parent; 1503 return parent;
1496} 1504}
1497 1505
1498struct inode_operations jfs_dir_inode_operations = { 1506const struct inode_operations jfs_dir_inode_operations = {
1499 .create = jfs_create, 1507 .create = jfs_create,
1500 .lookup = jfs_lookup, 1508 .lookup = jfs_lookup,
1501 .link = jfs_link, 1509 .link = jfs_link,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 846ac8f34513..52d73d54a931 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -46,7 +46,7 @@ MODULE_LICENSE("GPL");
46 46
47static struct kmem_cache * jfs_inode_cachep; 47static struct kmem_cache * jfs_inode_cachep;
48 48
49static struct super_operations jfs_super_operations; 49static const struct super_operations jfs_super_operations;
50static struct export_operations jfs_export_operations; 50static struct export_operations jfs_export_operations;
51static struct file_system_type jfs_fs_type; 51static struct file_system_type jfs_fs_type;
52 52
@@ -716,7 +716,7 @@ out:
716 716
717#endif 717#endif
718 718
719static struct super_operations jfs_super_operations = { 719static const struct super_operations jfs_super_operations = {
720 .alloc_inode = jfs_alloc_inode, 720 .alloc_inode = jfs_alloc_inode,
721 .destroy_inode = jfs_destroy_inode, 721 .destroy_inode = jfs_destroy_inode,
722 .read_inode = jfs_read_inode, 722 .read_inode = jfs_read_inode,
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index cee43f36f51d..4af1a05aad0a 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,7 +29,7 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
29 return NULL; 29 return NULL;
30} 30}
31 31
32struct inode_operations jfs_symlink_inode_operations = { 32const struct inode_operations jfs_symlink_inode_operations = {
33 .readlink = generic_readlink, 33 .readlink = generic_readlink,
34 .follow_link = jfs_follow_link, 34 .follow_link = jfs_follow_link,
35 .setxattr = jfs_setxattr, 35 .setxattr = jfs_setxattr,
diff --git a/fs/libfs.c b/fs/libfs.c
index 503898d5c4a7..7d487047dbb8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -186,7 +186,7 @@ const struct file_operations simple_dir_operations = {
186 .fsync = simple_sync_file, 186 .fsync = simple_sync_file,
187}; 187};
188 188
189struct inode_operations simple_dir_inode_operations = { 189const struct inode_operations simple_dir_inode_operations = {
190 .lookup = simple_lookup, 190 .lookup = simple_lookup,
191}; 191};
192 192
@@ -195,11 +195,11 @@ struct inode_operations simple_dir_inode_operations = {
195 * will never be mountable) 195 * will never be mountable)
196 */ 196 */
197int get_sb_pseudo(struct file_system_type *fs_type, char *name, 197int get_sb_pseudo(struct file_system_type *fs_type, char *name,
198 struct super_operations *ops, unsigned long magic, 198 const struct super_operations *ops, unsigned long magic,
199 struct vfsmount *mnt) 199 struct vfsmount *mnt)
200{ 200{
201 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 201 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
202 static struct super_operations default_ops = {.statfs = simple_statfs}; 202 static const struct super_operations default_ops = {.statfs = simple_statfs};
203 struct dentry *dentry; 203 struct dentry *dentry;
204 struct inode *root; 204 struct inode *root;
205 struct qstr d_name = {.name = name, .len = strlen(name)}; 205 struct qstr d_name = {.name = name, .len = strlen(name)};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 3d4610c2a266..22d403208973 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -192,7 +192,7 @@ struct nlm_host *
192nlmsvc_lookup_host(struct svc_rqst *rqstp, 192nlmsvc_lookup_host(struct svc_rqst *rqstp,
193 const char *hostname, int hostname_len) 193 const char *hostname, int hostname_len)
194{ 194{
195 return nlm_lookup_host(1, &rqstp->rq_addr, 195 return nlm_lookup_host(1, svc_addr_in(rqstp),
196 rqstp->rq_prot, rqstp->rq_vers, 196 rqstp->rq_prot, rqstp->rq_vers,
197 hostname, hostname_len); 197 hostname, hostname_len);
198} 198}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8ca18085e68d..80fcacc1acf9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -141,6 +141,7 @@ lockd(struct svc_rqst *rqstp)
141 */ 141 */
142 while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { 142 while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) {
143 long timeout = MAX_SCHEDULE_TIMEOUT; 143 long timeout = MAX_SCHEDULE_TIMEOUT;
144 char buf[RPC_MAX_ADDRBUFLEN];
144 145
145 if (signalled()) { 146 if (signalled()) {
146 flush_signals(current); 147 flush_signals(current);
@@ -175,11 +176,10 @@ lockd(struct svc_rqst *rqstp)
175 break; 176 break;
176 } 177 }
177 178
178 dprintk("lockd: request from %08x\n", 179 dprintk("lockd: request from %s\n",
179 (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); 180 svc_print_addr(rqstp, buf, sizeof(buf)));
180 181
181 svc_process(rqstp); 182 svc_process(rqstp);
182
183 } 183 }
184 184
185 flush_signals(current); 185 flush_signals(current);
@@ -223,23 +223,29 @@ static int find_socket(struct svc_serv *serv, int proto)
223 return found; 223 return found;
224} 224}
225 225
226/*
227 * Make any sockets that are needed but not present.
228 * If nlm_udpport or nlm_tcpport were set as module
229 * options, make those sockets unconditionally
230 */
226static int make_socks(struct svc_serv *serv, int proto) 231static int make_socks(struct svc_serv *serv, int proto)
227{ 232{
228 /* Make any sockets that are needed but not present. 233 static int warned;
229 * If nlm_udpport or nlm_tcpport were set as module
230 * options, make those sockets unconditionally
231 */
232 static int warned;
233 int err = 0; 234 int err = 0;
235
234 if (proto == IPPROTO_UDP || nlm_udpport) 236 if (proto == IPPROTO_UDP || nlm_udpport)
235 if (!find_socket(serv, IPPROTO_UDP)) 237 if (!find_socket(serv, IPPROTO_UDP))
236 err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport); 238 err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
237 if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport)) 239 SVC_SOCK_DEFAULTS);
240 if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
238 if (!find_socket(serv, IPPROTO_TCP)) 241 if (!find_socket(serv, IPPROTO_TCP))
239 err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport); 242 err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
240 if (!err) 243 SVC_SOCK_DEFAULTS);
244
245 if (err >= 0) {
241 warned = 0; 246 warned = 0;
242 else if (warned++ == 0) 247 err = 0;
248 } else if (warned++ == 0)
243 printk(KERN_WARNING 249 printk(KERN_WARNING
244 "lockd_up: makesock failed, error=%d\n", err); 250 "lockd_up: makesock failed, error=%d\n", err);
245 return err; 251 return err;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index f67146a8199a..47a66aa5d55b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -224,7 +224,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
224 resp->cookie = argp->cookie; 224 resp->cookie = argp->cookie;
225 225
226 dprintk("lockd: GRANTED called\n"); 226 dprintk("lockd: GRANTED called\n");
227 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); 227 resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock);
228 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 228 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
229 return rpc_success; 229 return rpc_success;
230} 230}
@@ -421,15 +421,16 @@ static __be32
421nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, 421nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
422 void *resp) 422 void *resp)
423{ 423{
424 struct sockaddr_in saddr = rqstp->rq_addr; 424 struct sockaddr_in saddr;
425
426 memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
425 427
426 dprintk("lockd: SM_NOTIFY called\n"); 428 dprintk("lockd: SM_NOTIFY called\n");
427 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 429 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
428 || ntohs(saddr.sin_port) >= 1024) { 430 || ntohs(saddr.sin_port) >= 1024) {
429 printk(KERN_WARNING 431 char buf[RPC_MAX_ADDRBUFLEN];
430 "lockd: rejected NSM callback from %08x:%d\n", 432 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
431 ntohl(rqstp->rq_addr.sin_addr.s_addr), 433 svc_print_addr(rqstp, buf, sizeof(buf)));
432 ntohs(rqstp->rq_addr.sin_port));
433 return rpc_system_err; 434 return rpc_system_err;
434 } 435 }
435 436
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3707c3a23e93..31cb48425733 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -253,7 +253,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
253 resp->cookie = argp->cookie; 253 resp->cookie = argp->cookie;
254 254
255 dprintk("lockd: GRANTED called\n"); 255 dprintk("lockd: GRANTED called\n");
256 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); 256 resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock);
257 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 257 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
258 return rpc_success; 258 return rpc_success;
259} 259}
@@ -452,15 +452,16 @@ static __be32
452nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, 452nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
453 void *resp) 453 void *resp)
454{ 454{
455 struct sockaddr_in saddr = rqstp->rq_addr; 455 struct sockaddr_in saddr;
456
457 memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
456 458
457 dprintk("lockd: SM_NOTIFY called\n"); 459 dprintk("lockd: SM_NOTIFY called\n");
458 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 460 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
459 || ntohs(saddr.sin_port) >= 1024) { 461 || ntohs(saddr.sin_port) >= 1024) {
460 printk(KERN_WARNING 462 char buf[RPC_MAX_ADDRBUFLEN];
461 "lockd: rejected NSM callback from %08x:%d\n", 463 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
462 ntohl(rqstp->rq_addr.sin_addr.s_addr), 464 svc_print_addr(rqstp, buf, sizeof(buf)));
463 ntohs(rqstp->rq_addr.sin_port));
464 return rpc_system_err; 465 return rpc_system_err;
465 } 466 }
466 467
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index df6b1075b549..c4a554df7b7e 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -26,14 +26,14 @@ static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, _
26 for (i=0; i<numblocks-1; i++) { 26 for (i=0; i<numblocks-1; i++) {
27 if (!(bh=map[i])) 27 if (!(bh=map[i]))
28 return(0); 28 return(0);
29 for (j=0; j<BLOCK_SIZE; j++) 29 for (j=0; j<bh->b_size; j++)
30 sum += nibblemap[bh->b_data[j] & 0xf] 30 sum += nibblemap[bh->b_data[j] & 0xf]
31 + nibblemap[(bh->b_data[j]>>4) & 0xf]; 31 + nibblemap[(bh->b_data[j]>>4) & 0xf];
32 } 32 }
33 33
34 if (numblocks==0 || !(bh=map[numblocks-1])) 34 if (numblocks==0 || !(bh=map[numblocks-1]))
35 return(0); 35 return(0);
36 i = ((numbits-(numblocks-1)*BLOCK_SIZE*8)/16)*2; 36 i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2;
37 for (j=0; j<i; j++) { 37 for (j=0; j<i; j++) {
38 sum += nibblemap[bh->b_data[j] & 0xf] 38 sum += nibblemap[bh->b_data[j] & 0xf]
39 + nibblemap[(bh->b_data[j]>>4) & 0xf]; 39 + nibblemap[(bh->b_data[j]>>4) & 0xf];
@@ -48,28 +48,29 @@ static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, _
48 return(sum); 48 return(sum);
49} 49}
50 50
51void minix_free_block(struct inode * inode, int block) 51void minix_free_block(struct inode *inode, unsigned long block)
52{ 52{
53 struct super_block * sb = inode->i_sb; 53 struct super_block *sb = inode->i_sb;
54 struct minix_sb_info * sbi = minix_sb(sb); 54 struct minix_sb_info *sbi = minix_sb(sb);
55 struct buffer_head * bh; 55 struct buffer_head *bh;
56 unsigned int bit,zone; 56 int k = sb->s_blocksize_bits + 3;
57 unsigned long bit, zone;
57 58
58 if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) { 59 if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) {
59 printk("Trying to free block not in datazone\n"); 60 printk("Trying to free block not in datazone\n");
60 return; 61 return;
61 } 62 }
62 zone = block - sbi->s_firstdatazone + 1; 63 zone = block - sbi->s_firstdatazone + 1;
63 bit = zone & 8191; 64 bit = zone & ((1<<k) - 1);
64 zone >>= 13; 65 zone >>= k;
65 if (zone >= sbi->s_zmap_blocks) { 66 if (zone >= sbi->s_zmap_blocks) {
66 printk("minix_free_block: nonexistent bitmap buffer\n"); 67 printk("minix_free_block: nonexistent bitmap buffer\n");
67 return; 68 return;
68 } 69 }
69 bh = sbi->s_zmap[zone]; 70 bh = sbi->s_zmap[zone];
70 lock_kernel(); 71 lock_kernel();
71 if (!minix_test_and_clear_bit(bit,bh->b_data)) 72 if (!minix_test_and_clear_bit(bit, bh->b_data))
72 printk("free_block (%s:%d): bit already cleared\n", 73 printk("minix_free_block (%s:%lu): bit already cleared\n",
73 sb->s_id, block); 74 sb->s_id, block);
74 unlock_kernel(); 75 unlock_kernel();
75 mark_buffer_dirty(bh); 76 mark_buffer_dirty(bh);
@@ -79,6 +80,7 @@ void minix_free_block(struct inode * inode, int block)
79int minix_new_block(struct inode * inode) 80int minix_new_block(struct inode * inode)
80{ 81{
81 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 82 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
83 int bits_per_zone = 8 * inode->i_sb->s_blocksize;
82 int i; 84 int i;
83 85
84 for (i = 0; i < sbi->s_zmap_blocks; i++) { 86 for (i = 0; i < sbi->s_zmap_blocks; i++) {
@@ -86,11 +88,12 @@ int minix_new_block(struct inode * inode)
86 int j; 88 int j;
87 89
88 lock_kernel(); 90 lock_kernel();
89 if ((j = minix_find_first_zero_bit(bh->b_data, 8192)) < 8192) { 91 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
90 minix_set_bit(j,bh->b_data); 92 if (j < bits_per_zone) {
93 minix_set_bit(j, bh->b_data);
91 unlock_kernel(); 94 unlock_kernel();
92 mark_buffer_dirty(bh); 95 mark_buffer_dirty(bh);
93 j += i*8192 + sbi->s_firstdatazone-1; 96 j += i * bits_per_zone + sbi->s_firstdatazone-1;
94 if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) 97 if (j < sbi->s_firstdatazone || j >= sbi->s_nzones)
95 break; 98 break;
96 return j; 99 return j;
@@ -137,6 +140,7 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
137 int block; 140 int block;
138 struct minix_sb_info *sbi = minix_sb(sb); 141 struct minix_sb_info *sbi = minix_sb(sb);
139 struct minix2_inode *p; 142 struct minix2_inode *p;
143 int minix2_inodes_per_block = sb->s_blocksize / sizeof(struct minix2_inode);
140 144
141 *bh = NULL; 145 *bh = NULL;
142 if (!ino || ino > sbi->s_ninodes) { 146 if (!ino || ino > sbi->s_ninodes) {
@@ -146,14 +150,14 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
146 } 150 }
147 ino--; 151 ino--;
148 block = 2 + sbi->s_imap_blocks + sbi->s_zmap_blocks + 152 block = 2 + sbi->s_imap_blocks + sbi->s_zmap_blocks +
149 ino / MINIX2_INODES_PER_BLOCK; 153 ino / minix2_inodes_per_block;
150 *bh = sb_bread(sb, block); 154 *bh = sb_bread(sb, block);
151 if (!*bh) { 155 if (!*bh) {
152 printk("Unable to read inode block\n"); 156 printk("Unable to read inode block\n");
153 return NULL; 157 return NULL;
154 } 158 }
155 p = (void *)(*bh)->b_data; 159 p = (void *)(*bh)->b_data;
156 return p + ino % MINIX2_INODES_PER_BLOCK; 160 return p + ino % minix2_inodes_per_block;
157} 161}
158 162
159/* Clear the link count and mode of a deleted inode on disk. */ 163/* Clear the link count and mode of a deleted inode on disk. */
@@ -185,26 +189,30 @@ static void minix_clear_inode(struct inode *inode)
185 189
186void minix_free_inode(struct inode * inode) 190void minix_free_inode(struct inode * inode)
187{ 191{
192 struct super_block *sb = inode->i_sb;
188 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 193 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
189 struct buffer_head * bh; 194 struct buffer_head *bh;
190 unsigned long ino; 195 int k = sb->s_blocksize_bits + 3;
196 unsigned long ino, bit;
191 197
192 ino = inode->i_ino; 198 ino = inode->i_ino;
193 if (ino < 1 || ino > sbi->s_ninodes) { 199 if (ino < 1 || ino > sbi->s_ninodes) {
194 printk("minix_free_inode: inode 0 or nonexistent inode\n"); 200 printk("minix_free_inode: inode 0 or nonexistent inode\n");
195 goto out; 201 goto out;
196 } 202 }
197 if ((ino >> 13) >= sbi->s_imap_blocks) { 203 bit = ino & ((1<<k) - 1);
204 ino >>= k;
205 if (ino >= sbi->s_imap_blocks) {
198 printk("minix_free_inode: nonexistent imap in superblock\n"); 206 printk("minix_free_inode: nonexistent imap in superblock\n");
199 goto out; 207 goto out;
200 } 208 }
201 209
202 minix_clear_inode(inode); /* clear on-disk copy */ 210 minix_clear_inode(inode); /* clear on-disk copy */
203 211
204 bh = sbi->s_imap[ino >> 13]; 212 bh = sbi->s_imap[ino];
205 lock_kernel(); 213 lock_kernel();
206 if (!minix_test_and_clear_bit(ino & 8191, bh->b_data)) 214 if (!minix_test_and_clear_bit(bit, bh->b_data))
207 printk("minix_free_inode: bit %lu already cleared\n", ino); 215 printk("minix_free_inode: bit %lu already cleared\n", bit);
208 unlock_kernel(); 216 unlock_kernel();
209 mark_buffer_dirty(bh); 217 mark_buffer_dirty(bh);
210 out: 218 out:
@@ -217,35 +225,38 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
217 struct minix_sb_info *sbi = minix_sb(sb); 225 struct minix_sb_info *sbi = minix_sb(sb);
218 struct inode *inode = new_inode(sb); 226 struct inode *inode = new_inode(sb);
219 struct buffer_head * bh; 227 struct buffer_head * bh;
220 int i,j; 228 int bits_per_zone = 8 * sb->s_blocksize;
229 unsigned long j;
230 int i;
221 231
222 if (!inode) { 232 if (!inode) {
223 *error = -ENOMEM; 233 *error = -ENOMEM;
224 return NULL; 234 return NULL;
225 } 235 }
226 j = 8192; 236 j = bits_per_zone;
227 bh = NULL; 237 bh = NULL;
228 *error = -ENOSPC; 238 *error = -ENOSPC;
229 lock_kernel(); 239 lock_kernel();
230 for (i = 0; i < sbi->s_imap_blocks; i++) { 240 for (i = 0; i < sbi->s_imap_blocks; i++) {
231 bh = sbi->s_imap[i]; 241 bh = sbi->s_imap[i];
232 if ((j = minix_find_first_zero_bit(bh->b_data, 8192)) < 8192) 242 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
243 if (j < bits_per_zone)
233 break; 244 break;
234 } 245 }
235 if (!bh || j >= 8192) { 246 if (!bh || j >= bits_per_zone) {
236 unlock_kernel(); 247 unlock_kernel();
237 iput(inode); 248 iput(inode);
238 return NULL; 249 return NULL;
239 } 250 }
240 if (minix_test_and_set_bit(j,bh->b_data)) { /* shouldn't happen */ 251 if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */
241 printk("new_inode: bit already set\n");
242 unlock_kernel(); 252 unlock_kernel();
253 printk("minix_new_inode: bit already set\n");
243 iput(inode); 254 iput(inode);
244 return NULL; 255 return NULL;
245 } 256 }
246 unlock_kernel(); 257 unlock_kernel();
247 mark_buffer_dirty(bh); 258 mark_buffer_dirty(bh);
248 j += i*8192; 259 j += i * bits_per_zone;
249 if (!j || j > sbi->s_ninodes) { 260 if (!j || j > sbi->s_ninodes) {
250 iput(inode); 261 iput(inode);
251 return NULL; 262 return NULL;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index ab782c4086f5..cb4cb571fddf 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * minix directory handling functions 6 * minix directory handling functions
7 *
8 * Updated to filesystem version 3 by Daniel Aragones
7 */ 9 */
8 10
9#include "minix.h" 11#include "minix.h"
@@ -11,6 +13,7 @@
11#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
12 14
13typedef struct minix_dir_entry minix_dirent; 15typedef struct minix_dir_entry minix_dirent;
16typedef struct minix3_dir_entry minix3_dirent;
14 17
15static int minix_readdir(struct file *, void *, filldir_t); 18static int minix_readdir(struct file *, void *, filldir_t);
16 19
@@ -89,6 +92,8 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
89 unsigned long npages = dir_pages(inode); 92 unsigned long npages = dir_pages(inode);
90 struct minix_sb_info *sbi = minix_sb(sb); 93 struct minix_sb_info *sbi = minix_sb(sb);
91 unsigned chunk_size = sbi->s_dirsize; 94 unsigned chunk_size = sbi->s_dirsize;
95 char *name;
96 __u32 inumber;
92 97
93 lock_kernel(); 98 lock_kernel();
94 99
@@ -105,16 +110,24 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
105 kaddr = (char *)page_address(page); 110 kaddr = (char *)page_address(page);
106 p = kaddr+offset; 111 p = kaddr+offset;
107 limit = kaddr + minix_last_byte(inode, n) - chunk_size; 112 limit = kaddr + minix_last_byte(inode, n) - chunk_size;
108 for ( ; p <= limit ; p = minix_next_entry(p, sbi)) { 113 for ( ; p <= limit; p = minix_next_entry(p, sbi)) {
109 minix_dirent *de = (minix_dirent *)p; 114 if (sbi->s_version == MINIX_V3) {
110 if (de->inode) { 115 minix3_dirent *de3 = (minix3_dirent *)p;
116 name = de3->name;
117 inumber = de3->inode;
118 } else {
119 minix_dirent *de = (minix_dirent *)p;
120 name = de->name;
121 inumber = de->inode;
122 }
123 if (inumber) {
111 int over; 124 int over;
112 unsigned l = strnlen(de->name,sbi->s_namelen);
113 125
126 unsigned l = strnlen(name, sbi->s_namelen);
114 offset = p - kaddr; 127 offset = p - kaddr;
115 over = filldir(dirent, de->name, l, 128 over = filldir(dirent, name, l,
116 (n<<PAGE_CACHE_SHIFT) | offset, 129 (n << PAGE_CACHE_SHIFT) | offset,
117 de->inode, DT_UNKNOWN); 130 inumber, DT_UNKNOWN);
118 if (over) { 131 if (over) {
119 dir_put_page(page); 132 dir_put_page(page);
120 goto done; 133 goto done;
@@ -156,23 +169,34 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
156 unsigned long n; 169 unsigned long n;
157 unsigned long npages = dir_pages(dir); 170 unsigned long npages = dir_pages(dir);
158 struct page *page = NULL; 171 struct page *page = NULL;
159 struct minix_dir_entry *de; 172 char *p;
160 173
174 char *namx;
175 __u32 inumber;
161 *res_page = NULL; 176 *res_page = NULL;
162 177
163 for (n = 0; n < npages; n++) { 178 for (n = 0; n < npages; n++) {
164 char *kaddr; 179 char *kaddr, *limit;
180
165 page = dir_get_page(dir, n); 181 page = dir_get_page(dir, n);
166 if (IS_ERR(page)) 182 if (IS_ERR(page))
167 continue; 183 continue;
168 184
169 kaddr = (char*)page_address(page); 185 kaddr = (char*)page_address(page);
170 de = (struct minix_dir_entry *) kaddr; 186 limit = kaddr + minix_last_byte(dir, n) - sbi->s_dirsize;
171 kaddr += minix_last_byte(dir, n) - sbi->s_dirsize; 187 for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
172 for ( ; (char *) de <= kaddr ; de = minix_next_entry(de,sbi)) { 188 if (sbi->s_version == MINIX_V3) {
173 if (!de->inode) 189 minix3_dirent *de3 = (minix3_dirent *)p;
190 namx = de3->name;
191 inumber = de3->inode;
192 } else {
193 minix_dirent *de = (minix_dirent *)p;
194 namx = de->name;
195 inumber = de->inode;
196 }
197 if (!inumber)
174 continue; 198 continue;
175 if (namecompare(namelen,sbi->s_namelen,name,de->name)) 199 if (namecompare(namelen, sbi->s_namelen, name, namx))
176 goto found; 200 goto found;
177 } 201 }
178 dir_put_page(page); 202 dir_put_page(page);
@@ -181,7 +205,7 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
181 205
182found: 206found:
183 *res_page = page; 207 *res_page = page;
184 return de; 208 return (minix_dirent *)p;
185} 209}
186 210
187int minix_add_link(struct dentry *dentry, struct inode *inode) 211int minix_add_link(struct dentry *dentry, struct inode *inode)
@@ -192,12 +216,15 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
192 struct super_block * sb = dir->i_sb; 216 struct super_block * sb = dir->i_sb;
193 struct minix_sb_info * sbi = minix_sb(sb); 217 struct minix_sb_info * sbi = minix_sb(sb);
194 struct page *page = NULL; 218 struct page *page = NULL;
195 struct minix_dir_entry * de;
196 unsigned long npages = dir_pages(dir); 219 unsigned long npages = dir_pages(dir);
197 unsigned long n; 220 unsigned long n;
198 char *kaddr; 221 char *kaddr, *p;
222 minix_dirent *de;
223 minix3_dirent *de3;
199 unsigned from, to; 224 unsigned from, to;
200 int err; 225 int err;
226 char *namx = NULL;
227 __u32 inumber;
201 228
202 /* 229 /*
203 * We take care of directory expansion in the same loop 230 * We take care of directory expansion in the same loop
@@ -205,7 +232,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
205 * to protect that region. 232 * to protect that region.
206 */ 233 */
207 for (n = 0; n <= npages; n++) { 234 for (n = 0; n <= npages; n++) {
208 char *dir_end; 235 char *limit, *dir_end;
209 236
210 page = dir_get_page(dir, n); 237 page = dir_get_page(dir, n);
211 err = PTR_ERR(page); 238 err = PTR_ERR(page);
@@ -214,20 +241,30 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
214 lock_page(page); 241 lock_page(page);
215 kaddr = (char*)page_address(page); 242 kaddr = (char*)page_address(page);
216 dir_end = kaddr + minix_last_byte(dir, n); 243 dir_end = kaddr + minix_last_byte(dir, n);
217 de = (minix_dirent *)kaddr; 244 limit = kaddr + PAGE_CACHE_SIZE - sbi->s_dirsize;
218 kaddr += PAGE_CACHE_SIZE - sbi->s_dirsize; 245 for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
219 while ((char *)de <= kaddr) { 246 de = (minix_dirent *)p;
220 if ((char *)de == dir_end) { 247 de3 = (minix3_dirent *)p;
248 if (sbi->s_version == MINIX_V3) {
249 namx = de3->name;
250 inumber = de3->inode;
251 } else {
252 namx = de->name;
253 inumber = de->inode;
254 }
255 if (p == dir_end) {
221 /* We hit i_size */ 256 /* We hit i_size */
222 de->inode = 0; 257 if (sbi->s_version == MINIX_V3)
258 de3->inode = 0;
259 else
260 de->inode = 0;
223 goto got_it; 261 goto got_it;
224 } 262 }
225 if (!de->inode) 263 if (!inumber)
226 goto got_it; 264 goto got_it;
227 err = -EEXIST; 265 err = -EEXIST;
228 if (namecompare(namelen,sbi->s_namelen,name,de->name)) 266 if (namecompare(namelen, sbi->s_namelen, name, namx))
229 goto out_unlock; 267 goto out_unlock;
230 de = minix_next_entry(de, sbi);
231 } 268 }
232 unlock_page(page); 269 unlock_page(page);
233 dir_put_page(page); 270 dir_put_page(page);
@@ -236,14 +273,19 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
236 return -EINVAL; 273 return -EINVAL;
237 274
238got_it: 275got_it:
239 from = (char*)de - (char*)page_address(page); 276 from = p - (char*)page_address(page);
240 to = from + sbi->s_dirsize; 277 to = from + sbi->s_dirsize;
241 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 278 err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
242 if (err) 279 if (err)
243 goto out_unlock; 280 goto out_unlock;
244 memcpy (de->name, name, namelen); 281 memcpy (namx, name, namelen);
245 memset (de->name + namelen, 0, sbi->s_dirsize - namelen - 2); 282 if (sbi->s_version == MINIX_V3) {
246 de->inode = inode->i_ino; 283 memset (namx + namelen, 0, sbi->s_dirsize - namelen - 4);
284 de3->inode = inode->i_ino;
285 } else {
286 memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2);
287 de->inode = inode->i_ino;
288 }
247 err = dir_commit_chunk(page, from, to); 289 err = dir_commit_chunk(page, from, to);
248 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 290 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
249 mark_inode_dirty(dir); 291 mark_inode_dirty(dir);
@@ -283,8 +325,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
283{ 325{
284 struct address_space *mapping = inode->i_mapping; 326 struct address_space *mapping = inode->i_mapping;
285 struct page *page = grab_cache_page(mapping, 0); 327 struct page *page = grab_cache_page(mapping, 0);
286 struct minix_sb_info * sbi = minix_sb(inode->i_sb); 328 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
287 struct minix_dir_entry * de;
288 char *kaddr; 329 char *kaddr;
289 int err; 330 int err;
290 331
@@ -299,12 +340,23 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
299 kaddr = kmap_atomic(page, KM_USER0); 340 kaddr = kmap_atomic(page, KM_USER0);
300 memset(kaddr, 0, PAGE_CACHE_SIZE); 341 memset(kaddr, 0, PAGE_CACHE_SIZE);
301 342
302 de = (struct minix_dir_entry *)kaddr; 343 if (sbi->s_version == MINIX_V3) {
303 de->inode = inode->i_ino; 344 minix3_dirent *de3 = (minix3_dirent *)kaddr;
304 strcpy(de->name,"."); 345
305 de = minix_next_entry(de, sbi); 346 de3->inode = inode->i_ino;
306 de->inode = dir->i_ino; 347 strcpy(de3->name, ".");
307 strcpy(de->name,".."); 348 de3 = minix_next_entry(de3, sbi);
349 de3->inode = dir->i_ino;
350 strcpy(de3->name, "..");
351 } else {
352 minix_dirent *de = (minix_dirent *)kaddr;
353
354 de->inode = inode->i_ino;
355 strcpy(de->name, ".");
356 de = minix_next_entry(de, sbi);
357 de->inode = dir->i_ino;
358 strcpy(de->name, "..");
359 }
308 kunmap_atomic(kaddr, KM_USER0); 360 kunmap_atomic(kaddr, KM_USER0);
309 361
310 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize); 362 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
@@ -321,33 +373,41 @@ int minix_empty_dir(struct inode * inode)
321 struct page *page = NULL; 373 struct page *page = NULL;
322 unsigned long i, npages = dir_pages(inode); 374 unsigned long i, npages = dir_pages(inode);
323 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 375 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
376 char *name;
377 __u32 inumber;
324 378
325 for (i = 0; i < npages; i++) { 379 for (i = 0; i < npages; i++) {
326 char *kaddr; 380 char *p, *kaddr, *limit;
327 minix_dirent * de;
328 page = dir_get_page(inode, i);
329 381
382 page = dir_get_page(inode, i);
330 if (IS_ERR(page)) 383 if (IS_ERR(page))
331 continue; 384 continue;
332 385
333 kaddr = (char *)page_address(page); 386 kaddr = (char *)page_address(page);
334 de = (minix_dirent *)kaddr; 387 limit = kaddr + minix_last_byte(inode, i) - sbi->s_dirsize;
335 kaddr += minix_last_byte(inode, i) - sbi->s_dirsize; 388 for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
389 if (sbi->s_version == MINIX_V3) {
390 minix3_dirent *de3 = (minix3_dirent *)p;
391 name = de3->name;
392 inumber = de3->inode;
393 } else {
394 minix_dirent *de = (minix_dirent *)p;
395 name = de->name;
396 inumber = de->inode;
397 }
336 398
337 while ((char *)de <= kaddr) { 399 if (inumber != 0) {
338 if (de->inode != 0) {
339 /* check for . and .. */ 400 /* check for . and .. */
340 if (de->name[0] != '.') 401 if (name[0] != '.')
341 goto not_empty; 402 goto not_empty;
342 if (!de->name[1]) { 403 if (!name[1]) {
343 if (de->inode != inode->i_ino) 404 if (inumber != inode->i_ino)
344 goto not_empty; 405 goto not_empty;
345 } else if (de->name[1] != '.') 406 } else if (name[1] != '.')
346 goto not_empty; 407 goto not_empty;
347 else if (de->name[2]) 408 else if (name[2])
348 goto not_empty; 409 goto not_empty;
349 } 410 }
350 de = minix_next_entry(de, sbi);
351 } 411 }
352 dir_put_page(page); 412 dir_put_page(page);
353 } 413 }
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 40eac2e60d25..f92baa1d7570 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -26,7 +26,7 @@ const struct file_operations minix_file_operations = {
26 .sendfile = generic_file_sendfile, 26 .sendfile = generic_file_sendfile,
27}; 27};
28 28
29struct inode_operations minix_file_inode_operations = { 29const struct inode_operations minix_file_inode_operations = {
30 .truncate = minix_truncate, 30 .truncate = minix_truncate,
31 .getattr = minix_getattr, 31 .getattr = minix_getattr,
32}; 32};
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 629e09b38c5c..92e383af3709 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -7,6 +7,7 @@
7 * Minix V2 fs support. 7 * Minix V2 fs support.
8 * 8 *
9 * Modified for 680x0 by Andreas Schwab 9 * Modified for 680x0 by Andreas Schwab
10 * Updated to filesystem version 3 by Daniel Aragones
10 */ 11 */
11 12
12#include <linux/module.h> 13#include <linux/module.h>
@@ -36,7 +37,8 @@ static void minix_put_super(struct super_block *sb)
36 struct minix_sb_info *sbi = minix_sb(sb); 37 struct minix_sb_info *sbi = minix_sb(sb);
37 38
38 if (!(sb->s_flags & MS_RDONLY)) { 39 if (!(sb->s_flags & MS_RDONLY)) {
39 sbi->s_ms->s_state = sbi->s_mount_state; 40 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
41 sbi->s_ms->s_state = sbi->s_mount_state;
40 mark_buffer_dirty(sbi->s_sbh); 42 mark_buffer_dirty(sbi->s_sbh);
41 } 43 }
42 for (i = 0; i < sbi->s_imap_blocks; i++) 44 for (i = 0; i < sbi->s_imap_blocks; i++)
@@ -93,7 +95,7 @@ static void destroy_inodecache(void)
93 kmem_cache_destroy(minix_inode_cachep); 95 kmem_cache_destroy(minix_inode_cachep);
94} 96}
95 97
96static struct super_operations minix_sops = { 98static const struct super_operations minix_sops = {
97 .alloc_inode = minix_alloc_inode, 99 .alloc_inode = minix_alloc_inode,
98 .destroy_inode = minix_destroy_inode, 100 .destroy_inode = minix_destroy_inode,
99 .read_inode = minix_read_inode, 101 .read_inode = minix_read_inode,
@@ -117,12 +119,17 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
117 !(sbi->s_mount_state & MINIX_VALID_FS)) 119 !(sbi->s_mount_state & MINIX_VALID_FS))
118 return 0; 120 return 0;
119 /* Mounting a rw partition read-only. */ 121 /* Mounting a rw partition read-only. */
120 ms->s_state = sbi->s_mount_state; 122 if (sbi->s_version != MINIX_V3)
123 ms->s_state = sbi->s_mount_state;
121 mark_buffer_dirty(sbi->s_sbh); 124 mark_buffer_dirty(sbi->s_sbh);
122 } else { 125 } else {
123 /* Mount a partition which is read-only, read-write. */ 126 /* Mount a partition which is read-only, read-write. */
124 sbi->s_mount_state = ms->s_state; 127 if (sbi->s_version != MINIX_V3) {
125 ms->s_state &= ~MINIX_VALID_FS; 128 sbi->s_mount_state = ms->s_state;
129 ms->s_state &= ~MINIX_VALID_FS;
130 } else {
131 sbi->s_mount_state = MINIX_VALID_FS;
132 }
126 mark_buffer_dirty(sbi->s_sbh); 133 mark_buffer_dirty(sbi->s_sbh);
127 134
128 if (!(sbi->s_mount_state & MINIX_VALID_FS)) 135 if (!(sbi->s_mount_state & MINIX_VALID_FS))
@@ -140,7 +147,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
140 struct buffer_head *bh; 147 struct buffer_head *bh;
141 struct buffer_head **map; 148 struct buffer_head **map;
142 struct minix_super_block *ms; 149 struct minix_super_block *ms;
143 int i, block; 150 struct minix3_super_block *m3s = NULL;
151 unsigned long i, block;
144 struct inode *root_inode; 152 struct inode *root_inode;
145 struct minix_sb_info *sbi; 153 struct minix_sb_info *sbi;
146 154
@@ -192,6 +200,22 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
192 sbi->s_dirsize = 32; 200 sbi->s_dirsize = 32;
193 sbi->s_namelen = 30; 201 sbi->s_namelen = 30;
194 sbi->s_link_max = MINIX2_LINK_MAX; 202 sbi->s_link_max = MINIX2_LINK_MAX;
203 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
204 m3s = (struct minix3_super_block *) bh->b_data;
205 s->s_magic = m3s->s_magic;
206 sbi->s_imap_blocks = m3s->s_imap_blocks;
207 sbi->s_zmap_blocks = m3s->s_zmap_blocks;
208 sbi->s_firstdatazone = m3s->s_firstdatazone;
209 sbi->s_log_zone_size = m3s->s_log_zone_size;
210 sbi->s_max_size = m3s->s_max_size;
211 sbi->s_ninodes = m3s->s_ninodes;
212 sbi->s_nzones = m3s->s_zones;
213 sbi->s_dirsize = 64;
214 sbi->s_namelen = 60;
215 sbi->s_version = MINIX_V3;
216 sbi->s_link_max = MINIX2_LINK_MAX;
217 sbi->s_mount_state = MINIX_VALID_FS;
218 sb_set_blocksize(s, m3s->s_blocksize);
195 } else 219 } else
196 goto out_no_fs; 220 goto out_no_fs;
197 221
@@ -236,7 +260,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
236 s->s_root->d_op = &minix_dentry_operations; 260 s->s_root->d_op = &minix_dentry_operations;
237 261
238 if (!(s->s_flags & MS_RDONLY)) { 262 if (!(s->s_flags & MS_RDONLY)) {
239 ms->s_state &= ~MINIX_VALID_FS; 263 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
264 ms->s_state &= ~MINIX_VALID_FS;
240 mark_buffer_dirty(bh); 265 mark_buffer_dirty(bh);
241 } 266 }
242 if (!(sbi->s_mount_state & MINIX_VALID_FS)) 267 if (!(sbi->s_mount_state & MINIX_VALID_FS))
@@ -278,8 +303,8 @@ out_illegal_sb:
278 303
279out_no_fs: 304out_no_fs:
280 if (!silent) 305 if (!silent)
281 printk("VFS: Can't find a Minix or Minix V2 filesystem " 306 printk("VFS: Can't find a Minix filesystem V1 | V2 | V3 "
282 "on device %s\n", s->s_id); 307 "on device %s.\n", s->s_id);
283out_release: 308out_release:
284 brelse(bh); 309 brelse(bh);
285 goto out; 310 goto out;
@@ -344,7 +369,7 @@ static const struct address_space_operations minix_aops = {
344 .bmap = minix_bmap 369 .bmap = minix_bmap
345}; 370};
346 371
347static struct inode_operations minix_symlink_inode_operations = { 372static const struct inode_operations minix_symlink_inode_operations = {
348 .readlink = generic_readlink, 373 .readlink = generic_readlink,
349 .follow_link = page_follow_link_light, 374 .follow_link = page_follow_link_light,
350 .put_link = page_put_link, 375 .put_link = page_put_link,
@@ -537,12 +562,14 @@ int minix_sync_inode(struct inode * inode)
537 562
538int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 563int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
539{ 564{
565 struct inode *dir = dentry->d_parent->d_inode;
566 struct super_block *sb = dir->i_sb;
540 generic_fillattr(dentry->d_inode, stat); 567 generic_fillattr(dentry->d_inode, stat);
541 if (INODE_VERSION(dentry->d_inode) == MINIX_V1) 568 if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
542 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size); 569 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
543 else 570 else
544 stat->blocks = (BLOCK_SIZE / 512) * V2_minix_blocks(stat->size); 571 stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
545 stat->blksize = BLOCK_SIZE; 572 stat->blksize = sb->s_blocksize;
546 return 0; 573 return 0;
547} 574}
548 575
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 429baf8de105..a731cabf1540 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -23,7 +23,7 @@ static inline int verify_chain(Indirect *from, Indirect *to)
23 23
24static inline block_t *block_end(struct buffer_head *bh) 24static inline block_t *block_end(struct buffer_head *bh)
25{ 25{
26 return (block_t *)((char*)bh->b_data + BLOCK_SIZE); 26 return (block_t *)((char*)bh->b_data + bh->b_size);
27} 27}
28 28
29static inline Indirect *get_branch(struct inode *inode, 29static inline Indirect *get_branch(struct inode *inode,
@@ -85,7 +85,7 @@ static int alloc_branch(struct inode *inode,
85 branch[n].key = cpu_to_block(nr); 85 branch[n].key = cpu_to_block(nr);
86 bh = sb_getblk(inode->i_sb, parent); 86 bh = sb_getblk(inode->i_sb, parent);
87 lock_buffer(bh); 87 lock_buffer(bh);
88 memset(bh->b_data, 0, BLOCK_SIZE); 88 memset(bh->b_data, 0, bh->b_size);
89 branch[n].bh = bh; 89 branch[n].bh = bh;
90 branch[n].p = (block_t*) bh->b_data + offsets[n]; 90 branch[n].p = (block_t*) bh->b_data + offsets[n];
91 *branch[n].p = branch[n].key; 91 *branch[n].p = branch[n].key;
@@ -292,6 +292,7 @@ static void free_branches(struct inode *inode, block_t *p, block_t *q, int depth
292 292
293static inline void truncate (struct inode * inode) 293static inline void truncate (struct inode * inode)
294{ 294{
295 struct super_block *sb = inode->i_sb;
295 block_t *idata = i_data(inode); 296 block_t *idata = i_data(inode);
296 int offsets[DEPTH]; 297 int offsets[DEPTH];
297 Indirect chain[DEPTH]; 298 Indirect chain[DEPTH];
@@ -301,7 +302,7 @@ static inline void truncate (struct inode * inode)
301 int first_whole; 302 int first_whole;
302 long iblock; 303 long iblock;
303 304
304 iblock = (inode->i_size + BLOCK_SIZE-1) >> 10; 305 iblock = (inode->i_size + sb->s_blocksize -1) >> sb->s_blocksize_bits;
305 block_truncate_page(inode->i_mapping, inode->i_size, get_block); 306 block_truncate_page(inode->i_mapping, inode->i_size, get_block);
306 307
307 n = block_to_path(inode, iblock, offsets); 308 n = block_to_path(inode, iblock, offsets);
@@ -346,15 +347,16 @@ do_indirects:
346 mark_inode_dirty(inode); 347 mark_inode_dirty(inode);
347} 348}
348 349
349static inline unsigned nblocks(loff_t size) 350static inline unsigned nblocks(loff_t size, struct super_block *sb)
350{ 351{
352 int k = sb->s_blocksize_bits - 10;
351 unsigned blocks, res, direct = DIRECT, i = DEPTH; 353 unsigned blocks, res, direct = DIRECT, i = DEPTH;
352 blocks = (size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; 354 blocks = (size + sb->s_blocksize - 1) >> (BLOCK_SIZE_BITS + k);
353 res = blocks; 355 res = blocks;
354 while (--i && blocks > direct) { 356 while (--i && blocks > direct) {
355 blocks -= direct; 357 blocks -= direct;
356 blocks += BLOCK_SIZE/sizeof(block_t) - 1; 358 blocks += sb->s_blocksize/sizeof(block_t) - 1;
357 blocks /= BLOCK_SIZE/sizeof(block_t); 359 blocks /= sb->s_blocksize/sizeof(block_t);
358 res += blocks; 360 res += blocks;
359 direct = 1; 361 direct = 1;
360 } 362 }
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index 656b1347a25b..1a5f3bf0bcec 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -55,7 +55,7 @@ void V1_minix_truncate(struct inode * inode)
55 truncate(inode); 55 truncate(inode);
56} 56}
57 57
58unsigned V1_minix_blocks(loff_t size) 58unsigned V1_minix_blocks(loff_t size, struct super_block *sb)
59{ 59{
60 return nblocks(size); 60 return nblocks(size, sb);
61} 61}
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 9adcdc754e0f..ad8f0dec4ef4 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -23,10 +23,11 @@ static inline block_t *i_data(struct inode *inode)
23static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) 23static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
24{ 24{
25 int n = 0; 25 int n = 0;
26 struct super_block *sb = inode->i_sb;
26 27
27 if (block < 0) { 28 if (block < 0) {
28 printk("minix_bmap: block<0\n"); 29 printk("minix_bmap: block<0\n");
29 } else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) { 30 } else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) {
30 printk("minix_bmap: block>big\n"); 31 printk("minix_bmap: block>big\n");
31 } else if (block < 7) { 32 } else if (block < 7) {
32 offsets[n++] = block; 33 offsets[n++] = block;
@@ -60,7 +61,7 @@ void V2_minix_truncate(struct inode * inode)
60 truncate(inode); 61 truncate(inode);
61} 62}
62 63
63unsigned V2_minix_blocks(loff_t size) 64unsigned V2_minix_blocks(loff_t size, struct super_block *sb)
64{ 65{
65 return nblocks(size); 66 return nblocks(size, sb);
66} 67}
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c55b77cdcc8e..73ef84f8fb0b 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -7,11 +7,10 @@
7 * truncated. Else they will be disallowed (ENAMETOOLONG). 7 * truncated. Else they will be disallowed (ENAMETOOLONG).
8 */ 8 */
9#define NO_TRUNCATE 1 9#define NO_TRUNCATE 1
10
11#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version 10#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
12
13#define MINIX_V1 0x0001 /* original minix fs */ 11#define MINIX_V1 0x0001 /* original minix fs */
14#define MINIX_V2 0x0002 /* minix V2 fs */ 12#define MINIX_V2 0x0002 /* minix V2 fs */
13#define MINIX_V3 0x0003 /* minix V3 fs */
15 14
16/* 15/*
17 * minix fs inode data in memory 16 * minix fs inode data in memory
@@ -52,12 +51,10 @@ extern struct inode * minix_new_inode(const struct inode * dir, int * error);
52extern void minix_free_inode(struct inode * inode); 51extern void minix_free_inode(struct inode * inode);
53extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); 52extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi);
54extern int minix_new_block(struct inode * inode); 53extern int minix_new_block(struct inode * inode);
55extern void minix_free_block(struct inode * inode, int block); 54extern void minix_free_block(struct inode *inode, unsigned long block);
56extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 55extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
57
58extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 56extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
59 57
60extern void V2_minix_truncate(struct inode *);
61extern void V1_minix_truncate(struct inode *); 58extern void V1_minix_truncate(struct inode *);
62extern void V2_minix_truncate(struct inode *); 59extern void V2_minix_truncate(struct inode *);
63extern void minix_truncate(struct inode *); 60extern void minix_truncate(struct inode *);
@@ -65,8 +62,8 @@ extern int minix_sync_inode(struct inode *);
65extern void minix_set_inode(struct inode *, dev_t); 62extern void minix_set_inode(struct inode *, dev_t);
66extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int); 63extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
67extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int); 64extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
68extern unsigned V1_minix_blocks(loff_t); 65extern unsigned V1_minix_blocks(loff_t, struct super_block *);
69extern unsigned V2_minix_blocks(loff_t); 66extern unsigned V2_minix_blocks(loff_t, struct super_block *);
70 67
71extern struct minix_dir_entry *minix_find_entry(struct dentry*, struct page**); 68extern struct minix_dir_entry *minix_find_entry(struct dentry*, struct page**);
72extern int minix_add_link(struct dentry*, struct inode*); 69extern int minix_add_link(struct dentry*, struct inode*);
@@ -76,11 +73,10 @@ extern int minix_empty_dir(struct inode*);
76extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*); 73extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
77extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**); 74extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
78extern ino_t minix_inode_by_name(struct dentry*); 75extern ino_t minix_inode_by_name(struct dentry*);
79
80extern int minix_sync_file(struct file *, struct dentry *, int); 76extern int minix_sync_file(struct file *, struct dentry *, int);
81 77
82extern struct inode_operations minix_file_inode_operations; 78extern const struct inode_operations minix_file_inode_operations;
83extern struct inode_operations minix_dir_inode_operations; 79extern const struct inode_operations minix_dir_inode_operations;
84extern const struct file_operations minix_file_operations; 80extern const struct file_operations minix_file_operations;
85extern const struct file_operations minix_dir_operations; 81extern const struct file_operations minix_dir_operations;
86extern struct dentry_operations minix_dentry_operations; 82extern struct dentry_operations minix_dentry_operations;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 299bb66e3bde..f4aa7a939040 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -291,7 +291,7 @@ out:
291/* 291/*
292 * directories can handle most operations... 292 * directories can handle most operations...
293 */ 293 */
294struct inode_operations minix_dir_inode_operations = { 294const struct inode_operations minix_dir_inode_operations = {
295 .create = minix_create, 295 .create = minix_create,
296 .lookup = minix_lookup, 296 .lookup = minix_lookup,
297 .link = minix_link, 297 .link = minix_link,
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 452461955cbd..30f7d0ae2215 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -646,7 +646,7 @@ out:
646 return err; 646 return err;
647} 647}
648 648
649static struct inode_operations msdos_dir_inode_operations = { 649static const struct inode_operations msdos_dir_inode_operations = {
650 .create = msdos_create, 650 .create = msdos_create,
651 .lookup = msdos_lookup, 651 .lookup = msdos_lookup,
652 .unlink = msdos_unlink, 652 .unlink = msdos_unlink,
diff --git a/fs/namei.c b/fs/namei.c
index e4f108f08230..161e2225c757 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2744,7 +2744,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
2744 mapping_gfp_mask(inode->i_mapping)); 2744 mapping_gfp_mask(inode->i_mapping));
2745} 2745}
2746 2746
2747struct inode_operations page_symlink_inode_operations = { 2747const struct inode_operations page_symlink_inode_operations = {
2748 .readlink = generic_readlink, 2748 .readlink = generic_readlink,
2749 .follow_link = page_follow_link_light, 2749 .follow_link = page_follow_link_light,
2750 .put_link = page_put_link, 2750 .put_link = page_put_link,
diff --git a/fs/namespace.c b/fs/namespace.c
index 5ef336c1103c..fd999cab7b57 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,9 +53,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
53 53
54struct vfsmount *alloc_vfsmnt(const char *name) 54struct vfsmount *alloc_vfsmnt(const char *name)
55{ 55{
56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 56 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
57 if (mnt) { 57 if (mnt) {
58 memset(mnt, 0, sizeof(struct vfsmount));
59 atomic_set(&mnt->mnt_count, 1); 58 atomic_set(&mnt->mnt_count, 1);
60 INIT_LIST_HEAD(&mnt->mnt_hash); 59 INIT_LIST_HEAD(&mnt->mnt_hash);
61 INIT_LIST_HEAD(&mnt->mnt_child); 60 INIT_LIST_HEAD(&mnt->mnt_child);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 73747772c3bb..011ef0b6d2d4 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -58,7 +58,7 @@ const struct file_operations ncp_dir_operations =
58#endif 58#endif
59}; 59};
60 60
61struct inode_operations ncp_dir_inode_operations = 61const struct inode_operations ncp_dir_inode_operations =
62{ 62{
63 .create = ncp_create, 63 .create = ncp_create,
64 .lookup = ncp_lookup, 64 .lookup = ncp_lookup,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index b91fea03b1c3..6b1f6d27099a 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -297,7 +297,7 @@ const struct file_operations ncp_file_operations =
297 .fsync = ncp_fsync, 297 .fsync = ncp_fsync,
298}; 298};
299 299
300struct inode_operations ncp_file_inode_operations = 300const struct inode_operations ncp_file_inode_operations =
301{ 301{
302 .setattr = ncp_notify_change, 302 .setattr = ncp_notify_change,
303}; 303};
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 67a90bf795d5..14939ddf74f1 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -90,7 +90,7 @@ static int ncp_remount(struct super_block *sb, int *flags, char* data)
90 return 0; 90 return 0;
91} 91}
92 92
93static struct super_operations ncp_sops = 93static const struct super_operations ncp_sops =
94{ 94{
95 .alloc_inode = ncp_alloc_inode, 95 .alloc_inode = ncp_alloc_inode,
96 .destroy_inode = ncp_destroy_inode, 96 .destroy_inode = ncp_destroy_inode,
@@ -229,7 +229,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
229} 229}
230 230
231#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) 231#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
232static struct inode_operations ncp_symlink_inode_operations = { 232static const struct inode_operations ncp_symlink_inode_operations = {
233 .readlink = generic_readlink, 233 .readlink = generic_readlink,
234 .follow_link = page_follow_link_light, 234 .follow_link = page_follow_link_light,
235 .put_link = page_put_link, 235 .put_link = page_put_link,
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7933e2e99dbc..75f309c8741a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -71,6 +71,8 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
71 complete(&nfs_callback_info.started); 71 complete(&nfs_callback_info.started);
72 72
73 for(;;) { 73 for(;;) {
74 char buf[RPC_MAX_ADDRBUFLEN];
75
74 if (signalled()) { 76 if (signalled()) {
75 if (nfs_callback_info.users == 0) 77 if (nfs_callback_info.users == 0)
76 break; 78 break;
@@ -88,8 +90,8 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
88 __FUNCTION__, -err); 90 __FUNCTION__, -err);
89 break; 91 break;
90 } 92 }
91 dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, 93 dprintk("%s: request from %s\n", __FUNCTION__,
92 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); 94 svc_print_addr(rqstp, buf, sizeof(buf)));
93 svc_process(rqstp); 95 svc_process(rqstp);
94 } 96 }
95 97
@@ -106,7 +108,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
106int nfs_callback_up(void) 108int nfs_callback_up(void)
107{ 109{
108 struct svc_serv *serv; 110 struct svc_serv *serv;
109 struct svc_sock *svsk;
110 int ret = 0; 111 int ret = 0;
111 112
112 lock_kernel(); 113 lock_kernel();
@@ -119,17 +120,14 @@ int nfs_callback_up(void)
119 ret = -ENOMEM; 120 ret = -ENOMEM;
120 if (!serv) 121 if (!serv)
121 goto out_err; 122 goto out_err;
122 /* FIXME: We don't want to register this socket with the portmapper */ 123
123 ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport); 124 ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport,
124 if (ret < 0) 125 SVC_SOCK_ANONYMOUS);
126 if (ret <= 0)
125 goto out_destroy; 127 goto out_destroy;
126 if (!list_empty(&serv->sv_permsocks)) { 128 nfs_callback_tcpport = ret;
127 svsk = list_entry(serv->sv_permsocks.next, 129 dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
128 struct svc_sock, sk_list); 130
129 nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
130 dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
131 } else
132 BUG();
133 ret = svc_create_thread(nfs_callback_svc, serv); 131 ret = svc_create_thread(nfs_callback_svc, serv);
134 if (ret < 0) 132 if (ret < 0)
135 goto out_destroy; 133 goto out_destroy;
@@ -140,6 +138,8 @@ out:
140 unlock_kernel(); 138 unlock_kernel();
141 return ret; 139 return ret;
142out_destroy: 140out_destroy:
141 dprintk("Couldn't create callback socket or server thread; err = %d\n",
142 ret);
143 svc_destroy(serv); 143 svc_destroy(serv);
144out_err: 144out_err:
145 nfs_callback_info.users--; 145 nfs_callback_info.users--;
@@ -166,15 +166,19 @@ void nfs_callback_down(void)
166 166
167static int nfs_callback_authenticate(struct svc_rqst *rqstp) 167static int nfs_callback_authenticate(struct svc_rqst *rqstp)
168{ 168{
169 struct sockaddr_in *addr = &rqstp->rq_addr; 169 struct sockaddr_in *addr = svc_addr_in(rqstp);
170 struct nfs_client *clp; 170 struct nfs_client *clp;
171 char buf[RPC_MAX_ADDRBUFLEN];
171 172
172 /* Don't talk to strangers */ 173 /* Don't talk to strangers */
173 clp = nfs_find_client(addr, 4); 174 clp = nfs_find_client(addr, 4);
174 if (clp == NULL) 175 if (clp == NULL)
175 return SVC_DROP; 176 return SVC_DROP;
176 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr)); 177
178 dprintk("%s: %s NFSv4 callback!\n", __FUNCTION__,
179 svc_print_addr(rqstp, buf, sizeof(buf)));
177 nfs_put_client(clp); 180 nfs_put_client(clp);
181
178 switch (rqstp->rq_authop->flavour) { 182 switch (rqstp->rq_authop->flavour) {
179 case RPC_AUTH_NULL: 183 case RPC_AUTH_NULL:
180 if (rqstp->rq_proc != CB_NULL) 184 if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index f8ea1f51f590..849a2029975d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -176,7 +176,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
176 status = decode_fh(xdr, &args->fh); 176 status = decode_fh(xdr, &args->fh);
177 if (unlikely(status != 0)) 177 if (unlikely(status != 0))
178 goto out; 178 goto out;
179 args->addr = &rqstp->rq_addr; 179 args->addr = svc_addr_in(rqstp);
180 status = decode_bitmap(xdr, args->bitmap); 180 status = decode_bitmap(xdr, args->bitmap);
181out: 181out:
182 dprintk("%s: exit with status = %d\n", __FUNCTION__, status); 182 dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
@@ -188,7 +188,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
188 __be32 *p; 188 __be32 *p;
189 __be32 status; 189 __be32 status;
190 190
191 args->addr = &rqstp->rq_addr; 191 args->addr = svc_addr_in(rqstp);
192 status = decode_stateid(xdr, &args->stateid); 192 status = decode_stateid(xdr, &args->stateid);
193 if (unlikely(status != 0)) 193 if (unlikely(status != 0))
194 goto out; 194 goto out;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c46e94fed9eb..2190e6c2792e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1181,7 +1181,7 @@ static struct seq_operations nfs_server_list_ops = {
1181 .show = nfs_server_list_show, 1181 .show = nfs_server_list_show,
1182}; 1182};
1183 1183
1184static struct file_operations nfs_server_list_fops = { 1184static const struct file_operations nfs_server_list_fops = {
1185 .open = nfs_server_list_open, 1185 .open = nfs_server_list_open,
1186 .read = seq_read, 1186 .read = seq_read,
1187 .llseek = seq_lseek, 1187 .llseek = seq_lseek,
@@ -1201,7 +1201,7 @@ static struct seq_operations nfs_volume_list_ops = {
1201 .show = nfs_volume_list_show, 1201 .show = nfs_volume_list_show,
1202}; 1202};
1203 1203
1204static struct file_operations nfs_volume_list_fops = { 1204static const struct file_operations nfs_volume_list_fops = {
1205 .open = nfs_volume_list_open, 1205 .open = nfs_volume_list_open,
1206 .read = seq_read, 1206 .read = seq_read,
1207 .llseek = seq_lseek, 1207 .llseek = seq_lseek,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 37c1dd642184..92d8ec859e22 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -65,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
65 .fsync = nfs_fsync_dir, 65 .fsync = nfs_fsync_dir,
66}; 66};
67 67
68struct inode_operations nfs_dir_inode_operations = { 68const struct inode_operations nfs_dir_inode_operations = {
69 .create = nfs_create, 69 .create = nfs_create,
70 .lookup = nfs_lookup, 70 .lookup = nfs_lookup,
71 .link = nfs_link, 71 .link = nfs_link,
@@ -81,7 +81,7 @@ struct inode_operations nfs_dir_inode_operations = {
81}; 81};
82 82
83#ifdef CONFIG_NFS_V3 83#ifdef CONFIG_NFS_V3
84struct inode_operations nfs3_dir_inode_operations = { 84const struct inode_operations nfs3_dir_inode_operations = {
85 .create = nfs_create, 85 .create = nfs_create,
86 .lookup = nfs_lookup, 86 .lookup = nfs_lookup,
87 .link = nfs_link, 87 .link = nfs_link,
@@ -104,7 +104,7 @@ struct inode_operations nfs3_dir_inode_operations = {
104#ifdef CONFIG_NFS_V4 104#ifdef CONFIG_NFS_V4
105 105
106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
107struct inode_operations nfs4_dir_inode_operations = { 107const struct inode_operations nfs4_dir_inode_operations = {
108 .create = nfs_create, 108 .create = nfs_create,
109 .lookup = nfs_atomic_lookup, 109 .lookup = nfs_atomic_lookup,
110 .link = nfs_link, 110 .link = nfs_link,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9e4a2b70995a..8e66b5a2d490 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -68,14 +68,14 @@ const struct file_operations nfs_file_operations = {
68 .check_flags = nfs_check_flags, 68 .check_flags = nfs_check_flags,
69}; 69};
70 70
71struct inode_operations nfs_file_inode_operations = { 71const struct inode_operations nfs_file_inode_operations = {
72 .permission = nfs_permission, 72 .permission = nfs_permission,
73 .getattr = nfs_getattr, 73 .getattr = nfs_getattr,
74 .setattr = nfs_setattr, 74 .setattr = nfs_setattr,
75}; 75};
76 76
77#ifdef CONFIG_NFS_V3 77#ifdef CONFIG_NFS_V3
78struct inode_operations nfs3_file_inode_operations = { 78const struct inode_operations nfs3_file_inode_operations = {
79 .permission = nfs_permission, 79 .permission = nfs_permission,
80 .getattr = nfs_getattr, 80 .getattr = nfs_getattr,
81 .setattr = nfs_setattr, 81 .setattr = nfs_setattr,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 371b804e7cc8..7f86e65182e4 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -155,12 +155,12 @@ out_follow:
155 goto out; 155 goto out;
156} 156}
157 157
158struct inode_operations nfs_mountpoint_inode_operations = { 158const struct inode_operations nfs_mountpoint_inode_operations = {
159 .follow_link = nfs_follow_mountpoint, 159 .follow_link = nfs_follow_mountpoint,
160 .getattr = nfs_getattr, 160 .getattr = nfs_getattr,
161}; 161};
162 162
163struct inode_operations nfs_referral_inode_operations = { 163const struct inode_operations nfs_referral_inode_operations = {
164 .follow_link = nfs_follow_mountpoint, 164 .follow_link = nfs_follow_mountpoint,
165}; 165};
166 166
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f2c88ffe41e0..cf3a17eb5c09 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -151,7 +151,7 @@ struct nfs4_state_recovery_ops {
151}; 151};
152 152
153extern struct dentry_operations nfs4_dentry_operations; 153extern struct dentry_operations nfs4_dentry_operations;
154extern struct inode_operations nfs4_dir_inode_operations; 154extern const struct inode_operations nfs4_dir_inode_operations;
155 155
156/* inode.c */ 156/* inode.c */
157extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); 157extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1712d0360ee6..f52cf5c33c6c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3584,7 +3584,7 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
3584 .recover_lock = nfs4_lock_expired, 3584 .recover_lock = nfs4_lock_expired,
3585}; 3585};
3586 3586
3587static struct inode_operations nfs4_file_inode_operations = { 3587static const struct inode_operations nfs4_file_inode_operations = {
3588 .permission = nfs_permission, 3588 .permission = nfs_permission,
3589 .getattr = nfs_getattr, 3589 .getattr = nfs_getattr,
3590 .setattr = nfs_setattr, 3590 .setattr = nfs_setattr,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 89da0a38c12c..bb516a2cfbaf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -44,6 +44,7 @@
44#include <linux/vfs.h> 44#include <linux/vfs.h>
45#include <linux/inet.h> 45#include <linux/inet.h>
46#include <linux/nfs_xdr.h> 46#include <linux/nfs_xdr.h>
47#include <linux/magic.h>
47 48
48#include <asm/system.h> 49#include <asm/system.h>
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
@@ -81,7 +82,7 @@ struct file_system_type nfs_xdev_fs_type = {
81 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 82 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
82}; 83};
83 84
84static struct super_operations nfs_sops = { 85static const struct super_operations nfs_sops = {
85 .alloc_inode = nfs_alloc_inode, 86 .alloc_inode = nfs_alloc_inode,
86 .destroy_inode = nfs_destroy_inode, 87 .destroy_inode = nfs_destroy_inode,
87 .write_inode = nfs_write_inode, 88 .write_inode = nfs_write_inode,
@@ -125,7 +126,7 @@ struct file_system_type nfs4_referral_fs_type = {
125 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 126 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
126}; 127};
127 128
128static struct super_operations nfs4_sops = { 129static const struct super_operations nfs4_sops = {
129 .alloc_inode = nfs_alloc_inode, 130 .alloc_inode = nfs_alloc_inode,
130 .destroy_inode = nfs_destroy_inode, 131 .destroy_inode = nfs_destroy_inode,
131 .write_inode = nfs_write_inode, 132 .write_inode = nfs_write_inode,
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 525c136c7d8c..f4a0548b9ce8 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -78,7 +78,7 @@ read_failed:
78/* 78/*
79 * symlinks can't do much... 79 * symlinks can't do much...
80 */ 80 */
81struct inode_operations nfs_symlink_inode_operations = { 81const struct inode_operations nfs_symlink_inode_operations = {
82 .readlink = generic_readlink, 82 .readlink = generic_readlink,
83 .follow_link = nfs_follow_link, 83 .follow_link = nfs_follow_link,
84 .put_link = page_put_link, 84 .put_link = page_put_link,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9de89df961f4..9e4067999209 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -714,7 +714,7 @@ __be32
714nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 714nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
715 struct nfsd4_setclientid *setclid) 715 struct nfsd4_setclientid *setclid)
716{ 716{
717 __be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; 717 struct sockaddr_in *sin = svc_addr_in(rqstp);
718 struct xdr_netobj clname = { 718 struct xdr_netobj clname = {
719 .len = setclid->se_namelen, 719 .len = setclid->se_namelen,
720 .data = setclid->se_name, 720 .data = setclid->se_name,
@@ -749,7 +749,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
749 */ 749 */
750 status = nfserr_clid_inuse; 750 status = nfserr_clid_inuse;
751 if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred) 751 if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
752 || conf->cl_addr != ip_addr) { 752 || conf->cl_addr != sin->sin_addr.s_addr) {
753 printk("NFSD: setclientid: string in use by client" 753 printk("NFSD: setclientid: string in use by client"
754 "(clientid %08x/%08x)\n", 754 "(clientid %08x/%08x)\n",
755 conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id); 755 conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
@@ -769,7 +769,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
769 if (new == NULL) 769 if (new == NULL)
770 goto out; 770 goto out;
771 copy_verf(new, &clverifier); 771 copy_verf(new, &clverifier);
772 new->cl_addr = ip_addr; 772 new->cl_addr = sin->sin_addr.s_addr;
773 copy_cred(&new->cl_cred,&rqstp->rq_cred); 773 copy_cred(&new->cl_cred,&rqstp->rq_cred);
774 gen_clid(new); 774 gen_clid(new);
775 gen_confirm(new); 775 gen_confirm(new);
@@ -801,7 +801,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
801 if (new == NULL) 801 if (new == NULL)
802 goto out; 802 goto out;
803 copy_verf(new,&conf->cl_verifier); 803 copy_verf(new,&conf->cl_verifier);
804 new->cl_addr = ip_addr; 804 new->cl_addr = sin->sin_addr.s_addr;
805 copy_cred(&new->cl_cred,&rqstp->rq_cred); 805 copy_cred(&new->cl_cred,&rqstp->rq_cred);
806 copy_clid(new, conf); 806 copy_clid(new, conf);
807 gen_confirm(new); 807 gen_confirm(new);
@@ -820,7 +820,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
820 if (new == NULL) 820 if (new == NULL)
821 goto out; 821 goto out;
822 copy_verf(new,&clverifier); 822 copy_verf(new,&clverifier);
823 new->cl_addr = ip_addr; 823 new->cl_addr = sin->sin_addr.s_addr;
824 copy_cred(&new->cl_cred,&rqstp->rq_cred); 824 copy_cred(&new->cl_cred,&rqstp->rq_cred);
825 gen_clid(new); 825 gen_clid(new);
826 gen_confirm(new); 826 gen_confirm(new);
@@ -847,7 +847,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
847 if (new == NULL) 847 if (new == NULL)
848 goto out; 848 goto out;
849 copy_verf(new,&clverifier); 849 copy_verf(new,&clverifier);
850 new->cl_addr = ip_addr; 850 new->cl_addr = sin->sin_addr.s_addr;
851 copy_cred(&new->cl_cred,&rqstp->rq_cred); 851 copy_cred(&new->cl_cred,&rqstp->rq_cred);
852 gen_clid(new); 852 gen_clid(new);
853 gen_confirm(new); 853 gen_confirm(new);
@@ -881,7 +881,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
881 struct nfsd4_compound_state *cstate, 881 struct nfsd4_compound_state *cstate,
882 struct nfsd4_setclientid_confirm *setclientid_confirm) 882 struct nfsd4_setclientid_confirm *setclientid_confirm)
883{ 883{
884 __be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; 884 struct sockaddr_in *sin = svc_addr_in(rqstp);
885 struct nfs4_client *conf, *unconf; 885 struct nfs4_client *conf, *unconf;
886 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 886 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
887 clientid_t * clid = &setclientid_confirm->sc_clientid; 887 clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -900,9 +900,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
900 unconf = find_unconfirmed_client(clid); 900 unconf = find_unconfirmed_client(clid);
901 901
902 status = nfserr_clid_inuse; 902 status = nfserr_clid_inuse;
903 if (conf && conf->cl_addr != ip_addr) 903 if (conf && conf->cl_addr != sin->sin_addr.s_addr)
904 goto out; 904 goto out;
905 if (unconf && unconf->cl_addr != ip_addr) 905 if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
906 goto out; 906 goto out;
907 907
908 if ((conf && unconf) && 908 if ((conf && unconf) &&
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index f90d70475854..578f2c9d56be 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -185,7 +185,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
185 rp->c_state = RC_INPROG; 185 rp->c_state = RC_INPROG;
186 rp->c_xid = xid; 186 rp->c_xid = xid;
187 rp->c_proc = proc; 187 rp->c_proc = proc;
188 rp->c_addr = rqstp->rq_addr; 188 memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
189 rp->c_prot = proto; 189 rp->c_prot = proto;
190 rp->c_vers = vers; 190 rp->c_vers = vers;
191 rp->c_timestamp = jiffies; 191 rp->c_timestamp = jiffies;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c59d6fbb7a6b..a0b4282cb284 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -20,6 +20,7 @@
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <asm/pgtable.h> 21#include <asm/pgtable.h>
22 22
23#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svc.h> 24#include <linux/sunrpc/svc.h>
24#include <linux/nfsd/nfsd.h> 25#include <linux/nfsd/nfsd.h>
25 26
@@ -180,10 +181,10 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
180 /* Check if the request originated from a secure port. */ 181 /* Check if the request originated from a secure port. */
181 error = nfserr_perm; 182 error = nfserr_perm;
182 if (!rqstp->rq_secure && EX_SECURE(exp)) { 183 if (!rqstp->rq_secure && EX_SECURE(exp)) {
184 char buf[RPC_MAX_ADDRBUFLEN];
183 printk(KERN_WARNING 185 printk(KERN_WARNING
184 "nfsd: request from insecure port (%u.%u.%u.%u:%d)!\n", 186 "nfsd: request from insecure port %s!\n",
185 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), 187 svc_print_addr(rqstp, buf, sizeof(buf)));
186 ntohs(rqstp->rq_addr.sin_port));
187 goto out; 188 goto out;
188 } 189 }
189 190
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index ec983b777680..5cc2eec981b8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -19,6 +19,7 @@
19#include <linux/unistd.h> 19#include <linux/unistd.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21 21
22#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/svc.h> 23#include <linux/sunrpc/svc.h>
23#include <linux/nfsd/nfsd.h> 24#include <linux/nfsd/nfsd.h>
24#include <linux/nfsd/cache.h> 25#include <linux/nfsd/cache.h>
@@ -147,10 +148,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
147 */ 148 */
148 149
149 if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { 150 if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
151 char buf[RPC_MAX_ADDRBUFLEN];
150 printk(KERN_NOTICE 152 printk(KERN_NOTICE
151 "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", 153 "oversized read request from %s (%d bytes)\n",
152 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), 154 svc_print_addr(rqstp, buf, sizeof(buf)),
153 ntohs(rqstp->rq_addr.sin_port),
154 argp->count); 155 argp->count);
155 argp->count = NFSSVC_MAXBLKSIZE_V2; 156 argp->count = NFSSVC_MAXBLKSIZE_V2;
156 } 157 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fbf5d51947ea..d7759ce6ed94 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -235,7 +235,8 @@ static int nfsd_init_socks(int port)
235 235
236 error = lockd_up(IPPROTO_UDP); 236 error = lockd_up(IPPROTO_UDP);
237 if (error >= 0) { 237 if (error >= 0) {
238 error = svc_makesock(nfsd_serv, IPPROTO_UDP, port); 238 error = svc_makesock(nfsd_serv, IPPROTO_UDP, port,
239 SVC_SOCK_DEFAULTS);
239 if (error < 0) 240 if (error < 0)
240 lockd_down(); 241 lockd_down();
241 } 242 }
@@ -245,7 +246,8 @@ static int nfsd_init_socks(int port)
245#ifdef CONFIG_NFSD_TCP 246#ifdef CONFIG_NFSD_TCP
246 error = lockd_up(IPPROTO_TCP); 247 error = lockd_up(IPPROTO_TCP);
247 if (error >= 0) { 248 if (error >= 0) {
248 error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); 249 error = svc_makesock(nfsd_serv, IPPROTO_TCP, port,
250 SVC_SOCK_DEFAULTS);
249 if (error < 0) 251 if (error < 0)
250 lockd_down(); 252 lockd_down();
251 } 253 }
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index c577d8e1bd95..7659cc192995 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1921,7 +1921,7 @@ s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
1921 u32 attr_len = 0; /* Silence stupid gcc warning. */ 1921 u32 attr_len = 0; /* Silence stupid gcc warning. */
1922 bool mp_rebuilt; 1922 bool mp_rebuilt;
1923 1923
1924#ifdef NTFS_DEBUG 1924#ifdef DEBUG
1925 read_lock_irqsave(&ni->size_lock, flags); 1925 read_lock_irqsave(&ni->size_lock, flags);
1926 allocated_size = ni->allocated_size; 1926 allocated_size = ni->allocated_size;
1927 read_unlock_irqrestore(&ni->size_lock, flags); 1927 read_unlock_irqrestore(&ni->size_lock, flags);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 076c9420c257..d69c4595ccd0 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2328,7 +2328,7 @@ const struct file_operations ntfs_file_ops = {
2328 the data source. */ 2328 the data source. */
2329}; 2329};
2330 2330
2331struct inode_operations ntfs_file_inode_ops = { 2331const struct inode_operations ntfs_file_inode_ops = {
2332#ifdef NTFS_RW 2332#ifdef NTFS_RW
2333 .truncate = ntfs_truncate_vfs, 2333 .truncate = ntfs_truncate_vfs,
2334 .setattr = ntfs_setattr, 2334 .setattr = ntfs_setattr,
@@ -2337,4 +2337,4 @@ struct inode_operations ntfs_file_inode_ops = {
2337 2337
2338const struct file_operations ntfs_empty_file_ops = {}; 2338const struct file_operations ntfs_empty_file_ops = {};
2339 2339
2340struct inode_operations ntfs_empty_inode_ops = {}; 2340const struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index eddb2247cec5..bff01a54675a 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -359,7 +359,7 @@ err_out:
359/** 359/**
360 * Inode operations for directories. 360 * Inode operations for directories.
361 */ 361 */
362struct inode_operations ntfs_dir_inode_ops = { 362const struct inode_operations ntfs_dir_inode_ops = {
363 .lookup = ntfs_lookup, /* VFS: Lookup directory. */ 363 .lookup = ntfs_lookup, /* VFS: Lookup directory. */
364}; 364};
365 365
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index a12847ae467d..d73f5a9ac341 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -61,13 +61,13 @@ extern const struct address_space_operations ntfs_aops;
61extern const struct address_space_operations ntfs_mst_aops; 61extern const struct address_space_operations ntfs_mst_aops;
62 62
63extern const struct file_operations ntfs_file_ops; 63extern const struct file_operations ntfs_file_ops;
64extern struct inode_operations ntfs_file_inode_ops; 64extern const struct inode_operations ntfs_file_inode_ops;
65 65
66extern const struct file_operations ntfs_dir_ops; 66extern const struct file_operations ntfs_dir_ops;
67extern struct inode_operations ntfs_dir_inode_ops; 67extern const struct inode_operations ntfs_dir_inode_ops;
68 68
69extern const struct file_operations ntfs_empty_file_ops; 69extern const struct file_operations ntfs_empty_file_ops;
70extern struct inode_operations ntfs_empty_inode_ops; 70extern const struct inode_operations ntfs_empty_inode_ops;
71 71
72extern struct export_operations ntfs_export_ops; 72extern struct export_operations ntfs_export_ops;
73 73
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index babf94d90def..1594c90b7164 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2699,7 +2699,7 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
2699/** 2699/**
2700 * The complete super operations. 2700 * The complete super operations.
2701 */ 2701 */
2702static struct super_operations ntfs_sops = { 2702static const struct super_operations ntfs_sops = {
2703 .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ 2703 .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
2704 .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ 2704 .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
2705#ifdef NTFS_RW 2705#ifdef NTFS_RW
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 277ca67a2ad6..5a9779bb9236 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -184,10 +184,9 @@ static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
184 flush_scheduled_work(); 184 flush_scheduled_work();
185} 185}
186 186
187static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc, 187static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
188 unsigned int num_ios)
189{ 188{
190 atomic_set(&wc->wc_num_reqs, num_ios); 189 atomic_set(&wc->wc_num_reqs, 1);
191 init_completion(&wc->wc_io_complete); 190 init_completion(&wc->wc_io_complete);
192 wc->wc_error = 0; 191 wc->wc_error = 0;
193} 192}
@@ -212,6 +211,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg,
212 struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; 211 struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
213 212
214 blk_run_address_space(mapping); 213 blk_run_address_space(mapping);
214 o2hb_bio_wait_dec(wc, 1);
215 215
216 wait_for_completion(&wc->wc_io_complete); 216 wait_for_completion(&wc->wc_io_complete);
217} 217}
@@ -231,6 +231,7 @@ static int o2hb_bio_end_io(struct bio *bio,
231 return 1; 231 return 1;
232 232
233 o2hb_bio_wait_dec(wc, 1); 233 o2hb_bio_wait_dec(wc, 1);
234 bio_put(bio);
234 return 0; 235 return 0;
235} 236}
236 237
@@ -238,23 +239,22 @@ static int o2hb_bio_end_io(struct bio *bio,
238 * start_slot. */ 239 * start_slot. */
239static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, 240static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
240 struct o2hb_bio_wait_ctxt *wc, 241 struct o2hb_bio_wait_ctxt *wc,
241 unsigned int start_slot, 242 unsigned int *current_slot,
242 unsigned int num_slots) 243 unsigned int max_slots)
243{ 244{
244 int i, nr_vecs, len, first_page, last_page; 245 int len, current_page;
245 unsigned int vec_len, vec_start; 246 unsigned int vec_len, vec_start;
246 unsigned int bits = reg->hr_block_bits; 247 unsigned int bits = reg->hr_block_bits;
247 unsigned int spp = reg->hr_slots_per_page; 248 unsigned int spp = reg->hr_slots_per_page;
249 unsigned int cs = *current_slot;
248 struct bio *bio; 250 struct bio *bio;
249 struct page *page; 251 struct page *page;
250 252
251 nr_vecs = (num_slots + spp - 1) / spp;
252
253 /* Testing has shown this allocation to take long enough under 253 /* Testing has shown this allocation to take long enough under
254 * GFP_KERNEL that the local node can get fenced. It would be 254 * GFP_KERNEL that the local node can get fenced. It would be
255 * nicest if we could pre-allocate these bios and avoid this 255 * nicest if we could pre-allocate these bios and avoid this
256 * all together. */ 256 * all together. */
257 bio = bio_alloc(GFP_ATOMIC, nr_vecs); 257 bio = bio_alloc(GFP_ATOMIC, 16);
258 if (!bio) { 258 if (!bio) {
259 mlog(ML_ERROR, "Could not alloc slots BIO!\n"); 259 mlog(ML_ERROR, "Could not alloc slots BIO!\n");
260 bio = ERR_PTR(-ENOMEM); 260 bio = ERR_PTR(-ENOMEM);
@@ -262,137 +262,53 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
262 } 262 }
263 263
264 /* Must put everything in 512 byte sectors for the bio... */ 264 /* Must put everything in 512 byte sectors for the bio... */
265 bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9); 265 bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9);
266 bio->bi_bdev = reg->hr_bdev; 266 bio->bi_bdev = reg->hr_bdev;
267 bio->bi_private = wc; 267 bio->bi_private = wc;
268 bio->bi_end_io = o2hb_bio_end_io; 268 bio->bi_end_io = o2hb_bio_end_io;
269 269
270 first_page = start_slot / spp; 270 vec_start = (cs << bits) % PAGE_CACHE_SIZE;
271 last_page = first_page + nr_vecs; 271 while(cs < max_slots) {
272 vec_start = (start_slot << bits) % PAGE_CACHE_SIZE; 272 current_page = cs / spp;
273 for(i = first_page; i < last_page; i++) { 273 page = reg->hr_slot_data[current_page];
274 page = reg->hr_slot_data[i];
275 274
276 vec_len = PAGE_CACHE_SIZE; 275 vec_len = min(PAGE_CACHE_SIZE,
277 /* last page might be short */ 276 (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
278 if (((i + 1) * spp) > (start_slot + num_slots))
279 vec_len = ((num_slots + start_slot) % spp) << bits;
280 vec_len -= vec_start;
281 277
282 mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", 278 mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
283 i, vec_len, vec_start); 279 current_page, vec_len, vec_start);
284 280
285 len = bio_add_page(bio, page, vec_len, vec_start); 281 len = bio_add_page(bio, page, vec_len, vec_start);
286 if (len != vec_len) { 282 if (len != vec_len) break;
287 bio_put(bio);
288 bio = ERR_PTR(-EIO);
289
290 mlog(ML_ERROR, "Error adding page to bio i = %d, "
291 "vec_len = %u, len = %d\n, start = %u\n",
292 i, vec_len, len, vec_start);
293 goto bail;
294 }
295 283
284 cs += vec_len / (PAGE_CACHE_SIZE/spp);
296 vec_start = 0; 285 vec_start = 0;
297 } 286 }
298 287
299bail: 288bail:
289 *current_slot = cs;
300 return bio; 290 return bio;
301} 291}
302 292
303/*
304 * Compute the maximum number of sectors the bdev can handle in one bio,
305 * as a power of two.
306 *
307 * Stolen from oracleasm, thanks Joel!
308 */
309static int compute_max_sectors(struct block_device *bdev)
310{
311 int max_pages, max_sectors, pow_two_sectors;
312
313 struct request_queue *q;
314
315 q = bdev_get_queue(bdev);
316 max_pages = q->max_sectors >> (PAGE_SHIFT - 9);
317 if (max_pages > BIO_MAX_PAGES)
318 max_pages = BIO_MAX_PAGES;
319 if (max_pages > q->max_phys_segments)
320 max_pages = q->max_phys_segments;
321 if (max_pages > q->max_hw_segments)
322 max_pages = q->max_hw_segments;
323 max_pages--; /* Handle I/Os that straddle a page */
324
325 if (max_pages) {
326 max_sectors = max_pages << (PAGE_SHIFT - 9);
327 } else {
328 /* If BIO contains 1 or less than 1 page. */
329 max_sectors = q->max_sectors;
330 }
331 /* Why is fls() 1-based???? */
332 pow_two_sectors = 1 << (fls(max_sectors) - 1);
333
334 return pow_two_sectors;
335}
336
337static inline void o2hb_compute_request_limits(struct o2hb_region *reg,
338 unsigned int num_slots,
339 unsigned int *num_bios,
340 unsigned int *slots_per_bio)
341{
342 unsigned int max_sectors, io_sectors;
343
344 max_sectors = compute_max_sectors(reg->hr_bdev);
345
346 io_sectors = num_slots << (reg->hr_block_bits - 9);
347
348 *num_bios = (io_sectors + max_sectors - 1) / max_sectors;
349 *slots_per_bio = max_sectors >> (reg->hr_block_bits - 9);
350
351 mlog(ML_HB_BIO, "My io size is %u sectors for %u slots. This "
352 "device can handle %u sectors of I/O\n", io_sectors, num_slots,
353 max_sectors);
354 mlog(ML_HB_BIO, "Will need %u bios holding %u slots each\n",
355 *num_bios, *slots_per_bio);
356}
357
358static int o2hb_read_slots(struct o2hb_region *reg, 293static int o2hb_read_slots(struct o2hb_region *reg,
359 unsigned int max_slots) 294 unsigned int max_slots)
360{ 295{
361 unsigned int num_bios, slots_per_bio, start_slot, num_slots; 296 unsigned int current_slot=0;
362 int i, status; 297 int status;
363 struct o2hb_bio_wait_ctxt wc; 298 struct o2hb_bio_wait_ctxt wc;
364 struct bio **bios;
365 struct bio *bio; 299 struct bio *bio;
366 300
367 o2hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio); 301 o2hb_bio_wait_init(&wc);
368 302
369 bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL); 303 while(current_slot < max_slots) {
370 if (!bios) { 304 bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots);
371 status = -ENOMEM;
372 mlog_errno(status);
373 return status;
374 }
375
376 o2hb_bio_wait_init(&wc, num_bios);
377
378 num_slots = slots_per_bio;
379 for(i = 0; i < num_bios; i++) {
380 start_slot = i * slots_per_bio;
381
382 /* adjust num_slots at last bio */
383 if (max_slots < (start_slot + num_slots))
384 num_slots = max_slots - start_slot;
385
386 bio = o2hb_setup_one_bio(reg, &wc, start_slot, num_slots);
387 if (IS_ERR(bio)) { 305 if (IS_ERR(bio)) {
388 o2hb_bio_wait_dec(&wc, num_bios - i);
389
390 status = PTR_ERR(bio); 306 status = PTR_ERR(bio);
391 mlog_errno(status); 307 mlog_errno(status);
392 goto bail_and_wait; 308 goto bail_and_wait;
393 } 309 }
394 bios[i] = bio;
395 310
311 atomic_inc(&wc.wc_num_reqs);
396 submit_bio(READ, bio); 312 submit_bio(READ, bio);
397 } 313 }
398 314
@@ -403,38 +319,30 @@ bail_and_wait:
403 if (wc.wc_error && !status) 319 if (wc.wc_error && !status)
404 status = wc.wc_error; 320 status = wc.wc_error;
405 321
406 if (bios) {
407 for(i = 0; i < num_bios; i++)
408 if (bios[i])
409 bio_put(bios[i]);
410 kfree(bios);
411 }
412
413 return status; 322 return status;
414} 323}
415 324
416static int o2hb_issue_node_write(struct o2hb_region *reg, 325static int o2hb_issue_node_write(struct o2hb_region *reg,
417 struct bio **write_bio,
418 struct o2hb_bio_wait_ctxt *write_wc) 326 struct o2hb_bio_wait_ctxt *write_wc)
419{ 327{
420 int status; 328 int status;
421 unsigned int slot; 329 unsigned int slot;
422 struct bio *bio; 330 struct bio *bio;
423 331
424 o2hb_bio_wait_init(write_wc, 1); 332 o2hb_bio_wait_init(write_wc);
425 333
426 slot = o2nm_this_node(); 334 slot = o2nm_this_node();
427 335
428 bio = o2hb_setup_one_bio(reg, write_wc, slot, 1); 336 bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1);
429 if (IS_ERR(bio)) { 337 if (IS_ERR(bio)) {
430 status = PTR_ERR(bio); 338 status = PTR_ERR(bio);
431 mlog_errno(status); 339 mlog_errno(status);
432 goto bail; 340 goto bail;
433 } 341 }
434 342
343 atomic_inc(&write_wc->wc_num_reqs);
435 submit_bio(WRITE, bio); 344 submit_bio(WRITE, bio);
436 345
437 *write_bio = bio;
438 status = 0; 346 status = 0;
439bail: 347bail:
440 return status; 348 return status;
@@ -826,7 +734,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
826{ 734{
827 int i, ret, highest_node, change = 0; 735 int i, ret, highest_node, change = 0;
828 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 736 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
829 struct bio *write_bio;
830 struct o2hb_bio_wait_ctxt write_wc; 737 struct o2hb_bio_wait_ctxt write_wc;
831 738
832 ret = o2nm_configured_node_map(configured_nodes, 739 ret = o2nm_configured_node_map(configured_nodes,
@@ -864,7 +771,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
864 771
865 /* And fire off the write. Note that we don't wait on this I/O 772 /* And fire off the write. Note that we don't wait on this I/O
866 * until later. */ 773 * until later. */
867 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 774 ret = o2hb_issue_node_write(reg, &write_wc);
868 if (ret < 0) { 775 if (ret < 0) {
869 mlog_errno(ret); 776 mlog_errno(ret);
870 return ret; 777 return ret;
@@ -882,7 +789,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
882 * people we find in our steady state have seen us. 789 * people we find in our steady state have seen us.
883 */ 790 */
884 o2hb_wait_on_io(reg, &write_wc); 791 o2hb_wait_on_io(reg, &write_wc);
885 bio_put(write_bio);
886 if (write_wc.wc_error) { 792 if (write_wc.wc_error) {
887 /* Do not re-arm the write timeout on I/O error - we 793 /* Do not re-arm the write timeout on I/O error - we
888 * can't be sure that the new block ever made it to 794 * can't be sure that the new block ever made it to
@@ -943,7 +849,6 @@ static int o2hb_thread(void *data)
943{ 849{
944 int i, ret; 850 int i, ret;
945 struct o2hb_region *reg = data; 851 struct o2hb_region *reg = data;
946 struct bio *write_bio;
947 struct o2hb_bio_wait_ctxt write_wc; 852 struct o2hb_bio_wait_ctxt write_wc;
948 struct timeval before_hb, after_hb; 853 struct timeval before_hb, after_hb;
949 unsigned int elapsed_msec; 854 unsigned int elapsed_msec;
@@ -993,10 +898,9 @@ static int o2hb_thread(void *data)
993 * 898 *
994 * XXX: Should we skip this on unclean_stop? */ 899 * XXX: Should we skip this on unclean_stop? */
995 o2hb_prepare_block(reg, 0); 900 o2hb_prepare_block(reg, 0);
996 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 901 ret = o2hb_issue_node_write(reg, &write_wc);
997 if (ret == 0) { 902 if (ret == 0) {
998 o2hb_wait_on_io(reg, &write_wc); 903 o2hb_wait_on_io(reg, &write_wc);
999 bio_put(write_bio);
1000 } else { 904 } else {
1001 mlog_errno(ret); 905 mlog_errno(ret);
1002 } 906 }
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ae4ff4a6636b..1718215fc018 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk,
556 sk->sk_data_ready = o2net_data_ready; 556 sk->sk_data_ready = o2net_data_ready;
557 sk->sk_state_change = o2net_state_change; 557 sk->sk_state_change = o2net_state_change;
558 558
559 mutex_init(&sc->sc_send_lock);
560
559 write_unlock_bh(&sk->sk_callback_lock); 561 write_unlock_bh(&sk->sk_callback_lock);
560} 562}
561 563
@@ -688,6 +690,7 @@ static void o2net_handler_put(struct o2net_msg_handler *nmh)
688 * be given to the handler if their payload is longer than the max. */ 690 * be given to the handler if their payload is longer than the max. */
689int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, 691int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
690 o2net_msg_handler_func *func, void *data, 692 o2net_msg_handler_func *func, void *data,
693 o2net_post_msg_handler_func *post_func,
691 struct list_head *unreg_list) 694 struct list_head *unreg_list)
692{ 695{
693 struct o2net_msg_handler *nmh = NULL; 696 struct o2net_msg_handler *nmh = NULL;
@@ -722,6 +725,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
722 725
723 nmh->nh_func = func; 726 nmh->nh_func = func;
724 nmh->nh_func_data = data; 727 nmh->nh_func_data = data;
728 nmh->nh_post_func = post_func;
725 nmh->nh_msg_type = msg_type; 729 nmh->nh_msg_type = msg_type;
726 nmh->nh_max_len = max_len; 730 nmh->nh_max_len = max_len;
727 nmh->nh_key = key; 731 nmh->nh_key = key;
@@ -856,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
856 ssize_t ret; 860 ssize_t ret;
857 861
858 862
863 mutex_lock(&sc->sc_send_lock);
859 ret = sc->sc_sock->ops->sendpage(sc->sc_sock, 864 ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
860 virt_to_page(kmalloced_virt), 865 virt_to_page(kmalloced_virt),
861 (long)kmalloced_virt & ~PAGE_MASK, 866 (long)kmalloced_virt & ~PAGE_MASK,
862 size, MSG_DONTWAIT); 867 size, MSG_DONTWAIT);
868 mutex_unlock(&sc->sc_send_lock);
863 if (ret != size) { 869 if (ret != size) {
864 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 870 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
865 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); 871 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
@@ -974,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
974 980
975 /* finally, convert the message header to network byte-order 981 /* finally, convert the message header to network byte-order
976 * and send */ 982 * and send */
983 mutex_lock(&sc->sc_send_lock);
977 ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, 984 ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen,
978 sizeof(struct o2net_msg) + caller_bytes); 985 sizeof(struct o2net_msg) + caller_bytes);
986 mutex_unlock(&sc->sc_send_lock);
979 msglog(msg, "sending returned %d\n", ret); 987 msglog(msg, "sending returned %d\n", ret);
980 if (ret < 0) { 988 if (ret < 0) {
981 mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); 989 mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret);
@@ -1049,6 +1057,7 @@ static int o2net_process_message(struct o2net_sock_container *sc,
1049 int ret = 0, handler_status; 1057 int ret = 0, handler_status;
1050 enum o2net_system_error syserr; 1058 enum o2net_system_error syserr;
1051 struct o2net_msg_handler *nmh = NULL; 1059 struct o2net_msg_handler *nmh = NULL;
1060 void *ret_data = NULL;
1052 1061
1053 msglog(hdr, "processing message\n"); 1062 msglog(hdr, "processing message\n");
1054 1063
@@ -1101,17 +1110,26 @@ static int o2net_process_message(struct o2net_sock_container *sc,
1101 sc->sc_msg_type = be16_to_cpu(hdr->msg_type); 1110 sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
1102 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + 1111 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
1103 be16_to_cpu(hdr->data_len), 1112 be16_to_cpu(hdr->data_len),
1104 nmh->nh_func_data); 1113 nmh->nh_func_data, &ret_data);
1105 do_gettimeofday(&sc->sc_tv_func_stop); 1114 do_gettimeofday(&sc->sc_tv_func_stop);
1106 1115
1107out_respond: 1116out_respond:
1108 /* this destroys the hdr, so don't use it after this */ 1117 /* this destroys the hdr, so don't use it after this */
1118 mutex_lock(&sc->sc_send_lock);
1109 ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, 1119 ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr,
1110 handler_status); 1120 handler_status);
1121 mutex_unlock(&sc->sc_send_lock);
1111 hdr = NULL; 1122 hdr = NULL;
1112 mlog(0, "sending handler status %d, syserr %d returned %d\n", 1123 mlog(0, "sending handler status %d, syserr %d returned %d\n",
1113 handler_status, syserr, ret); 1124 handler_status, syserr, ret);
1114 1125
1126 if (nmh) {
1127 BUG_ON(ret_data != NULL && nmh->nh_post_func == NULL);
1128 if (nmh->nh_post_func)
1129 (nmh->nh_post_func)(handler_status, nmh->nh_func_data,
1130 ret_data);
1131 }
1132
1115out: 1133out:
1116 if (nmh) 1134 if (nmh)
1117 o2net_handler_put(nmh); 1135 o2net_handler_put(nmh);
@@ -1795,13 +1813,13 @@ out:
1795 ready(sk, bytes); 1813 ready(sk, bytes);
1796} 1814}
1797 1815
1798static int o2net_open_listening_sock(__be16 port) 1816static int o2net_open_listening_sock(__be32 addr, __be16 port)
1799{ 1817{
1800 struct socket *sock = NULL; 1818 struct socket *sock = NULL;
1801 int ret; 1819 int ret;
1802 struct sockaddr_in sin = { 1820 struct sockaddr_in sin = {
1803 .sin_family = PF_INET, 1821 .sin_family = PF_INET,
1804 .sin_addr = { .s_addr = (__force u32)htonl(INADDR_ANY) }, 1822 .sin_addr = { .s_addr = (__force u32)addr },
1805 .sin_port = (__force u16)port, 1823 .sin_port = (__force u16)port,
1806 }; 1824 };
1807 1825
@@ -1824,15 +1842,15 @@ static int o2net_open_listening_sock(__be16 port)
1824 sock->sk->sk_reuse = 1; 1842 sock->sk->sk_reuse = 1;
1825 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); 1843 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
1826 if (ret < 0) { 1844 if (ret < 0) {
1827 mlog(ML_ERROR, "unable to bind socket to port %d, ret=%d\n", 1845 mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, "
1828 ntohs(port), ret); 1846 "ret=%d\n", NIPQUAD(addr), ntohs(port), ret);
1829 goto out; 1847 goto out;
1830 } 1848 }
1831 1849
1832 ret = sock->ops->listen(sock, 64); 1850 ret = sock->ops->listen(sock, 64);
1833 if (ret < 0) { 1851 if (ret < 0) {
1834 mlog(ML_ERROR, "unable to listen on port %d, ret=%d\n", 1852 mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n",
1835 ntohs(port), ret); 1853 NIPQUAD(addr), ntohs(port), ret);
1836 } 1854 }
1837 1855
1838out: 1856out:
@@ -1865,7 +1883,8 @@ int o2net_start_listening(struct o2nm_node *node)
1865 return -ENOMEM; /* ? */ 1883 return -ENOMEM; /* ? */
1866 } 1884 }
1867 1885
1868 ret = o2net_open_listening_sock(node->nd_ipv4_port); 1886 ret = o2net_open_listening_sock(node->nd_ipv4_address,
1887 node->nd_ipv4_port);
1869 if (ret) { 1888 if (ret) {
1870 destroy_workqueue(o2net_wq); 1889 destroy_workqueue(o2net_wq);
1871 o2net_wq = NULL; 1890 o2net_wq = NULL;
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 21a4e43df836..da880fc215f0 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -50,7 +50,10 @@ struct o2net_msg
50 __u8 buf[0]; 50 __u8 buf[0];
51}; 51};
52 52
53typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data); 53typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data,
54 void **ret_data);
55typedef void (o2net_post_msg_handler_func)(int status, void *data,
56 void *ret_data);
54 57
55#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 58#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg))
56 59
@@ -99,6 +102,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec,
99 102
100int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, 103int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
101 o2net_msg_handler_func *func, void *data, 104 o2net_msg_handler_func *func, void *data,
105 o2net_post_msg_handler_func *post_func,
102 struct list_head *unreg_list); 106 struct list_head *unreg_list);
103void o2net_unregister_handler_list(struct list_head *list); 107void o2net_unregister_handler_list(struct list_head *list);
104 108
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index b700dc9624d1..4dae5df5e467 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,12 @@
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * New in version 7:
42 * - DLM join domain includes the live nodemap
43 *
44 * New in version 6:
45 * - DLM lockres remote refcount fixes.
46 *
41 * New in version 5: 47 * New in version 5:
42 * - Network timeout checking protocol 48 * - Network timeout checking protocol
43 * 49 *
@@ -51,7 +57,7 @@
51 * - full 64 bit i_size in the metadata lock lvbs 57 * - full 64 bit i_size in the metadata lock lvbs
52 * - introduction of "rw" lock and pushing meta/data locking down 58 * - introduction of "rw" lock and pushing meta/data locking down
53 */ 59 */
54#define O2NET_PROTOCOL_VERSION 5ULL 60#define O2NET_PROTOCOL_VERSION 7ULL
55struct o2net_handshake { 61struct o2net_handshake {
56 __be64 protocol_version; 62 __be64 protocol_version;
57 __be64 connector_id; 63 __be64 connector_id;
@@ -149,6 +155,8 @@ struct o2net_sock_container {
149 struct timeval sc_tv_func_stop; 155 struct timeval sc_tv_func_stop;
150 u32 sc_msg_key; 156 u32 sc_msg_key;
151 u16 sc_msg_type; 157 u16 sc_msg_type;
158
159 struct mutex sc_send_lock;
152}; 160};
153 161
154struct o2net_msg_handler { 162struct o2net_msg_handler {
@@ -158,6 +166,8 @@ struct o2net_msg_handler {
158 u32 nh_key; 166 u32 nh_key;
159 o2net_msg_handler_func *nh_func; 167 o2net_msg_handler_func *nh_func;
160 o2net_msg_handler_func *nh_func_data; 168 o2net_msg_handler_func *nh_func_data;
169 o2net_post_msg_handler_func
170 *nh_post_func;
161 struct kref nh_kref; 171 struct kref nh_kref;
162 struct list_head nh_unregister_item; 172 struct list_head nh_unregister_item;
163}; 173};
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 681046d51393..241cad342a48 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -263,7 +263,8 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
263 263
264 264
265 265
266int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) 266int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
267 void **ret_data)
267{ 268{
268 int ret; 269 int ret;
269 unsigned int locklen; 270 unsigned int locklen;
@@ -311,8 +312,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
311 past->type != DLM_BAST) { 312 past->type != DLM_BAST) {
312 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" 313 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
313 "name=%.*s\n", past->type, 314 "name=%.*s\n", past->type,
314 dlm_get_lock_cookie_node(cookie), 315 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
315 dlm_get_lock_cookie_seq(cookie), 316 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
316 locklen, name); 317 locklen, name);
317 ret = DLM_IVLOCKID; 318 ret = DLM_IVLOCKID;
318 goto leave; 319 goto leave;
@@ -323,8 +324,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
323 mlog(0, "got %sast for unknown lockres! " 324 mlog(0, "got %sast for unknown lockres! "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 325 "cookie=%u:%llu, name=%.*s, namelen=%u\n",
325 past->type == DLM_AST ? "" : "b", 326 past->type == DLM_AST ? "" : "b",
326 dlm_get_lock_cookie_node(cookie), 327 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
327 dlm_get_lock_cookie_seq(cookie), 328 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
328 locklen, name, locklen); 329 locklen, name, locklen);
329 ret = DLM_IVLOCKID; 330 ret = DLM_IVLOCKID;
330 goto leave; 331 goto leave;
@@ -369,7 +370,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
369 370
370 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " 371 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
371 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 372 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
372 dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie), 373 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
374 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
373 locklen, name, locklen); 375 locklen, name, locklen);
374 376
375 ret = DLM_NORMAL; 377 ret = DLM_NORMAL;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 6b6ff76538c5..e90b92f9ece1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -180,6 +180,11 @@ struct dlm_assert_master_priv
180 unsigned ignore_higher:1; 180 unsigned ignore_higher:1;
181}; 181};
182 182
183struct dlm_deref_lockres_priv
184{
185 struct dlm_lock_resource *deref_res;
186 u8 deref_node;
187};
183 188
184struct dlm_work_item 189struct dlm_work_item
185{ 190{
@@ -191,6 +196,7 @@ struct dlm_work_item
191 struct dlm_request_all_locks_priv ral; 196 struct dlm_request_all_locks_priv ral;
192 struct dlm_mig_lockres_priv ml; 197 struct dlm_mig_lockres_priv ml;
193 struct dlm_assert_master_priv am; 198 struct dlm_assert_master_priv am;
199 struct dlm_deref_lockres_priv dl;
194 } u; 200 } u;
195}; 201};
196 202
@@ -222,6 +228,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
222#define DLM_LOCK_RES_DIRTY 0x00000008 228#define DLM_LOCK_RES_DIRTY 0x00000008
223#define DLM_LOCK_RES_IN_PROGRESS 0x00000010 229#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
224#define DLM_LOCK_RES_MIGRATING 0x00000020 230#define DLM_LOCK_RES_MIGRATING 0x00000020
231#define DLM_LOCK_RES_DROPPING_REF 0x00000040
232#define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000
233#define DLM_LOCK_RES_SETREF_INPROG 0x00002000
225 234
226/* max milliseconds to wait to sync up a network failure with a node death */ 235/* max milliseconds to wait to sync up a network failure with a node death */
227#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) 236#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
@@ -265,6 +274,8 @@ struct dlm_lock_resource
265 u8 owner; //node which owns the lock resource, or unknown 274 u8 owner; //node which owns the lock resource, or unknown
266 u16 state; 275 u16 state;
267 char lvb[DLM_LVB_LEN]; 276 char lvb[DLM_LVB_LEN];
277 unsigned int inflight_locks;
278 unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
268}; 279};
269 280
270struct dlm_migratable_lock 281struct dlm_migratable_lock
@@ -367,7 +378,7 @@ enum {
367 DLM_CONVERT_LOCK_MSG, /* 504 */ 378 DLM_CONVERT_LOCK_MSG, /* 504 */
368 DLM_PROXY_AST_MSG, /* 505 */ 379 DLM_PROXY_AST_MSG, /* 505 */
369 DLM_UNLOCK_LOCK_MSG, /* 506 */ 380 DLM_UNLOCK_LOCK_MSG, /* 506 */
370 DLM_UNUSED_MSG2, /* 507 */ 381 DLM_DEREF_LOCKRES_MSG, /* 507 */
371 DLM_MIGRATE_REQUEST_MSG, /* 508 */ 382 DLM_MIGRATE_REQUEST_MSG, /* 508 */
372 DLM_MIG_LOCKRES_MSG, /* 509 */ 383 DLM_MIG_LOCKRES_MSG, /* 509 */
373 DLM_QUERY_JOIN_MSG, /* 510 */ 384 DLM_QUERY_JOIN_MSG, /* 510 */
@@ -417,6 +428,9 @@ struct dlm_master_request
417 u8 name[O2NM_MAX_NAME_LEN]; 428 u8 name[O2NM_MAX_NAME_LEN];
418}; 429};
419 430
431#define DLM_ASSERT_RESPONSE_REASSERT 0x00000001
432#define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002
433
420#define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 434#define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001
421#define DLM_ASSERT_MASTER_REQUERY 0x00000002 435#define DLM_ASSERT_MASTER_REQUERY 0x00000002
422#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 436#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004
@@ -430,6 +444,8 @@ struct dlm_assert_master
430 u8 name[O2NM_MAX_NAME_LEN]; 444 u8 name[O2NM_MAX_NAME_LEN];
431}; 445};
432 446
447#define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001
448
433struct dlm_migrate_request 449struct dlm_migrate_request
434{ 450{
435 u8 master; 451 u8 master;
@@ -609,12 +625,16 @@ struct dlm_begin_reco
609}; 625};
610 626
611 627
628#define BITS_PER_BYTE 8
629#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
630
612struct dlm_query_join_request 631struct dlm_query_join_request
613{ 632{
614 u8 node_idx; 633 u8 node_idx;
615 u8 pad1[2]; 634 u8 pad1[2];
616 u8 name_len; 635 u8 name_len;
617 u8 domain[O2NM_MAX_NAME_LEN]; 636 u8 domain[O2NM_MAX_NAME_LEN];
637 u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)];
618}; 638};
619 639
620struct dlm_assert_joined 640struct dlm_assert_joined
@@ -648,6 +668,16 @@ struct dlm_finalize_reco
648 __be32 pad2; 668 __be32 pad2;
649}; 669};
650 670
671struct dlm_deref_lockres
672{
673 u32 pad1;
674 u16 pad2;
675 u8 node_idx;
676 u8 namelen;
677
678 u8 name[O2NM_MAX_NAME_LEN];
679};
680
651static inline enum dlm_status 681static inline enum dlm_status
652__dlm_lockres_state_to_status(struct dlm_lock_resource *res) 682__dlm_lockres_state_to_status(struct dlm_lock_resource *res)
653{ 683{
@@ -688,16 +718,20 @@ void dlm_lock_put(struct dlm_lock *lock);
688void dlm_lock_attach_lockres(struct dlm_lock *lock, 718void dlm_lock_attach_lockres(struct dlm_lock *lock,
689 struct dlm_lock_resource *res); 719 struct dlm_lock_resource *res);
690 720
691int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data); 721int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
692int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data); 722 void **ret_data);
693int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data); 723int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
724 void **ret_data);
725int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
726 void **ret_data);
694 727
695void dlm_revert_pending_convert(struct dlm_lock_resource *res, 728void dlm_revert_pending_convert(struct dlm_lock_resource *res,
696 struct dlm_lock *lock); 729 struct dlm_lock *lock);
697void dlm_revert_pending_lock(struct dlm_lock_resource *res, 730void dlm_revert_pending_lock(struct dlm_lock_resource *res,
698 struct dlm_lock *lock); 731 struct dlm_lock *lock);
699 732
700int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data); 733int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
734 void **ret_data);
701void dlm_commit_pending_cancel(struct dlm_lock_resource *res, 735void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
702 struct dlm_lock *lock); 736 struct dlm_lock *lock);
703void dlm_commit_pending_unlock(struct dlm_lock_resource *res, 737void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
@@ -721,8 +755,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
721 struct dlm_lock_resource *res); 755 struct dlm_lock_resource *res);
722void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 756void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
723 struct dlm_lock_resource *res); 757 struct dlm_lock_resource *res);
724void dlm_purge_lockres(struct dlm_ctxt *dlm,
725 struct dlm_lock_resource *lockres);
726static inline void dlm_lockres_get(struct dlm_lock_resource *res) 758static inline void dlm_lockres_get(struct dlm_lock_resource *res)
727{ 759{
728 /* This is called on every lookup, so it might be worth 760 /* This is called on every lookup, so it might be worth
@@ -733,6 +765,10 @@ void dlm_lockres_put(struct dlm_lock_resource *res);
733void __dlm_unhash_lockres(struct dlm_lock_resource *res); 765void __dlm_unhash_lockres(struct dlm_lock_resource *res);
734void __dlm_insert_lockres(struct dlm_ctxt *dlm, 766void __dlm_insert_lockres(struct dlm_ctxt *dlm,
735 struct dlm_lock_resource *res); 767 struct dlm_lock_resource *res);
768struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
769 const char *name,
770 unsigned int len,
771 unsigned int hash);
736struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 772struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
737 const char *name, 773 const char *name,
738 unsigned int len, 774 unsigned int len,
@@ -753,6 +789,47 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
753 const char *name, 789 const char *name,
754 unsigned int namelen); 790 unsigned int namelen);
755 791
792#define dlm_lockres_set_refmap_bit(bit,res) \
793 __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__)
794#define dlm_lockres_clear_refmap_bit(bit,res) \
795 __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__)
796
797static inline void __dlm_lockres_set_refmap_bit(int bit,
798 struct dlm_lock_resource *res,
799 const char *file,
800 int line)
801{
802 //printk("%s:%d:%.*s: setting bit %d\n", file, line,
803 // res->lockname.len, res->lockname.name, bit);
804 set_bit(bit, res->refmap);
805}
806
807static inline void __dlm_lockres_clear_refmap_bit(int bit,
808 struct dlm_lock_resource *res,
809 const char *file,
810 int line)
811{
812 //printk("%s:%d:%.*s: clearing bit %d\n", file, line,
813 // res->lockname.len, res->lockname.name, bit);
814 clear_bit(bit, res->refmap);
815}
816
817void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
818 struct dlm_lock_resource *res,
819 const char *file,
820 int line);
821void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
822 struct dlm_lock_resource *res,
823 int new_lockres,
824 const char *file,
825 int line);
826#define dlm_lockres_drop_inflight_ref(d,r) \
827 __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__)
828#define dlm_lockres_grab_inflight_ref(d,r) \
829 __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__)
830#define dlm_lockres_grab_inflight_ref_new(d,r) \
831 __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__)
832
756void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 833void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
757void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 834void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
758void dlm_do_local_ast(struct dlm_ctxt *dlm, 835void dlm_do_local_ast(struct dlm_ctxt *dlm,
@@ -801,10 +878,7 @@ int dlm_heartbeat_init(struct dlm_ctxt *dlm);
801void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); 878void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data);
802void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); 879void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data);
803 880
804int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); 881int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
805int dlm_migrate_lockres(struct dlm_ctxt *dlm,
806 struct dlm_lock_resource *res,
807 u8 target);
808int dlm_finish_migration(struct dlm_ctxt *dlm, 882int dlm_finish_migration(struct dlm_ctxt *dlm,
809 struct dlm_lock_resource *res, 883 struct dlm_lock_resource *res,
810 u8 old_master); 884 u8 old_master);
@@ -812,15 +886,27 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
812 struct dlm_lock_resource *res); 886 struct dlm_lock_resource *res);
813void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); 887void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res);
814 888
815int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data); 889int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
816int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data); 890 void **ret_data);
817int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data); 891int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
818int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data); 892 void **ret_data);
819int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data); 893void dlm_assert_master_post_handler(int status, void *data, void *ret_data);
820int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data); 894int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
821int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data); 895 void **ret_data);
822int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data); 896int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
823int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); 897 void **ret_data);
898int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
899 void **ret_data);
900int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
901 void **ret_data);
902int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
903 void **ret_data);
904int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
905 void **ret_data);
906int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
907 void **ret_data);
908int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
909 void **ret_data);
824int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, 910int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
825 u8 nodenum, u8 *real_master); 911 u8 nodenum, u8 *real_master);
826 912
@@ -856,10 +942,12 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
856int dlm_init_mle_cache(void); 942int dlm_init_mle_cache(void);
857void dlm_destroy_mle_cache(void); 943void dlm_destroy_mle_cache(void);
858void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); 944void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up);
945int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
946 struct dlm_lock_resource *res);
859void dlm_clean_master_list(struct dlm_ctxt *dlm, 947void dlm_clean_master_list(struct dlm_ctxt *dlm,
860 u8 dead_node); 948 u8 dead_node);
861int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); 949int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
862 950int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
863int __dlm_lockres_unused(struct dlm_lock_resource *res); 951int __dlm_lockres_unused(struct dlm_lock_resource *res);
864 952
865static inline const char * dlm_lock_mode_name(int mode) 953static inline const char * dlm_lock_mode_name(int mode)
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index c764dc8e40a2..ecb4d997221e 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -286,8 +286,8 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
286 __dlm_print_one_lock_resource(res); 286 __dlm_print_one_lock_resource(res);
287 mlog(ML_ERROR, "converting a remote lock that is already " 287 mlog(ML_ERROR, "converting a remote lock that is already "
288 "converting! (cookie=%u:%llu, conv=%d)\n", 288 "converting! (cookie=%u:%llu, conv=%d)\n",
289 dlm_get_lock_cookie_node(lock->ml.cookie), 289 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
290 dlm_get_lock_cookie_seq(lock->ml.cookie), 290 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
291 lock->ml.convert_type); 291 lock->ml.convert_type);
292 status = DLM_DENIED; 292 status = DLM_DENIED;
293 goto bail; 293 goto bail;
@@ -418,7 +418,8 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
418 * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, 418 * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS,
419 * status from __dlmconvert_master 419 * status from __dlmconvert_master
420 */ 420 */
421int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) 421int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
422 void **ret_data)
422{ 423{
423 struct dlm_ctxt *dlm = data; 424 struct dlm_ctxt *dlm = data;
424 struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; 425 struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf;
@@ -428,7 +429,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
428 struct dlm_lockstatus *lksb; 429 struct dlm_lockstatus *lksb;
429 enum dlm_status status = DLM_NORMAL; 430 enum dlm_status status = DLM_NORMAL;
430 u32 flags; 431 u32 flags;
431 int call_ast = 0, kick_thread = 0, ast_reserved = 0; 432 int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0;
432 433
433 if (!dlm_grab(dlm)) { 434 if (!dlm_grab(dlm)) {
434 dlm_error(DLM_REJECTED); 435 dlm_error(DLM_REJECTED);
@@ -479,25 +480,14 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
479 } 480 }
480 lock = NULL; 481 lock = NULL;
481 } 482 }
482 if (!lock) {
483 __dlm_print_one_lock_resource(res);
484 list_for_each(iter, &res->granted) {
485 lock = list_entry(iter, struct dlm_lock, list);
486 if (lock->ml.node == cnv->node_idx) {
487 mlog(ML_ERROR, "There is something here "
488 "for node %u, lock->ml.cookie=%llu, "
489 "cnv->cookie=%llu\n", cnv->node_idx,
490 (unsigned long long)lock->ml.cookie,
491 (unsigned long long)cnv->cookie);
492 break;
493 }
494 }
495 lock = NULL;
496 }
497 spin_unlock(&res->spinlock); 483 spin_unlock(&res->spinlock);
498 if (!lock) { 484 if (!lock) {
499 status = DLM_IVLOCKID; 485 status = DLM_IVLOCKID;
500 dlm_error(status); 486 mlog(ML_ERROR, "did not find lock to convert on grant queue! "
487 "cookie=%u:%llu\n",
488 dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)),
489 dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie)));
490 __dlm_print_one_lock_resource(res);
501 goto leave; 491 goto leave;
502 } 492 }
503 493
@@ -524,8 +514,11 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
524 cnv->requested_type, 514 cnv->requested_type,
525 &call_ast, &kick_thread); 515 &call_ast, &kick_thread);
526 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 516 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
517 wake = 1;
527 } 518 }
528 spin_unlock(&res->spinlock); 519 spin_unlock(&res->spinlock);
520 if (wake)
521 wake_up(&res->wq);
529 522
530 if (status != DLM_NORMAL) { 523 if (status != DLM_NORMAL) {
531 if (status != DLM_NOTQUEUED) 524 if (status != DLM_NOTQUEUED)
@@ -534,12 +527,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
534 } 527 }
535 528
536leave: 529leave:
537 if (!lock) 530 if (lock)
538 mlog(ML_ERROR, "did not find lock to convert on grant queue! "
539 "cookie=%u:%llu\n",
540 dlm_get_lock_cookie_node(cnv->cookie),
541 dlm_get_lock_cookie_seq(cnv->cookie));
542 else
543 dlm_lock_put(lock); 531 dlm_lock_put(lock);
544 532
545 /* either queue the ast or release it, if reserved */ 533 /* either queue the ast or release it, if reserved */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 3f6c8d88f7af..64239b37e5d4 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -53,6 +53,23 @@ void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
53 spin_unlock(&res->spinlock); 53 spin_unlock(&res->spinlock);
54} 54}
55 55
56static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
57{
58 int bit;
59 assert_spin_locked(&res->spinlock);
60
61 mlog(ML_NOTICE, " refmap nodes: [ ");
62 bit = 0;
63 while (1) {
64 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
65 if (bit >= O2NM_MAX_NODES)
66 break;
67 printk("%u ", bit);
68 bit++;
69 }
70 printk("], inflight=%u\n", res->inflight_locks);
71}
72
56void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) 73void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
57{ 74{
58 struct list_head *iter2; 75 struct list_head *iter2;
@@ -65,6 +82,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
65 res->owner, res->state); 82 res->owner, res->state);
66 mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", 83 mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n",
67 res->last_used, list_empty(&res->purge) ? "no" : "yes"); 84 res->last_used, list_empty(&res->purge) ? "no" : "yes");
85 dlm_print_lockres_refmap(res);
68 mlog(ML_NOTICE, " granted queue: \n"); 86 mlog(ML_NOTICE, " granted queue: \n");
69 list_for_each(iter2, &res->granted) { 87 list_for_each(iter2, &res->granted) {
70 lock = list_entry(iter2, struct dlm_lock, list); 88 lock = list_entry(iter2, struct dlm_lock, list);
@@ -72,8 +90,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
72 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " 90 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
73 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 91 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
74 lock->ml.type, lock->ml.convert_type, lock->ml.node, 92 lock->ml.type, lock->ml.convert_type, lock->ml.node,
75 dlm_get_lock_cookie_node(lock->ml.cookie), 93 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
76 dlm_get_lock_cookie_seq(lock->ml.cookie), 94 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
77 list_empty(&lock->ast_list) ? 'y' : 'n', 95 list_empty(&lock->ast_list) ? 'y' : 'n',
78 lock->ast_pending ? 'y' : 'n', 96 lock->ast_pending ? 'y' : 'n',
79 list_empty(&lock->bast_list) ? 'y' : 'n', 97 list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -87,8 +105,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
87 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " 105 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
88 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 106 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
89 lock->ml.type, lock->ml.convert_type, lock->ml.node, 107 lock->ml.type, lock->ml.convert_type, lock->ml.node,
90 dlm_get_lock_cookie_node(lock->ml.cookie), 108 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
91 dlm_get_lock_cookie_seq(lock->ml.cookie), 109 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
92 list_empty(&lock->ast_list) ? 'y' : 'n', 110 list_empty(&lock->ast_list) ? 'y' : 'n',
93 lock->ast_pending ? 'y' : 'n', 111 lock->ast_pending ? 'y' : 'n',
94 list_empty(&lock->bast_list) ? 'y' : 'n', 112 list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -102,8 +120,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
102 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " 120 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
103 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", 121 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
104 lock->ml.type, lock->ml.convert_type, lock->ml.node, 122 lock->ml.type, lock->ml.convert_type, lock->ml.node,
105 dlm_get_lock_cookie_node(lock->ml.cookie), 123 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
106 dlm_get_lock_cookie_seq(lock->ml.cookie), 124 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
107 list_empty(&lock->ast_list) ? 'y' : 'n', 125 list_empty(&lock->ast_list) ? 'y' : 'n',
108 lock->ast_pending ? 'y' : 'n', 126 lock->ast_pending ? 'y' : 'n',
109 list_empty(&lock->bast_list) ? 'y' : 'n', 127 list_empty(&lock->bast_list) ? 'y' : 'n',
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index f0b25f2dd205..6087c4749fee 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -48,6 +48,36 @@
48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
49#include "cluster/masklog.h" 49#include "cluster/masklog.h"
50 50
51/*
52 * ocfs2 node maps are array of long int, which limits to send them freely
53 * across the wire due to endianness issues. To workaround this, we convert
54 * long ints to byte arrays. Following 3 routines are helper functions to
55 * set/test/copy bits within those array of bytes
56 */
57static inline void byte_set_bit(u8 nr, u8 map[])
58{
59 map[nr >> 3] |= (1UL << (nr & 7));
60}
61
62static inline int byte_test_bit(u8 nr, u8 map[])
63{
64 return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
65}
66
67static inline void byte_copymap(u8 dmap[], unsigned long smap[],
68 unsigned int sz)
69{
70 unsigned int nn;
71
72 if (!sz)
73 return;
74
75 memset(dmap, 0, ((sz + 7) >> 3));
76 for (nn = 0 ; nn < sz; nn++)
77 if (test_bit(nn, smap))
78 byte_set_bit(nn, dmap);
79}
80
51static void dlm_free_pagevec(void **vec, int pages) 81static void dlm_free_pagevec(void **vec, int pages)
52{ 82{
53 while (pages--) 83 while (pages--)
@@ -95,10 +125,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
95 125
96#define DLM_DOMAIN_BACKOFF_MS 200 126#define DLM_DOMAIN_BACKOFF_MS 200
97 127
98static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data); 128static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
99static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data); 129 void **ret_data);
100static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data); 130static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
101static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data); 131 void **ret_data);
132static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
133 void **ret_data);
134static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
135 void **ret_data);
102 136
103static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); 137static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
104 138
@@ -125,10 +159,10 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
125 hlist_add_head(&res->hash_node, bucket); 159 hlist_add_head(&res->hash_node, bucket);
126} 160}
127 161
128struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, 162struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
129 const char *name, 163 const char *name,
130 unsigned int len, 164 unsigned int len,
131 unsigned int hash) 165 unsigned int hash)
132{ 166{
133 struct hlist_head *bucket; 167 struct hlist_head *bucket;
134 struct hlist_node *list; 168 struct hlist_node *list;
@@ -154,6 +188,37 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
154 return NULL; 188 return NULL;
155} 189}
156 190
191/* intended to be called by functions which do not care about lock
192 * resources which are being purged (most net _handler functions).
193 * this will return NULL for any lock resource which is found but
194 * currently in the process of dropping its mastery reference.
195 * use __dlm_lookup_lockres_full when you need the lock resource
196 * regardless (e.g. dlm_get_lock_resource) */
197struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
198 const char *name,
199 unsigned int len,
200 unsigned int hash)
201{
202 struct dlm_lock_resource *res = NULL;
203
204 mlog_entry("%.*s\n", len, name);
205
206 assert_spin_locked(&dlm->spinlock);
207
208 res = __dlm_lookup_lockres_full(dlm, name, len, hash);
209 if (res) {
210 spin_lock(&res->spinlock);
211 if (res->state & DLM_LOCK_RES_DROPPING_REF) {
212 spin_unlock(&res->spinlock);
213 dlm_lockres_put(res);
214 return NULL;
215 }
216 spin_unlock(&res->spinlock);
217 }
218
219 return res;
220}
221
157struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, 222struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
158 const char *name, 223 const char *name,
159 unsigned int len) 224 unsigned int len)
@@ -330,43 +395,60 @@ static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
330 wake_up(&dlm_domain_events); 395 wake_up(&dlm_domain_events);
331} 396}
332 397
333static void dlm_migrate_all_locks(struct dlm_ctxt *dlm) 398static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
334{ 399{
335 int i; 400 int i, num, n, ret = 0;
336 struct dlm_lock_resource *res; 401 struct dlm_lock_resource *res;
402 struct hlist_node *iter;
403 struct hlist_head *bucket;
404 int dropped;
337 405
338 mlog(0, "Migrating locks from domain %s\n", dlm->name); 406 mlog(0, "Migrating locks from domain %s\n", dlm->name);
339restart: 407
408 num = 0;
340 spin_lock(&dlm->spinlock); 409 spin_lock(&dlm->spinlock);
341 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 410 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
342 while (!hlist_empty(dlm_lockres_hash(dlm, i))) { 411redo_bucket:
343 res = hlist_entry(dlm_lockres_hash(dlm, i)->first, 412 n = 0;
344 struct dlm_lock_resource, hash_node); 413 bucket = dlm_lockres_hash(dlm, i);
345 /* need reference when manually grabbing lockres */ 414 iter = bucket->first;
415 while (iter) {
416 n++;
417 res = hlist_entry(iter, struct dlm_lock_resource,
418 hash_node);
346 dlm_lockres_get(res); 419 dlm_lockres_get(res);
347 /* this should unhash the lockres 420 /* migrate, if necessary. this will drop the dlm
348 * and exit with dlm->spinlock */ 421 * spinlock and retake it if it does migration. */
349 mlog(0, "purging res=%p\n", res); 422 dropped = dlm_empty_lockres(dlm, res);
350 if (dlm_lockres_is_dirty(dlm, res)) { 423
351 /* HACK! this should absolutely go. 424 spin_lock(&res->spinlock);
352 * need to figure out why some empty 425 __dlm_lockres_calc_usage(dlm, res);
353 * lockreses are still marked dirty */ 426 iter = res->hash_node.next;
354 mlog(ML_ERROR, "lockres %.*s dirty!\n", 427 spin_unlock(&res->spinlock);
355 res->lockname.len, res->lockname.name); 428
356
357 spin_unlock(&dlm->spinlock);
358 dlm_kick_thread(dlm, res);
359 wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
360 dlm_lockres_put(res);
361 goto restart;
362 }
363 dlm_purge_lockres(dlm, res);
364 dlm_lockres_put(res); 429 dlm_lockres_put(res);
430
431 cond_resched_lock(&dlm->spinlock);
432
433 if (dropped)
434 goto redo_bucket;
365 } 435 }
436 num += n;
437 mlog(0, "%s: touched %d lockreses in bucket %d "
438 "(tot=%d)\n", dlm->name, n, i, num);
366 } 439 }
367 spin_unlock(&dlm->spinlock); 440 spin_unlock(&dlm->spinlock);
368 441 wake_up(&dlm->dlm_thread_wq);
442
443 /* let the dlm thread take care of purging, keep scanning until
444 * nothing remains in the hash */
445 if (num) {
446 mlog(0, "%s: %d lock resources in hash last pass\n",
447 dlm->name, num);
448 ret = -EAGAIN;
449 }
369 mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); 450 mlog(0, "DONE Migrating locks from domain %s\n", dlm->name);
451 return ret;
370} 452}
371 453
372static int dlm_no_joining_node(struct dlm_ctxt *dlm) 454static int dlm_no_joining_node(struct dlm_ctxt *dlm)
@@ -418,7 +500,8 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
418 printk("\n"); 500 printk("\n");
419} 501}
420 502
421static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) 503static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
504 void **ret_data)
422{ 505{
423 struct dlm_ctxt *dlm = data; 506 struct dlm_ctxt *dlm = data;
424 unsigned int node; 507 unsigned int node;
@@ -571,7 +654,9 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
571 /* We changed dlm state, notify the thread */ 654 /* We changed dlm state, notify the thread */
572 dlm_kick_thread(dlm, NULL); 655 dlm_kick_thread(dlm, NULL);
573 656
574 dlm_migrate_all_locks(dlm); 657 while (dlm_migrate_all_locks(dlm)) {
658 mlog(0, "%s: more migration to do\n", dlm->name);
659 }
575 dlm_mark_domain_leaving(dlm); 660 dlm_mark_domain_leaving(dlm);
576 dlm_leave_domain(dlm); 661 dlm_leave_domain(dlm);
577 dlm_complete_dlm_shutdown(dlm); 662 dlm_complete_dlm_shutdown(dlm);
@@ -580,11 +665,13 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
580} 665}
581EXPORT_SYMBOL_GPL(dlm_unregister_domain); 666EXPORT_SYMBOL_GPL(dlm_unregister_domain);
582 667
583static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) 668static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
669 void **ret_data)
584{ 670{
585 struct dlm_query_join_request *query; 671 struct dlm_query_join_request *query;
586 enum dlm_query_join_response response; 672 enum dlm_query_join_response response;
587 struct dlm_ctxt *dlm = NULL; 673 struct dlm_ctxt *dlm = NULL;
674 u8 nodenum;
588 675
589 query = (struct dlm_query_join_request *) msg->buf; 676 query = (struct dlm_query_join_request *) msg->buf;
590 677
@@ -608,6 +695,28 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
608 695
609 spin_lock(&dlm_domain_lock); 696 spin_lock(&dlm_domain_lock);
610 dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 697 dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
698 if (!dlm)
699 goto unlock_respond;
700
701 /*
702 * There is a small window where the joining node may not see the
703 * node(s) that just left but still part of the cluster. DISALLOW
704 * join request if joining node has different node map.
705 */
706 nodenum=0;
707 while (nodenum < O2NM_MAX_NODES) {
708 if (test_bit(nodenum, dlm->domain_map)) {
709 if (!byte_test_bit(nodenum, query->node_map)) {
710 mlog(0, "disallow join as node %u does not "
711 "have node %u in its nodemap\n",
712 query->node_idx, nodenum);
713 response = JOIN_DISALLOW;
714 goto unlock_respond;
715 }
716 }
717 nodenum++;
718 }
719
611 /* Once the dlm ctxt is marked as leaving then we don't want 720 /* Once the dlm ctxt is marked as leaving then we don't want
612 * to be put in someone's domain map. 721 * to be put in someone's domain map.
613 * Also, explicitly disallow joining at certain troublesome 722 * Also, explicitly disallow joining at certain troublesome
@@ -626,15 +735,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
626 /* Disallow parallel joins. */ 735 /* Disallow parallel joins. */
627 response = JOIN_DISALLOW; 736 response = JOIN_DISALLOW;
628 } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { 737 } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
629 mlog(ML_NOTICE, "node %u trying to join, but recovery " 738 mlog(0, "node %u trying to join, but recovery "
630 "is ongoing.\n", bit); 739 "is ongoing.\n", bit);
631 response = JOIN_DISALLOW; 740 response = JOIN_DISALLOW;
632 } else if (test_bit(bit, dlm->recovery_map)) { 741 } else if (test_bit(bit, dlm->recovery_map)) {
633 mlog(ML_NOTICE, "node %u trying to join, but it " 742 mlog(0, "node %u trying to join, but it "
634 "still needs recovery.\n", bit); 743 "still needs recovery.\n", bit);
635 response = JOIN_DISALLOW; 744 response = JOIN_DISALLOW;
636 } else if (test_bit(bit, dlm->domain_map)) { 745 } else if (test_bit(bit, dlm->domain_map)) {
637 mlog(ML_NOTICE, "node %u trying to join, but it " 746 mlog(0, "node %u trying to join, but it "
638 "is still in the domain! needs recovery?\n", 747 "is still in the domain! needs recovery?\n",
639 bit); 748 bit);
640 response = JOIN_DISALLOW; 749 response = JOIN_DISALLOW;
@@ -649,6 +758,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
649 758
650 spin_unlock(&dlm->spinlock); 759 spin_unlock(&dlm->spinlock);
651 } 760 }
761unlock_respond:
652 spin_unlock(&dlm_domain_lock); 762 spin_unlock(&dlm_domain_lock);
653 763
654respond: 764respond:
@@ -657,7 +767,8 @@ respond:
657 return response; 767 return response;
658} 768}
659 769
660static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) 770static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
771 void **ret_data)
661{ 772{
662 struct dlm_assert_joined *assert; 773 struct dlm_assert_joined *assert;
663 struct dlm_ctxt *dlm = NULL; 774 struct dlm_ctxt *dlm = NULL;
@@ -694,7 +805,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
694 return 0; 805 return 0;
695} 806}
696 807
697static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data) 808static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
809 void **ret_data)
698{ 810{
699 struct dlm_cancel_join *cancel; 811 struct dlm_cancel_join *cancel;
700 struct dlm_ctxt *dlm = NULL; 812 struct dlm_ctxt *dlm = NULL;
@@ -796,6 +908,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
796 join_msg.name_len = strlen(dlm->name); 908 join_msg.name_len = strlen(dlm->name);
797 memcpy(join_msg.domain, dlm->name, join_msg.name_len); 909 memcpy(join_msg.domain, dlm->name, join_msg.name_len);
798 910
911 /* copy live node map to join message */
912 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
913
799 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 914 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
800 sizeof(join_msg), node, &retval); 915 sizeof(join_msg), node, &retval);
801 if (status < 0 && status != -ENOPROTOOPT) { 916 if (status < 0 && status != -ENOPROTOOPT) {
@@ -1036,98 +1151,106 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
1036 status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, 1151 status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key,
1037 sizeof(struct dlm_master_request), 1152 sizeof(struct dlm_master_request),
1038 dlm_master_request_handler, 1153 dlm_master_request_handler,
1039 dlm, &dlm->dlm_domain_handlers); 1154 dlm, NULL, &dlm->dlm_domain_handlers);
1040 if (status) 1155 if (status)
1041 goto bail; 1156 goto bail;
1042 1157
1043 status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, 1158 status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key,
1044 sizeof(struct dlm_assert_master), 1159 sizeof(struct dlm_assert_master),
1045 dlm_assert_master_handler, 1160 dlm_assert_master_handler,
1046 dlm, &dlm->dlm_domain_handlers); 1161 dlm, dlm_assert_master_post_handler,
1162 &dlm->dlm_domain_handlers);
1047 if (status) 1163 if (status)
1048 goto bail; 1164 goto bail;
1049 1165
1050 status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, 1166 status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key,
1051 sizeof(struct dlm_create_lock), 1167 sizeof(struct dlm_create_lock),
1052 dlm_create_lock_handler, 1168 dlm_create_lock_handler,
1053 dlm, &dlm->dlm_domain_handlers); 1169 dlm, NULL, &dlm->dlm_domain_handlers);
1054 if (status) 1170 if (status)
1055 goto bail; 1171 goto bail;
1056 1172
1057 status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, 1173 status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key,
1058 DLM_CONVERT_LOCK_MAX_LEN, 1174 DLM_CONVERT_LOCK_MAX_LEN,
1059 dlm_convert_lock_handler, 1175 dlm_convert_lock_handler,
1060 dlm, &dlm->dlm_domain_handlers); 1176 dlm, NULL, &dlm->dlm_domain_handlers);
1061 if (status) 1177 if (status)
1062 goto bail; 1178 goto bail;
1063 1179
1064 status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, 1180 status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key,
1065 DLM_UNLOCK_LOCK_MAX_LEN, 1181 DLM_UNLOCK_LOCK_MAX_LEN,
1066 dlm_unlock_lock_handler, 1182 dlm_unlock_lock_handler,
1067 dlm, &dlm->dlm_domain_handlers); 1183 dlm, NULL, &dlm->dlm_domain_handlers);
1068 if (status) 1184 if (status)
1069 goto bail; 1185 goto bail;
1070 1186
1071 status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, 1187 status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key,
1072 DLM_PROXY_AST_MAX_LEN, 1188 DLM_PROXY_AST_MAX_LEN,
1073 dlm_proxy_ast_handler, 1189 dlm_proxy_ast_handler,
1074 dlm, &dlm->dlm_domain_handlers); 1190 dlm, NULL, &dlm->dlm_domain_handlers);
1075 if (status) 1191 if (status)
1076 goto bail; 1192 goto bail;
1077 1193
1078 status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, 1194 status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key,
1079 sizeof(struct dlm_exit_domain), 1195 sizeof(struct dlm_exit_domain),
1080 dlm_exit_domain_handler, 1196 dlm_exit_domain_handler,
1081 dlm, &dlm->dlm_domain_handlers); 1197 dlm, NULL, &dlm->dlm_domain_handlers);
1198 if (status)
1199 goto bail;
1200
1201 status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key,
1202 sizeof(struct dlm_deref_lockres),
1203 dlm_deref_lockres_handler,
1204 dlm, NULL, &dlm->dlm_domain_handlers);
1082 if (status) 1205 if (status)
1083 goto bail; 1206 goto bail;
1084 1207
1085 status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, 1208 status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key,
1086 sizeof(struct dlm_migrate_request), 1209 sizeof(struct dlm_migrate_request),
1087 dlm_migrate_request_handler, 1210 dlm_migrate_request_handler,
1088 dlm, &dlm->dlm_domain_handlers); 1211 dlm, NULL, &dlm->dlm_domain_handlers);
1089 if (status) 1212 if (status)
1090 goto bail; 1213 goto bail;
1091 1214
1092 status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, 1215 status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key,
1093 DLM_MIG_LOCKRES_MAX_LEN, 1216 DLM_MIG_LOCKRES_MAX_LEN,
1094 dlm_mig_lockres_handler, 1217 dlm_mig_lockres_handler,
1095 dlm, &dlm->dlm_domain_handlers); 1218 dlm, NULL, &dlm->dlm_domain_handlers);
1096 if (status) 1219 if (status)
1097 goto bail; 1220 goto bail;
1098 1221
1099 status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, 1222 status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key,
1100 sizeof(struct dlm_master_requery), 1223 sizeof(struct dlm_master_requery),
1101 dlm_master_requery_handler, 1224 dlm_master_requery_handler,
1102 dlm, &dlm->dlm_domain_handlers); 1225 dlm, NULL, &dlm->dlm_domain_handlers);
1103 if (status) 1226 if (status)
1104 goto bail; 1227 goto bail;
1105 1228
1106 status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, 1229 status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key,
1107 sizeof(struct dlm_lock_request), 1230 sizeof(struct dlm_lock_request),
1108 dlm_request_all_locks_handler, 1231 dlm_request_all_locks_handler,
1109 dlm, &dlm->dlm_domain_handlers); 1232 dlm, NULL, &dlm->dlm_domain_handlers);
1110 if (status) 1233 if (status)
1111 goto bail; 1234 goto bail;
1112 1235
1113 status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, 1236 status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key,
1114 sizeof(struct dlm_reco_data_done), 1237 sizeof(struct dlm_reco_data_done),
1115 dlm_reco_data_done_handler, 1238 dlm_reco_data_done_handler,
1116 dlm, &dlm->dlm_domain_handlers); 1239 dlm, NULL, &dlm->dlm_domain_handlers);
1117 if (status) 1240 if (status)
1118 goto bail; 1241 goto bail;
1119 1242
1120 status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, 1243 status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key,
1121 sizeof(struct dlm_begin_reco), 1244 sizeof(struct dlm_begin_reco),
1122 dlm_begin_reco_handler, 1245 dlm_begin_reco_handler,
1123 dlm, &dlm->dlm_domain_handlers); 1246 dlm, NULL, &dlm->dlm_domain_handlers);
1124 if (status) 1247 if (status)
1125 goto bail; 1248 goto bail;
1126 1249
1127 status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, 1250 status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key,
1128 sizeof(struct dlm_finalize_reco), 1251 sizeof(struct dlm_finalize_reco),
1129 dlm_finalize_reco_handler, 1252 dlm_finalize_reco_handler,
1130 dlm, &dlm->dlm_domain_handlers); 1253 dlm, NULL, &dlm->dlm_domain_handlers);
1131 if (status) 1254 if (status)
1132 goto bail; 1255 goto bail;
1133 1256
@@ -1141,6 +1264,8 @@ bail:
1141static int dlm_join_domain(struct dlm_ctxt *dlm) 1264static int dlm_join_domain(struct dlm_ctxt *dlm)
1142{ 1265{
1143 int status; 1266 int status;
1267 unsigned int backoff;
1268 unsigned int total_backoff = 0;
1144 1269
1145 BUG_ON(!dlm); 1270 BUG_ON(!dlm);
1146 1271
@@ -1172,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1172 } 1297 }
1173 1298
1174 do { 1299 do {
1175 unsigned int backoff;
1176 status = dlm_try_to_join_domain(dlm); 1300 status = dlm_try_to_join_domain(dlm);
1177 1301
1178 /* If we're racing another node to the join, then we 1302 /* If we're racing another node to the join, then we
1179 * need to back off temporarily and let them 1303 * need to back off temporarily and let them
1180 * complete. */ 1304 * complete. */
1305#define DLM_JOIN_TIMEOUT_MSECS 90000
1181 if (status == -EAGAIN) { 1306 if (status == -EAGAIN) {
1182 if (signal_pending(current)) { 1307 if (signal_pending(current)) {
1183 status = -ERESTARTSYS; 1308 status = -ERESTARTSYS;
1184 goto bail; 1309 goto bail;
1185 } 1310 }
1186 1311
1312 if (total_backoff >
1313 msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
1314 status = -ERESTARTSYS;
1315 mlog(ML_NOTICE, "Timed out joining dlm domain "
1316 "%s after %u msecs\n", dlm->name,
1317 jiffies_to_msecs(total_backoff));
1318 goto bail;
1319 }
1320
1187 /* 1321 /*
1188 * <chip> After you! 1322 * <chip> After you!
1189 * <dale> No, after you! 1323 * <dale> No, after you!
@@ -1193,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1193 */ 1327 */
1194 backoff = (unsigned int)(jiffies & 0x3); 1328 backoff = (unsigned int)(jiffies & 0x3);
1195 backoff *= DLM_DOMAIN_BACKOFF_MS; 1329 backoff *= DLM_DOMAIN_BACKOFF_MS;
1330 total_backoff += backoff;
1196 mlog(0, "backoff %d\n", backoff); 1331 mlog(0, "backoff %d\n", backoff);
1197 msleep(backoff); 1332 msleep(backoff);
1198 } 1333 }
@@ -1421,21 +1556,21 @@ static int dlm_register_net_handlers(void)
1421 status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, 1556 status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
1422 sizeof(struct dlm_query_join_request), 1557 sizeof(struct dlm_query_join_request),
1423 dlm_query_join_handler, 1558 dlm_query_join_handler,
1424 NULL, &dlm_join_handlers); 1559 NULL, NULL, &dlm_join_handlers);
1425 if (status) 1560 if (status)
1426 goto bail; 1561 goto bail;
1427 1562
1428 status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, 1563 status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
1429 sizeof(struct dlm_assert_joined), 1564 sizeof(struct dlm_assert_joined),
1430 dlm_assert_joined_handler, 1565 dlm_assert_joined_handler,
1431 NULL, &dlm_join_handlers); 1566 NULL, NULL, &dlm_join_handlers);
1432 if (status) 1567 if (status)
1433 goto bail; 1568 goto bail;
1434 1569
1435 status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, 1570 status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
1436 sizeof(struct dlm_cancel_join), 1571 sizeof(struct dlm_cancel_join),
1437 dlm_cancel_join_handler, 1572 dlm_cancel_join_handler,
1438 NULL, &dlm_join_handlers); 1573 NULL, NULL, &dlm_join_handlers);
1439 1574
1440bail: 1575bail:
1441 if (status < 0) 1576 if (status < 0)
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index b7f0ba97a1a2..de952eba29a9 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -61,11 +61,11 @@
61#define MLOG_MASK_PREFIX ML_DLMFS 61#define MLOG_MASK_PREFIX ML_DLMFS
62#include "cluster/masklog.h" 62#include "cluster/masklog.h"
63 63
64static struct super_operations dlmfs_ops; 64static const struct super_operations dlmfs_ops;
65static struct file_operations dlmfs_file_operations; 65static const struct file_operations dlmfs_file_operations;
66static struct inode_operations dlmfs_dir_inode_operations; 66static const struct inode_operations dlmfs_dir_inode_operations;
67static struct inode_operations dlmfs_root_inode_operations; 67static const struct inode_operations dlmfs_root_inode_operations;
68static struct inode_operations dlmfs_file_inode_operations; 68static const struct inode_operations dlmfs_file_inode_operations;
69static struct kmem_cache *dlmfs_inode_cache; 69static struct kmem_cache *dlmfs_inode_cache;
70 70
71struct workqueue_struct *user_dlm_worker; 71struct workqueue_struct *user_dlm_worker;
@@ -540,27 +540,27 @@ static int dlmfs_fill_super(struct super_block * sb,
540 return 0; 540 return 0;
541} 541}
542 542
543static struct file_operations dlmfs_file_operations = { 543static const struct file_operations dlmfs_file_operations = {
544 .open = dlmfs_file_open, 544 .open = dlmfs_file_open,
545 .release = dlmfs_file_release, 545 .release = dlmfs_file_release,
546 .read = dlmfs_file_read, 546 .read = dlmfs_file_read,
547 .write = dlmfs_file_write, 547 .write = dlmfs_file_write,
548}; 548};
549 549
550static struct inode_operations dlmfs_dir_inode_operations = { 550static const struct inode_operations dlmfs_dir_inode_operations = {
551 .create = dlmfs_create, 551 .create = dlmfs_create,
552 .lookup = simple_lookup, 552 .lookup = simple_lookup,
553 .unlink = dlmfs_unlink, 553 .unlink = dlmfs_unlink,
554}; 554};
555 555
556/* this way we can restrict mkdir to only the toplevel of the fs. */ 556/* this way we can restrict mkdir to only the toplevel of the fs. */
557static struct inode_operations dlmfs_root_inode_operations = { 557static const struct inode_operations dlmfs_root_inode_operations = {
558 .lookup = simple_lookup, 558 .lookup = simple_lookup,
559 .mkdir = dlmfs_mkdir, 559 .mkdir = dlmfs_mkdir,
560 .rmdir = simple_rmdir, 560 .rmdir = simple_rmdir,
561}; 561};
562 562
563static struct super_operations dlmfs_ops = { 563static const struct super_operations dlmfs_ops = {
564 .statfs = simple_statfs, 564 .statfs = simple_statfs,
565 .alloc_inode = dlmfs_alloc_inode, 565 .alloc_inode = dlmfs_alloc_inode,
566 .destroy_inode = dlmfs_destroy_inode, 566 .destroy_inode = dlmfs_destroy_inode,
@@ -568,7 +568,7 @@ static struct super_operations dlmfs_ops = {
568 .drop_inode = generic_delete_inode, 568 .drop_inode = generic_delete_inode,
569}; 569};
570 570
571static struct inode_operations dlmfs_file_inode_operations = { 571static const struct inode_operations dlmfs_file_inode_operations = {
572 .getattr = simple_getattr, 572 .getattr = simple_getattr,
573}; 573};
574 574
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index e5ca3db197f6..52578d907d9a 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -163,6 +163,10 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
163 kick_thread = 1; 163 kick_thread = 1;
164 } 164 }
165 } 165 }
166 /* reduce the inflight count, this may result in the lockres
167 * being purged below during calc_usage */
168 if (lock->ml.node == dlm->node_num)
169 dlm_lockres_drop_inflight_ref(dlm, res);
166 170
167 spin_unlock(&res->spinlock); 171 spin_unlock(&res->spinlock);
168 wake_up(&res->wq); 172 wake_up(&res->wq);
@@ -437,7 +441,8 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
437 * held on exit: none 441 * held on exit: none
438 * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED 442 * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED
439 */ 443 */
440int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data) 444int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
445 void **ret_data)
441{ 446{
442 struct dlm_ctxt *dlm = data; 447 struct dlm_ctxt *dlm = data;
443 struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; 448 struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 0ad872055cb3..77e4e6169a0d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -99,9 +99,10 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm,
99 int idx); 99 int idx);
100 100
101static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); 101static void dlm_assert_master_worker(struct dlm_work_item *item, void *data);
102static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, 102static int dlm_do_assert_master(struct dlm_ctxt *dlm,
103 unsigned int namelen, void *nodemap, 103 struct dlm_lock_resource *res,
104 u32 flags); 104 void *nodemap, u32 flags);
105static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);
105 106
106static inline int dlm_mle_equal(struct dlm_ctxt *dlm, 107static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
107 struct dlm_master_list_entry *mle, 108 struct dlm_master_list_entry *mle,
@@ -237,7 +238,8 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
237 struct dlm_master_list_entry **mle, 238 struct dlm_master_list_entry **mle,
238 char *name, unsigned int namelen); 239 char *name, unsigned int namelen);
239 240
240static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to); 241static int dlm_do_master_request(struct dlm_lock_resource *res,
242 struct dlm_master_list_entry *mle, int to);
241 243
242 244
243static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, 245static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
@@ -687,6 +689,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
687 INIT_LIST_HEAD(&res->purge); 689 INIT_LIST_HEAD(&res->purge);
688 atomic_set(&res->asts_reserved, 0); 690 atomic_set(&res->asts_reserved, 0);
689 res->migration_pending = 0; 691 res->migration_pending = 0;
692 res->inflight_locks = 0;
690 693
691 kref_init(&res->refs); 694 kref_init(&res->refs);
692 695
@@ -700,6 +703,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
700 res->last_used = 0; 703 res->last_used = 0;
701 704
702 memset(res->lvb, 0, DLM_LVB_LEN); 705 memset(res->lvb, 0, DLM_LVB_LEN);
706 memset(res->refmap, 0, sizeof(res->refmap));
703} 707}
704 708
705struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, 709struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
@@ -722,6 +726,42 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
722 return res; 726 return res;
723} 727}
724 728
729void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
730 struct dlm_lock_resource *res,
731 int new_lockres,
732 const char *file,
733 int line)
734{
735 if (!new_lockres)
736 assert_spin_locked(&res->spinlock);
737
738 if (!test_bit(dlm->node_num, res->refmap)) {
739 BUG_ON(res->inflight_locks != 0);
740 dlm_lockres_set_refmap_bit(dlm->node_num, res);
741 }
742 res->inflight_locks++;
743 mlog(0, "%s:%.*s: inflight++: now %u\n",
744 dlm->name, res->lockname.len, res->lockname.name,
745 res->inflight_locks);
746}
747
748void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
749 struct dlm_lock_resource *res,
750 const char *file,
751 int line)
752{
753 assert_spin_locked(&res->spinlock);
754
755 BUG_ON(res->inflight_locks == 0);
756 res->inflight_locks--;
757 mlog(0, "%s:%.*s: inflight--: now %u\n",
758 dlm->name, res->lockname.len, res->lockname.name,
759 res->inflight_locks);
760 if (res->inflight_locks == 0)
761 dlm_lockres_clear_refmap_bit(dlm->node_num, res);
762 wake_up(&res->wq);
763}
764
725/* 765/*
726 * lookup a lock resource by name. 766 * lookup a lock resource by name.
727 * may already exist in the hashtable. 767 * may already exist in the hashtable.
@@ -752,6 +792,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
752 unsigned int hash; 792 unsigned int hash;
753 int tries = 0; 793 int tries = 0;
754 int bit, wait_on_recovery = 0; 794 int bit, wait_on_recovery = 0;
795 int drop_inflight_if_nonlocal = 0;
755 796
756 BUG_ON(!lockid); 797 BUG_ON(!lockid);
757 798
@@ -761,9 +802,30 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
761 802
762lookup: 803lookup:
763 spin_lock(&dlm->spinlock); 804 spin_lock(&dlm->spinlock);
764 tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash); 805 tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
765 if (tmpres) { 806 if (tmpres) {
807 int dropping_ref = 0;
808
809 spin_lock(&tmpres->spinlock);
810 if (tmpres->owner == dlm->node_num) {
811 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
812 dlm_lockres_grab_inflight_ref(dlm, tmpres);
813 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
814 dropping_ref = 1;
815 spin_unlock(&tmpres->spinlock);
766 spin_unlock(&dlm->spinlock); 816 spin_unlock(&dlm->spinlock);
817
818 /* wait until done messaging the master, drop our ref to allow
819 * the lockres to be purged, start over. */
820 if (dropping_ref) {
821 spin_lock(&tmpres->spinlock);
822 __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF);
823 spin_unlock(&tmpres->spinlock);
824 dlm_lockres_put(tmpres);
825 tmpres = NULL;
826 goto lookup;
827 }
828
767 mlog(0, "found in hash!\n"); 829 mlog(0, "found in hash!\n");
768 if (res) 830 if (res)
769 dlm_lockres_put(res); 831 dlm_lockres_put(res);
@@ -793,6 +855,7 @@ lookup:
793 spin_lock(&res->spinlock); 855 spin_lock(&res->spinlock);
794 dlm_change_lockres_owner(dlm, res, dlm->node_num); 856 dlm_change_lockres_owner(dlm, res, dlm->node_num);
795 __dlm_insert_lockres(dlm, res); 857 __dlm_insert_lockres(dlm, res);
858 dlm_lockres_grab_inflight_ref(dlm, res);
796 spin_unlock(&res->spinlock); 859 spin_unlock(&res->spinlock);
797 spin_unlock(&dlm->spinlock); 860 spin_unlock(&dlm->spinlock);
798 /* lockres still marked IN_PROGRESS */ 861 /* lockres still marked IN_PROGRESS */
@@ -805,29 +868,40 @@ lookup:
805 /* if we found a block, wait for lock to be mastered by another node */ 868 /* if we found a block, wait for lock to be mastered by another node */
806 blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); 869 blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen);
807 if (blocked) { 870 if (blocked) {
871 int mig;
808 if (mle->type == DLM_MLE_MASTER) { 872 if (mle->type == DLM_MLE_MASTER) {
809 mlog(ML_ERROR, "master entry for nonexistent lock!\n"); 873 mlog(ML_ERROR, "master entry for nonexistent lock!\n");
810 BUG(); 874 BUG();
811 } else if (mle->type == DLM_MLE_MIGRATION) { 875 }
812 /* migration is in progress! */ 876 mig = (mle->type == DLM_MLE_MIGRATION);
813 /* the good news is that we now know the 877 /* if there is a migration in progress, let the migration
814 * "current" master (mle->master). */ 878 * finish before continuing. we can wait for the absence
815 879 * of the MIGRATION mle: either the migrate finished or
880 * one of the nodes died and the mle was cleaned up.
881 * if there is a BLOCK here, but it already has a master
882 * set, we are too late. the master does not have a ref
883 * for us in the refmap. detach the mle and drop it.
884 * either way, go back to the top and start over. */
885 if (mig || mle->master != O2NM_MAX_NODES) {
886 BUG_ON(mig && mle->master == dlm->node_num);
887 /* we arrived too late. the master does not
888 * have a ref for us. retry. */
889 mlog(0, "%s:%.*s: late on %s\n",
890 dlm->name, namelen, lockid,
891 mig ? "MIGRATION" : "BLOCK");
816 spin_unlock(&dlm->master_lock); 892 spin_unlock(&dlm->master_lock);
817 assert_spin_locked(&dlm->spinlock);
818
819 /* set the lockres owner and hash it */
820 spin_lock(&res->spinlock);
821 dlm_set_lockres_owner(dlm, res, mle->master);
822 __dlm_insert_lockres(dlm, res);
823 spin_unlock(&res->spinlock);
824 spin_unlock(&dlm->spinlock); 893 spin_unlock(&dlm->spinlock);
825 894
826 /* master is known, detach */ 895 /* master is known, detach */
827 dlm_mle_detach_hb_events(dlm, mle); 896 if (!mig)
897 dlm_mle_detach_hb_events(dlm, mle);
828 dlm_put_mle(mle); 898 dlm_put_mle(mle);
829 mle = NULL; 899 mle = NULL;
830 goto wake_waiters; 900 /* this is lame, but we cant wait on either
901 * the mle or lockres waitqueue here */
902 if (mig)
903 msleep(100);
904 goto lookup;
831 } 905 }
832 } else { 906 } else {
833 /* go ahead and try to master lock on this node */ 907 /* go ahead and try to master lock on this node */
@@ -858,6 +932,13 @@ lookup:
858 932
859 /* finally add the lockres to its hash bucket */ 933 /* finally add the lockres to its hash bucket */
860 __dlm_insert_lockres(dlm, res); 934 __dlm_insert_lockres(dlm, res);
935 /* since this lockres is new it doesnt not require the spinlock */
936 dlm_lockres_grab_inflight_ref_new(dlm, res);
937
938 /* if this node does not become the master make sure to drop
939 * this inflight reference below */
940 drop_inflight_if_nonlocal = 1;
941
861 /* get an extra ref on the mle in case this is a BLOCK 942 /* get an extra ref on the mle in case this is a BLOCK
862 * if so, the creator of the BLOCK may try to put the last 943 * if so, the creator of the BLOCK may try to put the last
863 * ref at this time in the assert master handler, so we 944 * ref at this time in the assert master handler, so we
@@ -910,7 +991,7 @@ redo_request:
910 ret = -EINVAL; 991 ret = -EINVAL;
911 dlm_node_iter_init(mle->vote_map, &iter); 992 dlm_node_iter_init(mle->vote_map, &iter);
912 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 993 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
913 ret = dlm_do_master_request(mle, nodenum); 994 ret = dlm_do_master_request(res, mle, nodenum);
914 if (ret < 0) 995 if (ret < 0)
915 mlog_errno(ret); 996 mlog_errno(ret);
916 if (mle->master != O2NM_MAX_NODES) { 997 if (mle->master != O2NM_MAX_NODES) {
@@ -960,6 +1041,8 @@ wait:
960 1041
961wake_waiters: 1042wake_waiters:
962 spin_lock(&res->spinlock); 1043 spin_lock(&res->spinlock);
1044 if (res->owner != dlm->node_num && drop_inflight_if_nonlocal)
1045 dlm_lockres_drop_inflight_ref(dlm, res);
963 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 1046 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
964 spin_unlock(&res->spinlock); 1047 spin_unlock(&res->spinlock);
965 wake_up(&res->wq); 1048 wake_up(&res->wq);
@@ -998,7 +1081,7 @@ recheck:
998 /* this will cause the master to re-assert across 1081 /* this will cause the master to re-assert across
999 * the whole cluster, freeing up mles */ 1082 * the whole cluster, freeing up mles */
1000 if (res->owner != dlm->node_num) { 1083 if (res->owner != dlm->node_num) {
1001 ret = dlm_do_master_request(mle, res->owner); 1084 ret = dlm_do_master_request(res, mle, res->owner);
1002 if (ret < 0) { 1085 if (ret < 0) {
1003 /* give recovery a chance to run */ 1086 /* give recovery a chance to run */
1004 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); 1087 mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
@@ -1062,6 +1145,8 @@ recheck:
1062 * now tell other nodes that I am 1145 * now tell other nodes that I am
1063 * mastering this. */ 1146 * mastering this. */
1064 mle->master = dlm->node_num; 1147 mle->master = dlm->node_num;
1148 /* ref was grabbed in get_lock_resource
1149 * will be dropped in dlmlock_master */
1065 assert = 1; 1150 assert = 1;
1066 sleep = 0; 1151 sleep = 0;
1067 } 1152 }
@@ -1087,7 +1172,8 @@ recheck:
1087 (atomic_read(&mle->woken) == 1), 1172 (atomic_read(&mle->woken) == 1),
1088 timeo); 1173 timeo);
1089 if (res->owner == O2NM_MAX_NODES) { 1174 if (res->owner == O2NM_MAX_NODES) {
1090 mlog(0, "waiting again\n"); 1175 mlog(0, "%s:%.*s: waiting again\n", dlm->name,
1176 res->lockname.len, res->lockname.name);
1091 goto recheck; 1177 goto recheck;
1092 } 1178 }
1093 mlog(0, "done waiting, master is %u\n", res->owner); 1179 mlog(0, "done waiting, master is %u\n", res->owner);
@@ -1100,8 +1186,7 @@ recheck:
1100 m = dlm->node_num; 1186 m = dlm->node_num;
1101 mlog(0, "about to master %.*s here, this=%u\n", 1187 mlog(0, "about to master %.*s here, this=%u\n",
1102 res->lockname.len, res->lockname.name, m); 1188 res->lockname.len, res->lockname.name, m);
1103 ret = dlm_do_assert_master(dlm, res->lockname.name, 1189 ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0);
1104 res->lockname.len, mle->vote_map, 0);
1105 if (ret) { 1190 if (ret) {
1106 /* This is a failure in the network path, 1191 /* This is a failure in the network path,
1107 * not in the response to the assert_master 1192 * not in the response to the assert_master
@@ -1117,6 +1202,8 @@ recheck:
1117 1202
1118 /* set the lockres owner */ 1203 /* set the lockres owner */
1119 spin_lock(&res->spinlock); 1204 spin_lock(&res->spinlock);
1205 /* mastery reference obtained either during
1206 * assert_master_handler or in get_lock_resource */
1120 dlm_change_lockres_owner(dlm, res, m); 1207 dlm_change_lockres_owner(dlm, res, m);
1121 spin_unlock(&res->spinlock); 1208 spin_unlock(&res->spinlock);
1122 1209
@@ -1283,7 +1370,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1283 * 1370 *
1284 */ 1371 */
1285 1372
1286static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to) 1373static int dlm_do_master_request(struct dlm_lock_resource *res,
1374 struct dlm_master_list_entry *mle, int to)
1287{ 1375{
1288 struct dlm_ctxt *dlm = mle->dlm; 1376 struct dlm_ctxt *dlm = mle->dlm;
1289 struct dlm_master_request request; 1377 struct dlm_master_request request;
@@ -1339,6 +1427,9 @@ again:
1339 case DLM_MASTER_RESP_YES: 1427 case DLM_MASTER_RESP_YES:
1340 set_bit(to, mle->response_map); 1428 set_bit(to, mle->response_map);
1341 mlog(0, "node %u is the master, response=YES\n", to); 1429 mlog(0, "node %u is the master, response=YES\n", to);
1430 mlog(0, "%s:%.*s: master node %u now knows I have a "
1431 "reference\n", dlm->name, res->lockname.len,
1432 res->lockname.name, to);
1342 mle->master = to; 1433 mle->master = to;
1343 break; 1434 break;
1344 case DLM_MASTER_RESP_NO: 1435 case DLM_MASTER_RESP_NO:
@@ -1379,7 +1470,8 @@ out:
1379 * 1470 *
1380 * if possible, TRIM THIS DOWN!!! 1471 * if possible, TRIM THIS DOWN!!!
1381 */ 1472 */
1382int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data) 1473int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
1474 void **ret_data)
1383{ 1475{
1384 u8 response = DLM_MASTER_RESP_MAYBE; 1476 u8 response = DLM_MASTER_RESP_MAYBE;
1385 struct dlm_ctxt *dlm = data; 1477 struct dlm_ctxt *dlm = data;
@@ -1417,10 +1509,11 @@ way_up_top:
1417 1509
1418 /* take care of the easy cases up front */ 1510 /* take care of the easy cases up front */
1419 spin_lock(&res->spinlock); 1511 spin_lock(&res->spinlock);
1420 if (res->state & DLM_LOCK_RES_RECOVERING) { 1512 if (res->state & (DLM_LOCK_RES_RECOVERING|
1513 DLM_LOCK_RES_MIGRATING)) {
1421 spin_unlock(&res->spinlock); 1514 spin_unlock(&res->spinlock);
1422 mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " 1515 mlog(0, "returning DLM_MASTER_RESP_ERROR since res is "
1423 "being recovered\n"); 1516 "being recovered/migrated\n");
1424 response = DLM_MASTER_RESP_ERROR; 1517 response = DLM_MASTER_RESP_ERROR;
1425 if (mle) 1518 if (mle)
1426 kmem_cache_free(dlm_mle_cache, mle); 1519 kmem_cache_free(dlm_mle_cache, mle);
@@ -1428,8 +1521,10 @@ way_up_top:
1428 } 1521 }
1429 1522
1430 if (res->owner == dlm->node_num) { 1523 if (res->owner == dlm->node_num) {
1524 mlog(0, "%s:%.*s: setting bit %u in refmap\n",
1525 dlm->name, namelen, name, request->node_idx);
1526 dlm_lockres_set_refmap_bit(request->node_idx, res);
1431 spin_unlock(&res->spinlock); 1527 spin_unlock(&res->spinlock);
1432 // mlog(0, "this node is the master\n");
1433 response = DLM_MASTER_RESP_YES; 1528 response = DLM_MASTER_RESP_YES;
1434 if (mle) 1529 if (mle)
1435 kmem_cache_free(dlm_mle_cache, mle); 1530 kmem_cache_free(dlm_mle_cache, mle);
@@ -1477,7 +1572,6 @@ way_up_top:
1477 mlog(0, "node %u is master, but trying to migrate to " 1572 mlog(0, "node %u is master, but trying to migrate to "
1478 "node %u.\n", tmpmle->master, tmpmle->new_master); 1573 "node %u.\n", tmpmle->master, tmpmle->new_master);
1479 if (tmpmle->master == dlm->node_num) { 1574 if (tmpmle->master == dlm->node_num) {
1480 response = DLM_MASTER_RESP_YES;
1481 mlog(ML_ERROR, "no owner on lockres, but this " 1575 mlog(ML_ERROR, "no owner on lockres, but this "
1482 "node is trying to migrate it to %u?!\n", 1576 "node is trying to migrate it to %u?!\n",
1483 tmpmle->new_master); 1577 tmpmle->new_master);
@@ -1494,6 +1588,10 @@ way_up_top:
1494 * go back and clean the mles on any 1588 * go back and clean the mles on any
1495 * other nodes */ 1589 * other nodes */
1496 dispatch_assert = 1; 1590 dispatch_assert = 1;
1591 dlm_lockres_set_refmap_bit(request->node_idx, res);
1592 mlog(0, "%s:%.*s: setting bit %u in refmap\n",
1593 dlm->name, namelen, name,
1594 request->node_idx);
1497 } else 1595 } else
1498 response = DLM_MASTER_RESP_NO; 1596 response = DLM_MASTER_RESP_NO;
1499 } else { 1597 } else {
@@ -1607,17 +1705,24 @@ send_response:
1607 * can periodically run all locks owned by this node 1705 * can periodically run all locks owned by this node
1608 * and re-assert across the cluster... 1706 * and re-assert across the cluster...
1609 */ 1707 */
1610static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, 1708int dlm_do_assert_master(struct dlm_ctxt *dlm,
1611 unsigned int namelen, void *nodemap, 1709 struct dlm_lock_resource *res,
1612 u32 flags) 1710 void *nodemap, u32 flags)
1613{ 1711{
1614 struct dlm_assert_master assert; 1712 struct dlm_assert_master assert;
1615 int to, tmpret; 1713 int to, tmpret;
1616 struct dlm_node_iter iter; 1714 struct dlm_node_iter iter;
1617 int ret = 0; 1715 int ret = 0;
1618 int reassert; 1716 int reassert;
1717 const char *lockname = res->lockname.name;
1718 unsigned int namelen = res->lockname.len;
1619 1719
1620 BUG_ON(namelen > O2NM_MAX_NAME_LEN); 1720 BUG_ON(namelen > O2NM_MAX_NAME_LEN);
1721
1722 spin_lock(&res->spinlock);
1723 res->state |= DLM_LOCK_RES_SETREF_INPROG;
1724 spin_unlock(&res->spinlock);
1725
1621again: 1726again:
1622 reassert = 0; 1727 reassert = 0;
1623 1728
@@ -1647,6 +1752,7 @@ again:
1647 mlog(0, "link to %d went down!\n", to); 1752 mlog(0, "link to %d went down!\n", to);
1648 /* any nonzero status return will do */ 1753 /* any nonzero status return will do */
1649 ret = tmpret; 1754 ret = tmpret;
1755 r = 0;
1650 } else if (r < 0) { 1756 } else if (r < 0) {
1651 /* ok, something horribly messed. kill thyself. */ 1757 /* ok, something horribly messed. kill thyself. */
1652 mlog(ML_ERROR,"during assert master of %.*s to %u, " 1758 mlog(ML_ERROR,"during assert master of %.*s to %u, "
@@ -1661,17 +1767,39 @@ again:
1661 spin_unlock(&dlm->master_lock); 1767 spin_unlock(&dlm->master_lock);
1662 spin_unlock(&dlm->spinlock); 1768 spin_unlock(&dlm->spinlock);
1663 BUG(); 1769 BUG();
1664 } else if (r == EAGAIN) { 1770 }
1771
1772 if (r & DLM_ASSERT_RESPONSE_REASSERT &&
1773 !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) {
1774 mlog(ML_ERROR, "%.*s: very strange, "
1775 "master MLE but no lockres on %u\n",
1776 namelen, lockname, to);
1777 }
1778
1779 if (r & DLM_ASSERT_RESPONSE_REASSERT) {
1665 mlog(0, "%.*s: node %u create mles on other " 1780 mlog(0, "%.*s: node %u create mles on other "
1666 "nodes and requests a re-assert\n", 1781 "nodes and requests a re-assert\n",
1667 namelen, lockname, to); 1782 namelen, lockname, to);
1668 reassert = 1; 1783 reassert = 1;
1669 } 1784 }
1785 if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) {
1786 mlog(0, "%.*s: node %u has a reference to this "
1787 "lockres, set the bit in the refmap\n",
1788 namelen, lockname, to);
1789 spin_lock(&res->spinlock);
1790 dlm_lockres_set_refmap_bit(to, res);
1791 spin_unlock(&res->spinlock);
1792 }
1670 } 1793 }
1671 1794
1672 if (reassert) 1795 if (reassert)
1673 goto again; 1796 goto again;
1674 1797
1798 spin_lock(&res->spinlock);
1799 res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
1800 spin_unlock(&res->spinlock);
1801 wake_up(&res->wq);
1802
1675 return ret; 1803 return ret;
1676} 1804}
1677 1805
@@ -1684,7 +1812,8 @@ again:
1684 * 1812 *
1685 * if possible, TRIM THIS DOWN!!! 1813 * if possible, TRIM THIS DOWN!!!
1686 */ 1814 */
1687int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) 1815int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1816 void **ret_data)
1688{ 1817{
1689 struct dlm_ctxt *dlm = data; 1818 struct dlm_ctxt *dlm = data;
1690 struct dlm_master_list_entry *mle = NULL; 1819 struct dlm_master_list_entry *mle = NULL;
@@ -1693,7 +1822,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
1693 char *name; 1822 char *name;
1694 unsigned int namelen, hash; 1823 unsigned int namelen, hash;
1695 u32 flags; 1824 u32 flags;
1696 int master_request = 0; 1825 int master_request = 0, have_lockres_ref = 0;
1697 int ret = 0; 1826 int ret = 0;
1698 1827
1699 if (!dlm_grab(dlm)) 1828 if (!dlm_grab(dlm))
@@ -1851,6 +1980,7 @@ ok:
1851 spin_unlock(&mle->spinlock); 1980 spin_unlock(&mle->spinlock);
1852 1981
1853 if (res) { 1982 if (res) {
1983 int wake = 0;
1854 spin_lock(&res->spinlock); 1984 spin_lock(&res->spinlock);
1855 if (mle->type == DLM_MLE_MIGRATION) { 1985 if (mle->type == DLM_MLE_MIGRATION) {
1856 mlog(0, "finishing off migration of lockres %.*s, " 1986 mlog(0, "finishing off migration of lockres %.*s, "
@@ -1858,12 +1988,16 @@ ok:
1858 res->lockname.len, res->lockname.name, 1988 res->lockname.len, res->lockname.name,
1859 dlm->node_num, mle->new_master); 1989 dlm->node_num, mle->new_master);
1860 res->state &= ~DLM_LOCK_RES_MIGRATING; 1990 res->state &= ~DLM_LOCK_RES_MIGRATING;
1991 wake = 1;
1861 dlm_change_lockres_owner(dlm, res, mle->new_master); 1992 dlm_change_lockres_owner(dlm, res, mle->new_master);
1862 BUG_ON(res->state & DLM_LOCK_RES_DIRTY); 1993 BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
1863 } else { 1994 } else {
1864 dlm_change_lockres_owner(dlm, res, mle->master); 1995 dlm_change_lockres_owner(dlm, res, mle->master);
1865 } 1996 }
1866 spin_unlock(&res->spinlock); 1997 spin_unlock(&res->spinlock);
1998 have_lockres_ref = 1;
1999 if (wake)
2000 wake_up(&res->wq);
1867 } 2001 }
1868 2002
1869 /* master is known, detach if not already detached. 2003 /* master is known, detach if not already detached.
@@ -1913,12 +2047,28 @@ ok:
1913 2047
1914done: 2048done:
1915 ret = 0; 2049 ret = 0;
1916 if (res) 2050 if (res) {
1917 dlm_lockres_put(res); 2051 spin_lock(&res->spinlock);
2052 res->state |= DLM_LOCK_RES_SETREF_INPROG;
2053 spin_unlock(&res->spinlock);
2054 *ret_data = (void *)res;
2055 }
1918 dlm_put(dlm); 2056 dlm_put(dlm);
1919 if (master_request) { 2057 if (master_request) {
1920 mlog(0, "need to tell master to reassert\n"); 2058 mlog(0, "need to tell master to reassert\n");
1921 ret = EAGAIN; // positive. negative would shoot down the node. 2059 /* positive. negative would shoot down the node. */
2060 ret |= DLM_ASSERT_RESPONSE_REASSERT;
2061 if (!have_lockres_ref) {
2062 mlog(ML_ERROR, "strange, got assert from %u, MASTER "
2063 "mle present here for %s:%.*s, but no lockres!\n",
2064 assert->node_idx, dlm->name, namelen, name);
2065 }
2066 }
2067 if (have_lockres_ref) {
2068 /* let the master know we have a reference to the lockres */
2069 ret |= DLM_ASSERT_RESPONSE_MASTERY_REF;
2070 mlog(0, "%s:%.*s: got assert from %u, need a ref\n",
2071 dlm->name, namelen, name, assert->node_idx);
1922 } 2072 }
1923 return ret; 2073 return ret;
1924 2074
@@ -1929,11 +2079,25 @@ kill:
1929 __dlm_print_one_lock_resource(res); 2079 __dlm_print_one_lock_resource(res);
1930 spin_unlock(&res->spinlock); 2080 spin_unlock(&res->spinlock);
1931 spin_unlock(&dlm->spinlock); 2081 spin_unlock(&dlm->spinlock);
1932 dlm_lockres_put(res); 2082 *ret_data = (void *)res;
1933 dlm_put(dlm); 2083 dlm_put(dlm);
1934 return -EINVAL; 2084 return -EINVAL;
1935} 2085}
1936 2086
2087void dlm_assert_master_post_handler(int status, void *data, void *ret_data)
2088{
2089 struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data;
2090
2091 if (ret_data) {
2092 spin_lock(&res->spinlock);
2093 res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
2094 spin_unlock(&res->spinlock);
2095 wake_up(&res->wq);
2096 dlm_lockres_put(res);
2097 }
2098 return;
2099}
2100
1937int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, 2101int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
1938 struct dlm_lock_resource *res, 2102 struct dlm_lock_resource *res,
1939 int ignore_higher, u8 request_from, u32 flags) 2103 int ignore_higher, u8 request_from, u32 flags)
@@ -2023,9 +2187,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
2023 * even if one or more nodes die */ 2187 * even if one or more nodes die */
2024 mlog(0, "worker about to master %.*s here, this=%u\n", 2188 mlog(0, "worker about to master %.*s here, this=%u\n",
2025 res->lockname.len, res->lockname.name, dlm->node_num); 2189 res->lockname.len, res->lockname.name, dlm->node_num);
2026 ret = dlm_do_assert_master(dlm, res->lockname.name, 2190 ret = dlm_do_assert_master(dlm, res, nodemap, flags);
2027 res->lockname.len,
2028 nodemap, flags);
2029 if (ret < 0) { 2191 if (ret < 0) {
2030 /* no need to restart, we are done */ 2192 /* no need to restart, we are done */
2031 if (!dlm_is_host_down(ret)) 2193 if (!dlm_is_host_down(ret))
@@ -2097,14 +2259,180 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
2097 return ret; 2259 return ret;
2098} 2260}
2099 2261
2262/*
2263 * DLM_DEREF_LOCKRES_MSG
2264 */
2265
2266int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2267{
2268 struct dlm_deref_lockres deref;
2269 int ret = 0, r;
2270 const char *lockname;
2271 unsigned int namelen;
2272
2273 lockname = res->lockname.name;
2274 namelen = res->lockname.len;
2275 BUG_ON(namelen > O2NM_MAX_NAME_LEN);
2276
2277 mlog(0, "%s:%.*s: sending deref to %d\n",
2278 dlm->name, namelen, lockname, res->owner);
2279 memset(&deref, 0, sizeof(deref));
2280 deref.node_idx = dlm->node_num;
2281 deref.namelen = namelen;
2282 memcpy(deref.name, lockname, namelen);
2283
2284 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
2285 &deref, sizeof(deref), res->owner, &r);
2286 if (ret < 0)
2287 mlog_errno(ret);
2288 else if (r < 0) {
2289 /* BAD. other node says I did not have a ref. */
2290 mlog(ML_ERROR,"while dropping ref on %s:%.*s "
2291 "(master=%u) got %d.\n", dlm->name, namelen,
2292 lockname, res->owner, r);
2293 dlm_print_one_lock_resource(res);
2294 BUG();
2295 }
2296 return ret;
2297}
2298
2299int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
2300 void **ret_data)
2301{
2302 struct dlm_ctxt *dlm = data;
2303 struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf;
2304 struct dlm_lock_resource *res = NULL;
2305 char *name;
2306 unsigned int namelen;
2307 int ret = -EINVAL;
2308 u8 node;
2309 unsigned int hash;
2310 struct dlm_work_item *item;
2311 int cleared = 0;
2312 int dispatch = 0;
2313
2314 if (!dlm_grab(dlm))
2315 return 0;
2316
2317 name = deref->name;
2318 namelen = deref->namelen;
2319 node = deref->node_idx;
2320
2321 if (namelen > DLM_LOCKID_NAME_MAX) {
2322 mlog(ML_ERROR, "Invalid name length!");
2323 goto done;
2324 }
2325 if (deref->node_idx >= O2NM_MAX_NODES) {
2326 mlog(ML_ERROR, "Invalid node number: %u\n", node);
2327 goto done;
2328 }
2329
2330 hash = dlm_lockid_hash(name, namelen);
2331
2332 spin_lock(&dlm->spinlock);
2333 res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
2334 if (!res) {
2335 spin_unlock(&dlm->spinlock);
2336 mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
2337 dlm->name, namelen, name);
2338 goto done;
2339 }
2340 spin_unlock(&dlm->spinlock);
2341
2342 spin_lock(&res->spinlock);
2343 if (res->state & DLM_LOCK_RES_SETREF_INPROG)
2344 dispatch = 1;
2345 else {
2346 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
2347 if (test_bit(node, res->refmap)) {
2348 dlm_lockres_clear_refmap_bit(node, res);
2349 cleared = 1;
2350 }
2351 }
2352 spin_unlock(&res->spinlock);
2353
2354 if (!dispatch) {
2355 if (cleared)
2356 dlm_lockres_calc_usage(dlm, res);
2357 else {
2358 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
2359 "but it is already dropped!\n", dlm->name,
2360 res->lockname.len, res->lockname.name, node);
2361 __dlm_print_one_lock_resource(res);
2362 }
2363 ret = 0;
2364 goto done;
2365 }
2366
2367 item = kzalloc(sizeof(*item), GFP_NOFS);
2368 if (!item) {
2369 ret = -ENOMEM;
2370 mlog_errno(ret);
2371 goto done;
2372 }
2373
2374 dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL);
2375 item->u.dl.deref_res = res;
2376 item->u.dl.deref_node = node;
2377
2378 spin_lock(&dlm->work_lock);
2379 list_add_tail(&item->list, &dlm->work_list);
2380 spin_unlock(&dlm->work_lock);
2381
2382 queue_work(dlm->dlm_worker, &dlm->dispatched_work);
2383 return 0;
2384
2385done:
2386 if (res)
2387 dlm_lockres_put(res);
2388 dlm_put(dlm);
2389
2390 return ret;
2391}
2392
2393static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2394{
2395 struct dlm_ctxt *dlm;
2396 struct dlm_lock_resource *res;
2397 u8 node;
2398 u8 cleared = 0;
2399
2400 dlm = item->dlm;
2401 res = item->u.dl.deref_res;
2402 node = item->u.dl.deref_node;
2403
2404 spin_lock(&res->spinlock);
2405 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
2406 if (test_bit(node, res->refmap)) {
2407 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
2408 dlm_lockres_clear_refmap_bit(node, res);
2409 cleared = 1;
2410 }
2411 spin_unlock(&res->spinlock);
2412
2413 if (cleared) {
2414 mlog(0, "%s:%.*s node %u ref dropped in dispatch\n",
2415 dlm->name, res->lockname.len, res->lockname.name, node);
2416 dlm_lockres_calc_usage(dlm, res);
2417 } else {
2418 mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
2419 "but it is already dropped!\n", dlm->name,
2420 res->lockname.len, res->lockname.name, node);
2421 __dlm_print_one_lock_resource(res);
2422 }
2423
2424 dlm_lockres_put(res);
2425}
2426
2100 2427
2101/* 2428/*
2102 * DLM_MIGRATE_LOCKRES 2429 * DLM_MIGRATE_LOCKRES
2103 */ 2430 */
2104 2431
2105 2432
2106int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, 2433static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2107 u8 target) 2434 struct dlm_lock_resource *res,
2435 u8 target)
2108{ 2436{
2109 struct dlm_master_list_entry *mle = NULL; 2437 struct dlm_master_list_entry *mle = NULL;
2110 struct dlm_master_list_entry *oldmle = NULL; 2438 struct dlm_master_list_entry *oldmle = NULL;
@@ -2116,7 +2444,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2116 struct list_head *queue, *iter; 2444 struct list_head *queue, *iter;
2117 int i; 2445 int i;
2118 struct dlm_lock *lock; 2446 struct dlm_lock *lock;
2119 int empty = 1; 2447 int empty = 1, wake = 0;
2120 2448
2121 if (!dlm_grab(dlm)) 2449 if (!dlm_grab(dlm))
2122 return -EINVAL; 2450 return -EINVAL;
@@ -2241,6 +2569,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2241 res->lockname.name, target); 2569 res->lockname.name, target);
2242 spin_lock(&res->spinlock); 2570 spin_lock(&res->spinlock);
2243 res->state &= ~DLM_LOCK_RES_MIGRATING; 2571 res->state &= ~DLM_LOCK_RES_MIGRATING;
2572 wake = 1;
2244 spin_unlock(&res->spinlock); 2573 spin_unlock(&res->spinlock);
2245 ret = -EINVAL; 2574 ret = -EINVAL;
2246 } 2575 }
@@ -2268,6 +2597,9 @@ fail:
2268 * the lockres 2597 * the lockres
2269 */ 2598 */
2270 2599
2600 /* now that remote nodes are spinning on the MIGRATING flag,
2601 * ensure that all assert_master work is flushed. */
2602 flush_workqueue(dlm->dlm_worker);
2271 2603
2272 /* get an extra reference on the mle. 2604 /* get an extra reference on the mle.
2273 * otherwise the assert_master from the new 2605 * otherwise the assert_master from the new
@@ -2296,6 +2628,7 @@ fail:
2296 dlm_put_mle_inuse(mle); 2628 dlm_put_mle_inuse(mle);
2297 spin_lock(&res->spinlock); 2629 spin_lock(&res->spinlock);
2298 res->state &= ~DLM_LOCK_RES_MIGRATING; 2630 res->state &= ~DLM_LOCK_RES_MIGRATING;
2631 wake = 1;
2299 spin_unlock(&res->spinlock); 2632 spin_unlock(&res->spinlock);
2300 goto leave; 2633 goto leave;
2301 } 2634 }
@@ -2322,7 +2655,8 @@ fail:
2322 res->owner == target) 2655 res->owner == target)
2323 break; 2656 break;
2324 2657
2325 mlog(0, "timed out during migration\n"); 2658 mlog(0, "%s:%.*s: timed out during migration\n",
2659 dlm->name, res->lockname.len, res->lockname.name);
2326 /* avoid hang during shutdown when migrating lockres 2660 /* avoid hang during shutdown when migrating lockres
2327 * to a node which also goes down */ 2661 * to a node which also goes down */
2328 if (dlm_is_node_dead(dlm, target)) { 2662 if (dlm_is_node_dead(dlm, target)) {
@@ -2330,20 +2664,20 @@ fail:
2330 "target %u is no longer up, restarting\n", 2664 "target %u is no longer up, restarting\n",
2331 dlm->name, res->lockname.len, 2665 dlm->name, res->lockname.len,
2332 res->lockname.name, target); 2666 res->lockname.name, target);
2333 ret = -ERESTARTSYS; 2667 ret = -EINVAL;
2668 /* migration failed, detach and clean up mle */
2669 dlm_mle_detach_hb_events(dlm, mle);
2670 dlm_put_mle(mle);
2671 dlm_put_mle_inuse(mle);
2672 spin_lock(&res->spinlock);
2673 res->state &= ~DLM_LOCK_RES_MIGRATING;
2674 wake = 1;
2675 spin_unlock(&res->spinlock);
2676 goto leave;
2334 } 2677 }
2335 } 2678 } else
2336 if (ret == -ERESTARTSYS) { 2679 mlog(0, "%s:%.*s: caught signal during migration\n",
2337 /* migration failed, detach and clean up mle */ 2680 dlm->name, res->lockname.len, res->lockname.name);
2338 dlm_mle_detach_hb_events(dlm, mle);
2339 dlm_put_mle(mle);
2340 dlm_put_mle_inuse(mle);
2341 spin_lock(&res->spinlock);
2342 res->state &= ~DLM_LOCK_RES_MIGRATING;
2343 spin_unlock(&res->spinlock);
2344 goto leave;
2345 }
2346 /* TODO: if node died: stop, clean up, return error */
2347 } 2681 }
2348 2682
2349 /* all done, set the owner, clear the flag */ 2683 /* all done, set the owner, clear the flag */
@@ -2366,6 +2700,11 @@ leave:
2366 if (ret < 0) 2700 if (ret < 0)
2367 dlm_kick_thread(dlm, res); 2701 dlm_kick_thread(dlm, res);
2368 2702
2703 /* wake up waiters if the MIGRATING flag got set
2704 * but migration failed */
2705 if (wake)
2706 wake_up(&res->wq);
2707
2369 /* TODO: cleanup */ 2708 /* TODO: cleanup */
2370 if (mres) 2709 if (mres)
2371 free_page((unsigned long)mres); 2710 free_page((unsigned long)mres);
@@ -2376,6 +2715,53 @@ leave:
2376 return ret; 2715 return ret;
2377} 2716}
2378 2717
2718#define DLM_MIGRATION_RETRY_MS 100
2719
2720/* Should be called only after beginning the domain leave process.
2721 * There should not be any remaining locks on nonlocal lock resources,
2722 * and there should be no local locks left on locally mastered resources.
2723 *
2724 * Called with the dlm spinlock held, may drop it to do migration, but
2725 * will re-acquire before exit.
2726 *
2727 * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */
2728int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2729{
2730 int ret;
2731 int lock_dropped = 0;
2732
2733 if (res->owner != dlm->node_num) {
2734 if (!__dlm_lockres_unused(res)) {
2735 mlog(ML_ERROR, "%s:%.*s: this node is not master, "
2736 "trying to free this but locks remain\n",
2737 dlm->name, res->lockname.len, res->lockname.name);
2738 }
2739 goto leave;
2740 }
2741
2742 /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
2743 spin_unlock(&dlm->spinlock);
2744 lock_dropped = 1;
2745 while (1) {
2746 ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES);
2747 if (ret >= 0)
2748 break;
2749 if (ret == -ENOTEMPTY) {
2750 mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
2751 res->lockname.len, res->lockname.name);
2752 BUG();
2753 }
2754
2755 mlog(0, "lockres %.*s: migrate failed, "
2756 "retrying\n", res->lockname.len,
2757 res->lockname.name);
2758 msleep(DLM_MIGRATION_RETRY_MS);
2759 }
2760 spin_lock(&dlm->spinlock);
2761leave:
2762 return lock_dropped;
2763}
2764
2379int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) 2765int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
2380{ 2766{
2381 int ret; 2767 int ret;
@@ -2405,7 +2791,8 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
2405 return can_proceed; 2791 return can_proceed;
2406} 2792}
2407 2793
2408int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 2794static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm,
2795 struct dlm_lock_resource *res)
2409{ 2796{
2410 int ret; 2797 int ret;
2411 spin_lock(&res->spinlock); 2798 spin_lock(&res->spinlock);
@@ -2434,8 +2821,15 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
2434 __dlm_lockres_reserve_ast(res); 2821 __dlm_lockres_reserve_ast(res);
2435 spin_unlock(&res->spinlock); 2822 spin_unlock(&res->spinlock);
2436 2823
2437 /* now flush all the pending asts.. hang out for a bit */ 2824 /* now flush all the pending asts */
2438 dlm_kick_thread(dlm, res); 2825 dlm_kick_thread(dlm, res);
2826 /* before waiting on DIRTY, block processes which may
2827 * try to dirty the lockres before MIGRATING is set */
2828 spin_lock(&res->spinlock);
2829 BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY);
2830 res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
2831 spin_unlock(&res->spinlock);
2832 /* now wait on any pending asts and the DIRTY state */
2439 wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); 2833 wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
2440 dlm_lockres_release_ast(dlm, res); 2834 dlm_lockres_release_ast(dlm, res);
2441 2835
@@ -2461,6 +2855,13 @@ again:
2461 mlog(0, "trying again...\n"); 2855 mlog(0, "trying again...\n");
2462 goto again; 2856 goto again;
2463 } 2857 }
2858 /* now that we are sure the MIGRATING state is there, drop
2859 * the unneded state which blocked threads trying to DIRTY */
2860 spin_lock(&res->spinlock);
2861 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2862 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2863 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2864 spin_unlock(&res->spinlock);
2464 2865
2465 /* did the target go down or die? */ 2866 /* did the target go down or die? */
2466 spin_lock(&dlm->spinlock); 2867 spin_lock(&dlm->spinlock);
@@ -2490,7 +2891,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2490{ 2891{
2491 struct list_head *iter, *iter2; 2892 struct list_head *iter, *iter2;
2492 struct list_head *queue = &res->granted; 2893 struct list_head *queue = &res->granted;
2493 int i; 2894 int i, bit;
2494 struct dlm_lock *lock; 2895 struct dlm_lock *lock;
2495 2896
2496 assert_spin_locked(&res->spinlock); 2897 assert_spin_locked(&res->spinlock);
@@ -2508,12 +2909,28 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2508 BUG_ON(!list_empty(&lock->bast_list)); 2909 BUG_ON(!list_empty(&lock->bast_list));
2509 BUG_ON(lock->ast_pending); 2910 BUG_ON(lock->ast_pending);
2510 BUG_ON(lock->bast_pending); 2911 BUG_ON(lock->bast_pending);
2912 dlm_lockres_clear_refmap_bit(lock->ml.node, res);
2511 list_del_init(&lock->list); 2913 list_del_init(&lock->list);
2512 dlm_lock_put(lock); 2914 dlm_lock_put(lock);
2513 } 2915 }
2514 } 2916 }
2515 queue++; 2917 queue++;
2516 } 2918 }
2919 bit = 0;
2920 while (1) {
2921 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
2922 if (bit >= O2NM_MAX_NODES)
2923 break;
2924 /* do not clear the local node reference, if there is a
2925 * process holding this, let it drop the ref itself */
2926 if (bit != dlm->node_num) {
2927 mlog(0, "%s:%.*s: node %u had a ref to this "
2928 "migrating lockres, clearing\n", dlm->name,
2929 res->lockname.len, res->lockname.name, bit);
2930 dlm_lockres_clear_refmap_bit(bit, res);
2931 }
2932 bit++;
2933 }
2517} 2934}
2518 2935
2519/* for now this is not too intelligent. we will 2936/* for now this is not too intelligent. we will
@@ -2601,6 +3018,16 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2601 mlog(0, "migrate request (node %u) returned %d!\n", 3018 mlog(0, "migrate request (node %u) returned %d!\n",
2602 nodenum, status); 3019 nodenum, status);
2603 ret = status; 3020 ret = status;
3021 } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) {
3022 /* during the migration request we short-circuited
3023 * the mastery of the lockres. make sure we have
3024 * a mastery ref for nodenum */
3025 mlog(0, "%s:%.*s: need ref for node %u\n",
3026 dlm->name, res->lockname.len, res->lockname.name,
3027 nodenum);
3028 spin_lock(&res->spinlock);
3029 dlm_lockres_set_refmap_bit(nodenum, res);
3030 spin_unlock(&res->spinlock);
2604 } 3031 }
2605 } 3032 }
2606 3033
@@ -2619,7 +3046,8 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2619 * we will have no mle in the list to start with. now we can add an mle for 3046 * we will have no mle in the list to start with. now we can add an mle for
2620 * the migration and this should be the only one found for those scanning the 3047 * the migration and this should be the only one found for those scanning the
2621 * list. */ 3048 * list. */
2622int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data) 3049int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3050 void **ret_data)
2623{ 3051{
2624 struct dlm_ctxt *dlm = data; 3052 struct dlm_ctxt *dlm = data;
2625 struct dlm_lock_resource *res = NULL; 3053 struct dlm_lock_resource *res = NULL;
@@ -2745,7 +3173,13 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
2745 /* remove it from the list so that only one 3173 /* remove it from the list so that only one
2746 * mle will be found */ 3174 * mle will be found */
2747 list_del_init(&tmp->list); 3175 list_del_init(&tmp->list);
2748 __dlm_mle_detach_hb_events(dlm, mle); 3176 /* this was obviously WRONG. mle is uninited here. should be tmp. */
3177 __dlm_mle_detach_hb_events(dlm, tmp);
3178 ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
3179 mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
3180 "telling master to get ref for cleared out mle "
3181 "during migration\n", dlm->name, namelen, name,
3182 master, new_master);
2749 } 3183 }
2750 spin_unlock(&tmp->spinlock); 3184 spin_unlock(&tmp->spinlock);
2751 } 3185 }
@@ -2753,6 +3187,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
2753 /* now add a migration mle to the tail of the list */ 3187 /* now add a migration mle to the tail of the list */
2754 dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); 3188 dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen);
2755 mle->new_master = new_master; 3189 mle->new_master = new_master;
3190 /* the new master will be sending an assert master for this.
3191 * at that point we will get the refmap reference */
2756 mle->master = master; 3192 mle->master = master;
2757 /* do this for consistency with other mle types */ 3193 /* do this for consistency with other mle types */
2758 set_bit(new_master, mle->maybe_map); 3194 set_bit(new_master, mle->maybe_map);
@@ -2902,6 +3338,13 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2902 clear_bit(dlm->node_num, iter.node_map); 3338 clear_bit(dlm->node_num, iter.node_map);
2903 spin_unlock(&dlm->spinlock); 3339 spin_unlock(&dlm->spinlock);
2904 3340
3341 /* ownership of the lockres is changing. account for the
3342 * mastery reference here since old_master will briefly have
3343 * a reference after the migration completes */
3344 spin_lock(&res->spinlock);
3345 dlm_lockres_set_refmap_bit(old_master, res);
3346 spin_unlock(&res->spinlock);
3347
2905 mlog(0, "now time to do a migrate request to other nodes\n"); 3348 mlog(0, "now time to do a migrate request to other nodes\n");
2906 ret = dlm_do_migrate_request(dlm, res, old_master, 3349 ret = dlm_do_migrate_request(dlm, res, old_master,
2907 dlm->node_num, &iter); 3350 dlm->node_num, &iter);
@@ -2914,8 +3357,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2914 res->lockname.len, res->lockname.name); 3357 res->lockname.len, res->lockname.name);
2915 /* this call now finishes out the nodemap 3358 /* this call now finishes out the nodemap
2916 * even if one or more nodes die */ 3359 * even if one or more nodes die */
2917 ret = dlm_do_assert_master(dlm, res->lockname.name, 3360 ret = dlm_do_assert_master(dlm, res, iter.node_map,
2918 res->lockname.len, iter.node_map,
2919 DLM_ASSERT_MASTER_FINISH_MIGRATION); 3361 DLM_ASSERT_MASTER_FINISH_MIGRATION);
2920 if (ret < 0) { 3362 if (ret < 0) {
2921 /* no longer need to retry. all living nodes contacted. */ 3363 /* no longer need to retry. all living nodes contacted. */
@@ -2927,8 +3369,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
2927 set_bit(old_master, iter.node_map); 3369 set_bit(old_master, iter.node_map);
2928 mlog(0, "doing assert master of %.*s back to %u\n", 3370 mlog(0, "doing assert master of %.*s back to %u\n",
2929 res->lockname.len, res->lockname.name, old_master); 3371 res->lockname.len, res->lockname.name, old_master);
2930 ret = dlm_do_assert_master(dlm, res->lockname.name, 3372 ret = dlm_do_assert_master(dlm, res, iter.node_map,
2931 res->lockname.len, iter.node_map,
2932 DLM_ASSERT_MASTER_FINISH_MIGRATION); 3373 DLM_ASSERT_MASTER_FINISH_MIGRATION);
2933 if (ret < 0) { 3374 if (ret < 0) {
2934 mlog(0, "assert master to original master failed " 3375 mlog(0, "assert master to original master failed "
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 367a11e9e2ed..6d4a83d50152 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -163,9 +163,6 @@ void dlm_dispatch_work(struct work_struct *work)
163 dlm_workfunc_t *workfunc; 163 dlm_workfunc_t *workfunc;
164 int tot=0; 164 int tot=0;
165 165
166 if (!dlm_joined(dlm))
167 return;
168
169 spin_lock(&dlm->work_lock); 166 spin_lock(&dlm->work_lock);
170 list_splice_init(&dlm->work_list, &tmp_list); 167 list_splice_init(&dlm->work_list, &tmp_list);
171 spin_unlock(&dlm->work_lock); 168 spin_unlock(&dlm->work_lock);
@@ -821,7 +818,8 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
821 818
822} 819}
823 820
824int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data) 821int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
822 void **ret_data)
825{ 823{
826 struct dlm_ctxt *dlm = data; 824 struct dlm_ctxt *dlm = data;
827 struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; 825 struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf;
@@ -978,7 +976,8 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
978} 976}
979 977
980 978
981int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data) 979int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
980 void **ret_data)
982{ 981{
983 struct dlm_ctxt *dlm = data; 982 struct dlm_ctxt *dlm = data;
984 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; 983 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
@@ -1129,6 +1128,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1129 if (total_locks == mres_total_locks) 1128 if (total_locks == mres_total_locks)
1130 mres->flags |= DLM_MRES_ALL_DONE; 1129 mres->flags |= DLM_MRES_ALL_DONE;
1131 1130
1131 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1132 dlm->name, res->lockname.len, res->lockname.name,
1133 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
1134 send_to);
1135
1132 /* send it */ 1136 /* send it */
1133 ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, 1137 ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
1134 sz, send_to, &status); 1138 sz, send_to, &status);
@@ -1213,6 +1217,34 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1213 return 0; 1217 return 0;
1214} 1218}
1215 1219
1220static void dlm_add_dummy_lock(struct dlm_ctxt *dlm,
1221 struct dlm_migratable_lockres *mres)
1222{
1223 struct dlm_lock dummy;
1224 memset(&dummy, 0, sizeof(dummy));
1225 dummy.ml.cookie = 0;
1226 dummy.ml.type = LKM_IVMODE;
1227 dummy.ml.convert_type = LKM_IVMODE;
1228 dummy.ml.highest_blocked = LKM_IVMODE;
1229 dummy.lksb = NULL;
1230 dummy.ml.node = dlm->node_num;
1231 dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST);
1232}
1233
1234static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm,
1235 struct dlm_migratable_lock *ml,
1236 u8 *nodenum)
1237{
1238 if (unlikely(ml->cookie == 0 &&
1239 ml->type == LKM_IVMODE &&
1240 ml->convert_type == LKM_IVMODE &&
1241 ml->highest_blocked == LKM_IVMODE &&
1242 ml->list == DLM_BLOCKED_LIST)) {
1243 *nodenum = ml->node;
1244 return 1;
1245 }
1246 return 0;
1247}
1216 1248
1217int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, 1249int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1218 struct dlm_migratable_lockres *mres, 1250 struct dlm_migratable_lockres *mres,
@@ -1260,6 +1292,14 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1260 goto error; 1292 goto error;
1261 } 1293 }
1262 } 1294 }
1295 if (total_locks == 0) {
1296 /* send a dummy lock to indicate a mastery reference only */
1297 mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n",
1298 dlm->name, res->lockname.len, res->lockname.name,
1299 send_to, flags & DLM_MRES_RECOVERY ? "recovery" :
1300 "migration");
1301 dlm_add_dummy_lock(dlm, mres);
1302 }
1263 /* flush any remaining locks */ 1303 /* flush any remaining locks */
1264 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); 1304 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
1265 if (ret < 0) 1305 if (ret < 0)
@@ -1293,7 +1333,8 @@ error:
1293 * do we spin? returning an error only delays the problem really 1333 * do we spin? returning an error only delays the problem really
1294 */ 1334 */
1295 1335
1296int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) 1336int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1337 void **ret_data)
1297{ 1338{
1298 struct dlm_ctxt *dlm = data; 1339 struct dlm_ctxt *dlm = data;
1299 struct dlm_migratable_lockres *mres = 1340 struct dlm_migratable_lockres *mres =
@@ -1382,17 +1423,21 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1382 spin_lock(&res->spinlock); 1423 spin_lock(&res->spinlock);
1383 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 1424 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
1384 spin_unlock(&res->spinlock); 1425 spin_unlock(&res->spinlock);
1426 wake_up(&res->wq);
1385 1427
1386 /* add an extra ref for just-allocated lockres 1428 /* add an extra ref for just-allocated lockres
1387 * otherwise the lockres will be purged immediately */ 1429 * otherwise the lockres will be purged immediately */
1388 dlm_lockres_get(res); 1430 dlm_lockres_get(res);
1389
1390 } 1431 }
1391 1432
1392 /* at this point we have allocated everything we need, 1433 /* at this point we have allocated everything we need,
1393 * and we have a hashed lockres with an extra ref and 1434 * and we have a hashed lockres with an extra ref and
1394 * the proper res->state flags. */ 1435 * the proper res->state flags. */
1395 ret = 0; 1436 ret = 0;
1437 spin_lock(&res->spinlock);
1438 /* drop this either when master requery finds a different master
1439 * or when a lock is added by the recovery worker */
1440 dlm_lockres_grab_inflight_ref(dlm, res);
1396 if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { 1441 if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) {
1397 /* migration cannot have an unknown master */ 1442 /* migration cannot have an unknown master */
1398 BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); 1443 BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
@@ -1400,10 +1445,11 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1400 "unknown owner.. will need to requery: " 1445 "unknown owner.. will need to requery: "
1401 "%.*s\n", mres->lockname_len, mres->lockname); 1446 "%.*s\n", mres->lockname_len, mres->lockname);
1402 } else { 1447 } else {
1403 spin_lock(&res->spinlock); 1448 /* take a reference now to pin the lockres, drop it
1449 * when locks are added in the worker */
1404 dlm_change_lockres_owner(dlm, res, dlm->node_num); 1450 dlm_change_lockres_owner(dlm, res, dlm->node_num);
1405 spin_unlock(&res->spinlock);
1406 } 1451 }
1452 spin_unlock(&res->spinlock);
1407 1453
1408 /* queue up work for dlm_mig_lockres_worker */ 1454 /* queue up work for dlm_mig_lockres_worker */
1409 dlm_grab(dlm); /* get an extra ref for the work item */ 1455 dlm_grab(dlm); /* get an extra ref for the work item */
@@ -1459,6 +1505,9 @@ again:
1459 "this node will take it.\n", 1505 "this node will take it.\n",
1460 res->lockname.len, res->lockname.name); 1506 res->lockname.len, res->lockname.name);
1461 } else { 1507 } else {
1508 spin_lock(&res->spinlock);
1509 dlm_lockres_drop_inflight_ref(dlm, res);
1510 spin_unlock(&res->spinlock);
1462 mlog(0, "master needs to respond to sender " 1511 mlog(0, "master needs to respond to sender "
1463 "that node %u still owns %.*s\n", 1512 "that node %u still owns %.*s\n",
1464 real_master, res->lockname.len, 1513 real_master, res->lockname.len,
@@ -1578,7 +1627,8 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1578/* this function cannot error, so unless the sending 1627/* this function cannot error, so unless the sending
1579 * or receiving of the message failed, the owner can 1628 * or receiving of the message failed, the owner can
1580 * be trusted */ 1629 * be trusted */
1581int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data) 1630int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1631 void **ret_data)
1582{ 1632{
1583 struct dlm_ctxt *dlm = data; 1633 struct dlm_ctxt *dlm = data;
1584 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; 1634 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
@@ -1660,21 +1710,38 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1660{ 1710{
1661 struct dlm_migratable_lock *ml; 1711 struct dlm_migratable_lock *ml;
1662 struct list_head *queue; 1712 struct list_head *queue;
1713 struct list_head *tmpq = NULL;
1663 struct dlm_lock *newlock = NULL; 1714 struct dlm_lock *newlock = NULL;
1664 struct dlm_lockstatus *lksb = NULL; 1715 struct dlm_lockstatus *lksb = NULL;
1665 int ret = 0; 1716 int ret = 0;
1666 int i, bad; 1717 int i, j, bad;
1667 struct list_head *iter; 1718 struct list_head *iter;
1668 struct dlm_lock *lock = NULL; 1719 struct dlm_lock *lock = NULL;
1720 u8 from = O2NM_MAX_NODES;
1721 unsigned int added = 0;
1669 1722
1670 mlog(0, "running %d locks for this lockres\n", mres->num_locks); 1723 mlog(0, "running %d locks for this lockres\n", mres->num_locks);
1671 for (i=0; i<mres->num_locks; i++) { 1724 for (i=0; i<mres->num_locks; i++) {
1672 ml = &(mres->ml[i]); 1725 ml = &(mres->ml[i]);
1726
1727 if (dlm_is_dummy_lock(dlm, ml, &from)) {
1728 /* placeholder, just need to set the refmap bit */
1729 BUG_ON(mres->num_locks != 1);
1730 mlog(0, "%s:%.*s: dummy lock for %u\n",
1731 dlm->name, mres->lockname_len, mres->lockname,
1732 from);
1733 spin_lock(&res->spinlock);
1734 dlm_lockres_set_refmap_bit(from, res);
1735 spin_unlock(&res->spinlock);
1736 added++;
1737 break;
1738 }
1673 BUG_ON(ml->highest_blocked != LKM_IVMODE); 1739 BUG_ON(ml->highest_blocked != LKM_IVMODE);
1674 newlock = NULL; 1740 newlock = NULL;
1675 lksb = NULL; 1741 lksb = NULL;
1676 1742
1677 queue = dlm_list_num_to_pointer(res, ml->list); 1743 queue = dlm_list_num_to_pointer(res, ml->list);
1744 tmpq = NULL;
1678 1745
1679 /* if the lock is for the local node it needs to 1746 /* if the lock is for the local node it needs to
1680 * be moved to the proper location within the queue. 1747 * be moved to the proper location within the queue.
@@ -1684,11 +1751,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1684 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); 1751 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
1685 1752
1686 spin_lock(&res->spinlock); 1753 spin_lock(&res->spinlock);
1687 list_for_each(iter, queue) { 1754 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
1688 lock = list_entry (iter, struct dlm_lock, list); 1755 tmpq = dlm_list_idx_to_ptr(res, j);
1689 if (lock->ml.cookie != ml->cookie) 1756 list_for_each(iter, tmpq) {
1690 lock = NULL; 1757 lock = list_entry (iter, struct dlm_lock, list);
1691 else 1758 if (lock->ml.cookie != ml->cookie)
1759 lock = NULL;
1760 else
1761 break;
1762 }
1763 if (lock)
1692 break; 1764 break;
1693 } 1765 }
1694 1766
@@ -1698,12 +1770,20 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1698 u64 c = ml->cookie; 1770 u64 c = ml->cookie;
1699 mlog(ML_ERROR, "could not find local lock " 1771 mlog(ML_ERROR, "could not find local lock "
1700 "with cookie %u:%llu!\n", 1772 "with cookie %u:%llu!\n",
1701 dlm_get_lock_cookie_node(c), 1773 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1702 dlm_get_lock_cookie_seq(c)); 1774 dlm_get_lock_cookie_seq(be64_to_cpu(c)));
1775 __dlm_print_one_lock_resource(res);
1703 BUG(); 1776 BUG();
1704 } 1777 }
1705 BUG_ON(lock->ml.node != ml->node); 1778 BUG_ON(lock->ml.node != ml->node);
1706 1779
1780 if (tmpq != queue) {
1781 mlog(0, "lock was on %u instead of %u for %.*s\n",
1782 j, ml->list, res->lockname.len, res->lockname.name);
1783 spin_unlock(&res->spinlock);
1784 continue;
1785 }
1786
1707 /* see NOTE above about why we do not update 1787 /* see NOTE above about why we do not update
1708 * to match the master here */ 1788 * to match the master here */
1709 1789
@@ -1711,6 +1791,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1711 /* do not alter lock refcount. switching lists. */ 1791 /* do not alter lock refcount. switching lists. */
1712 list_move_tail(&lock->list, queue); 1792 list_move_tail(&lock->list, queue);
1713 spin_unlock(&res->spinlock); 1793 spin_unlock(&res->spinlock);
1794 added++;
1714 1795
1715 mlog(0, "just reordered a local lock!\n"); 1796 mlog(0, "just reordered a local lock!\n");
1716 continue; 1797 continue;
@@ -1799,14 +1880,14 @@ skip_lvb:
1799 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " 1880 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1800 "exists on this lockres!\n", dlm->name, 1881 "exists on this lockres!\n", dlm->name,
1801 res->lockname.len, res->lockname.name, 1882 res->lockname.len, res->lockname.name,
1802 dlm_get_lock_cookie_node(c), 1883 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1803 dlm_get_lock_cookie_seq(c)); 1884 dlm_get_lock_cookie_seq(be64_to_cpu(c)));
1804 1885
1805 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " 1886 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
1806 "node=%u, cookie=%u:%llu, queue=%d\n", 1887 "node=%u, cookie=%u:%llu, queue=%d\n",
1807 ml->type, ml->convert_type, ml->node, 1888 ml->type, ml->convert_type, ml->node,
1808 dlm_get_lock_cookie_node(ml->cookie), 1889 dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)),
1809 dlm_get_lock_cookie_seq(ml->cookie), 1890 dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)),
1810 ml->list); 1891 ml->list);
1811 1892
1812 __dlm_print_one_lock_resource(res); 1893 __dlm_print_one_lock_resource(res);
@@ -1817,12 +1898,22 @@ skip_lvb:
1817 if (!bad) { 1898 if (!bad) {
1818 dlm_lock_get(newlock); 1899 dlm_lock_get(newlock);
1819 list_add_tail(&newlock->list, queue); 1900 list_add_tail(&newlock->list, queue);
1901 mlog(0, "%s:%.*s: added lock for node %u, "
1902 "setting refmap bit\n", dlm->name,
1903 res->lockname.len, res->lockname.name, ml->node);
1904 dlm_lockres_set_refmap_bit(ml->node, res);
1905 added++;
1820 } 1906 }
1821 spin_unlock(&res->spinlock); 1907 spin_unlock(&res->spinlock);
1822 } 1908 }
1823 mlog(0, "done running all the locks\n"); 1909 mlog(0, "done running all the locks\n");
1824 1910
1825leave: 1911leave:
1912 /* balance the ref taken when the work was queued */
1913 spin_lock(&res->spinlock);
1914 dlm_lockres_drop_inflight_ref(dlm, res);
1915 spin_unlock(&res->spinlock);
1916
1826 if (ret < 0) { 1917 if (ret < 0) {
1827 mlog_errno(ret); 1918 mlog_errno(ret);
1828 if (newlock) 1919 if (newlock)
@@ -1935,9 +2026,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1935 if (res->owner == dead_node) { 2026 if (res->owner == dead_node) {
1936 list_del_init(&res->recovering); 2027 list_del_init(&res->recovering);
1937 spin_lock(&res->spinlock); 2028 spin_lock(&res->spinlock);
2029 /* new_master has our reference from
2030 * the lock state sent during recovery */
1938 dlm_change_lockres_owner(dlm, res, new_master); 2031 dlm_change_lockres_owner(dlm, res, new_master);
1939 res->state &= ~DLM_LOCK_RES_RECOVERING; 2032 res->state &= ~DLM_LOCK_RES_RECOVERING;
1940 if (!__dlm_lockres_unused(res)) 2033 if (__dlm_lockres_has_locks(res))
1941 __dlm_dirty_lockres(dlm, res); 2034 __dlm_dirty_lockres(dlm, res);
1942 spin_unlock(&res->spinlock); 2035 spin_unlock(&res->spinlock);
1943 wake_up(&res->wq); 2036 wake_up(&res->wq);
@@ -1977,9 +2070,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1977 dlm_lockres_put(res); 2070 dlm_lockres_put(res);
1978 } 2071 }
1979 spin_lock(&res->spinlock); 2072 spin_lock(&res->spinlock);
2073 /* new_master has our reference from
2074 * the lock state sent during recovery */
1980 dlm_change_lockres_owner(dlm, res, new_master); 2075 dlm_change_lockres_owner(dlm, res, new_master);
1981 res->state &= ~DLM_LOCK_RES_RECOVERING; 2076 res->state &= ~DLM_LOCK_RES_RECOVERING;
1982 if (!__dlm_lockres_unused(res)) 2077 if (__dlm_lockres_has_locks(res))
1983 __dlm_dirty_lockres(dlm, res); 2078 __dlm_dirty_lockres(dlm, res);
1984 spin_unlock(&res->spinlock); 2079 spin_unlock(&res->spinlock);
1985 wake_up(&res->wq); 2080 wake_up(&res->wq);
@@ -2048,6 +2143,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2048{ 2143{
2049 struct list_head *iter, *tmpiter; 2144 struct list_head *iter, *tmpiter;
2050 struct dlm_lock *lock; 2145 struct dlm_lock *lock;
2146 unsigned int freed = 0;
2051 2147
2052 /* this node is the lockres master: 2148 /* this node is the lockres master:
2053 * 1) remove any stale locks for the dead node 2149 * 1) remove any stale locks for the dead node
@@ -2062,6 +2158,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2062 if (lock->ml.node == dead_node) { 2158 if (lock->ml.node == dead_node) {
2063 list_del_init(&lock->list); 2159 list_del_init(&lock->list);
2064 dlm_lock_put(lock); 2160 dlm_lock_put(lock);
2161 freed++;
2065 } 2162 }
2066 } 2163 }
2067 list_for_each_safe(iter, tmpiter, &res->converting) { 2164 list_for_each_safe(iter, tmpiter, &res->converting) {
@@ -2069,6 +2166,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2069 if (lock->ml.node == dead_node) { 2166 if (lock->ml.node == dead_node) {
2070 list_del_init(&lock->list); 2167 list_del_init(&lock->list);
2071 dlm_lock_put(lock); 2168 dlm_lock_put(lock);
2169 freed++;
2072 } 2170 }
2073 } 2171 }
2074 list_for_each_safe(iter, tmpiter, &res->blocked) { 2172 list_for_each_safe(iter, tmpiter, &res->blocked) {
@@ -2076,9 +2174,23 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2076 if (lock->ml.node == dead_node) { 2174 if (lock->ml.node == dead_node) {
2077 list_del_init(&lock->list); 2175 list_del_init(&lock->list);
2078 dlm_lock_put(lock); 2176 dlm_lock_put(lock);
2177 freed++;
2079 } 2178 }
2080 } 2179 }
2081 2180
2181 if (freed) {
2182 mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
2183 "dropping ref from lockres\n", dlm->name,
2184 res->lockname.len, res->lockname.name, freed, dead_node);
2185 BUG_ON(!test_bit(dead_node, res->refmap));
2186 dlm_lockres_clear_refmap_bit(dead_node, res);
2187 } else if (test_bit(dead_node, res->refmap)) {
2188 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
2189 "no locks and had not purged before dying\n", dlm->name,
2190 res->lockname.len, res->lockname.name, dead_node);
2191 dlm_lockres_clear_refmap_bit(dead_node, res);
2192 }
2193
2082 /* do not kick thread yet */ 2194 /* do not kick thread yet */
2083 __dlm_dirty_lockres(dlm, res); 2195 __dlm_dirty_lockres(dlm, res);
2084} 2196}
@@ -2141,9 +2253,21 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2141 spin_lock(&res->spinlock); 2253 spin_lock(&res->spinlock);
2142 /* zero the lvb if necessary */ 2254 /* zero the lvb if necessary */
2143 dlm_revalidate_lvb(dlm, res, dead_node); 2255 dlm_revalidate_lvb(dlm, res, dead_node);
2144 if (res->owner == dead_node) 2256 if (res->owner == dead_node) {
2257 if (res->state & DLM_LOCK_RES_DROPPING_REF)
2258 mlog(0, "%s:%.*s: owned by "
2259 "dead node %u, this node was "
2260 "dropping its ref when it died. "
2261 "continue, dropping the flag.\n",
2262 dlm->name, res->lockname.len,
2263 res->lockname.name, dead_node);
2264
2265 /* the wake_up for this will happen when the
2266 * RECOVERING flag is dropped later */
2267 res->state &= ~DLM_LOCK_RES_DROPPING_REF;
2268
2145 dlm_move_lockres_to_recovery_list(dlm, res); 2269 dlm_move_lockres_to_recovery_list(dlm, res);
2146 else if (res->owner == dlm->node_num) { 2270 } else if (res->owner == dlm->node_num) {
2147 dlm_free_dead_locks(dlm, res, dead_node); 2271 dlm_free_dead_locks(dlm, res, dead_node);
2148 __dlm_lockres_calc_usage(dlm, res); 2272 __dlm_lockres_calc_usage(dlm, res);
2149 } 2273 }
@@ -2480,7 +2604,8 @@ retry:
2480 return ret; 2604 return ret;
2481} 2605}
2482 2606
2483int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) 2607int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2608 void **ret_data)
2484{ 2609{
2485 struct dlm_ctxt *dlm = data; 2610 struct dlm_ctxt *dlm = data;
2486 struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; 2611 struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf;
@@ -2608,7 +2733,8 @@ stage2:
2608 return ret; 2733 return ret;
2609} 2734}
2610 2735
2611int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) 2736int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2737 void **ret_data)
2612{ 2738{
2613 struct dlm_ctxt *dlm = data; 2739 struct dlm_ctxt *dlm = data;
2614 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; 2740 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 0c822f3ffb05..8ffa0916eb86 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -54,9 +54,6 @@
54#include "cluster/masklog.h" 54#include "cluster/masklog.h"
55 55
56static int dlm_thread(void *data); 56static int dlm_thread(void *data);
57static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
58 struct dlm_lock_resource *lockres);
59
60static void dlm_flush_asts(struct dlm_ctxt *dlm); 57static void dlm_flush_asts(struct dlm_ctxt *dlm);
61 58
62#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) 59#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num)
@@ -82,14 +79,33 @@ repeat:
82 current->state = TASK_RUNNING; 79 current->state = TASK_RUNNING;
83} 80}
84 81
85 82int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
86int __dlm_lockres_unused(struct dlm_lock_resource *res)
87{ 83{
88 if (list_empty(&res->granted) && 84 if (list_empty(&res->granted) &&
89 list_empty(&res->converting) && 85 list_empty(&res->converting) &&
90 list_empty(&res->blocked) && 86 list_empty(&res->blocked))
91 list_empty(&res->dirty)) 87 return 0;
92 return 1; 88 return 1;
89}
90
91/* "unused": the lockres has no locks, is not on the dirty list,
92 * has no inflight locks (in the gap between mastery and acquiring
93 * the first lock), and has no bits in its refmap.
94 * truly ready to be freed. */
95int __dlm_lockres_unused(struct dlm_lock_resource *res)
96{
97 if (!__dlm_lockres_has_locks(res) &&
98 (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) {
99 /* try not to scan the bitmap unless the first two
100 * conditions are already true */
101 int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
102 if (bit >= O2NM_MAX_NODES) {
103 /* since the bit for dlm->node_num is not
104 * set, inflight_locks better be zero */
105 BUG_ON(res->inflight_locks != 0);
106 return 1;
107 }
108 }
93 return 0; 109 return 0;
94} 110}
95 111
@@ -106,46 +122,21 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
106 assert_spin_locked(&res->spinlock); 122 assert_spin_locked(&res->spinlock);
107 123
108 if (__dlm_lockres_unused(res)){ 124 if (__dlm_lockres_unused(res)){
109 /* For now, just keep any resource we master */
110 if (res->owner == dlm->node_num)
111 {
112 if (!list_empty(&res->purge)) {
113 mlog(0, "we master %s:%.*s, but it is on "
114 "the purge list. Removing\n",
115 dlm->name, res->lockname.len,
116 res->lockname.name);
117 list_del_init(&res->purge);
118 dlm->purge_count--;
119 }
120 return;
121 }
122
123 if (list_empty(&res->purge)) { 125 if (list_empty(&res->purge)) {
124 mlog(0, "putting lockres %.*s from purge list\n", 126 mlog(0, "putting lockres %.*s:%p onto purge list\n",
125 res->lockname.len, res->lockname.name); 127 res->lockname.len, res->lockname.name, res);
126 128
127 res->last_used = jiffies; 129 res->last_used = jiffies;
130 dlm_lockres_get(res);
128 list_add_tail(&res->purge, &dlm->purge_list); 131 list_add_tail(&res->purge, &dlm->purge_list);
129 dlm->purge_count++; 132 dlm->purge_count++;
130
131 /* if this node is not the owner, there is
132 * no way to keep track of who the owner could be.
133 * unhash it to avoid serious problems. */
134 if (res->owner != dlm->node_num) {
135 mlog(0, "%s:%.*s: doing immediate "
136 "purge of lockres owned by %u\n",
137 dlm->name, res->lockname.len,
138 res->lockname.name, res->owner);
139
140 dlm_purge_lockres_now(dlm, res);
141 }
142 } 133 }
143 } else if (!list_empty(&res->purge)) { 134 } else if (!list_empty(&res->purge)) {
144 mlog(0, "removing lockres %.*s from purge list, " 135 mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
145 "owner=%u\n", res->lockname.len, res->lockname.name, 136 res->lockname.len, res->lockname.name, res, res->owner);
146 res->owner);
147 137
148 list_del_init(&res->purge); 138 list_del_init(&res->purge);
139 dlm_lockres_put(res);
149 dlm->purge_count--; 140 dlm->purge_count--;
150 } 141 }
151} 142}
@@ -163,68 +154,65 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
163 spin_unlock(&dlm->spinlock); 154 spin_unlock(&dlm->spinlock);
164} 155}
165 156
166/* TODO: Eventual API: Called with the dlm spinlock held, may drop it 157static int dlm_purge_lockres(struct dlm_ctxt *dlm,
167 * to do migration, but will re-acquire before exit. */ 158 struct dlm_lock_resource *res)
168void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres)
169{ 159{
170 int master; 160 int master;
171 int ret; 161 int ret = 0;
172
173 spin_lock(&lockres->spinlock);
174 master = lockres->owner == dlm->node_num;
175 spin_unlock(&lockres->spinlock);
176 162
177 mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len, 163 spin_lock(&res->spinlock);
178 lockres->lockname.name, master); 164 if (!__dlm_lockres_unused(res)) {
179 165 spin_unlock(&res->spinlock);
180 /* Non master is the easy case -- no migration required, just 166 mlog(0, "%s:%.*s: tried to purge but not unused\n",
181 * quit. */ 167 dlm->name, res->lockname.len, res->lockname.name);
168 return -ENOTEMPTY;
169 }
170 master = (res->owner == dlm->node_num);
182 if (!master) 171 if (!master)
183 goto finish; 172 res->state |= DLM_LOCK_RES_DROPPING_REF;
184 173 spin_unlock(&res->spinlock);
185 /* Wheee! Migrate lockres here! */
186 spin_unlock(&dlm->spinlock);
187again:
188 174
189 ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES); 175 mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
190 if (ret == -ENOTEMPTY) { 176 res->lockname.name, master);
191 mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
192 lockres->lockname.len, lockres->lockname.name);
193 177
194 BUG(); 178 if (!master) {
195 } else if (ret < 0) { 179 spin_lock(&res->spinlock);
196 mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", 180 /* This ensures that clear refmap is sent after the set */
197 lockres->lockname.len, lockres->lockname.name); 181 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
198 msleep(100); 182 spin_unlock(&res->spinlock);
199 goto again; 183 /* drop spinlock to do messaging, retake below */
184 spin_unlock(&dlm->spinlock);
185 /* clear our bit from the master's refmap, ignore errors */
186 ret = dlm_drop_lockres_ref(dlm, res);
187 if (ret < 0) {
188 mlog_errno(ret);
189 if (!dlm_is_host_down(ret))
190 BUG();
191 }
192 mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
193 dlm->name, res->lockname.len, res->lockname.name, ret);
194 spin_lock(&dlm->spinlock);
200 } 195 }
201 196
202 spin_lock(&dlm->spinlock); 197 if (!list_empty(&res->purge)) {
203 198 mlog(0, "removing lockres %.*s:%p from purgelist, "
204finish: 199 "master = %d\n", res->lockname.len, res->lockname.name,
205 if (!list_empty(&lockres->purge)) { 200 res, master);
206 list_del_init(&lockres->purge); 201 list_del_init(&res->purge);
202 dlm_lockres_put(res);
207 dlm->purge_count--; 203 dlm->purge_count--;
208 } 204 }
209 __dlm_unhash_lockres(lockres); 205 __dlm_unhash_lockres(res);
210}
211
212/* make an unused lockres go away immediately.
213 * as soon as the dlm spinlock is dropped, this lockres
214 * will not be found. kfree still happens on last put. */
215static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
216 struct dlm_lock_resource *lockres)
217{
218 assert_spin_locked(&dlm->spinlock);
219 assert_spin_locked(&lockres->spinlock);
220 206
221 BUG_ON(!__dlm_lockres_unused(lockres)); 207 /* lockres is not in the hash now. drop the flag and wake up
222 208 * any processes waiting in dlm_get_lock_resource. */
223 if (!list_empty(&lockres->purge)) { 209 if (!master) {
224 list_del_init(&lockres->purge); 210 spin_lock(&res->spinlock);
225 dlm->purge_count--; 211 res->state &= ~DLM_LOCK_RES_DROPPING_REF;
212 spin_unlock(&res->spinlock);
213 wake_up(&res->wq);
226 } 214 }
227 __dlm_unhash_lockres(lockres); 215 return 0;
228} 216}
229 217
230static void dlm_run_purge_list(struct dlm_ctxt *dlm, 218static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -268,13 +256,17 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
268 break; 256 break;
269 } 257 }
270 258
259 mlog(0, "removing lockres %.*s:%p from purgelist\n",
260 lockres->lockname.len, lockres->lockname.name, lockres);
271 list_del_init(&lockres->purge); 261 list_del_init(&lockres->purge);
262 dlm_lockres_put(lockres);
272 dlm->purge_count--; 263 dlm->purge_count--;
273 264
274 /* This may drop and reacquire the dlm spinlock if it 265 /* This may drop and reacquire the dlm spinlock if it
275 * has to do migration. */ 266 * has to do migration. */
276 mlog(0, "calling dlm_purge_lockres!\n"); 267 mlog(0, "calling dlm_purge_lockres!\n");
277 dlm_purge_lockres(dlm, lockres); 268 if (dlm_purge_lockres(dlm, lockres))
269 BUG();
278 mlog(0, "DONE calling dlm_purge_lockres!\n"); 270 mlog(0, "DONE calling dlm_purge_lockres!\n");
279 271
280 /* Avoid adding any scheduling latencies */ 272 /* Avoid adding any scheduling latencies */
@@ -467,12 +459,17 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
467 assert_spin_locked(&res->spinlock); 459 assert_spin_locked(&res->spinlock);
468 460
469 /* don't shuffle secondary queues */ 461 /* don't shuffle secondary queues */
470 if ((res->owner == dlm->node_num) && 462 if ((res->owner == dlm->node_num)) {
471 !(res->state & DLM_LOCK_RES_DIRTY)) { 463 if (res->state & (DLM_LOCK_RES_MIGRATING |
472 /* ref for dirty_list */ 464 DLM_LOCK_RES_BLOCK_DIRTY))
473 dlm_lockres_get(res); 465 return;
474 list_add_tail(&res->dirty, &dlm->dirty_list); 466
475 res->state |= DLM_LOCK_RES_DIRTY; 467 if (list_empty(&res->dirty)) {
468 /* ref for dirty_list */
469 dlm_lockres_get(res);
470 list_add_tail(&res->dirty, &dlm->dirty_list);
471 res->state |= DLM_LOCK_RES_DIRTY;
472 }
476 } 473 }
477} 474}
478 475
@@ -651,7 +648,7 @@ static int dlm_thread(void *data)
651 dlm_lockres_get(res); 648 dlm_lockres_get(res);
652 649
653 spin_lock(&res->spinlock); 650 spin_lock(&res->spinlock);
654 res->state &= ~DLM_LOCK_RES_DIRTY; 651 /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */
655 list_del_init(&res->dirty); 652 list_del_init(&res->dirty);
656 spin_unlock(&res->spinlock); 653 spin_unlock(&res->spinlock);
657 spin_unlock(&dlm->spinlock); 654 spin_unlock(&dlm->spinlock);
@@ -675,10 +672,11 @@ static int dlm_thread(void *data)
675 /* it is now ok to move lockreses in these states 672 /* it is now ok to move lockreses in these states
676 * to the dirty list, assuming that they will only be 673 * to the dirty list, assuming that they will only be
677 * dirty for a short while. */ 674 * dirty for a short while. */
675 BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
678 if (res->state & (DLM_LOCK_RES_IN_PROGRESS | 676 if (res->state & (DLM_LOCK_RES_IN_PROGRESS |
679 DLM_LOCK_RES_MIGRATING |
680 DLM_LOCK_RES_RECOVERING)) { 677 DLM_LOCK_RES_RECOVERING)) {
681 /* move it to the tail and keep going */ 678 /* move it to the tail and keep going */
679 res->state &= ~DLM_LOCK_RES_DIRTY;
682 spin_unlock(&res->spinlock); 680 spin_unlock(&res->spinlock);
683 mlog(0, "delaying list shuffling for in-" 681 mlog(0, "delaying list shuffling for in-"
684 "progress lockres %.*s, state=%d\n", 682 "progress lockres %.*s, state=%d\n",
@@ -699,6 +697,7 @@ static int dlm_thread(void *data)
699 697
700 /* called while holding lockres lock */ 698 /* called while holding lockres lock */
701 dlm_shuffle_lists(dlm, res); 699 dlm_shuffle_lists(dlm, res);
700 res->state &= ~DLM_LOCK_RES_DIRTY;
702 spin_unlock(&res->spinlock); 701 spin_unlock(&res->spinlock);
703 702
704 dlm_lockres_calc_usage(dlm, res); 703 dlm_lockres_calc_usage(dlm, res);
@@ -709,11 +708,8 @@ in_progress:
709 /* if the lock was in-progress, stick 708 /* if the lock was in-progress, stick
710 * it on the back of the list */ 709 * it on the back of the list */
711 if (delay) { 710 if (delay) {
712 /* ref for dirty_list */
713 dlm_lockres_get(res);
714 spin_lock(&res->spinlock); 711 spin_lock(&res->spinlock);
715 list_add_tail(&res->dirty, &dlm->dirty_list); 712 __dlm_dirty_lockres(dlm, res);
716 res->state |= DLM_LOCK_RES_DIRTY;
717 spin_unlock(&res->spinlock); 713 spin_unlock(&res->spinlock);
718 } 714 }
719 dlm_lockres_put(res); 715 dlm_lockres_put(res);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 37be4b2e0d4a..86ca085ef324 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -147,6 +147,10 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
147 goto leave; 147 goto leave;
148 } 148 }
149 149
150 if (res->state & DLM_LOCK_RES_MIGRATING) {
151 status = DLM_MIGRATING;
152 goto leave;
153 }
150 154
151 /* see above for what the spec says about 155 /* see above for what the spec says about
152 * LKM_CANCEL and the lock queue state */ 156 * LKM_CANCEL and the lock queue state */
@@ -244,8 +248,8 @@ leave:
244 /* this should always be coupled with list removal */ 248 /* this should always be coupled with list removal */
245 BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); 249 BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
246 mlog(0, "lock %u:%llu should be gone now! refs=%d\n", 250 mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
247 dlm_get_lock_cookie_node(lock->ml.cookie), 251 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
248 dlm_get_lock_cookie_seq(lock->ml.cookie), 252 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
249 atomic_read(&lock->lock_refs.refcount)-1); 253 atomic_read(&lock->lock_refs.refcount)-1);
250 dlm_lock_put(lock); 254 dlm_lock_put(lock);
251 } 255 }
@@ -379,7 +383,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
379 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, 383 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
380 * return value from dlmunlock_master 384 * return value from dlmunlock_master
381 */ 385 */
382int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) 386int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
387 void **ret_data)
383{ 388{
384 struct dlm_ctxt *dlm = data; 389 struct dlm_ctxt *dlm = data;
385 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; 390 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
@@ -502,8 +507,8 @@ not_found:
502 if (!found) 507 if (!found)
503 mlog(ML_ERROR, "failed to find lock to unlock! " 508 mlog(ML_ERROR, "failed to find lock to unlock! "
504 "cookie=%u:%llu\n", 509 "cookie=%u:%llu\n",
505 dlm_get_lock_cookie_node(unlock->cookie), 510 dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
506 dlm_get_lock_cookie_seq(unlock->cookie)); 511 dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie)));
507 else 512 else
508 dlm_lock_put(lock); 513 dlm_lock_put(lock);
509 514
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 10953a508f2f..f2cd3bf9efb2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1365,13 +1365,13 @@ bail:
1365 return ret; 1365 return ret;
1366} 1366}
1367 1367
1368struct inode_operations ocfs2_file_iops = { 1368const struct inode_operations ocfs2_file_iops = {
1369 .setattr = ocfs2_setattr, 1369 .setattr = ocfs2_setattr,
1370 .getattr = ocfs2_getattr, 1370 .getattr = ocfs2_getattr,
1371 .permission = ocfs2_permission, 1371 .permission = ocfs2_permission,
1372}; 1372};
1373 1373
1374struct inode_operations ocfs2_special_file_iops = { 1374const struct inode_operations ocfs2_special_file_iops = {
1375 .setattr = ocfs2_setattr, 1375 .setattr = ocfs2_setattr,
1376 .getattr = ocfs2_getattr, 1376 .getattr = ocfs2_getattr,
1377 .permission = ocfs2_permission, 1377 .permission = ocfs2_permission,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 601a453f18a8..cc973f01f6ce 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,8 +28,8 @@
28 28
29extern const struct file_operations ocfs2_fops; 29extern const struct file_operations ocfs2_fops;
30extern const struct file_operations ocfs2_dops; 30extern const struct file_operations ocfs2_dops;
31extern struct inode_operations ocfs2_file_iops; 31extern const struct inode_operations ocfs2_file_iops;
32extern struct inode_operations ocfs2_special_file_iops; 32extern const struct inode_operations ocfs2_special_file_iops;
33struct ocfs2_alloc_context; 33struct ocfs2_alloc_context;
34 34
35enum ocfs2_alloc_restarted { 35enum ocfs2_alloc_restarted {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e1216364d191..d026b4f27757 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -306,8 +306,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
306 * for the dinode, one for the new block. */ 306 * for the dinode, one for the new block. */
307#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) 307#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
308 308
309/* file update (nlink, etc) + dir entry block */ 309/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
310#define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 310#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
311 311
312/* inode + dir inode (if we unlink a dir), + dir entry block + orphan 312/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
313 * dir inode link */ 313 * dir inode link */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f3d7803b4b46..f7fa52bb3f6b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2306,7 +2306,7 @@ leave:
2306 return status; 2306 return status;
2307} 2307}
2308 2308
2309struct inode_operations ocfs2_dir_iops = { 2309const struct inode_operations ocfs2_dir_iops = {
2310 .create = ocfs2_create, 2310 .create = ocfs2_create,
2311 .lookup = ocfs2_lookup, 2311 .lookup = ocfs2_lookup,
2312 .link = ocfs2_link, 2312 .link = ocfs2_link,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 8425944fcccd..0975c7b7212b 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -26,7 +26,7 @@
26#ifndef OCFS2_NAMEI_H 26#ifndef OCFS2_NAMEI_H
27#define OCFS2_NAMEI_H 27#define OCFS2_NAMEI_H
28 28
29extern struct inode_operations ocfs2_dir_iops; 29extern const struct inode_operations ocfs2_dir_iops;
30 30
31struct dentry *ocfs2_get_parent(struct dentry *child); 31struct dentry *ocfs2_get_parent(struct dentry *child);
32 32
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6e300a88a47e..6534f92424dd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -116,7 +116,7 @@ static void ocfs2_destroy_inode(struct inode *inode);
116 116
117static unsigned long long ocfs2_max_file_offset(unsigned int blockshift); 117static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
118 118
119static struct super_operations ocfs2_sops = { 119static const struct super_operations ocfs2_sops = {
120 .statfs = ocfs2_statfs, 120 .statfs = ocfs2_statfs,
121 .alloc_inode = ocfs2_alloc_inode, 121 .alloc_inode = ocfs2_alloc_inode,
122 .destroy_inode = ocfs2_destroy_inode, 122 .destroy_inode = ocfs2_destroy_inode,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 03b0191534d5..40dc1a51f4a9 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -170,12 +170,12 @@ bail:
170 return ERR_PTR(status); 170 return ERR_PTR(status);
171} 171}
172 172
173struct inode_operations ocfs2_symlink_inode_operations = { 173const struct inode_operations ocfs2_symlink_inode_operations = {
174 .readlink = page_readlink, 174 .readlink = page_readlink,
175 .follow_link = ocfs2_follow_link, 175 .follow_link = ocfs2_follow_link,
176 .getattr = ocfs2_getattr, 176 .getattr = ocfs2_getattr,
177}; 177};
178struct inode_operations ocfs2_fast_symlink_inode_operations = { 178const struct inode_operations ocfs2_fast_symlink_inode_operations = {
179 .readlink = ocfs2_readlink, 179 .readlink = ocfs2_readlink,
180 .follow_link = ocfs2_follow_link, 180 .follow_link = ocfs2_follow_link,
181 .getattr = ocfs2_getattr, 181 .getattr = ocfs2_getattr,
diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h
index 1ea9e4d9e9eb..65a6c9c6ad51 100644
--- a/fs/ocfs2/symlink.h
+++ b/fs/ocfs2/symlink.h
@@ -26,8 +26,8 @@
26#ifndef OCFS2_SYMLINK_H 26#ifndef OCFS2_SYMLINK_H
27#define OCFS2_SYMLINK_H 27#define OCFS2_SYMLINK_H
28 28
29extern struct inode_operations ocfs2_symlink_inode_operations; 29extern const struct inode_operations ocfs2_symlink_inode_operations;
30extern struct inode_operations ocfs2_fast_symlink_inode_operations; 30extern const struct inode_operations ocfs2_fast_symlink_inode_operations;
31 31
32/* 32/*
33 * Test whether an inode is a fast symlink. 33 * Test whether an inode is a fast symlink.
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 0afd8b9af70f..f30e63b9910c 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -887,7 +887,7 @@ static inline int ocfs2_translate_response(int response)
887 887
888static int ocfs2_handle_response_message(struct o2net_msg *msg, 888static int ocfs2_handle_response_message(struct o2net_msg *msg,
889 u32 len, 889 u32 len,
890 void *data) 890 void *data, void **ret_data)
891{ 891{
892 unsigned int response_id, node_num; 892 unsigned int response_id, node_num;
893 int response_status; 893 int response_status;
@@ -943,7 +943,7 @@ bail:
943 943
944static int ocfs2_handle_vote_message(struct o2net_msg *msg, 944static int ocfs2_handle_vote_message(struct o2net_msg *msg,
945 u32 len, 945 u32 len,
946 void *data) 946 void *data, void **ret_data)
947{ 947{
948 int status; 948 int status;
949 struct ocfs2_super *osb = data; 949 struct ocfs2_super *osb = data;
@@ -1007,7 +1007,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
1007 osb->net_key, 1007 osb->net_key,
1008 sizeof(struct ocfs2_response_msg), 1008 sizeof(struct ocfs2_response_msg),
1009 ocfs2_handle_response_message, 1009 ocfs2_handle_response_message,
1010 osb, &osb->osb_net_handlers); 1010 osb, NULL, &osb->osb_net_handlers);
1011 if (status) { 1011 if (status) {
1012 mlog_errno(status); 1012 mlog_errno(status);
1013 goto bail; 1013 goto bail;
@@ -1017,7 +1017,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
1017 osb->net_key, 1017 osb->net_key,
1018 sizeof(struct ocfs2_vote_msg), 1018 sizeof(struct ocfs2_vote_msg),
1019 ocfs2_handle_vote_message, 1019 ocfs2_handle_vote_message,
1020 osb, &osb->osb_net_handlers); 1020 osb, NULL, &osb->osb_net_handlers);
1021 if (status) { 1021 if (status) {
1022 mlog_errno(status); 1022 mlog_errno(status);
1023 goto bail; 1023 goto bail;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 99c0bc37ba09..bde1c164417d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -169,7 +169,7 @@ static const struct file_operations openprom_operations = {
169 169
170static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); 170static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
171 171
172static struct inode_operations openprom_inode_operations = { 172static const struct inode_operations openprom_inode_operations = {
173 .lookup = openpromfs_lookup, 173 .lookup = openpromfs_lookup,
174}; 174};
175 175
@@ -364,7 +364,7 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
364 return 0; 364 return 0;
365} 365}
366 366
367static struct super_operations openprom_sops = { 367static const struct super_operations openprom_sops = {
368 .alloc_inode = openprom_alloc_inode, 368 .alloc_inode = openprom_alloc_inode,
369 .destroy_inode = openprom_destroy_inode, 369 .destroy_inode = openprom_destroy_inode,
370 .read_inode = openprom_read_inode, 370 .read_inode = openprom_read_inode,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 3d73d94d93a7..ac32a2e8540c 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -365,7 +365,7 @@ void delete_partition(struct gendisk *disk, int part)
365 kobject_put(&p->kobj); 365 kobject_put(&p->kobj);
366} 366}
367 367
368void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) 368void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
369{ 369{
370 struct hd_struct *p; 370 struct hd_struct *p;
371 371
@@ -390,6 +390,15 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
390 if (!disk->part_uevent_suppress) 390 if (!disk->part_uevent_suppress)
391 kobject_uevent(&p->kobj, KOBJ_ADD); 391 kobject_uevent(&p->kobj, KOBJ_ADD);
392 sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem"); 392 sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem");
393 if (flags & ADDPART_FLAG_WHOLEDISK) {
394 static struct attribute addpartattr = {
395 .name = "whole_disk",
396 .mode = S_IRUSR | S_IRGRP | S_IROTH,
397 .owner = THIS_MODULE,
398 };
399
400 sysfs_create_file(&p->kobj, &addpartattr);
401 }
393 partition_sysfs_add_subdir(p); 402 partition_sysfs_add_subdir(p);
394 disk->part[part-1] = p; 403 disk->part[part-1] = p;
395} 404}
@@ -543,9 +552,9 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
543 printk(" %s: p%d exceeds device capacity\n", 552 printk(" %s: p%d exceeds device capacity\n",
544 disk->disk_name, p); 553 disk->disk_name, p);
545 } 554 }
546 add_partition(disk, p, from, size); 555 add_partition(disk, p, from, size, state->parts[p].flags);
547#ifdef CONFIG_BLK_DEV_MD 556#ifdef CONFIG_BLK_DEV_MD
548 if (state->parts[p].flags) 557 if (state->parts[p].flags & ADDPART_FLAG_RAID)
549 md_autodetect_dev(bdev->bd_dev+p); 558 md_autodetect_dev(bdev->bd_dev+p);
550#endif 559#endif
551 } 560 }
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 8c7af1777819..4ccec4cd1367 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -63,15 +63,25 @@ msdos_magic_present(unsigned char *p)
63#define AIX_LABEL_MAGIC4 0xC1 63#define AIX_LABEL_MAGIC4 0xC1
64static int aix_magic_present(unsigned char *p, struct block_device *bdev) 64static int aix_magic_present(unsigned char *p, struct block_device *bdev)
65{ 65{
66 struct partition *pt = (struct partition *) (p + 0x1be);
66 Sector sect; 67 Sector sect;
67 unsigned char *d; 68 unsigned char *d;
68 int ret = 0; 69 int slot, ret = 0;
69 70
70 if (p[0] != AIX_LABEL_MAGIC1 && 71 if (!(p[0] == AIX_LABEL_MAGIC1 &&
71 p[1] != AIX_LABEL_MAGIC2 && 72 p[1] == AIX_LABEL_MAGIC2 &&
72 p[2] != AIX_LABEL_MAGIC3 && 73 p[2] == AIX_LABEL_MAGIC3 &&
73 p[3] != AIX_LABEL_MAGIC4) 74 p[3] == AIX_LABEL_MAGIC4))
74 return 0; 75 return 0;
76 /* Assume the partition table is valid if Linux partitions exists */
77 for (slot = 1; slot <= 4; slot++, pt++) {
78 if (pt->sys_ind == LINUX_SWAP_PARTITION ||
79 pt->sys_ind == LINUX_RAID_PARTITION ||
80 pt->sys_ind == LINUX_DATA_PARTITION ||
81 pt->sys_ind == LINUX_LVM_PARTITION ||
82 is_extended_partition(pt))
83 return 0;
84 }
75 d = read_dev_sector(bdev, 7, &sect); 85 d = read_dev_sector(bdev, 7, &sect);
76 if (d) { 86 if (d) {
77 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') 87 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
@@ -155,7 +165,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
155 165
156 put_partition(state, state->next, next, size); 166 put_partition(state, state->next, next, size);
157 if (SYS_IND(p) == LINUX_RAID_PARTITION) 167 if (SYS_IND(p) == LINUX_RAID_PARTITION)
158 state->parts[state->next].flags = 1; 168 state->parts[state->next].flags = ADDPART_FLAG_RAID;
159 loopct = 0; 169 loopct = 0;
160 if (++state->next == state->limit) 170 if (++state->next == state->limit)
161 goto done; 171 goto done;
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
index 6fa4ff895104..ed5ac83fe83a 100644
--- a/fs/partitions/sgi.c
+++ b/fs/partitions/sgi.c
@@ -72,7 +72,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
72 if (blocks) { 72 if (blocks) {
73 put_partition(state, slot, start, blocks); 73 put_partition(state, slot, start, blocks);
74 if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION) 74 if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION)
75 state->parts[slot].flags = 1; 75 state->parts[slot].flags = ADDPART_FLAG_RAID;
76 } 76 }
77 slot++; 77 slot++;
78 } 78 }
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index 0a5927c806ca..123f8b46c8ba 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -80,8 +80,11 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
80 num_sectors = be32_to_cpu(p->num_sectors); 80 num_sectors = be32_to_cpu(p->num_sectors);
81 if (num_sectors) { 81 if (num_sectors) {
82 put_partition(state, slot, st_sector, num_sectors); 82 put_partition(state, slot, st_sector, num_sectors);
83 state->parts[slot].flags = 0;
83 if (label->infos[i].id == LINUX_RAID_PARTITION) 84 if (label->infos[i].id == LINUX_RAID_PARTITION)
84 state->parts[slot].flags = 1; 85 state->parts[slot].flags |= ADDPART_FLAG_RAID;
86 if (label->infos[i].id == SUN_WHOLE_DISK)
87 state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK;
85 } 88 }
86 slot++; 89 slot++;
87 } 90 }
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab117b1..07c9cdbcdcac 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -351,7 +351,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
351 struct signal_struct *sig = task->signal; 351 struct signal_struct *sig = task->signal;
352 352
353 if (sig->tty) { 353 if (sig->tty) {
354 tty_pgrp = sig->tty->pgrp; 354 tty_pgrp = pid_nr(sig->tty->pgrp);
355 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 355 tty_nr = new_encode_dev(tty_devnum(sig->tty));
356 } 356 }
357 357
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea3b379..4f5745af8c19 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -93,8 +93,8 @@ struct pid_entry {
93 int len; 93 int len;
94 char *name; 94 char *name;
95 mode_t mode; 95 mode_t mode;
96 struct inode_operations *iop; 96 const struct inode_operations *iop;
97 struct file_operations *fop; 97 const struct file_operations *fop;
98 union proc_op op; 98 union proc_op op;
99}; 99};
100 100
@@ -352,7 +352,7 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
352 return error; 352 return error;
353} 353}
354 354
355static struct inode_operations proc_def_inode_operations = { 355static const struct inode_operations proc_def_inode_operations = {
356 .setattr = proc_setattr, 356 .setattr = proc_setattr,
357}; 357};
358 358
@@ -424,7 +424,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
424 return res; 424 return res;
425} 425}
426 426
427static struct file_operations proc_mounts_operations = { 427static const struct file_operations proc_mounts_operations = {
428 .open = mounts_open, 428 .open = mounts_open,
429 .read = seq_read, 429 .read = seq_read,
430 .llseek = seq_lseek, 430 .llseek = seq_lseek,
@@ -462,7 +462,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
462 return ret; 462 return ret;
463} 463}
464 464
465static struct file_operations proc_mountstats_operations = { 465static const struct file_operations proc_mountstats_operations = {
466 .open = mountstats_open, 466 .open = mountstats_open,
467 .read = seq_read, 467 .read = seq_read,
468 .llseek = seq_lseek, 468 .llseek = seq_lseek,
@@ -501,7 +501,7 @@ out_no_task:
501 return length; 501 return length;
502} 502}
503 503
504static struct file_operations proc_info_file_operations = { 504static const struct file_operations proc_info_file_operations = {
505 .read = proc_info_read, 505 .read = proc_info_read,
506}; 506};
507 507
@@ -646,7 +646,7 @@ static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
646 return file->f_pos; 646 return file->f_pos;
647} 647}
648 648
649static struct file_operations proc_mem_operations = { 649static const struct file_operations proc_mem_operations = {
650 .llseek = mem_lseek, 650 .llseek = mem_lseek,
651 .read = mem_read, 651 .read = mem_read,
652 .write = mem_write, 652 .write = mem_write,
@@ -710,7 +710,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
710 return end - buffer; 710 return end - buffer;
711} 711}
712 712
713static struct file_operations proc_oom_adjust_operations = { 713static const struct file_operations proc_oom_adjust_operations = {
714 .read = oom_adjust_read, 714 .read = oom_adjust_read,
715 .write = oom_adjust_write, 715 .write = oom_adjust_write,
716}; 716};
@@ -777,7 +777,7 @@ out_free_page:
777 return length; 777 return length;
778} 778}
779 779
780static struct file_operations proc_loginuid_operations = { 780static const struct file_operations proc_loginuid_operations = {
781 .read = proc_loginuid_read, 781 .read = proc_loginuid_read,
782 .write = proc_loginuid_write, 782 .write = proc_loginuid_write,
783}; 783};
@@ -849,7 +849,7 @@ out_no_task:
849 return result; 849 return result;
850} 850}
851 851
852static struct file_operations proc_seccomp_operations = { 852static const struct file_operations proc_seccomp_operations = {
853 .read = seccomp_read, 853 .read = seccomp_read,
854 .write = seccomp_write, 854 .write = seccomp_write,
855}; 855};
@@ -908,7 +908,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
908 return end - buffer; 908 return end - buffer;
909} 909}
910 910
911static struct file_operations proc_fault_inject_operations = { 911static const struct file_operations proc_fault_inject_operations = {
912 .read = proc_fault_inject_read, 912 .read = proc_fault_inject_read,
913 .write = proc_fault_inject_write, 913 .write = proc_fault_inject_write,
914}; 914};
@@ -980,7 +980,7 @@ out:
980 return error; 980 return error;
981} 981}
982 982
983static struct inode_operations proc_pid_link_inode_operations = { 983static const struct inode_operations proc_pid_link_inode_operations = {
984 .readlink = proc_pid_readlink, 984 .readlink = proc_pid_readlink,
985 .follow_link = proc_pid_follow_link, 985 .follow_link = proc_pid_follow_link,
986 .setattr = proc_setattr, 986 .setattr = proc_setattr,
@@ -1408,7 +1408,7 @@ out_no_task:
1408 return retval; 1408 return retval;
1409} 1409}
1410 1410
1411static struct file_operations proc_fd_operations = { 1411static const struct file_operations proc_fd_operations = {
1412 .read = generic_read_dir, 1412 .read = generic_read_dir,
1413 .readdir = proc_readfd, 1413 .readdir = proc_readfd,
1414}; 1414};
@@ -1416,7 +1416,7 @@ static struct file_operations proc_fd_operations = {
1416/* 1416/*
1417 * proc directories can do almost nothing.. 1417 * proc directories can do almost nothing..
1418 */ 1418 */
1419static struct inode_operations proc_fd_inode_operations = { 1419static const struct inode_operations proc_fd_inode_operations = {
1420 .lookup = proc_lookupfd, 1420 .lookup = proc_lookupfd,
1421 .setattr = proc_setattr, 1421 .setattr = proc_setattr,
1422}; 1422};
@@ -1623,7 +1623,7 @@ out_no_task:
1623 return length; 1623 return length;
1624} 1624}
1625 1625
1626static struct file_operations proc_pid_attr_operations = { 1626static const struct file_operations proc_pid_attr_operations = {
1627 .read = proc_pid_attr_read, 1627 .read = proc_pid_attr_read,
1628 .write = proc_pid_attr_write, 1628 .write = proc_pid_attr_write,
1629}; 1629};
@@ -1644,7 +1644,7 @@ static int proc_attr_dir_readdir(struct file * filp,
1644 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); 1644 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
1645} 1645}
1646 1646
1647static struct file_operations proc_attr_dir_operations = { 1647static const struct file_operations proc_attr_dir_operations = {
1648 .read = generic_read_dir, 1648 .read = generic_read_dir,
1649 .readdir = proc_attr_dir_readdir, 1649 .readdir = proc_attr_dir_readdir,
1650}; 1650};
@@ -1656,7 +1656,7 @@ static struct dentry *proc_attr_dir_lookup(struct inode *dir,
1656 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 1656 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
1657} 1657}
1658 1658
1659static struct inode_operations proc_attr_dir_inode_operations = { 1659static const struct inode_operations proc_attr_dir_inode_operations = {
1660 .lookup = proc_attr_dir_lookup, 1660 .lookup = proc_attr_dir_lookup,
1661 .getattr = pid_getattr, 1661 .getattr = pid_getattr,
1662 .setattr = proc_setattr, 1662 .setattr = proc_setattr,
@@ -1682,7 +1682,7 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1682 return ERR_PTR(vfs_follow_link(nd,tmp)); 1682 return ERR_PTR(vfs_follow_link(nd,tmp));
1683} 1683}
1684 1684
1685static struct inode_operations proc_self_inode_operations = { 1685static const struct inode_operations proc_self_inode_operations = {
1686 .readlink = proc_self_readlink, 1686 .readlink = proc_self_readlink,
1687 .follow_link = proc_self_follow_link, 1687 .follow_link = proc_self_follow_link,
1688}; 1688};
@@ -1810,17 +1810,21 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filld
1810static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 1810static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
1811{ 1811{
1812 return sprintf(buffer, 1812 return sprintf(buffer,
1813#ifdef CONFIG_TASK_XACCT
1813 "rchar: %llu\n" 1814 "rchar: %llu\n"
1814 "wchar: %llu\n" 1815 "wchar: %llu\n"
1815 "syscr: %llu\n" 1816 "syscr: %llu\n"
1816 "syscw: %llu\n" 1817 "syscw: %llu\n"
1818#endif
1817 "read_bytes: %llu\n" 1819 "read_bytes: %llu\n"
1818 "write_bytes: %llu\n" 1820 "write_bytes: %llu\n"
1819 "cancelled_write_bytes: %llu\n", 1821 "cancelled_write_bytes: %llu\n",
1822#ifdef CONFIG_TASK_XACCT
1820 (unsigned long long)task->rchar, 1823 (unsigned long long)task->rchar,
1821 (unsigned long long)task->wchar, 1824 (unsigned long long)task->wchar,
1822 (unsigned long long)task->syscr, 1825 (unsigned long long)task->syscr,
1823 (unsigned long long)task->syscw, 1826 (unsigned long long)task->syscw,
1827#endif
1824 (unsigned long long)task->ioac.read_bytes, 1828 (unsigned long long)task->ioac.read_bytes,
1825 (unsigned long long)task->ioac.write_bytes, 1829 (unsigned long long)task->ioac.write_bytes,
1826 (unsigned long long)task->ioac.cancelled_write_bytes); 1830 (unsigned long long)task->ioac.cancelled_write_bytes);
@@ -1830,8 +1834,8 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
1830/* 1834/*
1831 * Thread groups 1835 * Thread groups
1832 */ 1836 */
1833static struct file_operations proc_task_operations; 1837static const struct file_operations proc_task_operations;
1834static struct inode_operations proc_task_inode_operations; 1838static const struct inode_operations proc_task_inode_operations;
1835 1839
1836static struct pid_entry tgid_base_stuff[] = { 1840static struct pid_entry tgid_base_stuff[] = {
1837 DIR("task", S_IRUGO|S_IXUGO, task), 1841 DIR("task", S_IRUGO|S_IXUGO, task),
@@ -1890,7 +1894,7 @@ static int proc_tgid_base_readdir(struct file * filp,
1890 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1894 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1891} 1895}
1892 1896
1893static struct file_operations proc_tgid_base_operations = { 1897static const struct file_operations proc_tgid_base_operations = {
1894 .read = generic_read_dir, 1898 .read = generic_read_dir,
1895 .readdir = proc_tgid_base_readdir, 1899 .readdir = proc_tgid_base_readdir,
1896}; 1900};
@@ -1900,7 +1904,7 @@ static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *de
1900 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 1904 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
1901} 1905}
1902 1906
1903static struct inode_operations proc_tgid_base_inode_operations = { 1907static const struct inode_operations proc_tgid_base_inode_operations = {
1904 .lookup = proc_tgid_base_lookup, 1908 .lookup = proc_tgid_base_lookup,
1905 .getattr = pid_getattr, 1909 .getattr = pid_getattr,
1906 .setattr = proc_setattr, 1910 .setattr = proc_setattr,
@@ -2173,12 +2177,12 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
2173 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 2177 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
2174} 2178}
2175 2179
2176static struct file_operations proc_tid_base_operations = { 2180static const struct file_operations proc_tid_base_operations = {
2177 .read = generic_read_dir, 2181 .read = generic_read_dir,
2178 .readdir = proc_tid_base_readdir, 2182 .readdir = proc_tid_base_readdir,
2179}; 2183};
2180 2184
2181static struct inode_operations proc_tid_base_inode_operations = { 2185static const struct inode_operations proc_tid_base_inode_operations = {
2182 .lookup = proc_tid_base_lookup, 2186 .lookup = proc_tid_base_lookup,
2183 .getattr = pid_getattr, 2187 .getattr = pid_getattr,
2184 .setattr = proc_setattr, 2188 .setattr = proc_setattr,
@@ -2404,13 +2408,13 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
2404 return 0; 2408 return 0;
2405} 2409}
2406 2410
2407static struct inode_operations proc_task_inode_operations = { 2411static const struct inode_operations proc_task_inode_operations = {
2408 .lookup = proc_task_lookup, 2412 .lookup = proc_task_lookup,
2409 .getattr = proc_task_getattr, 2413 .getattr = proc_task_getattr,
2410 .setattr = proc_setattr, 2414 .setattr = proc_setattr,
2411}; 2415};
2412 2416
2413static struct file_operations proc_task_operations = { 2417static const struct file_operations proc_task_operations = {
2414 .read = generic_read_dir, 2418 .read = generic_read_dir,
2415 .readdir = proc_task_readdir, 2419 .readdir = proc_task_readdir,
2416}; 2420};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 853cb877d5f3..0cdc00d9d97e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -39,7 +39,7 @@ int proc_match(int len, const char *name, struct proc_dir_entry *de)
39 return !memcmp(name, de->name, len); 39 return !memcmp(name, de->name, len);
40} 40}
41 41
42static struct file_operations proc_file_operations = { 42static const struct file_operations proc_file_operations = {
43 .llseek = proc_file_lseek, 43 .llseek = proc_file_lseek,
44 .read = proc_file_read, 44 .read = proc_file_read,
45 .write = proc_file_write, 45 .write = proc_file_write,
@@ -265,7 +265,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
265 return 0; 265 return 0;
266} 266}
267 267
268static struct inode_operations proc_file_inode_operations = { 268static const struct inode_operations proc_file_inode_operations = {
269 .setattr = proc_notify_change, 269 .setattr = proc_notify_change,
270}; 270};
271 271
@@ -357,7 +357,7 @@ static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
357 return NULL; 357 return NULL;
358} 358}
359 359
360static struct inode_operations proc_link_inode_operations = { 360static const struct inode_operations proc_link_inode_operations = {
361 .readlink = generic_readlink, 361 .readlink = generic_readlink,
362 .follow_link = proc_follow_link, 362 .follow_link = proc_follow_link,
363}; 363};
@@ -497,7 +497,7 @@ out: unlock_kernel();
497 * use the in-memory "struct proc_dir_entry" tree to parse 497 * use the in-memory "struct proc_dir_entry" tree to parse
498 * the /proc directory. 498 * the /proc directory.
499 */ 499 */
500static struct file_operations proc_dir_operations = { 500static const struct file_operations proc_dir_operations = {
501 .read = generic_read_dir, 501 .read = generic_read_dir,
502 .readdir = proc_readdir, 502 .readdir = proc_readdir,
503}; 503};
@@ -505,7 +505,7 @@ static struct file_operations proc_dir_operations = {
505/* 505/*
506 * proc directories can do almost nothing.. 506 * proc directories can do almost nothing..
507 */ 507 */
508static struct inode_operations proc_dir_inode_operations = { 508static const struct inode_operations proc_dir_inode_operations = {
509 .lookup = proc_lookup, 509 .lookup = proc_lookup,
510 .getattr = proc_getattr, 510 .getattr = proc_getattr,
511 .setattr = proc_notify_change, 511 .setattr = proc_notify_change,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e26945ba685b..f6722be37dde 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -132,7 +132,7 @@ static int proc_remount(struct super_block *sb, int *flags, char *data)
132 return 0; 132 return 0;
133} 133}
134 134
135static struct super_operations proc_sops = { 135static const struct super_operations proc_sops = {
136 .alloc_inode = proc_alloc_inode, 136 .alloc_inode = proc_alloc_inode,
137 .destroy_inode = proc_destroy_inode, 137 .destroy_inode = proc_destroy_inode,
138 .read_inode = proc_read_inode, 138 .read_inode = proc_read_inode,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 987c773dbb20..277dcd66ebe2 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -38,13 +38,13 @@ extern int proc_tgid_stat(struct task_struct *, char *);
38extern int proc_pid_status(struct task_struct *, char *); 38extern int proc_pid_status(struct task_struct *, char *);
39extern int proc_pid_statm(struct task_struct *, char *); 39extern int proc_pid_statm(struct task_struct *, char *);
40 40
41extern struct file_operations proc_maps_operations; 41extern const struct file_operations proc_maps_operations;
42extern struct file_operations proc_numa_maps_operations; 42extern const struct file_operations proc_numa_maps_operations;
43extern struct file_operations proc_smaps_operations; 43extern const struct file_operations proc_smaps_operations;
44 44
45extern struct file_operations proc_maps_operations; 45extern const struct file_operations proc_maps_operations;
46extern struct file_operations proc_numa_maps_operations; 46extern const struct file_operations proc_numa_maps_operations;
47extern struct file_operations proc_smaps_operations; 47extern const struct file_operations proc_smaps_operations;
48 48
49 49
50void free_proc_entry(struct proc_dir_entry *de); 50void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 5ec67257e5f9..22f789de3909 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -128,7 +128,7 @@ static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
128 return seq_open(file, &proc_nommu_vma_list_seqop); 128 return seq_open(file, &proc_nommu_vma_list_seqop);
129} 129}
130 130
131static struct file_operations proc_nommu_vma_list_operations = { 131static const struct file_operations proc_nommu_vma_list_operations = {
132 .open = proc_nommu_vma_list_open, 132 .open = proc_nommu_vma_list_open,
133 .read = seq_read, 133 .read = seq_read,
134 .llseek = seq_lseek, 134 .llseek = seq_lseek,
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b37ce33f67ea..e2c4c0a5c90d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -121,16 +121,11 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
121{ 121{
122 struct sysinfo i; 122 struct sysinfo i;
123 int len; 123 int len;
124 unsigned long inactive;
125 unsigned long active;
126 unsigned long free;
127 unsigned long committed; 124 unsigned long committed;
128 unsigned long allowed; 125 unsigned long allowed;
129 struct vmalloc_info vmi; 126 struct vmalloc_info vmi;
130 long cached; 127 long cached;
131 128
132 get_zone_counts(&active, &inactive, &free);
133
134/* 129/*
135 * display in kilobytes. 130 * display in kilobytes.
136 */ 131 */
@@ -187,8 +182,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
187 K(i.bufferram), 182 K(i.bufferram),
188 K(cached), 183 K(cached),
189 K(total_swapcache_pages), 184 K(total_swapcache_pages),
190 K(active), 185 K(global_page_state(NR_ACTIVE)),
191 K(inactive), 186 K(global_page_state(NR_INACTIVE)),
192#ifdef CONFIG_HIGHMEM 187#ifdef CONFIG_HIGHMEM
193 K(i.totalhigh), 188 K(i.totalhigh),
194 K(i.freehigh), 189 K(i.freehigh),
@@ -228,7 +223,7 @@ static int fragmentation_open(struct inode *inode, struct file *file)
228 return seq_open(file, &fragmentation_op); 223 return seq_open(file, &fragmentation_op);
229} 224}
230 225
231static struct file_operations fragmentation_file_operations = { 226static const struct file_operations fragmentation_file_operations = {
232 .open = fragmentation_open, 227 .open = fragmentation_open,
233 .read = seq_read, 228 .read = seq_read,
234 .llseek = seq_lseek, 229 .llseek = seq_lseek,
@@ -241,7 +236,7 @@ static int zoneinfo_open(struct inode *inode, struct file *file)
241 return seq_open(file, &zoneinfo_op); 236 return seq_open(file, &zoneinfo_op);
242} 237}
243 238
244static struct file_operations proc_zoneinfo_file_operations = { 239static const struct file_operations proc_zoneinfo_file_operations = {
245 .open = zoneinfo_open, 240 .open = zoneinfo_open,
246 .read = seq_read, 241 .read = seq_read,
247 .llseek = seq_lseek, 242 .llseek = seq_lseek,
@@ -266,7 +261,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
266 return seq_open(file, &cpuinfo_op); 261 return seq_open(file, &cpuinfo_op);
267} 262}
268 263
269static struct file_operations proc_cpuinfo_operations = { 264static const struct file_operations proc_cpuinfo_operations = {
270 .open = cpuinfo_open, 265 .open = cpuinfo_open,
271 .read = seq_read, 266 .read = seq_read,
272 .llseek = seq_lseek, 267 .llseek = seq_lseek,
@@ -325,7 +320,7 @@ static int devinfo_open(struct inode *inode, struct file *filp)
325 return seq_open(filp, &devinfo_ops); 320 return seq_open(filp, &devinfo_ops);
326} 321}
327 322
328static struct file_operations proc_devinfo_operations = { 323static const struct file_operations proc_devinfo_operations = {
329 .open = devinfo_open, 324 .open = devinfo_open,
330 .read = seq_read, 325 .read = seq_read,
331 .llseek = seq_lseek, 326 .llseek = seq_lseek,
@@ -337,7 +332,7 @@ static int vmstat_open(struct inode *inode, struct file *file)
337{ 332{
338 return seq_open(file, &vmstat_op); 333 return seq_open(file, &vmstat_op);
339} 334}
340static struct file_operations proc_vmstat_file_operations = { 335static const struct file_operations proc_vmstat_file_operations = {
341 .open = vmstat_open, 336 .open = vmstat_open,
342 .read = seq_read, 337 .read = seq_read,
343 .llseek = seq_lseek, 338 .llseek = seq_lseek,
@@ -368,7 +363,7 @@ static int partitions_open(struct inode *inode, struct file *file)
368{ 363{
369 return seq_open(file, &partitions_op); 364 return seq_open(file, &partitions_op);
370} 365}
371static struct file_operations proc_partitions_operations = { 366static const struct file_operations proc_partitions_operations = {
372 .open = partitions_open, 367 .open = partitions_open,
373 .read = seq_read, 368 .read = seq_read,
374 .llseek = seq_lseek, 369 .llseek = seq_lseek,
@@ -380,7 +375,7 @@ static int diskstats_open(struct inode *inode, struct file *file)
380{ 375{
381 return seq_open(file, &diskstats_op); 376 return seq_open(file, &diskstats_op);
382} 377}
383static struct file_operations proc_diskstats_operations = { 378static const struct file_operations proc_diskstats_operations = {
384 .open = diskstats_open, 379 .open = diskstats_open,
385 .read = seq_read, 380 .read = seq_read,
386 .llseek = seq_lseek, 381 .llseek = seq_lseek,
@@ -394,7 +389,7 @@ static int modules_open(struct inode *inode, struct file *file)
394{ 389{
395 return seq_open(file, &modules_op); 390 return seq_open(file, &modules_op);
396} 391}
397static struct file_operations proc_modules_operations = { 392static const struct file_operations proc_modules_operations = {
398 .open = modules_open, 393 .open = modules_open,
399 .read = seq_read, 394 .read = seq_read,
400 .llseek = seq_lseek, 395 .llseek = seq_lseek,
@@ -409,7 +404,7 @@ static int slabinfo_open(struct inode *inode, struct file *file)
409{ 404{
410 return seq_open(file, &slabinfo_op); 405 return seq_open(file, &slabinfo_op);
411} 406}
412static struct file_operations proc_slabinfo_operations = { 407static const struct file_operations proc_slabinfo_operations = {
413 .open = slabinfo_open, 408 .open = slabinfo_open,
414 .read = seq_read, 409 .read = seq_read,
415 .write = slabinfo_write, 410 .write = slabinfo_write,
@@ -443,7 +438,7 @@ static int slabstats_release(struct inode *inode, struct file *file)
443 return seq_release(inode, file); 438 return seq_release(inode, file);
444} 439}
445 440
446static struct file_operations proc_slabstats_operations = { 441static const struct file_operations proc_slabstats_operations = {
447 .open = slabstats_open, 442 .open = slabstats_open,
448 .read = seq_read, 443 .read = seq_read,
449 .llseek = seq_lseek, 444 .llseek = seq_lseek,
@@ -556,7 +551,7 @@ static int stat_open(struct inode *inode, struct file *file)
556 kfree(buf); 551 kfree(buf);
557 return res; 552 return res;
558} 553}
559static struct file_operations proc_stat_operations = { 554static const struct file_operations proc_stat_operations = {
560 .open = stat_open, 555 .open = stat_open,
561 .read = seq_read, 556 .read = seq_read,
562 .llseek = seq_lseek, 557 .llseek = seq_lseek,
@@ -598,7 +593,7 @@ static int interrupts_open(struct inode *inode, struct file *filp)
598 return seq_open(filp, &int_seq_ops); 593 return seq_open(filp, &int_seq_ops);
599} 594}
600 595
601static struct file_operations proc_interrupts_operations = { 596static const struct file_operations proc_interrupts_operations = {
602 .open = interrupts_open, 597 .open = interrupts_open,
603 .read = seq_read, 598 .read = seq_read,
604 .llseek = seq_lseek, 599 .llseek = seq_lseek,
@@ -655,7 +650,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
655 return count; 650 return count;
656} 651}
657 652
658static struct file_operations proc_sysrq_trigger_operations = { 653static const struct file_operations proc_sysrq_trigger_operations = {
659 .write = write_sysrq_trigger, 654 .write = write_sysrq_trigger,
660}; 655};
661#endif 656#endif
@@ -672,7 +667,6 @@ void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
672 667
673void __init proc_misc_init(void) 668void __init proc_misc_init(void)
674{ 669{
675 struct proc_dir_entry *entry;
676 static struct { 670 static struct {
677 char *name; 671 char *name;
678 int (*read_proc)(char*,char**,off_t,int,int*,void*); 672 int (*read_proc)(char*,char**,off_t,int,int*,void*);
@@ -700,9 +694,12 @@ void __init proc_misc_init(void)
700 694
701 /* And now for trickier ones */ 695 /* And now for trickier ones */
702#ifdef CONFIG_PRINTK 696#ifdef CONFIG_PRINTK
703 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); 697 {
704 if (entry) 698 struct proc_dir_entry *entry;
705 entry->proc_fops = &proc_kmsg_operations; 699 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
700 if (entry)
701 entry->proc_fops = &proc_kmsg_operations;
702 }
706#endif 703#endif
707 create_seq_entry("devices", 0, &proc_devinfo_operations); 704 create_seq_entry("devices", 0, &proc_devinfo_operations);
708 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 705 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
@@ -743,8 +740,11 @@ void __init proc_misc_init(void)
743 proc_vmcore->proc_fops = &proc_vmcore_operations; 740 proc_vmcore->proc_fops = &proc_vmcore_operations;
744#endif 741#endif
745#ifdef CONFIG_MAGIC_SYSRQ 742#ifdef CONFIG_MAGIC_SYSRQ
746 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); 743 {
747 if (entry) 744 struct proc_dir_entry *entry;
748 entry->proc_fops = &proc_sysrq_trigger_operations; 745 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
746 if (entry)
747 entry->proc_fops = &proc_sysrq_trigger_operations;
748 }
749#endif 749#endif
750} 750}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15c4455b09eb..c1bbfbeb035e 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -138,7 +138,7 @@ static int tty_drivers_open(struct inode *inode, struct file *file)
138 return seq_open(file, &tty_drivers_op); 138 return seq_open(file, &tty_drivers_op);
139} 139}
140 140
141static struct file_operations proc_tty_drivers_operations = { 141static const struct file_operations proc_tty_drivers_operations = {
142 .open = tty_drivers_open, 142 .open = tty_drivers_open,
143 .read = seq_read, 143 .read = seq_read,
144 .llseek = seq_lseek, 144 .llseek = seq_lseek,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 64d242b6dcfa..af154458b540 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -136,7 +136,7 @@ static int proc_root_readdir(struct file * filp,
136 * <pid> directories. Thus we don't use the generic 136 * <pid> directories. Thus we don't use the generic
137 * directory handling functions for that.. 137 * directory handling functions for that..
138 */ 138 */
139static struct file_operations proc_root_operations = { 139static const struct file_operations proc_root_operations = {
140 .read = generic_read_dir, 140 .read = generic_read_dir,
141 .readdir = proc_root_readdir, 141 .readdir = proc_root_readdir,
142}; 142};
@@ -144,7 +144,7 @@ static struct file_operations proc_root_operations = {
144/* 144/*
145 * proc root can do almost nothing.. 145 * proc root can do almost nothing..
146 */ 146 */
147static struct inode_operations proc_root_inode_operations = { 147static const struct inode_operations proc_root_inode_operations = {
148 .lookup = proc_root_lookup, 148 .lookup = proc_root_lookup,
149 .getattr = proc_root_getattr, 149 .getattr = proc_root_getattr,
150}; 150};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 55ade0d15621..7445980c8022 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -434,7 +434,7 @@ static int maps_open(struct inode *inode, struct file *file)
434 return do_maps_open(inode, file, &proc_pid_maps_op); 434 return do_maps_open(inode, file, &proc_pid_maps_op);
435} 435}
436 436
437struct file_operations proc_maps_operations = { 437const struct file_operations proc_maps_operations = {
438 .open = maps_open, 438 .open = maps_open,
439 .read = seq_read, 439 .read = seq_read,
440 .llseek = seq_lseek, 440 .llseek = seq_lseek,
@@ -456,7 +456,7 @@ static int numa_maps_open(struct inode *inode, struct file *file)
456 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 456 return do_maps_open(inode, file, &proc_pid_numa_maps_op);
457} 457}
458 458
459struct file_operations proc_numa_maps_operations = { 459const struct file_operations proc_numa_maps_operations = {
460 .open = numa_maps_open, 460 .open = numa_maps_open,
461 .read = seq_read, 461 .read = seq_read,
462 .llseek = seq_lseek, 462 .llseek = seq_lseek,
@@ -469,7 +469,7 @@ static int smaps_open(struct inode *inode, struct file *file)
469 return do_maps_open(inode, file, &proc_pid_smaps_op); 469 return do_maps_open(inode, file, &proc_pid_smaps_op);
470} 470}
471 471
472struct file_operations proc_smaps_operations = { 472const struct file_operations proc_smaps_operations = {
473 .open = smaps_open, 473 .open = smaps_open,
474 .read = seq_read, 474 .read = seq_read,
475 .llseek = seq_lseek, 475 .llseek = seq_lseek,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index fcc5caf93f55..7cddf6b8635a 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -220,7 +220,7 @@ static int maps_open(struct inode *inode, struct file *file)
220 return ret; 220 return ret;
221} 221}
222 222
223struct file_operations proc_maps_operations = { 223const struct file_operations proc_maps_operations = {
224 .open = maps_open, 224 .open = maps_open,
225 .read = seq_read, 225 .read = seq_read,
226 .llseek = seq_lseek, 226 .llseek = seq_lseek,
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index c94db1db7a71..ea9ffefb48ad 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -87,7 +87,7 @@ const struct file_operations qnx4_dir_operations =
87 .fsync = file_fsync, 87 .fsync = file_fsync,
88}; 88};
89 89
90struct inode_operations qnx4_dir_inode_operations = 90const struct inode_operations qnx4_dir_inode_operations =
91{ 91{
92 .lookup = qnx4_lookup, 92 .lookup = qnx4_lookup,
93#ifdef CONFIG_QNX4FS_RW 93#ifdef CONFIG_QNX4FS_RW
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 467e5ac7280e..44649981bbc8 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -33,7 +33,7 @@ const struct file_operations qnx4_file_operations =
33#endif 33#endif
34}; 34};
35 35
36struct inode_operations qnx4_file_inode_operations = 36const struct inode_operations qnx4_file_inode_operations =
37{ 37{
38#ifdef CONFIG_QNX4FS_RW 38#ifdef CONFIG_QNX4FS_RW
39 .truncate = qnx4_truncate, 39 .truncate = qnx4_truncate,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index c047dc654d5c..83bc8e7824cd 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -30,7 +30,7 @@
30#define QNX4_VERSION 4 30#define QNX4_VERSION 4
31#define QNX4_BMNAME ".bitmap" 31#define QNX4_BMNAME ".bitmap"
32 32
33static struct super_operations qnx4_sops; 33static const struct super_operations qnx4_sops;
34 34
35#ifdef CONFIG_QNX4FS_RW 35#ifdef CONFIG_QNX4FS_RW
36 36
@@ -129,7 +129,7 @@ static void qnx4_read_inode(struct inode *);
129static int qnx4_remount(struct super_block *sb, int *flags, char *data); 129static int qnx4_remount(struct super_block *sb, int *flags, char *data);
130static int qnx4_statfs(struct dentry *, struct kstatfs *); 130static int qnx4_statfs(struct dentry *, struct kstatfs *);
131 131
132static struct super_operations qnx4_sops = 132static const struct super_operations qnx4_sops =
133{ 133{
134 .alloc_inode = qnx4_alloc_inode, 134 .alloc_inode = qnx4_alloc_inode,
135 .destroy_inode = qnx4_destroy_inode, 135 .destroy_inode = qnx4_destroy_inode,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 54ebbc84207f..2f14774a124f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,7 +31,7 @@ const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 31 .readpage = simple_readpage,
32 .prepare_write = simple_prepare_write, 32 .prepare_write = simple_prepare_write,
33 .commit_write = simple_commit_write, 33 .commit_write = simple_commit_write,
34 .set_page_dirty = __set_page_dirty_nobuffers, 34 .set_page_dirty = __set_page_dirty_no_writeback,
35}; 35};
36 36
37const struct file_operations ramfs_file_operations = { 37const struct file_operations ramfs_file_operations = {
@@ -45,6 +45,6 @@ const struct file_operations ramfs_file_operations = {
45 .llseek = generic_file_llseek, 45 .llseek = generic_file_llseek,
46}; 46};
47 47
48struct inode_operations ramfs_file_inode_operations = { 48const struct inode_operations ramfs_file_inode_operations = {
49 .getattr = simple_getattr, 49 .getattr = simple_getattr,
50}; 50};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index e9d6c4733282..d3fd7c6732d2 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -32,7 +32,7 @@ const struct address_space_operations ramfs_aops = {
32 .readpage = simple_readpage, 32 .readpage = simple_readpage,
33 .prepare_write = simple_prepare_write, 33 .prepare_write = simple_prepare_write,
34 .commit_write = simple_commit_write, 34 .commit_write = simple_commit_write,
35 .set_page_dirty = __set_page_dirty_nobuffers, 35 .set_page_dirty = __set_page_dirty_no_writeback,
36}; 36};
37 37
38const struct file_operations ramfs_file_operations = { 38const struct file_operations ramfs_file_operations = {
@@ -47,7 +47,7 @@ const struct file_operations ramfs_file_operations = {
47 .llseek = generic_file_llseek, 47 .llseek = generic_file_llseek,
48}; 48};
49 49
50struct inode_operations ramfs_file_inode_operations = { 50const struct inode_operations ramfs_file_inode_operations = {
51 .setattr = ramfs_nommu_setattr, 51 .setattr = ramfs_nommu_setattr,
52 .getattr = simple_getattr, 52 .getattr = simple_getattr,
53}; 53};
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 2faf4cdf61b0..ff1f7639707b 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -40,8 +40,8 @@
40/* some random number */ 40/* some random number */
41#define RAMFS_MAGIC 0x858458f6 41#define RAMFS_MAGIC 0x858458f6
42 42
43static struct super_operations ramfs_ops; 43static const struct super_operations ramfs_ops;
44static struct inode_operations ramfs_dir_inode_operations; 44static const struct inode_operations ramfs_dir_inode_operations;
45 45
46static struct backing_dev_info ramfs_backing_dev_info = { 46static struct backing_dev_info ramfs_backing_dev_info = {
47 .ra_pages = 0, /* No readahead */ 47 .ra_pages = 0, /* No readahead */
@@ -143,7 +143,7 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
143 return error; 143 return error;
144} 144}
145 145
146static struct inode_operations ramfs_dir_inode_operations = { 146static const struct inode_operations ramfs_dir_inode_operations = {
147 .create = ramfs_create, 147 .create = ramfs_create,
148 .lookup = simple_lookup, 148 .lookup = simple_lookup,
149 .link = simple_link, 149 .link = simple_link,
@@ -155,7 +155,7 @@ static struct inode_operations ramfs_dir_inode_operations = {
155 .rename = simple_rename, 155 .rename = simple_rename,
156}; 156};
157 157
158static struct super_operations ramfs_ops = { 158static const struct super_operations ramfs_ops = {
159 .statfs = simple_statfs, 159 .statfs = simple_statfs,
160 .drop_inode = generic_delete_inode, 160 .drop_inode = generic_delete_inode,
161}; 161};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index c2bb58e74653..af7cc074a476 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -12,4 +12,4 @@
12 12
13extern const struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations; 14extern const struct file_operations ramfs_file_operations;
15extern struct inode_operations ramfs_file_inode_operations; 15extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 707ac21700d3..1f8dc373ede7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -197,13 +197,13 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
197 struct inode *inode; 197 struct inode *inode;
198 loff_t pos; 198 loff_t pos;
199 199
200 inode = file->f_path.dentry->d_inode;
200 if (unlikely((ssize_t) count < 0)) 201 if (unlikely((ssize_t) count < 0))
201 goto Einval; 202 goto Einval;
202 pos = *ppos; 203 pos = *ppos;
203 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 204 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
204 goto Einval; 205 goto Einval;
205 206
206 inode = file->f_path.dentry->d_inode;
207 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { 207 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
208 int retval = locks_mandatory_area( 208 int retval = locks_mandatory_area(
209 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 209 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
@@ -274,9 +274,9 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
274 ret = do_sync_read(file, buf, count, pos); 274 ret = do_sync_read(file, buf, count, pos);
275 if (ret > 0) { 275 if (ret > 0) {
276 fsnotify_access(file->f_path.dentry); 276 fsnotify_access(file->f_path.dentry);
277 current->rchar += ret; 277 add_rchar(current, ret);
278 } 278 }
279 current->syscr++; 279 inc_syscr(current);
280 } 280 }
281 } 281 }
282 282
@@ -332,9 +332,9 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
332 ret = do_sync_write(file, buf, count, pos); 332 ret = do_sync_write(file, buf, count, pos);
333 if (ret > 0) { 333 if (ret > 0) {
334 fsnotify_modify(file->f_path.dentry); 334 fsnotify_modify(file->f_path.dentry);
335 current->wchar += ret; 335 add_wchar(current, ret);
336 } 336 }
337 current->syscw++; 337 inc_syscw(current);
338 } 338 }
339 } 339 }
340 340
@@ -675,8 +675,8 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
675 } 675 }
676 676
677 if (ret > 0) 677 if (ret > 0)
678 current->rchar += ret; 678 add_rchar(current, ret);
679 current->syscr++; 679 inc_syscr(current);
680 return ret; 680 return ret;
681} 681}
682 682
@@ -696,8 +696,8 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
696 } 696 }
697 697
698 if (ret > 0) 698 if (ret > 0)
699 current->wchar += ret; 699 add_wchar(current, ret);
700 current->syscw++; 700 inc_syscw(current);
701 return ret; 701 return ret;
702} 702}
703 703
@@ -779,12 +779,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
779 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 779 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
780 780
781 if (retval > 0) { 781 if (retval > 0) {
782 current->rchar += retval; 782 add_rchar(current, retval);
783 current->wchar += retval; 783 add_wchar(current, retval);
784 } 784 }
785 current->syscr++;
786 current->syscw++;
787 785
786 inc_syscr(current);
787 inc_syscw(current);
788 if (*ppos > max) 788 if (*ppos > max)
789 retval = -EOVERFLOW; 789 retval = -EOVERFLOW;
790 790
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index fba304e64de8..f85c5cf4934c 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -19,6 +19,7 @@
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/reiserfs_fs.h> 20#include <linux/reiserfs_fs.h>
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h>
22 23
23#ifdef CONFIG_REISERFS_CHECK 24#ifdef CONFIG_REISERFS_CHECK
24 25
@@ -1756,7 +1757,7 @@ static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1756 if (buffer_dirty(bh)) 1757 if (buffer_dirty(bh))
1757 reiserfs_warning(tb->tb_sb, 1758 reiserfs_warning(tb->tb_sb,
1758 "store_thrown deals with dirty buffer"); 1759 "store_thrown deals with dirty buffer");
1759 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) 1760 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1760 if (!tb->thrown[i]) { 1761 if (!tb->thrown[i]) {
1761 tb->thrown[i] = bh; 1762 tb->thrown[i] = bh;
1762 get_bh(bh); /* free_thrown puts this */ 1763 get_bh(bh); /* free_thrown puts this */
@@ -1769,7 +1770,7 @@ static void free_thrown(struct tree_balance *tb)
1769{ 1770{
1770 int i; 1771 int i;
1771 b_blocknr_t blocknr; 1772 b_blocknr_t blocknr;
1772 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) { 1773 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1773 if (tb->thrown[i]) { 1774 if (tb->thrown[i]) {
1774 blocknr = tb->thrown[i]->b_blocknr; 1775 blocknr = tb->thrown[i]->b_blocknr;
1775 if (buffer_dirty(tb->thrown[i])) 1776 if (buffer_dirty(tb->thrown[i]))
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 5109f1d5e7ff..abfada2f52db 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1556,7 +1556,7 @@ const struct file_operations reiserfs_file_operations = {
1556 .splice_write = generic_file_splice_write, 1556 .splice_write = generic_file_splice_write,
1557}; 1557};
1558 1558
1559struct inode_operations reiserfs_file_inode_operations = { 1559const struct inode_operations reiserfs_file_inode_operations = {
1560 .truncate = reiserfs_vfs_truncate_file, 1560 .truncate = reiserfs_vfs_truncate_file,
1561 .setattr = reiserfs_setattr, 1561 .setattr = reiserfs_setattr,
1562 .setxattr = reiserfs_setxattr, 1562 .setxattr = reiserfs_setxattr,
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 23f5cd5bbf56..a2161840bc7c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1525,7 +1525,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1525/* 1525/*
1526 * directories can handle most operations... 1526 * directories can handle most operations...
1527 */ 1527 */
1528struct inode_operations reiserfs_dir_inode_operations = { 1528const struct inode_operations reiserfs_dir_inode_operations = {
1529 //&reiserfs_dir_operations, /* default_file_ops */ 1529 //&reiserfs_dir_operations, /* default_file_ops */
1530 .create = reiserfs_create, 1530 .create = reiserfs_create,
1531 .lookup = reiserfs_lookup, 1531 .lookup = reiserfs_lookup,
@@ -1548,7 +1548,7 @@ struct inode_operations reiserfs_dir_inode_operations = {
1548 * symlink operations.. same as page_symlink_inode_operations, with xattr 1548 * symlink operations.. same as page_symlink_inode_operations, with xattr
1549 * stuff added 1549 * stuff added
1550 */ 1550 */
1551struct inode_operations reiserfs_symlink_inode_operations = { 1551const struct inode_operations reiserfs_symlink_inode_operations = {
1552 .readlink = generic_readlink, 1552 .readlink = generic_readlink,
1553 .follow_link = page_follow_link_light, 1553 .follow_link = page_follow_link_light,
1554 .put_link = page_put_link, 1554 .put_link = page_put_link,
@@ -1564,7 +1564,7 @@ struct inode_operations reiserfs_symlink_inode_operations = {
1564/* 1564/*
1565 * special file operations.. just xattr/acl stuff 1565 * special file operations.. just xattr/acl stuff
1566 */ 1566 */
1567struct inode_operations reiserfs_special_inode_operations = { 1567const struct inode_operations reiserfs_special_inode_operations = {
1568 .setattr = reiserfs_setattr, 1568 .setattr = reiserfs_setattr,
1569 .setxattr = reiserfs_setxattr, 1569 .setxattr = reiserfs_setxattr,
1570 .getxattr = reiserfs_getxattr, 1570 .getxattr = reiserfs_getxattr,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 58ad4551a7c1..f13a7f164dc6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -593,7 +593,7 @@ static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
593 loff_t); 593 loff_t);
594#endif 594#endif
595 595
596static struct super_operations reiserfs_sops = { 596static const struct super_operations reiserfs_sops = {
597 .alloc_inode = reiserfs_alloc_inode, 597 .alloc_inode = reiserfs_alloc_inode,
598 .destroy_inode = reiserfs_destroy_inode, 598 .destroy_inode = reiserfs_destroy_inode,
599 .write_inode = reiserfs_write_inode, 599 .write_inode = reiserfs_write_inode,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index d3e243a6f609..fd601014813e 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -110,7 +110,7 @@ romfs_checksum(void *data, int size)
110 return sum; 110 return sum;
111} 111}
112 112
113static struct super_operations romfs_ops; 113static const struct super_operations romfs_ops;
114 114
115static int romfs_fill_super(struct super_block *s, void *data, int silent) 115static int romfs_fill_super(struct super_block *s, void *data, int silent)
116{ 116{
@@ -468,7 +468,7 @@ static const struct file_operations romfs_dir_operations = {
468 .readdir = romfs_readdir, 468 .readdir = romfs_readdir,
469}; 469};
470 470
471static struct inode_operations romfs_dir_inode_operations = { 471static const struct inode_operations romfs_dir_inode_operations = {
472 .lookup = romfs_lookup, 472 .lookup = romfs_lookup,
473}; 473};
474 474
@@ -598,7 +598,7 @@ static int romfs_remount(struct super_block *sb, int *flags, char *data)
598 return 0; 598 return 0;
599} 599}
600 600
601static struct super_operations romfs_ops = { 601static const struct super_operations romfs_ops = {
602 .alloc_inode = romfs_alloc_inode, 602 .alloc_inode = romfs_alloc_inode,
603 .destroy_inode = romfs_destroy_inode, 603 .destroy_inode = romfs_destroy_inode,
604 .read_inode = romfs_read_inode, 604 .read_inode = romfs_read_inode,
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index b1e58d1ac9ca..50136b1a3eca 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -42,7 +42,7 @@ const struct file_operations smb_dir_operations =
42 .open = smb_dir_open, 42 .open = smb_dir_open,
43}; 43};
44 44
45struct inode_operations smb_dir_inode_operations = 45const struct inode_operations smb_dir_inode_operations =
46{ 46{
47 .create = smb_create, 47 .create = smb_create,
48 .lookup = smb_lookup, 48 .lookup = smb_lookup,
@@ -54,7 +54,7 @@ struct inode_operations smb_dir_inode_operations =
54 .setattr = smb_notify_change, 54 .setattr = smb_notify_change,
55}; 55};
56 56
57struct inode_operations smb_dir_inode_operations_unix = 57const struct inode_operations smb_dir_inode_operations_unix =
58{ 58{
59 .create = smb_create, 59 .create = smb_create,
60 .lookup = smb_lookup, 60 .lookup = smb_lookup,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e50533a79517..f161797160c4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -418,7 +418,7 @@ const struct file_operations smb_file_operations =
418 .sendfile = smb_file_sendfile, 418 .sendfile = smb_file_sendfile,
419}; 419};
420 420
421struct inode_operations smb_file_inode_operations = 421const struct inode_operations smb_file_inode_operations =
422{ 422{
423 .permission = smb_file_permission, 423 .permission = smb_file_permission,
424 .getattr = smb_getattr, 424 .getattr = smb_getattr,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 84dfe3f3482e..5faba4f1c9ab 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -98,7 +98,7 @@ static int smb_remount(struct super_block *sb, int *flags, char *data)
98 return 0; 98 return 0;
99} 99}
100 100
101static struct super_operations smb_sops = 101static const struct super_operations smb_sops =
102{ 102{
103 .alloc_inode = smb_alloc_inode, 103 .alloc_inode = smb_alloc_inode,
104 .destroy_inode = smb_destroy_inode, 104 .destroy_inode = smb_destroy_inode,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 34fb462b2379..03f456c1b7d4 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -36,8 +36,8 @@ extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, stru
36extern void smb_install_null_ops(struct smb_ops *ops); 36extern void smb_install_null_ops(struct smb_ops *ops);
37/* dir.c */ 37/* dir.c */
38extern const struct file_operations smb_dir_operations; 38extern const struct file_operations smb_dir_operations;
39extern struct inode_operations smb_dir_inode_operations; 39extern const struct inode_operations smb_dir_inode_operations;
40extern struct inode_operations smb_dir_inode_operations_unix; 40extern const struct inode_operations smb_dir_inode_operations_unix;
41extern void smb_new_dentry(struct dentry *dentry); 41extern void smb_new_dentry(struct dentry *dentry);
42extern void smb_renew_times(struct dentry *dentry); 42extern void smb_renew_times(struct dentry *dentry);
43/* cache.c */ 43/* cache.c */
@@ -65,7 +65,7 @@ extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
65/* file.c */ 65/* file.c */
66extern const struct address_space_operations smb_file_aops; 66extern const struct address_space_operations smb_file_aops;
67extern const struct file_operations smb_file_operations; 67extern const struct file_operations smb_file_operations;
68extern struct inode_operations smb_file_inode_operations; 68extern const struct inode_operations smb_file_inode_operations;
69/* ioctl.c */ 69/* ioctl.c */
70extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); 70extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
71/* smbiod.c */ 71/* smbiod.c */
@@ -84,4 +84,4 @@ extern int smb_request_send_server(struct smb_sb_info *server);
84extern int smb_request_recv(struct smb_sb_info *server); 84extern int smb_request_recv(struct smb_sb_info *server);
85/* symlink.c */ 85/* symlink.c */
86extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname); 86extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
87extern struct inode_operations smb_link_inode_operations; 87extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index a4bcae8a9aff..42261dbdf60f 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
61 struct smb_request *req; 61 struct smb_request *req;
62 unsigned char *buf = NULL; 62 unsigned char *buf = NULL;
63 63
64 req = kmem_cache_alloc(req_cachep, GFP_KERNEL); 64 req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
65 VERBOSE("allocating request: %p\n", req); 65 VERBOSE("allocating request: %p\n", req);
66 if (!req) 66 if (!req)
67 goto out; 67 goto out;
@@ -74,7 +74,6 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
74 } 74 }
75 } 75 }
76 76
77 memset(req, 0, sizeof(struct smb_request));
78 req->rq_buffer = buf; 77 req->rq_buffer = buf;
79 req->rq_bufsize = bufsize; 78 req->rq_bufsize = bufsize;
80 req->rq_server = server; 79 req->rq_server = server;
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index cdc53c4fb381..e4bf3456d07e 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -62,7 +62,7 @@ static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
62 __putname(s); 62 __putname(s);
63} 63}
64 64
65struct inode_operations smb_link_inode_operations = 65const struct inode_operations smb_link_inode_operations =
66{ 66{
67 .readlink = generic_readlink, 67 .readlink = generic_readlink,
68 .follow_link = smb_follow_link, 68 .follow_link = smb_follow_link,
diff --git a/fs/super.c b/fs/super.c
index 3e7458c2bb76..60b1e50cbf53 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -285,7 +285,7 @@ int fsync_super(struct super_block *sb)
285 */ 285 */
286void generic_shutdown_super(struct super_block *sb) 286void generic_shutdown_super(struct super_block *sb)
287{ 287{
288 struct super_operations *sop = sb->s_op; 288 const struct super_operations *sop = sb->s_op;
289 289
290 if (sb->s_root) { 290 if (sb->s_root) {
291 shrink_dcache_for_umount(sb); 291 shrink_dcache_for_umount(sb);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index e8f540d38d48..d3b9f5f07db1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17 17
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <asm/semaphore.h>
19 20
20#include "sysfs.h" 21#include "sysfs.h"
21 22
@@ -146,7 +147,7 @@ static int open(struct inode * inode, struct file * file)
146 Error: 147 Error:
147 module_put(attr->attr.owner); 148 module_put(attr->attr.owner);
148 Done: 149 Done:
149 if (error && kobj) 150 if (error)
150 kobject_put(kobj); 151 kobject_put(kobj);
151 return error; 152 return error;
152} 153}
@@ -157,8 +158,7 @@ static int release(struct inode * inode, struct file * file)
157 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); 158 struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
158 u8 * buffer = file->private_data; 159 u8 * buffer = file->private_data;
159 160
160 if (kobj) 161 kobject_put(kobj);
161 kobject_put(kobj);
162 module_put(attr->attr.owner); 162 module_put(attr->attr.owner);
163 kfree(buffer); 163 kfree(buffer);
164 return 0; 164 return 0;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 511edef8b321..8813990304fe 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h> 11#include <linux/namei.h>
12#include <asm/semaphore.h>
12#include "sysfs.h" 13#include "sysfs.h"
13 14
14DECLARE_RWSEM(sysfs_rename_sem); 15DECLARE_RWSEM(sysfs_rename_sem);
@@ -32,25 +33,39 @@ static struct dentry_operations sysfs_dentry_ops = {
32/* 33/*
33 * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent 34 * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
34 */ 35 */
35static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd, 36static struct sysfs_dirent * __sysfs_new_dirent(void * element)
36 void * element)
37{ 37{
38 struct sysfs_dirent * sd; 38 struct sysfs_dirent * sd;
39 39
40 sd = kmem_cache_alloc(sysfs_dir_cachep, GFP_KERNEL); 40 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
41 if (!sd) 41 if (!sd)
42 return NULL; 42 return NULL;
43 43
44 memset(sd, 0, sizeof(*sd));
45 atomic_set(&sd->s_count, 1); 44 atomic_set(&sd->s_count, 1);
46 atomic_set(&sd->s_event, 1); 45 atomic_set(&sd->s_event, 1);
47 INIT_LIST_HEAD(&sd->s_children); 46 INIT_LIST_HEAD(&sd->s_children);
48 list_add(&sd->s_sibling, &parent_sd->s_children); 47 INIT_LIST_HEAD(&sd->s_sibling);
49 sd->s_element = element; 48 sd->s_element = element;
50 49
51 return sd; 50 return sd;
52} 51}
53 52
53static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
54 struct sysfs_dirent *sd)
55{
56 if (sd)
57 list_add(&sd->s_sibling, &parent_sd->s_children);
58}
59
60static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
61 void * element)
62{
63 struct sysfs_dirent *sd;
64 sd = __sysfs_new_dirent(element);
65 __sysfs_list_dirent(parent_sd, sd);
66 return sd;
67}
68
54/* 69/*
55 * 70 *
56 * Return -EEXIST if there is already a sysfs element with the same name for 71 * Return -EEXIST if there is already a sysfs element with the same name for
@@ -77,14 +92,14 @@ int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
77} 92}
78 93
79 94
80int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, 95static struct sysfs_dirent *
81 void * element, umode_t mode, int type) 96__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
82{ 97{
83 struct sysfs_dirent * sd; 98 struct sysfs_dirent * sd;
84 99
85 sd = sysfs_new_dirent(parent_sd, element); 100 sd = __sysfs_new_dirent(element);
86 if (!sd) 101 if (!sd)
87 return -ENOMEM; 102 goto out;
88 103
89 sd->s_mode = mode; 104 sd->s_mode = mode;
90 sd->s_type = type; 105 sd->s_type = type;
@@ -94,7 +109,19 @@ int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
94 dentry->d_op = &sysfs_dentry_ops; 109 dentry->d_op = &sysfs_dentry_ops;
95 } 110 }
96 111
97 return 0; 112out:
113 return sd;
114}
115
116int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
117 void * element, umode_t mode, int type)
118{
119 struct sysfs_dirent *sd;
120
121 sd = __sysfs_make_dirent(dentry, element, mode, type);
122 __sysfs_list_dirent(parent_sd, sd);
123
124 return sd ? 0 : -ENOMEM;
98} 125}
99 126
100static int init_dir(struct inode * inode) 127static int init_dir(struct inode * inode)
@@ -165,11 +192,11 @@ int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
165 192
166/** 193/**
167 * sysfs_create_dir - create a directory for an object. 194 * sysfs_create_dir - create a directory for an object.
168 * @parent: parent parent object.
169 * @kobj: object we're creating directory for. 195 * @kobj: object we're creating directory for.
196 * @shadow_parent: parent parent object.
170 */ 197 */
171 198
172int sysfs_create_dir(struct kobject * kobj) 199int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
173{ 200{
174 struct dentry * dentry = NULL; 201 struct dentry * dentry = NULL;
175 struct dentry * parent; 202 struct dentry * parent;
@@ -177,7 +204,9 @@ int sysfs_create_dir(struct kobject * kobj)
177 204
178 BUG_ON(!kobj); 205 BUG_ON(!kobj);
179 206
180 if (kobj->parent) 207 if (shadow_parent)
208 parent = shadow_parent;
209 else if (kobj->parent)
181 parent = kobj->parent->dentry; 210 parent = kobj->parent->dentry;
182 else if (sysfs_mount && sysfs_mount->mnt_sb) 211 else if (sysfs_mount && sysfs_mount->mnt_sb)
183 parent = sysfs_mount->mnt_sb->s_root; 212 parent = sysfs_mount->mnt_sb->s_root;
@@ -267,7 +296,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
267 return ERR_PTR(err); 296 return ERR_PTR(err);
268} 297}
269 298
270struct inode_operations sysfs_dir_inode_operations = { 299const struct inode_operations sysfs_dir_inode_operations = {
271 .lookup = sysfs_lookup, 300 .lookup = sysfs_lookup,
272 .setattr = sysfs_setattr, 301 .setattr = sysfs_setattr,
273}; 302};
@@ -298,21 +327,12 @@ void sysfs_remove_subdir(struct dentry * d)
298} 327}
299 328
300 329
301/** 330static void __sysfs_remove_dir(struct dentry *dentry)
302 * sysfs_remove_dir - remove an object's directory.
303 * @kobj: object.
304 *
305 * The only thing special about this is that we remove any files in
306 * the directory before we remove the directory, and we've inlined
307 * what used to be sysfs_rmdir() below, instead of calling separately.
308 */
309
310void sysfs_remove_dir(struct kobject * kobj)
311{ 331{
312 struct dentry * dentry = dget(kobj->dentry);
313 struct sysfs_dirent * parent_sd; 332 struct sysfs_dirent * parent_sd;
314 struct sysfs_dirent * sd, * tmp; 333 struct sysfs_dirent * sd, * tmp;
315 334
335 dget(dentry);
316 if (!dentry) 336 if (!dentry)
317 return; 337 return;
318 338
@@ -333,32 +353,60 @@ void sysfs_remove_dir(struct kobject * kobj)
333 * Drop reference from dget() on entrance. 353 * Drop reference from dget() on entrance.
334 */ 354 */
335 dput(dentry); 355 dput(dentry);
356}
357
358/**
359 * sysfs_remove_dir - remove an object's directory.
360 * @kobj: object.
361 *
362 * The only thing special about this is that we remove any files in
363 * the directory before we remove the directory, and we've inlined
364 * what used to be sysfs_rmdir() below, instead of calling separately.
365 */
366
367void sysfs_remove_dir(struct kobject * kobj)
368{
369 __sysfs_remove_dir(kobj->dentry);
336 kobj->dentry = NULL; 370 kobj->dentry = NULL;
337} 371}
338 372
339int sysfs_rename_dir(struct kobject * kobj, const char *new_name) 373int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
374 const char *new_name)
340{ 375{
341 int error = 0; 376 int error = 0;
342 struct dentry * new_dentry, * parent; 377 struct dentry * new_dentry;
343
344 if (!strcmp(kobject_name(kobj), new_name))
345 return -EINVAL;
346 378
347 if (!kobj->parent) 379 if (!new_parent)
348 return -EINVAL; 380 return -EFAULT;
349 381
350 down_write(&sysfs_rename_sem); 382 down_write(&sysfs_rename_sem);
351 parent = kobj->parent->dentry; 383 mutex_lock(&new_parent->d_inode->i_mutex);
352
353 mutex_lock(&parent->d_inode->i_mutex);
354 384
355 new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); 385 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
356 if (!IS_ERR(new_dentry)) { 386 if (!IS_ERR(new_dentry)) {
357 if (!new_dentry->d_inode) { 387 /* By allowing two different directories with the
388 * same d_parent we allow this routine to move
389 * between different shadows of the same directory
390 */
391 if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
392 return -EINVAL;
393 else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
394 error = -EINVAL;
395 else if (new_dentry == kobj->dentry)
396 error = -EINVAL;
397 else if (!new_dentry->d_inode) {
358 error = kobject_set_name(kobj, "%s", new_name); 398 error = kobject_set_name(kobj, "%s", new_name);
359 if (!error) { 399 if (!error) {
400 struct sysfs_dirent *sd, *parent_sd;
401
360 d_add(new_dentry, NULL); 402 d_add(new_dentry, NULL);
361 d_move(kobj->dentry, new_dentry); 403 d_move(kobj->dentry, new_dentry);
404
405 sd = kobj->dentry->d_fsdata;
406 parent_sd = new_parent->d_fsdata;
407
408 list_del_init(&sd->s_sibling);
409 list_add(&sd->s_sibling, &parent_sd->s_children);
362 } 410 }
363 else 411 else
364 d_drop(new_dentry); 412 d_drop(new_dentry);
@@ -366,7 +414,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
366 error = -EEXIST; 414 error = -EEXIST;
367 dput(new_dentry); 415 dput(new_dentry);
368 } 416 }
369 mutex_unlock(&parent->d_inode->i_mutex); 417 mutex_unlock(&new_parent->d_inode->i_mutex);
370 up_write(&sysfs_rename_sem); 418 up_write(&sysfs_rename_sem);
371 419
372 return error; 420 return error;
@@ -378,12 +426,10 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
378 struct sysfs_dirent *new_parent_sd, *sd; 426 struct sysfs_dirent *new_parent_sd, *sd;
379 int error; 427 int error;
380 428
381 if (!new_parent)
382 return -EINVAL;
383
384 old_parent_dentry = kobj->parent ? 429 old_parent_dentry = kobj->parent ?
385 kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; 430 kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
386 new_parent_dentry = new_parent->dentry; 431 new_parent_dentry = new_parent ?
432 new_parent->dentry : sysfs_mount->mnt_sb->s_root;
387 433
388again: 434again:
389 mutex_lock(&old_parent_dentry->d_inode->i_mutex); 435 mutex_lock(&old_parent_dentry->d_inode->i_mutex);
@@ -547,6 +593,95 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
547 return offset; 593 return offset;
548} 594}
549 595
596
597/**
598 * sysfs_make_shadowed_dir - Setup so a directory can be shadowed
599 * @kobj: object we're creating shadow of.
600 */
601
602int sysfs_make_shadowed_dir(struct kobject *kobj,
603 void * (*follow_link)(struct dentry *, struct nameidata *))
604{
605 struct inode *inode;
606 struct inode_operations *i_op;
607
608 inode = kobj->dentry->d_inode;
609 if (inode->i_op != &sysfs_dir_inode_operations)
610 return -EINVAL;
611
612 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
613 if (!i_op)
614 return -ENOMEM;
615
616 memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
617 i_op->follow_link = follow_link;
618
619 /* Locking of inode->i_op?
620 * Since setting i_op is a single word write and they
621 * are atomic we should be ok here.
622 */
623 inode->i_op = i_op;
624 return 0;
625}
626
627/**
628 * sysfs_create_shadow_dir - create a shadow directory for an object.
629 * @kobj: object we're creating directory for.
630 *
631 * sysfs_make_shadowed_dir must already have been called on this
632 * directory.
633 */
634
635struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
636{
637 struct sysfs_dirent *sd;
638 struct dentry *parent, *dir, *shadow;
639 struct inode *inode;
640
641 dir = kobj->dentry;
642 inode = dir->d_inode;
643 parent = dir->d_parent;
644 shadow = ERR_PTR(-EINVAL);
645 if (!sysfs_is_shadowed_inode(inode))
646 goto out;
647
648 shadow = d_alloc(parent, &dir->d_name);
649 if (!shadow)
650 goto nomem;
651
652 sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
653 if (!sd)
654 goto nomem;
655
656 d_instantiate(shadow, igrab(inode));
657 inc_nlink(inode);
658 inc_nlink(parent->d_inode);
659 shadow->d_op = &sysfs_dentry_ops;
660
661 dget(shadow); /* Extra count - pin the dentry in core */
662
663out:
664 return shadow;
665nomem:
666 dput(shadow);
667 shadow = ERR_PTR(-ENOMEM);
668 goto out;
669}
670
671/**
672 * sysfs_remove_shadow_dir - remove an object's directory.
673 * @shadow: dentry of shadow directory
674 *
675 * The only thing special about this is that we remove any files in
676 * the directory before we remove the directory, and we've inlined
677 * what used to be sysfs_rmdir() below, instead of calling separately.
678 */
679
680void sysfs_remove_shadow_dir(struct dentry *shadow)
681{
682 __sysfs_remove_dir(shadow);
683}
684
550const struct file_operations sysfs_dir_operations = { 685const struct file_operations sysfs_dir_operations = {
551 .open = sysfs_dir_open, 686 .open = sysfs_dir_open,
552 .release = sysfs_dir_close, 687 .release = sysfs_dir_close,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 9cfe53e1e00d..c0e117649a4d 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -7,6 +7,7 @@
7#include <linux/kobject.h> 7#include <linux/kobject.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/poll.h> 9#include <linux/poll.h>
10#include <linux/list.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11#include <asm/semaphore.h> 12#include <asm/semaphore.h>
12 13
@@ -50,17 +51,29 @@ static struct sysfs_ops subsys_sysfs_ops = {
50 .store = subsys_attr_store, 51 .store = subsys_attr_store,
51}; 52};
52 53
54/**
55 * add_to_collection - add buffer to a collection
56 * @buffer: buffer to be added
57 * @node inode of set to add to
58 */
53 59
54struct sysfs_buffer { 60static inline void
55 size_t count; 61add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
56 loff_t pos; 62{
57 char * page; 63 struct sysfs_buffer_collection *set = node->i_private;
58 struct sysfs_ops * ops;
59 struct semaphore sem;
60 int needs_read_fill;
61 int event;
62};
63 64
65 mutex_lock(&node->i_mutex);
66 list_add(&buffer->associates, &set->associates);
67 mutex_unlock(&node->i_mutex);
68}
69
70static inline void
71remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
72{
73 mutex_lock(&node->i_mutex);
74 list_del(&buffer->associates);
75 mutex_unlock(&node->i_mutex);
76}
64 77
65/** 78/**
66 * fill_read_buffer - allocate and fill buffer from object. 79 * fill_read_buffer - allocate and fill buffer from object.
@@ -70,7 +83,8 @@ struct sysfs_buffer {
70 * Allocate @buffer->page, if it hasn't been already, then call the 83 * Allocate @buffer->page, if it hasn't been already, then call the
71 * kobject's show() method to fill the buffer with this attribute's 84 * kobject's show() method to fill the buffer with this attribute's
72 * data. 85 * data.
73 * This is called only once, on the file's first read. 86 * This is called only once, on the file's first read unless an error
87 * is returned.
74 */ 88 */
75static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 89static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
76{ 90{
@@ -88,12 +102,13 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
88 102
89 buffer->event = atomic_read(&sd->s_event); 103 buffer->event = atomic_read(&sd->s_event);
90 count = ops->show(kobj,attr,buffer->page); 104 count = ops->show(kobj,attr,buffer->page);
91 buffer->needs_read_fill = 0;
92 BUG_ON(count > (ssize_t)PAGE_SIZE); 105 BUG_ON(count > (ssize_t)PAGE_SIZE);
93 if (count >= 0) 106 if (count >= 0) {
107 buffer->needs_read_fill = 0;
94 buffer->count = count; 108 buffer->count = count;
95 else 109 } else {
96 ret = count; 110 ret = count;
111 }
97 return ret; 112 return ret;
98} 113}
99 114
@@ -153,6 +168,10 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
153 ssize_t retval = 0; 168 ssize_t retval = 0;
154 169
155 down(&buffer->sem); 170 down(&buffer->sem);
171 if (buffer->orphaned) {
172 retval = -ENODEV;
173 goto out;
174 }
156 if (buffer->needs_read_fill) { 175 if (buffer->needs_read_fill) {
157 if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) 176 if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
158 goto out; 177 goto out;
@@ -165,7 +184,6 @@ out:
165 return retval; 184 return retval;
166} 185}
167 186
168
169/** 187/**
170 * fill_write_buffer - copy buffer from userspace. 188 * fill_write_buffer - copy buffer from userspace.
171 * @buffer: data buffer for file. 189 * @buffer: data buffer for file.
@@ -243,19 +261,25 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
243 ssize_t len; 261 ssize_t len;
244 262
245 down(&buffer->sem); 263 down(&buffer->sem);
264 if (buffer->orphaned) {
265 len = -ENODEV;
266 goto out;
267 }
246 len = fill_write_buffer(buffer, buf, count); 268 len = fill_write_buffer(buffer, buf, count);
247 if (len > 0) 269 if (len > 0)
248 len = flush_write_buffer(file->f_path.dentry, buffer, len); 270 len = flush_write_buffer(file->f_path.dentry, buffer, len);
249 if (len > 0) 271 if (len > 0)
250 *ppos += len; 272 *ppos += len;
273out:
251 up(&buffer->sem); 274 up(&buffer->sem);
252 return len; 275 return len;
253} 276}
254 277
255static int check_perm(struct inode * inode, struct file * file) 278static int sysfs_open_file(struct inode *inode, struct file *file)
256{ 279{
257 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); 280 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
258 struct attribute * attr = to_attr(file->f_path.dentry); 281 struct attribute * attr = to_attr(file->f_path.dentry);
282 struct sysfs_buffer_collection *set;
259 struct sysfs_buffer * buffer; 283 struct sysfs_buffer * buffer;
260 struct sysfs_ops * ops = NULL; 284 struct sysfs_ops * ops = NULL;
261 int error = 0; 285 int error = 0;
@@ -285,6 +309,18 @@ static int check_perm(struct inode * inode, struct file * file)
285 if (!ops) 309 if (!ops)
286 goto Eaccess; 310 goto Eaccess;
287 311
312 /* make sure we have a collection to add our buffers to */
313 mutex_lock(&inode->i_mutex);
314 if (!(set = inode->i_private)) {
315 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
316 error = -ENOMEM;
317 goto Done;
318 } else {
319 INIT_LIST_HEAD(&set->associates);
320 }
321 }
322 mutex_unlock(&inode->i_mutex);
323
288 /* File needs write support. 324 /* File needs write support.
289 * The inode's perms must say it's ok, 325 * The inode's perms must say it's ok,
290 * and we must have a store method. 326 * and we must have a store method.
@@ -310,9 +346,11 @@ static int check_perm(struct inode * inode, struct file * file)
310 */ 346 */
311 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); 347 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
312 if (buffer) { 348 if (buffer) {
349 INIT_LIST_HEAD(&buffer->associates);
313 init_MUTEX(&buffer->sem); 350 init_MUTEX(&buffer->sem);
314 buffer->needs_read_fill = 1; 351 buffer->needs_read_fill = 1;
315 buffer->ops = ops; 352 buffer->ops = ops;
353 add_to_collection(buffer, inode);
316 file->private_data = buffer; 354 file->private_data = buffer;
317 } else 355 } else
318 error = -ENOMEM; 356 error = -ENOMEM;
@@ -325,16 +363,11 @@ static int check_perm(struct inode * inode, struct file * file)
325 error = -EACCES; 363 error = -EACCES;
326 module_put(attr->owner); 364 module_put(attr->owner);
327 Done: 365 Done:
328 if (error && kobj) 366 if (error)
329 kobject_put(kobj); 367 kobject_put(kobj);
330 return error; 368 return error;
331} 369}
332 370
333static int sysfs_open_file(struct inode * inode, struct file * filp)
334{
335 return check_perm(inode,filp);
336}
337
338static int sysfs_release(struct inode * inode, struct file * filp) 371static int sysfs_release(struct inode * inode, struct file * filp)
339{ 372{
340 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 373 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
@@ -342,8 +375,9 @@ static int sysfs_release(struct inode * inode, struct file * filp)
342 struct module * owner = attr->owner; 375 struct module * owner = attr->owner;
343 struct sysfs_buffer * buffer = filp->private_data; 376 struct sysfs_buffer * buffer = filp->private_data;
344 377
345 if (kobj) 378 if (buffer)
346 kobject_put(kobj); 379 remove_from_collection(buffer, inode);
380 kobject_put(kobj);
347 /* After this point, attr should not be accessed. */ 381 /* After this point, attr should not be accessed. */
348 module_put(owner); 382 module_put(owner);
349 383
@@ -548,7 +582,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
548 582
549void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 583void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
550{ 584{
551 sysfs_hash_and_remove(kobj->dentry,attr->name); 585 sysfs_hash_and_remove(kobj->dentry, attr->name);
552} 586}
553 587
554 588
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 122145b0895c..b20951c93761 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -13,6 +13,8 @@
13#include <linux/dcache.h> 13#include <linux/dcache.h>
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/fs.h>
17#include <asm/semaphore.h>
16#include "sysfs.h" 18#include "sysfs.h"
17 19
18 20
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e79e38d52c00..dd1344b007f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -13,6 +13,7 @@
13#include <linux/backing-dev.h> 13#include <linux/backing-dev.h>
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <asm/semaphore.h>
16#include "sysfs.h" 17#include "sysfs.h"
17 18
18extern struct super_block * sysfs_sb; 19extern struct super_block * sysfs_sb;
@@ -28,10 +29,20 @@ static struct backing_dev_info sysfs_backing_dev_info = {
28 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 29 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
29}; 30};
30 31
31static struct inode_operations sysfs_inode_operations ={ 32static const struct inode_operations sysfs_inode_operations ={
32 .setattr = sysfs_setattr, 33 .setattr = sysfs_setattr,
33}; 34};
34 35
36void sysfs_delete_inode(struct inode *inode)
37{
38 /* Free the shadowed directory inode operations */
39 if (sysfs_is_shadowed_inode(inode)) {
40 kfree(inode->i_op);
41 inode->i_op = NULL;
42 }
43 return generic_delete_inode(inode);
44}
45
35int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 46int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
36{ 47{
37 struct inode * inode = dentry->d_inode; 48 struct inode * inode = dentry->d_inode;
@@ -209,6 +220,22 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
209 return NULL; 220 return NULL;
210} 221}
211 222
223static inline void orphan_all_buffers(struct inode *node)
224{
225 struct sysfs_buffer_collection *set = node->i_private;
226 struct sysfs_buffer *buf;
227
228 mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
229 if (node->i_private) {
230 list_for_each_entry(buf, &set->associates, associates) {
231 down(&buf->sem);
232 buf->orphaned = 1;
233 up(&buf->sem);
234 }
235 }
236 mutex_unlock(&node->i_mutex);
237}
238
212 239
213/* 240/*
214 * Unhashes the dentry corresponding to given sysfs_dirent 241 * Unhashes the dentry corresponding to given sysfs_dirent
@@ -217,16 +244,23 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
217void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) 244void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
218{ 245{
219 struct dentry * dentry = sd->s_dentry; 246 struct dentry * dentry = sd->s_dentry;
247 struct inode *inode;
220 248
221 if (dentry) { 249 if (dentry) {
222 spin_lock(&dcache_lock); 250 spin_lock(&dcache_lock);
223 spin_lock(&dentry->d_lock); 251 spin_lock(&dentry->d_lock);
224 if (!(d_unhashed(dentry) && dentry->d_inode)) { 252 if (!(d_unhashed(dentry) && dentry->d_inode)) {
253 inode = dentry->d_inode;
254 spin_lock(&inode->i_lock);
255 __iget(inode);
256 spin_unlock(&inode->i_lock);
225 dget_locked(dentry); 257 dget_locked(dentry);
226 __d_drop(dentry); 258 __d_drop(dentry);
227 spin_unlock(&dentry->d_lock); 259 spin_unlock(&dentry->d_lock);
228 spin_unlock(&dcache_lock); 260 spin_unlock(&dcache_lock);
229 simple_unlink(parent->d_inode, dentry); 261 simple_unlink(parent->d_inode, dentry);
262 orphan_all_buffers(inode);
263 iput(inode);
230 } else { 264 } else {
231 spin_unlock(&dentry->d_lock); 265 spin_unlock(&dentry->d_lock);
232 spin_unlock(&dcache_lock); 266 spin_unlock(&dcache_lock);
@@ -248,7 +282,7 @@ int sysfs_hash_and_remove(struct dentry * dir, const char * name)
248 return -ENOENT; 282 return -ENOENT;
249 283
250 parent_sd = dir->d_fsdata; 284 parent_sd = dir->d_fsdata;
251 mutex_lock(&dir->d_inode->i_mutex); 285 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
252 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 286 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
253 if (!sd->s_element) 287 if (!sd->s_element)
254 continue; 288 continue;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e503f858fba8..23a48a38e6af 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -8,6 +8,7 @@
8#include <linux/mount.h> 8#include <linux/mount.h>
9#include <linux/pagemap.h> 9#include <linux/pagemap.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <asm/semaphore.h>
11 12
12#include "sysfs.h" 13#include "sysfs.h"
13 14
@@ -18,9 +19,12 @@ struct vfsmount *sysfs_mount;
18struct super_block * sysfs_sb = NULL; 19struct super_block * sysfs_sb = NULL;
19struct kmem_cache *sysfs_dir_cachep; 20struct kmem_cache *sysfs_dir_cachep;
20 21
21static struct super_operations sysfs_ops = { 22static void sysfs_clear_inode(struct inode *inode);
23
24static const struct super_operations sysfs_ops = {
22 .statfs = simple_statfs, 25 .statfs = simple_statfs,
23 .drop_inode = generic_delete_inode, 26 .drop_inode = sysfs_delete_inode,
27 .clear_inode = sysfs_clear_inode,
24}; 28};
25 29
26static struct sysfs_dirent sysfs_root = { 30static struct sysfs_dirent sysfs_root = {
@@ -31,6 +35,11 @@ static struct sysfs_dirent sysfs_root = {
31 .s_iattr = NULL, 35 .s_iattr = NULL,
32}; 36};
33 37
38static void sysfs_clear_inode(struct inode *inode)
39{
40 kfree(inode->i_private);
41}
42
34static int sysfs_fill_super(struct super_block *sb, void *data, int silent) 43static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
35{ 44{
36 struct inode *inode; 45 struct inode *inode;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index f50e3cc2ded8..7b9c5bfde920 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/kobject.h> 8#include <linux/kobject.h>
9#include <linux/namei.h> 9#include <linux/namei.h>
10#include <asm/semaphore.h>
10 11
11#include "sysfs.h" 12#include "sysfs.h"
12 13
@@ -180,7 +181,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
180 free_page((unsigned long)page); 181 free_page((unsigned long)page);
181} 182}
182 183
183struct inode_operations sysfs_symlink_inode_operations = { 184const struct inode_operations sysfs_symlink_inode_operations = {
184 .readlink = generic_readlink, 185 .readlink = generic_readlink,
185 .follow_link = sysfs_follow_link, 186 .follow_link = sysfs_follow_link,
186 .put_link = sysfs_put_link, 187 .put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index bd7cec295dab..d976b0005549 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -2,6 +2,7 @@
2extern struct vfsmount * sysfs_mount; 2extern struct vfsmount * sysfs_mount;
3extern struct kmem_cache *sysfs_dir_cachep; 3extern struct kmem_cache *sysfs_dir_cachep;
4 4
5extern void sysfs_delete_inode(struct inode *inode);
5extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); 6extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
6extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 7extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
7 8
@@ -25,14 +26,30 @@ extern struct super_block * sysfs_sb;
25extern const struct file_operations sysfs_dir_operations; 26extern const struct file_operations sysfs_dir_operations;
26extern const struct file_operations sysfs_file_operations; 27extern const struct file_operations sysfs_file_operations;
27extern const struct file_operations bin_fops; 28extern const struct file_operations bin_fops;
28extern struct inode_operations sysfs_dir_inode_operations; 29extern const struct inode_operations sysfs_dir_inode_operations;
29extern struct inode_operations sysfs_symlink_inode_operations; 30extern const struct inode_operations sysfs_symlink_inode_operations;
30 31
31struct sysfs_symlink { 32struct sysfs_symlink {
32 char * link_name; 33 char * link_name;
33 struct kobject * target_kobj; 34 struct kobject * target_kobj;
34}; 35};
35 36
37struct sysfs_buffer {
38 struct list_head associates;
39 size_t count;
40 loff_t pos;
41 char * page;
42 struct sysfs_ops * ops;
43 struct semaphore sem;
44 int orphaned;
45 int needs_read_fill;
46 int event;
47};
48
49struct sysfs_buffer_collection {
50 struct list_head associates;
51};
52
36static inline struct kobject * to_kobj(struct dentry * dentry) 53static inline struct kobject * to_kobj(struct dentry * dentry)
37{ 54{
38 struct sysfs_dirent * sd = dentry->d_fsdata; 55 struct sysfs_dirent * sd = dentry->d_fsdata;
@@ -96,3 +113,7 @@ static inline void sysfs_put(struct sysfs_dirent * sd)
96 release_sysfs_dirent(sd); 113 release_sysfs_dirent(sd);
97} 114}
98 115
116static inline int sysfs_is_shadowed_inode(struct inode *inode)
117{
118 return S_ISDIR(inode->i_mode) && inode->i_op->follow_link;
119}
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 47a4b728f15b..0732ddb9020b 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,7 @@ const struct file_operations sysv_file_operations = {
30 .sendfile = generic_file_sendfile, 30 .sendfile = generic_file_sendfile,
31}; 31};
32 32
33struct inode_operations sysv_file_inode_operations = { 33const struct inode_operations sysv_file_inode_operations = {
34 .truncate = sysv_truncate, 34 .truncate = sysv_truncate,
35 .getattr = sysv_getattr, 35 .getattr = sysv_getattr,
36}; 36};
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index ead9864567e3..9311cac186fe 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -142,7 +142,7 @@ static inline void write3byte(struct sysv_sb_info *sbi,
142 } 142 }
143} 143}
144 144
145static struct inode_operations sysv_symlink_inode_operations = { 145static const struct inode_operations sysv_symlink_inode_operations = {
146 .readlink = generic_readlink, 146 .readlink = generic_readlink,
147 .follow_link = page_follow_link_light, 147 .follow_link = page_follow_link_light,
148 .put_link = page_put_link, 148 .put_link = page_put_link,
@@ -327,7 +327,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
327 inode_init_once(&si->vfs_inode); 327 inode_init_once(&si->vfs_inode);
328} 328}
329 329
330struct super_operations sysv_sops = { 330const struct super_operations sysv_sops = {
331 .alloc_inode = sysv_alloc_inode, 331 .alloc_inode = sysv_alloc_inode,
332 .destroy_inode = sysv_destroy_inode, 332 .destroy_inode = sysv_destroy_inode,
333 .read_inode = sysv_read_inode, 333 .read_inode = sysv_read_inode,
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index f7c08db8e34c..4e48abbd2b5d 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -292,7 +292,7 @@ out:
292/* 292/*
293 * directories can handle most operations... 293 * directories can handle most operations...
294 */ 294 */
295struct inode_operations sysv_dir_inode_operations = { 295const struct inode_operations sysv_dir_inode_operations = {
296 .create = sysv_create, 296 .create = sysv_create,
297 .lookup = sysv_lookup, 297 .lookup = sysv_lookup,
298 .link = sysv_link, 298 .link = sysv_link,
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
index b85ce61d635c..00d2f8a43e4e 100644
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -14,7 +14,7 @@ static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
14 return NULL; 14 return NULL;
15} 15}
16 16
17struct inode_operations sysv_fast_symlink_inode_operations = { 17const struct inode_operations sysv_fast_symlink_inode_operations = {
18 .readlink = generic_readlink, 18 .readlink = generic_readlink,
19 .follow_link = sysv_follow_link, 19 .follow_link = sysv_follow_link,
20}; 20};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index dcb18b2171fe..5b4fedf17cc4 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -159,13 +159,13 @@ extern struct sysv_dir_entry *sysv_dotdot(struct inode *, struct page **);
159extern ino_t sysv_inode_by_name(struct dentry *); 159extern ino_t sysv_inode_by_name(struct dentry *);
160 160
161 161
162extern struct inode_operations sysv_file_inode_operations; 162extern const struct inode_operations sysv_file_inode_operations;
163extern struct inode_operations sysv_dir_inode_operations; 163extern const struct inode_operations sysv_dir_inode_operations;
164extern struct inode_operations sysv_fast_symlink_inode_operations; 164extern const struct inode_operations sysv_fast_symlink_inode_operations;
165extern const struct file_operations sysv_file_operations; 165extern const struct file_operations sysv_file_operations;
166extern const struct file_operations sysv_dir_operations; 166extern const struct file_operations sysv_dir_operations;
167extern const struct address_space_operations sysv_aops; 167extern const struct address_space_operations sysv_aops;
168extern struct super_operations sysv_sops; 168extern const struct super_operations sysv_sops;
169extern struct dentry_operations sysv_dentry_operations; 169extern struct dentry_operations sysv_dentry_operations;
170 170
171 171
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d81f2db7b0e3..40d5047defea 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -263,6 +263,6 @@ const struct file_operations udf_file_operations = {
263 .sendfile = generic_file_sendfile, 263 .sendfile = generic_file_sendfile,
264}; 264};
265 265
266struct inode_operations udf_file_inode_operations = { 266const struct inode_operations udf_file_inode_operations = {
267 .truncate = udf_truncate, 267 .truncate = udf_truncate,
268}; 268};
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 73163325e5ec..fe361cd19a98 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1308,7 +1308,7 @@ end_rename:
1308 return retval; 1308 return retval;
1309} 1309}
1310 1310
1311struct inode_operations udf_dir_inode_operations = { 1311const struct inode_operations udf_dir_inode_operations = {
1312 .lookup = udf_lookup, 1312 .lookup = udf_lookup,
1313 .create = udf_create, 1313 .create = udf_create,
1314 .link = udf_link, 1314 .link = udf_link,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1dbc2955f02e..8672b88f7ff2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -160,7 +160,7 @@ static void destroy_inodecache(void)
160} 160}
161 161
162/* Superblock operations */ 162/* Superblock operations */
163static struct super_operations udf_sb_ops = { 163static const struct super_operations udf_sb_ops = {
164 .alloc_inode = udf_alloc_inode, 164 .alloc_inode = udf_alloc_inode,
165 .destroy_inode = udf_destroy_inode, 165 .destroy_inode = udf_destroy_inode,
166 .write_inode = udf_write_inode, 166 .write_inode = udf_write_inode,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1033b7cf2939..ee1dece1f6f5 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -42,9 +42,9 @@ struct task_struct;
42struct buffer_head; 42struct buffer_head;
43struct super_block; 43struct super_block;
44 44
45extern struct inode_operations udf_dir_inode_operations; 45extern const struct inode_operations udf_dir_inode_operations;
46extern const struct file_operations udf_dir_operations; 46extern const struct file_operations udf_dir_operations;
47extern struct inode_operations udf_file_inode_operations; 47extern const struct inode_operations udf_file_inode_operations;
48extern const struct file_operations udf_file_operations; 48extern const struct file_operations udf_file_operations;
49extern const struct address_space_operations udf_aops; 49extern const struct address_space_operations udf_aops;
50extern const struct address_space_operations udf_adinicb_aops; 50extern const struct address_space_operations udf_adinicb_aops;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 638f4c585e89..0e97a4f79c31 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 1998 4 * Copyright (C) 1998
5 * Daniel Pirkl <daniel.pirkl@email.cz> 5 * Daniel Pirkl <daniel.pirkl@email.cz>
6 * Charles University, Faculty of Mathematics and Physics 6 * Charles University, Faculty of Mathematics and Physics
7 *
8 * UFS2 write support Evgeniy Dushistov <dushistov@mail.ru>, 2007
7 */ 9 */
8 10
9#include <linux/fs.h> 11#include <linux/fs.h>
@@ -21,38 +23,42 @@
21#include "swab.h" 23#include "swab.h"
22#include "util.h" 24#include "util.h"
23 25
24static unsigned ufs_add_fragments (struct inode *, unsigned, unsigned, unsigned, int *); 26#define INVBLOCK ((u64)-1L)
25static unsigned ufs_alloc_fragments (struct inode *, unsigned, unsigned, unsigned, int *); 27
26static unsigned ufs_alloccg_block (struct inode *, struct ufs_cg_private_info *, unsigned, int *); 28static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned, int *);
27static unsigned ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, unsigned, unsigned); 29static u64 ufs_alloc_fragments(struct inode *, unsigned, u64, unsigned, int *);
30static u64 ufs_alloccg_block(struct inode *, struct ufs_cg_private_info *, u64, int *);
31static u64 ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, u64, unsigned);
28static unsigned char ufs_fragtable_8fpb[], ufs_fragtable_other[]; 32static unsigned char ufs_fragtable_8fpb[], ufs_fragtable_other[];
29static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *, unsigned, int); 33static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *, unsigned, int);
30 34
31/* 35/*
32 * Free 'count' fragments from fragment number 'fragment' 36 * Free 'count' fragments from fragment number 'fragment'
33 */ 37 */
34void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count) 38void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
35{ 39{
36 struct super_block * sb; 40 struct super_block * sb;
37 struct ufs_sb_private_info * uspi; 41 struct ufs_sb_private_info * uspi;
38 struct ufs_super_block_first * usb1; 42 struct ufs_super_block_first * usb1;
39 struct ufs_cg_private_info * ucpi; 43 struct ufs_cg_private_info * ucpi;
40 struct ufs_cylinder_group * ucg; 44 struct ufs_cylinder_group * ucg;
41 unsigned cgno, bit, end_bit, bbase, blkmap, i, blkno, cylno; 45 unsigned cgno, bit, end_bit, bbase, blkmap, i;
46 u64 blkno;
42 47
43 sb = inode->i_sb; 48 sb = inode->i_sb;
44 uspi = UFS_SB(sb)->s_uspi; 49 uspi = UFS_SB(sb)->s_uspi;
45 usb1 = ubh_get_usb_first(uspi); 50 usb1 = ubh_get_usb_first(uspi);
46 51
47 UFSD("ENTER, fragment %u, count %u\n", fragment, count); 52 UFSD("ENTER, fragment %llu, count %u\n",
53 (unsigned long long)fragment, count);
48 54
49 if (ufs_fragnum(fragment) + count > uspi->s_fpg) 55 if (ufs_fragnum(fragment) + count > uspi->s_fpg)
50 ufs_error (sb, "ufs_free_fragments", "internal error"); 56 ufs_error (sb, "ufs_free_fragments", "internal error");
51 57
52 lock_super(sb); 58 lock_super(sb);
53 59
54 cgno = ufs_dtog(fragment); 60 cgno = ufs_dtog(uspi, fragment);
55 bit = ufs_dtogd(fragment); 61 bit = ufs_dtogd(uspi, fragment);
56 if (cgno >= uspi->s_ncg) { 62 if (cgno >= uspi->s_ncg) {
57 ufs_panic (sb, "ufs_free_fragments", "freeing blocks are outside device"); 63 ufs_panic (sb, "ufs_free_fragments", "freeing blocks are outside device");
58 goto failed; 64 goto failed;
@@ -101,9 +107,13 @@ void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count)
101 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); 107 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
102 uspi->cs_total.cs_nbfree++; 108 uspi->cs_total.cs_nbfree++;
103 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); 109 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
104 cylno = ufs_cbtocylno (bbase); 110 if (uspi->fs_magic != UFS2_MAGIC) {
105 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(bbase)), 1); 111 unsigned cylno = ufs_cbtocylno (bbase);
106 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); 112
113 fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
114 ufs_cbtorpos(bbase)), 1);
115 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
116 }
107 } 117 }
108 118
109 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 119 ubh_mark_buffer_dirty (USPI_UBH(uspi));
@@ -127,24 +137,27 @@ failed:
127/* 137/*
128 * Free 'count' fragments from fragment number 'fragment' (free whole blocks) 138 * Free 'count' fragments from fragment number 'fragment' (free whole blocks)
129 */ 139 */
130void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count) 140void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
131{ 141{
132 struct super_block * sb; 142 struct super_block * sb;
133 struct ufs_sb_private_info * uspi; 143 struct ufs_sb_private_info * uspi;
134 struct ufs_super_block_first * usb1; 144 struct ufs_super_block_first * usb1;
135 struct ufs_cg_private_info * ucpi; 145 struct ufs_cg_private_info * ucpi;
136 struct ufs_cylinder_group * ucg; 146 struct ufs_cylinder_group * ucg;
137 unsigned overflow, cgno, bit, end_bit, blkno, i, cylno; 147 unsigned overflow, cgno, bit, end_bit, i;
148 u64 blkno;
138 149
139 sb = inode->i_sb; 150 sb = inode->i_sb;
140 uspi = UFS_SB(sb)->s_uspi; 151 uspi = UFS_SB(sb)->s_uspi;
141 usb1 = ubh_get_usb_first(uspi); 152 usb1 = ubh_get_usb_first(uspi);
142 153
143 UFSD("ENTER, fragment %u, count %u\n", fragment, count); 154 UFSD("ENTER, fragment %llu, count %u\n",
155 (unsigned long long)fragment, count);
144 156
145 if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) { 157 if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) {
146 ufs_error (sb, "ufs_free_blocks", "internal error, " 158 ufs_error (sb, "ufs_free_blocks", "internal error, "
147 "fragment %u, count %u\n", fragment, count); 159 "fragment %llu, count %u\n",
160 (unsigned long long)fragment, count);
148 goto failed; 161 goto failed;
149 } 162 }
150 163
@@ -152,8 +165,8 @@ void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count)
152 165
153do_more: 166do_more:
154 overflow = 0; 167 overflow = 0;
155 cgno = ufs_dtog (fragment); 168 cgno = ufs_dtog(uspi, fragment);
156 bit = ufs_dtogd (fragment); 169 bit = ufs_dtogd(uspi, fragment);
157 if (cgno >= uspi->s_ncg) { 170 if (cgno >= uspi->s_ncg) {
158 ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device"); 171 ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device");
159 goto failed_unlock; 172 goto failed_unlock;
@@ -187,9 +200,14 @@ do_more:
187 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); 200 fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
188 uspi->cs_total.cs_nbfree++; 201 uspi->cs_total.cs_nbfree++;
189 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1); 202 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
190 cylno = ufs_cbtocylno(i); 203
191 fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(i)), 1); 204 if (uspi->fs_magic != UFS2_MAGIC) {
192 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1); 205 unsigned cylno = ufs_cbtocylno(i);
206
207 fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
208 ufs_cbtorpos(i)), 1);
209 fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
210 }
193 } 211 }
194 212
195 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 213 ubh_mark_buffer_dirty (USPI_UBH(uspi));
@@ -308,15 +326,19 @@ static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
308 } 326 }
309} 327}
310 328
311unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, 329u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
312 unsigned goal, unsigned count, int * err, struct page *locked_page) 330 u64 goal, unsigned count, int *err,
331 struct page *locked_page)
313{ 332{
314 struct super_block * sb; 333 struct super_block * sb;
315 struct ufs_sb_private_info * uspi; 334 struct ufs_sb_private_info * uspi;
316 struct ufs_super_block_first * usb1; 335 struct ufs_super_block_first * usb1;
317 unsigned cgno, oldcount, newcount, tmp, request, result; 336 unsigned cgno, oldcount, newcount;
337 u64 tmp, request, result;
318 338
319 UFSD("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count); 339 UFSD("ENTER, ino %lu, fragment %llu, goal %llu, count %u\n",
340 inode->i_ino, (unsigned long long)fragment,
341 (unsigned long long)goal, count);
320 342
321 sb = inode->i_sb; 343 sb = inode->i_sb;
322 uspi = UFS_SB(sb)->s_uspi; 344 uspi = UFS_SB(sb)->s_uspi;
@@ -324,11 +346,12 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
324 *err = -ENOSPC; 346 *err = -ENOSPC;
325 347
326 lock_super (sb); 348 lock_super (sb);
327 349 tmp = ufs_data_ptr_to_cpu(sb, p);
328 tmp = fs32_to_cpu(sb, *p); 350
329 if (count + ufs_fragnum(fragment) > uspi->s_fpb) { 351 if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
330 ufs_warning (sb, "ufs_new_fragments", "internal warning" 352 ufs_warning(sb, "ufs_new_fragments", "internal warning"
331 " fragment %u, count %u", fragment, count); 353 " fragment %llu, count %u",
354 (unsigned long long)fragment, count);
332 count = uspi->s_fpb - ufs_fragnum(fragment); 355 count = uspi->s_fpb - ufs_fragnum(fragment);
333 } 356 }
334 oldcount = ufs_fragnum (fragment); 357 oldcount = ufs_fragnum (fragment);
@@ -339,10 +362,12 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
339 */ 362 */
340 if (oldcount) { 363 if (oldcount) {
341 if (!tmp) { 364 if (!tmp) {
342 ufs_error (sb, "ufs_new_fragments", "internal error, " 365 ufs_error(sb, "ufs_new_fragments", "internal error, "
343 "fragment %u, tmp %u\n", fragment, tmp); 366 "fragment %llu, tmp %llu\n",
344 unlock_super (sb); 367 (unsigned long long)fragment,
345 return (unsigned)-1; 368 (unsigned long long)tmp);
369 unlock_super(sb);
370 return INVBLOCK;
346 } 371 }
347 if (fragment < UFS_I(inode)->i_lastfrag) { 372 if (fragment < UFS_I(inode)->i_lastfrag) {
348 UFSD("EXIT (ALREADY ALLOCATED)\n"); 373 UFSD("EXIT (ALREADY ALLOCATED)\n");
@@ -372,7 +397,7 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
372 if (goal == 0) 397 if (goal == 0)
373 cgno = ufs_inotocg (inode->i_ino); 398 cgno = ufs_inotocg (inode->i_ino);
374 else 399 else
375 cgno = ufs_dtog (goal); 400 cgno = ufs_dtog(uspi, goal);
376 401
377 /* 402 /*
378 * allocate new fragment 403 * allocate new fragment
@@ -380,14 +405,16 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
380 if (oldcount == 0) { 405 if (oldcount == 0) {
381 result = ufs_alloc_fragments (inode, cgno, goal, count, err); 406 result = ufs_alloc_fragments (inode, cgno, goal, count, err);
382 if (result) { 407 if (result) {
383 *p = cpu_to_fs32(sb, result); 408 ufs_cpu_to_data_ptr(sb, p, result);
384 *err = 0; 409 *err = 0;
385 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 410 UFS_I(inode)->i_lastfrag =
386 ufs_clear_frags(inode, result + oldcount, newcount - oldcount, 411 max_t(u32, UFS_I(inode)->i_lastfrag,
387 locked_page != NULL); 412 fragment + count);
413 ufs_clear_frags(inode, result + oldcount,
414 newcount - oldcount, locked_page != NULL);
388 } 415 }
389 unlock_super(sb); 416 unlock_super(sb);
390 UFSD("EXIT, result %u\n", result); 417 UFSD("EXIT, result %llu\n", (unsigned long long)result);
391 return result; 418 return result;
392 } 419 }
393 420
@@ -401,7 +428,7 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
401 ufs_clear_frags(inode, result + oldcount, newcount - oldcount, 428 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
402 locked_page != NULL); 429 locked_page != NULL);
403 unlock_super(sb); 430 unlock_super(sb);
404 UFSD("EXIT, result %u\n", result); 431 UFSD("EXIT, result %llu\n", (unsigned long long)result);
405 return result; 432 return result;
406 } 433 }
407 434
@@ -433,15 +460,14 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
433 locked_page != NULL); 460 locked_page != NULL);
434 ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp, 461 ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp,
435 result, locked_page); 462 result, locked_page);
436 463 ufs_cpu_to_data_ptr(sb, p, result);
437 *p = cpu_to_fs32(sb, result);
438 *err = 0; 464 *err = 0;
439 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 465 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
440 unlock_super(sb); 466 unlock_super(sb);
441 if (newcount < request) 467 if (newcount < request)
442 ufs_free_fragments (inode, result + newcount, request - newcount); 468 ufs_free_fragments (inode, result + newcount, request - newcount);
443 ufs_free_fragments (inode, tmp, oldcount); 469 ufs_free_fragments (inode, tmp, oldcount);
444 UFSD("EXIT, result %u\n", result); 470 UFSD("EXIT, result %llu\n", (unsigned long long)result);
445 return result; 471 return result;
446 } 472 }
447 473
@@ -450,9 +476,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
450 return 0; 476 return 0;
451} 477}
452 478
453static unsigned 479static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
454ufs_add_fragments (struct inode * inode, unsigned fragment, 480 unsigned oldcount, unsigned newcount, int *err)
455 unsigned oldcount, unsigned newcount, int * err)
456{ 481{
457 struct super_block * sb; 482 struct super_block * sb;
458 struct ufs_sb_private_info * uspi; 483 struct ufs_sb_private_info * uspi;
@@ -461,14 +486,15 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
461 struct ufs_cylinder_group * ucg; 486 struct ufs_cylinder_group * ucg;
462 unsigned cgno, fragno, fragoff, count, fragsize, i; 487 unsigned cgno, fragno, fragoff, count, fragsize, i;
463 488
464 UFSD("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount); 489 UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
490 (unsigned long long)fragment, oldcount, newcount);
465 491
466 sb = inode->i_sb; 492 sb = inode->i_sb;
467 uspi = UFS_SB(sb)->s_uspi; 493 uspi = UFS_SB(sb)->s_uspi;
468 usb1 = ubh_get_usb_first (uspi); 494 usb1 = ubh_get_usb_first (uspi);
469 count = newcount - oldcount; 495 count = newcount - oldcount;
470 496
471 cgno = ufs_dtog(fragment); 497 cgno = ufs_dtog(uspi, fragment);
472 if (fs32_to_cpu(sb, UFS_SB(sb)->fs_cs(cgno).cs_nffree) < count) 498 if (fs32_to_cpu(sb, UFS_SB(sb)->fs_cs(cgno).cs_nffree) < count)
473 return 0; 499 return 0;
474 if ((ufs_fragnum (fragment) + newcount) > uspi->s_fpb) 500 if ((ufs_fragnum (fragment) + newcount) > uspi->s_fpb)
@@ -483,7 +509,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
483 return 0; 509 return 0;
484 } 510 }
485 511
486 fragno = ufs_dtogd (fragment); 512 fragno = ufs_dtogd(uspi, fragment);
487 fragoff = ufs_fragnum (fragno); 513 fragoff = ufs_fragnum (fragno);
488 for (i = oldcount; i < newcount; i++) 514 for (i = oldcount; i < newcount; i++)
489 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) 515 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
@@ -521,7 +547,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
521 } 547 }
522 sb->s_dirt = 1; 548 sb->s_dirt = 1;
523 549
524 UFSD("EXIT, fragment %u\n", fragment); 550 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
525 551
526 return fragment; 552 return fragment;
527} 553}
@@ -534,17 +560,19 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
534 if (fs32_to_cpu(sb, ucg->cg_frsum[k])) \ 560 if (fs32_to_cpu(sb, ucg->cg_frsum[k])) \
535 goto cg_found; 561 goto cg_found;
536 562
537static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno, 563static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
538 unsigned goal, unsigned count, int * err) 564 u64 goal, unsigned count, int *err)
539{ 565{
540 struct super_block * sb; 566 struct super_block * sb;
541 struct ufs_sb_private_info * uspi; 567 struct ufs_sb_private_info * uspi;
542 struct ufs_super_block_first * usb1; 568 struct ufs_super_block_first * usb1;
543 struct ufs_cg_private_info * ucpi; 569 struct ufs_cg_private_info * ucpi;
544 struct ufs_cylinder_group * ucg; 570 struct ufs_cylinder_group * ucg;
545 unsigned oldcg, i, j, k, result, allocsize; 571 unsigned oldcg, i, j, k, allocsize;
572 u64 result;
546 573
547 UFSD("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count); 574 UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
575 inode->i_ino, cgno, (unsigned long long)goal, count);
548 576
549 sb = inode->i_sb; 577 sb = inode->i_sb;
550 uspi = UFS_SB(sb)->s_uspi; 578 uspi = UFS_SB(sb)->s_uspi;
@@ -593,7 +621,7 @@ cg_found:
593 621
594 if (count == uspi->s_fpb) { 622 if (count == uspi->s_fpb) {
595 result = ufs_alloccg_block (inode, ucpi, goal, err); 623 result = ufs_alloccg_block (inode, ucpi, goal, err);
596 if (result == (unsigned)-1) 624 if (result == INVBLOCK)
597 return 0; 625 return 0;
598 goto succed; 626 goto succed;
599 } 627 }
@@ -604,9 +632,9 @@ cg_found:
604 632
605 if (allocsize == uspi->s_fpb) { 633 if (allocsize == uspi->s_fpb) {
606 result = ufs_alloccg_block (inode, ucpi, goal, err); 634 result = ufs_alloccg_block (inode, ucpi, goal, err);
607 if (result == (unsigned)-1) 635 if (result == INVBLOCK)
608 return 0; 636 return 0;
609 goal = ufs_dtogd (result); 637 goal = ufs_dtogd(uspi, result);
610 for (i = count; i < uspi->s_fpb; i++) 638 for (i = count; i < uspi->s_fpb; i++)
611 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); 639 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
612 i = uspi->s_fpb - count; 640 i = uspi->s_fpb - count;
@@ -620,7 +648,7 @@ cg_found:
620 } 648 }
621 649
622 result = ufs_bitmap_search (sb, ucpi, goal, allocsize); 650 result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
623 if (result == (unsigned)-1) 651 if (result == INVBLOCK)
624 return 0; 652 return 0;
625 if(DQUOT_ALLOC_BLOCK(inode, count)) { 653 if(DQUOT_ALLOC_BLOCK(inode, count)) {
626 *err = -EDQUOT; 654 *err = -EDQUOT;
@@ -647,20 +675,21 @@ succed:
647 sb->s_dirt = 1; 675 sb->s_dirt = 1;
648 676
649 result += cgno * uspi->s_fpg; 677 result += cgno * uspi->s_fpg;
650 UFSD("EXIT3, result %u\n", result); 678 UFSD("EXIT3, result %llu\n", (unsigned long long)result);
651 return result; 679 return result;
652} 680}
653 681
654static unsigned ufs_alloccg_block (struct inode * inode, 682static u64 ufs_alloccg_block(struct inode *inode,
655 struct ufs_cg_private_info * ucpi, unsigned goal, int * err) 683 struct ufs_cg_private_info *ucpi,
684 u64 goal, int *err)
656{ 685{
657 struct super_block * sb; 686 struct super_block * sb;
658 struct ufs_sb_private_info * uspi; 687 struct ufs_sb_private_info * uspi;
659 struct ufs_super_block_first * usb1; 688 struct ufs_super_block_first * usb1;
660 struct ufs_cylinder_group * ucg; 689 struct ufs_cylinder_group * ucg;
661 unsigned result, cylno, blkno; 690 u64 result, blkno;
662 691
663 UFSD("ENTER, goal %u\n", goal); 692 UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
664 693
665 sb = inode->i_sb; 694 sb = inode->i_sb;
666 uspi = UFS_SB(sb)->s_uspi; 695 uspi = UFS_SB(sb)->s_uspi;
@@ -672,7 +701,7 @@ static unsigned ufs_alloccg_block (struct inode * inode,
672 goto norot; 701 goto norot;
673 } 702 }
674 goal = ufs_blknum (goal); 703 goal = ufs_blknum (goal);
675 goal = ufs_dtogd (goal); 704 goal = ufs_dtogd(uspi, goal);
676 705
677 /* 706 /*
678 * If the requested block is available, use it. 707 * If the requested block is available, use it.
@@ -684,8 +713,8 @@ static unsigned ufs_alloccg_block (struct inode * inode,
684 713
685norot: 714norot:
686 result = ufs_bitmap_search (sb, ucpi, goal, uspi->s_fpb); 715 result = ufs_bitmap_search (sb, ucpi, goal, uspi->s_fpb);
687 if (result == (unsigned)-1) 716 if (result == INVBLOCK)
688 return (unsigned)-1; 717 return INVBLOCK;
689 ucpi->c_rotor = result; 718 ucpi->c_rotor = result;
690gotit: 719gotit:
691 blkno = ufs_fragstoblks(result); 720 blkno = ufs_fragstoblks(result);
@@ -694,17 +723,22 @@ gotit:
694 ufs_clusteracct (sb, ucpi, blkno, -1); 723 ufs_clusteracct (sb, ucpi, blkno, -1);
695 if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) { 724 if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) {
696 *err = -EDQUOT; 725 *err = -EDQUOT;
697 return (unsigned)-1; 726 return INVBLOCK;
698 } 727 }
699 728
700 fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1); 729 fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
701 uspi->cs_total.cs_nbfree--; 730 uspi->cs_total.cs_nbfree--;
702 fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1); 731 fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1);
703 cylno = ufs_cbtocylno(result); 732
704 fs16_sub(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(result)), 1); 733 if (uspi->fs_magic != UFS2_MAGIC) {
705 fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1); 734 unsigned cylno = ufs_cbtocylno((unsigned)result);
735
736 fs16_sub(sb, &ubh_cg_blks(ucpi, cylno,
737 ufs_cbtorpos((unsigned)result)), 1);
738 fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1);
739 }
706 740
707 UFSD("EXIT, result %u\n", result); 741 UFSD("EXIT, result %llu\n", (unsigned long long)result);
708 742
709 return result; 743 return result;
710} 744}
@@ -744,9 +778,9 @@ static unsigned ubh_scanc(struct ufs_sb_private_info *uspi,
744 * @goal: near which block we want find new one 778 * @goal: near which block we want find new one
745 * @count: specified size 779 * @count: specified size
746 */ 780 */
747static unsigned ufs_bitmap_search(struct super_block *sb, 781static u64 ufs_bitmap_search(struct super_block *sb,
748 struct ufs_cg_private_info *ucpi, 782 struct ufs_cg_private_info *ucpi,
749 unsigned goal, unsigned count) 783 u64 goal, unsigned count)
750{ 784{
751 /* 785 /*
752 * Bit patterns for identifying fragments in the block map 786 * Bit patterns for identifying fragments in the block map
@@ -761,16 +795,18 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
761 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 795 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
762 struct ufs_super_block_first *usb1; 796 struct ufs_super_block_first *usb1;
763 struct ufs_cylinder_group *ucg; 797 struct ufs_cylinder_group *ucg;
764 unsigned start, length, loc, result; 798 unsigned start, length, loc;
765 unsigned pos, want, blockmap, mask, end; 799 unsigned pos, want, blockmap, mask, end;
800 u64 result;
766 801
767 UFSD("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count); 802 UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
803 (unsigned long long)goal, count);
768 804
769 usb1 = ubh_get_usb_first (uspi); 805 usb1 = ubh_get_usb_first (uspi);
770 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 806 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
771 807
772 if (goal) 808 if (goal)
773 start = ufs_dtogd(goal) >> 3; 809 start = ufs_dtogd(uspi, goal) >> 3;
774 else 810 else
775 start = ucpi->c_frotor >> 3; 811 start = ucpi->c_frotor >> 3;
776 812
@@ -790,7 +826,7 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
790 " length %u, count %u, freeoff %u\n", 826 " length %u, count %u, freeoff %u\n",
791 ucpi->c_cgx, start, length, count, 827 ucpi->c_cgx, start, length, count,
792 ucpi->c_freeoff); 828 ucpi->c_freeoff);
793 return (unsigned)-1; 829 return INVBLOCK;
794 } 830 }
795 start = 0; 831 start = 0;
796 } 832 }
@@ -808,7 +844,8 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
808 want = want_arr[count]; 844 want = want_arr[count];
809 for (pos = 0; pos <= uspi->s_fpb - count; pos++) { 845 for (pos = 0; pos <= uspi->s_fpb - count; pos++) {
810 if ((blockmap & mask) == want) { 846 if ((blockmap & mask) == want) {
811 UFSD("EXIT, result %u\n", result); 847 UFSD("EXIT, result %llu\n",
848 (unsigned long long)result);
812 return result + pos; 849 return result + pos;
813 } 850 }
814 mask <<= 1; 851 mask <<= 1;
@@ -819,7 +856,7 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
819 ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n", 856 ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n",
820 ucpi->c_cgx); 857 ucpi->c_cgx);
821 UFSD("EXIT (FAILED)\n"); 858 UFSD("EXIT (FAILED)\n");
822 return (unsigned)-1; 859 return INVBLOCK;
823} 860}
824 861
825static void ufs_clusteracct(struct super_block * sb, 862static void ufs_clusteracct(struct super_block * sb,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 433b6f68403a..a6c0ca9f48bf 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -106,12 +106,13 @@ static void ufs_check_page(struct page *page)
106 char *kaddr = page_address(page); 106 char *kaddr = page_address(page);
107 unsigned offs, rec_len; 107 unsigned offs, rec_len;
108 unsigned limit = PAGE_CACHE_SIZE; 108 unsigned limit = PAGE_CACHE_SIZE;
109 const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
109 struct ufs_dir_entry *p; 110 struct ufs_dir_entry *p;
110 char *error; 111 char *error;
111 112
112 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { 113 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
113 limit = dir->i_size & ~PAGE_CACHE_MASK; 114 limit = dir->i_size & ~PAGE_CACHE_MASK;
114 if (limit & (UFS_SECTOR_SIZE - 1)) 115 if (limit & chunk_mask)
115 goto Ebadsize; 116 goto Ebadsize;
116 if (!limit) 117 if (!limit)
117 goto out; 118 goto out;
@@ -126,7 +127,7 @@ static void ufs_check_page(struct page *page)
126 goto Ealign; 127 goto Ealign;
127 if (rec_len < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, p))) 128 if (rec_len < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, p)))
128 goto Enamelen; 129 goto Enamelen;
129 if (((offs + rec_len - 1) ^ offs) & ~(UFS_SECTOR_SIZE-1)) 130 if (((offs + rec_len - 1) ^ offs) & ~chunk_mask)
130 goto Espan; 131 goto Espan;
131 if (fs32_to_cpu(sb, p->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg * 132 if (fs32_to_cpu(sb, p->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg *
132 UFS_SB(sb)->s_uspi->s_ncg)) 133 UFS_SB(sb)->s_uspi->s_ncg))
@@ -310,6 +311,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
310 int namelen = dentry->d_name.len; 311 int namelen = dentry->d_name.len;
311 struct super_block *sb = dir->i_sb; 312 struct super_block *sb = dir->i_sb;
312 unsigned reclen = UFS_DIR_REC_LEN(namelen); 313 unsigned reclen = UFS_DIR_REC_LEN(namelen);
314 const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
313 unsigned short rec_len, name_len; 315 unsigned short rec_len, name_len;
314 struct page *page = NULL; 316 struct page *page = NULL;
315 struct ufs_dir_entry *de; 317 struct ufs_dir_entry *de;
@@ -342,8 +344,8 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
342 if ((char *)de == dir_end) { 344 if ((char *)de == dir_end) {
343 /* We hit i_size */ 345 /* We hit i_size */
344 name_len = 0; 346 name_len = 0;
345 rec_len = UFS_SECTOR_SIZE; 347 rec_len = chunk_size;
346 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE); 348 de->d_reclen = cpu_to_fs16(sb, chunk_size);
347 de->d_ino = 0; 349 de->d_ino = 0;
348 goto got_it; 350 goto got_it;
349 } 351 }
@@ -431,7 +433,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
431 unsigned int offset = pos & ~PAGE_CACHE_MASK; 433 unsigned int offset = pos & ~PAGE_CACHE_MASK;
432 unsigned long n = pos >> PAGE_CACHE_SHIFT; 434 unsigned long n = pos >> PAGE_CACHE_SHIFT;
433 unsigned long npages = ufs_dir_pages(inode); 435 unsigned long npages = ufs_dir_pages(inode);
434 unsigned chunk_mask = ~(UFS_SECTOR_SIZE - 1); 436 unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
435 int need_revalidate = filp->f_version != inode->i_version; 437 int need_revalidate = filp->f_version != inode->i_version;
436 unsigned flags = UFS_SB(sb)->s_flags; 438 unsigned flags = UFS_SB(sb)->s_flags;
437 439
@@ -511,7 +513,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
511 struct super_block *sb = inode->i_sb; 513 struct super_block *sb = inode->i_sb;
512 struct address_space *mapping = page->mapping; 514 struct address_space *mapping = page->mapping;
513 char *kaddr = page_address(page); 515 char *kaddr = page_address(page);
514 unsigned from = ((char*)dir - kaddr) & ~(UFS_SECTOR_SIZE - 1); 516 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
515 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); 517 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
516 struct ufs_dir_entry *pde = NULL; 518 struct ufs_dir_entry *pde = NULL;
517 struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from); 519 struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
@@ -556,6 +558,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
556 struct super_block * sb = dir->i_sb; 558 struct super_block * sb = dir->i_sb;
557 struct address_space *mapping = inode->i_mapping; 559 struct address_space *mapping = inode->i_mapping;
558 struct page *page = grab_cache_page(mapping, 0); 560 struct page *page = grab_cache_page(mapping, 0);
561 const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
559 struct ufs_dir_entry * de; 562 struct ufs_dir_entry * de;
560 char *base; 563 char *base;
561 int err; 564 int err;
@@ -563,7 +566,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
563 if (!page) 566 if (!page)
564 return -ENOMEM; 567 return -ENOMEM;
565 kmap(page); 568 kmap(page);
566 err = mapping->a_ops->prepare_write(NULL, page, 0, UFS_SECTOR_SIZE); 569 err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
567 if (err) { 570 if (err) {
568 unlock_page(page); 571 unlock_page(page);
569 goto fail; 572 goto fail;
@@ -584,11 +587,11 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
584 ((char *)de + fs16_to_cpu(sb, de->d_reclen)); 587 ((char *)de + fs16_to_cpu(sb, de->d_reclen));
585 de->d_ino = cpu_to_fs32(sb, dir->i_ino); 588 de->d_ino = cpu_to_fs32(sb, dir->i_ino);
586 ufs_set_de_type(sb, de, dir->i_mode); 589 ufs_set_de_type(sb, de, dir->i_mode);
587 de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE - UFS_DIR_REC_LEN(1)); 590 de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
588 ufs_set_de_namlen(sb, de, 2); 591 ufs_set_de_namlen(sb, de, 2);
589 strcpy (de->d_name, ".."); 592 strcpy (de->d_name, "..");
590 593
591 err = ufs_commit_chunk(page, 0, UFS_SECTOR_SIZE); 594 err = ufs_commit_chunk(page, 0, chunk_size);
592fail: 595fail:
593 kunmap(page); 596 kunmap(page);
594 page_cache_release(page); 597 page_cache_release(page);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 2ad1259c6eca..b868878009b6 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -18,6 +18,9 @@
18 * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 18 * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
19 * Big-endian to little-endian byte-swapping/bitmaps by 19 * Big-endian to little-endian byte-swapping/bitmaps by
20 * David S. Miller (davem@caip.rutgers.edu), 1995 20 * David S. Miller (davem@caip.rutgers.edu), 1995
21 *
22 * UFS2 write support added by
23 * Evgeniy Dushistov <dushistov@mail.ru>, 2007
21 */ 24 */
22 25
23#include <linux/fs.h> 26#include <linux/fs.h>
@@ -126,6 +129,47 @@ void ufs_free_inode (struct inode * inode)
126} 129}
127 130
128/* 131/*
132 * Nullify new chunk of inodes,
133 * BSD people also set ui_gen field of inode
134 * during nullification, but we not care about
135 * that because of linux ufs do not support NFS
136 */
137static void ufs2_init_inodes_chunk(struct super_block *sb,
138 struct ufs_cg_private_info *ucpi,
139 struct ufs_cylinder_group *ucg)
140{
141 struct buffer_head *bh;
142 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
143 sector_t beg = uspi->s_sbbase +
144 ufs_inotofsba(ucpi->c_cgx * uspi->s_ipg +
145 fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_initediblk));
146 sector_t end = beg + uspi->s_fpb;
147
148 UFSD("ENTER cgno %d\n", ucpi->c_cgx);
149
150 for (; beg < end; ++beg) {
151 bh = sb_getblk(sb, beg);
152 lock_buffer(bh);
153 memset(bh->b_data, 0, sb->s_blocksize);
154 set_buffer_uptodate(bh);
155 mark_buffer_dirty(bh);
156 unlock_buffer(bh);
157 if (sb->s_flags & MS_SYNCHRONOUS)
158 sync_dirty_buffer(bh);
159 brelse(bh);
160 }
161
162 fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
163 ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
164 if (sb->s_flags & MS_SYNCHRONOUS) {
165 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
166 ubh_wait_on_buffer(UCPI_UBH(ucpi));
167 }
168
169 UFSD("EXIT\n");
170}
171
172/*
129 * There are two policies for allocating an inode. If the new inode is 173 * There are two policies for allocating an inode. If the new inode is
130 * a directory, then a forward search is made for a block group with both 174 * a directory, then a forward search is made for a block group with both
131 * free space and a low directory-to-inode ratio; if that fails, then of 175 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -146,6 +190,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
146 struct inode * inode; 190 struct inode * inode;
147 unsigned cg, bit, i, j, start; 191 unsigned cg, bit, i, j, start;
148 struct ufs_inode_info *ufsi; 192 struct ufs_inode_info *ufsi;
193 int err = -ENOSPC;
149 194
150 UFSD("ENTER\n"); 195 UFSD("ENTER\n");
151 196
@@ -198,13 +243,15 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
198 goto cg_found; 243 goto cg_found;
199 } 244 }
200 } 245 }
201 246
202 goto failed; 247 goto failed;
203 248
204cg_found: 249cg_found:
205 ucpi = ufs_load_cylinder (sb, cg); 250 ucpi = ufs_load_cylinder (sb, cg);
206 if (!ucpi) 251 if (!ucpi) {
252 err = -EIO;
207 goto failed; 253 goto failed;
254 }
208 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 255 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
209 if (!ufs_cg_chkmagic(sb, ucg)) 256 if (!ufs_cg_chkmagic(sb, ucg))
210 ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number"); 257 ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number");
@@ -216,6 +263,7 @@ cg_found:
216 if (!(bit < start)) { 263 if (!(bit < start)) {
217 ufs_error (sb, "ufs_new_inode", 264 ufs_error (sb, "ufs_new_inode",
218 "cylinder group %u corrupted - error in inode bitmap\n", cg); 265 "cylinder group %u corrupted - error in inode bitmap\n", cg);
266 err = -EIO;
219 goto failed; 267 goto failed;
220 } 268 }
221 } 269 }
@@ -224,9 +272,18 @@ cg_found:
224 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit); 272 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit);
225 else { 273 else {
226 ufs_panic (sb, "ufs_new_inode", "internal error"); 274 ufs_panic (sb, "ufs_new_inode", "internal error");
275 err = -EIO;
227 goto failed; 276 goto failed;
228 } 277 }
229 278
279 if (uspi->fs_magic == UFS2_MAGIC) {
280 u32 initediblk = fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_initediblk);
281
282 if (bit + uspi->s_inopb > initediblk &&
283 initediblk < fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_niblk))
284 ufs2_init_inodes_chunk(sb, ucpi, ucg);
285 }
286
230 fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1); 287 fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1);
231 uspi->cs_total.cs_nifree--; 288 uspi->cs_total.cs_nifree--;
232 fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1); 289 fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1);
@@ -236,7 +293,6 @@ cg_found:
236 uspi->cs_total.cs_ndir++; 293 uspi->cs_total.cs_ndir++;
237 fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1); 294 fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1);
238 } 295 }
239
240 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 296 ubh_mark_buffer_dirty (USPI_UBH(uspi));
241 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 297 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
242 if (sb->s_flags & MS_SYNCHRONOUS) { 298 if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -245,6 +301,7 @@ cg_found:
245 } 301 }
246 sb->s_dirt = 1; 302 sb->s_dirt = 1;
247 303
304 inode->i_ino = cg * uspi->s_ipg + bit;
248 inode->i_mode = mode; 305 inode->i_mode = mode;
249 inode->i_uid = current->fsuid; 306 inode->i_uid = current->fsuid;
250 if (dir->i_mode & S_ISGID) { 307 if (dir->i_mode & S_ISGID) {
@@ -254,39 +311,72 @@ cg_found:
254 } else 311 } else
255 inode->i_gid = current->fsgid; 312 inode->i_gid = current->fsgid;
256 313
257 inode->i_ino = cg * uspi->s_ipg + bit;
258 inode->i_blocks = 0; 314 inode->i_blocks = 0;
315 inode->i_generation = 0;
259 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 316 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
260 ufsi->i_flags = UFS_I(dir)->i_flags; 317 ufsi->i_flags = UFS_I(dir)->i_flags;
261 ufsi->i_lastfrag = 0; 318 ufsi->i_lastfrag = 0;
262 ufsi->i_gen = 0;
263 ufsi->i_shadow = 0; 319 ufsi->i_shadow = 0;
264 ufsi->i_osync = 0; 320 ufsi->i_osync = 0;
265 ufsi->i_oeftflag = 0; 321 ufsi->i_oeftflag = 0;
266 ufsi->i_dir_start_lookup = 0; 322 ufsi->i_dir_start_lookup = 0;
267 memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1)); 323 memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1));
268
269 insert_inode_hash(inode); 324 insert_inode_hash(inode);
270 mark_inode_dirty(inode); 325 mark_inode_dirty(inode);
271 326
327 if (uspi->fs_magic == UFS2_MAGIC) {
328 struct buffer_head *bh;
329 struct ufs2_inode *ufs2_inode;
330
331 /*
332 * setup birth date, we do it here because of there is no sense
333 * to hold it in struct ufs_inode_info, and lose 64 bit
334 */
335 bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
336 if (!bh) {
337 ufs_warning(sb, "ufs_read_inode",
338 "unable to read inode %lu\n",
339 inode->i_ino);
340 err = -EIO;
341 goto fail_remove_inode;
342 }
343 lock_buffer(bh);
344 ufs2_inode = (struct ufs2_inode *)bh->b_data;
345 ufs2_inode += ufs_inotofsbo(inode->i_ino);
346 ufs2_inode->ui_birthtime.tv_sec =
347 cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec);
348 ufs2_inode->ui_birthtime.tv_usec = 0;
349 mark_buffer_dirty(bh);
350 unlock_buffer(bh);
351 if (sb->s_flags & MS_SYNCHRONOUS)
352 sync_dirty_buffer(bh);
353 brelse(bh);
354 }
355
272 unlock_super (sb); 356 unlock_super (sb);
273 357
274 if (DQUOT_ALLOC_INODE(inode)) { 358 if (DQUOT_ALLOC_INODE(inode)) {
275 DQUOT_DROP(inode); 359 DQUOT_DROP(inode);
276 inode->i_flags |= S_NOQUOTA; 360 err = -EDQUOT;
277 inode->i_nlink = 0; 361 goto fail_without_unlock;
278 iput(inode);
279 return ERR_PTR(-EDQUOT);
280 } 362 }
281 363
282 UFSD("allocating inode %lu\n", inode->i_ino); 364 UFSD("allocating inode %lu\n", inode->i_ino);
283 UFSD("EXIT\n"); 365 UFSD("EXIT\n");
284 return inode; 366 return inode;
285 367
368fail_remove_inode:
369 unlock_super(sb);
370fail_without_unlock:
371 inode->i_flags |= S_NOQUOTA;
372 inode->i_nlink = 0;
373 iput(inode);
374 UFSD("EXIT (FAILED): err %d\n", err);
375 return ERR_PTR(err);
286failed: 376failed:
287 unlock_super (sb); 377 unlock_super (sb);
288 make_bad_inode(inode); 378 make_bad_inode(inode);
289 iput (inode); 379 iput (inode);
290 UFSD("EXIT (FAILED)\n"); 380 UFSD("EXIT (FAILED): err %d\n", err);
291 return ERR_PTR(-ENOSPC); 381 return ERR_PTR(err);
292} 382}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 4295ca91cf85..fb34ad03e224 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -170,7 +170,7 @@ out:
170 * @locked_page - for ufs_new_fragments() 170 * @locked_page - for ufs_new_fragments()
171 */ 171 */
172static struct buffer_head * 172static struct buffer_head *
173ufs_inode_getfrag(struct inode *inode, unsigned int fragment, 173ufs_inode_getfrag(struct inode *inode, u64 fragment,
174 sector_t new_fragment, unsigned int required, int *err, 174 sector_t new_fragment, unsigned int required, int *err,
175 long *phys, int *new, struct page *locked_page) 175 long *phys, int *new, struct page *locked_page)
176{ 176{
@@ -178,12 +178,12 @@ ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
178 struct super_block *sb = inode->i_sb; 178 struct super_block *sb = inode->i_sb;
179 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 179 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
180 struct buffer_head * result; 180 struct buffer_head * result;
181 unsigned block, blockoff, lastfrag, lastblock, lastblockoff; 181 unsigned blockoff, lastblockoff;
182 unsigned tmp, goal; 182 u64 tmp, goal, lastfrag, block, lastblock;
183 __fs32 * p, * p2; 183 void *p, *p2;
184 184
185 UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, required %u, " 185 UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, required %u, "
186 "metadata %d\n", inode->i_ino, fragment, 186 "metadata %d\n", inode->i_ino, (unsigned long long)fragment,
187 (unsigned long long)new_fragment, required, !phys); 187 (unsigned long long)new_fragment, required, !phys);
188 188
189 /* TODO : to be done for write support 189 /* TODO : to be done for write support
@@ -193,17 +193,20 @@ ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
193 193
194 block = ufs_fragstoblks (fragment); 194 block = ufs_fragstoblks (fragment);
195 blockoff = ufs_fragnum (fragment); 195 blockoff = ufs_fragnum (fragment);
196 p = ufsi->i_u1.i_data + block; 196 p = ufs_get_direct_data_ptr(uspi, ufsi, block);
197
197 goal = 0; 198 goal = 0;
198 199
199repeat: 200repeat:
200 tmp = fs32_to_cpu(sb, *p); 201 tmp = ufs_data_ptr_to_cpu(sb, p);
202
201 lastfrag = ufsi->i_lastfrag; 203 lastfrag = ufsi->i_lastfrag;
202 if (tmp && fragment < lastfrag) { 204 if (tmp && fragment < lastfrag) {
203 if (!phys) { 205 if (!phys) {
204 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); 206 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
205 if (tmp == fs32_to_cpu(sb, *p)) { 207 if (tmp == ufs_data_ptr_to_cpu(sb, p)) {
206 UFSD("EXIT, result %u\n", tmp + blockoff); 208 UFSD("EXIT, result %llu\n",
209 (unsigned long long)tmp + blockoff);
207 return result; 210 return result;
208 } 211 }
209 brelse (result); 212 brelse (result);
@@ -224,10 +227,11 @@ repeat:
224 * We must reallocate last allocated block 227 * We must reallocate last allocated block
225 */ 228 */
226 if (lastblockoff) { 229 if (lastblockoff) {
227 p2 = ufsi->i_u1.i_data + lastblock; 230 p2 = ufs_get_direct_data_ptr(uspi, ufsi, lastblock);
228 tmp = ufs_new_fragments (inode, p2, lastfrag, 231 tmp = ufs_new_fragments(inode, p2, lastfrag,
229 fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff, 232 ufs_data_ptr_to_cpu(sb, p2),
230 err, locked_page); 233 uspi->s_fpb - lastblockoff,
234 err, locked_page);
231 if (!tmp) { 235 if (!tmp) {
232 if (lastfrag != ufsi->i_lastfrag) 236 if (lastfrag != ufsi->i_lastfrag)
233 goto repeat; 237 goto repeat;
@@ -237,27 +241,31 @@ repeat:
237 lastfrag = ufsi->i_lastfrag; 241 lastfrag = ufsi->i_lastfrag;
238 242
239 } 243 }
240 tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]); 244 tmp = ufs_data_ptr_to_cpu(sb,
245 ufs_get_direct_data_ptr(uspi, ufsi,
246 lastblock));
241 if (tmp) 247 if (tmp)
242 goal = tmp + uspi->s_fpb; 248 goal = tmp + uspi->s_fpb;
243 tmp = ufs_new_fragments (inode, p, fragment - blockoff, 249 tmp = ufs_new_fragments (inode, p, fragment - blockoff,
244 goal, required + blockoff, 250 goal, required + blockoff,
245 err, 251 err,
246 phys != NULL ? locked_page : NULL); 252 phys != NULL ? locked_page : NULL);
247 } 253 } else if (lastblock == block) {
248 /* 254 /*
249 * We will extend last allocated block 255 * We will extend last allocated block
250 */ 256 */
251 else if (lastblock == block) { 257 tmp = ufs_new_fragments(inode, p, fragment -
252 tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), 258 (blockoff - lastblockoff),
253 fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), 259 ufs_data_ptr_to_cpu(sb, p),
260 required + (blockoff - lastblockoff),
254 err, phys != NULL ? locked_page : NULL); 261 err, phys != NULL ? locked_page : NULL);
255 } else /* (lastblock > block) */ { 262 } else /* (lastblock > block) */ {
256 /* 263 /*
257 * We will allocate new block before last allocated block 264 * We will allocate new block before last allocated block
258 */ 265 */
259 if (block) { 266 if (block) {
260 tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]); 267 tmp = ufs_data_ptr_to_cpu(sb,
268 ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
261 if (tmp) 269 if (tmp)
262 goal = tmp + uspi->s_fpb; 270 goal = tmp + uspi->s_fpb;
263 } 271 }
@@ -266,7 +274,7 @@ repeat:
266 phys != NULL ? locked_page : NULL); 274 phys != NULL ? locked_page : NULL);
267 } 275 }
268 if (!tmp) { 276 if (!tmp) {
269 if ((!blockoff && *p) || 277 if ((!blockoff && ufs_data_ptr_to_cpu(sb, p)) ||
270 (blockoff && lastfrag != ufsi->i_lastfrag)) 278 (blockoff && lastfrag != ufsi->i_lastfrag))
271 goto repeat; 279 goto repeat;
272 *err = -ENOSPC; 280 *err = -ENOSPC;
@@ -286,7 +294,7 @@ repeat:
286 if (IS_SYNC(inode)) 294 if (IS_SYNC(inode))
287 ufs_sync_inode (inode); 295 ufs_sync_inode (inode);
288 mark_inode_dirty(inode); 296 mark_inode_dirty(inode);
289 UFSD("EXIT, result %u\n", tmp + blockoff); 297 UFSD("EXIT, result %llu\n", (unsigned long long)tmp + blockoff);
290 return result; 298 return result;
291 299
292 /* This part : To be implemented .... 300 /* This part : To be implemented ....
@@ -320,20 +328,22 @@ repeat2:
320 */ 328 */
321static struct buffer_head * 329static struct buffer_head *
322ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, 330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
323 unsigned int fragment, sector_t new_fragment, int *err, 331 u64 fragment, sector_t new_fragment, int *err,
324 long *phys, int *new, struct page *locked_page) 332 long *phys, int *new, struct page *locked_page)
325{ 333{
326 struct super_block *sb = inode->i_sb; 334 struct super_block *sb = inode->i_sb;
327 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 335 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
328 struct buffer_head * result; 336 struct buffer_head * result;
329 unsigned tmp, goal, block, blockoff; 337 unsigned blockoff;
330 __fs32 * p; 338 u64 tmp, goal, block;
339 void *p;
331 340
332 block = ufs_fragstoblks (fragment); 341 block = ufs_fragstoblks (fragment);
333 blockoff = ufs_fragnum (fragment); 342 blockoff = ufs_fragnum (fragment);
334 343
335 UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, metadata %d\n", 344 UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
336 inode->i_ino, fragment, (unsigned long long)new_fragment, !phys); 345 inode->i_ino, (unsigned long long)fragment,
346 (unsigned long long)new_fragment, !phys);
337 347
338 result = NULL; 348 result = NULL;
339 if (!bh) 349 if (!bh)
@@ -344,14 +354,16 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
344 if (!buffer_uptodate(bh)) 354 if (!buffer_uptodate(bh))
345 goto out; 355 goto out;
346 } 356 }
347 357 if (uspi->fs_magic == UFS2_MAGIC)
348 p = (__fs32 *) bh->b_data + block; 358 p = (__fs64 *)bh->b_data + block;
359 else
360 p = (__fs32 *)bh->b_data + block;
349repeat: 361repeat:
350 tmp = fs32_to_cpu(sb, *p); 362 tmp = ufs_data_ptr_to_cpu(sb, p);
351 if (tmp) { 363 if (tmp) {
352 if (!phys) { 364 if (!phys) {
353 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); 365 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
354 if (tmp == fs32_to_cpu(sb, *p)) 366 if (tmp == ufs_data_ptr_to_cpu(sb, p))
355 goto out; 367 goto out;
356 brelse (result); 368 brelse (result);
357 goto repeat; 369 goto repeat;
@@ -361,14 +373,16 @@ repeat:
361 } 373 }
362 } 374 }
363 375
364 if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]))) 376 if (block && (uspi->fs_magic == UFS2_MAGIC ?
377 (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) :
378 (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[block-1]))))
365 goal = tmp + uspi->s_fpb; 379 goal = tmp + uspi->s_fpb;
366 else 380 else
367 goal = bh->b_blocknr + uspi->s_fpb; 381 goal = bh->b_blocknr + uspi->s_fpb;
368 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal, 382 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
369 uspi->s_fpb, err, locked_page); 383 uspi->s_fpb, err, locked_page);
370 if (!tmp) { 384 if (!tmp) {
371 if (fs32_to_cpu(sb, *p)) 385 if (ufs_data_ptr_to_cpu(sb, p))
372 goto repeat; 386 goto repeat;
373 goto out; 387 goto out;
374 } 388 }
@@ -386,7 +400,7 @@ repeat:
386 sync_dirty_buffer(bh); 400 sync_dirty_buffer(bh);
387 inode->i_ctime = CURRENT_TIME_SEC; 401 inode->i_ctime = CURRENT_TIME_SEC;
388 mark_inode_dirty(inode); 402 mark_inode_dirty(inode);
389 UFSD("result %u\n", tmp + blockoff); 403 UFSD("result %llu\n", (unsigned long long)tmp + blockoff);
390out: 404out:
391 brelse (bh); 405 brelse (bh);
392 UFSD("EXIT\n"); 406 UFSD("EXIT\n");
@@ -616,8 +630,8 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
616 inode->i_atime.tv_nsec = 0; 630 inode->i_atime.tv_nsec = 0;
617 inode->i_ctime.tv_nsec = 0; 631 inode->i_ctime.tv_nsec = 0;
618 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks); 632 inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
633 inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen);
619 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags); 634 ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
620 ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen);
621 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 635 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
622 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 636 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
623 637
@@ -661,8 +675,8 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
661 inode->i_atime.tv_nsec = 0; 675 inode->i_atime.tv_nsec = 0;
662 inode->i_ctime.tv_nsec = 0; 676 inode->i_ctime.tv_nsec = 0;
663 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); 677 inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
678 inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
664 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags); 679 ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
665 ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen);
666 /* 680 /*
667 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 681 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
668 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 682 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
@@ -731,34 +745,11 @@ bad_inode:
731 make_bad_inode(inode); 745 make_bad_inode(inode);
732} 746}
733 747
734static int ufs_update_inode(struct inode * inode, int do_sync) 748static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
735{ 749{
736 struct ufs_inode_info *ufsi = UFS_I(inode); 750 struct super_block *sb = inode->i_sb;
737 struct super_block * sb; 751 struct ufs_inode_info *ufsi = UFS_I(inode);
738 struct ufs_sb_private_info * uspi; 752 unsigned i;
739 struct buffer_head * bh;
740 struct ufs_inode * ufs_inode;
741 unsigned i;
742 unsigned flags;
743
744 UFSD("ENTER, ino %lu\n", inode->i_ino);
745
746 sb = inode->i_sb;
747 uspi = UFS_SB(sb)->s_uspi;
748 flags = UFS_SB(sb)->s_flags;
749
750 if (inode->i_ino < UFS_ROOTINO ||
751 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
752 ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
753 return -1;
754 }
755
756 bh = sb_bread(sb, ufs_inotofsba(inode->i_ino));
757 if (!bh) {
758 ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
759 return -1;
760 }
761 ufs_inode = (struct ufs_inode *) (bh->b_data + ufs_inotofsbo(inode->i_ino) * sizeof(struct ufs_inode));
762 753
763 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); 754 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
764 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); 755 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
@@ -775,9 +766,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
775 ufs_inode->ui_mtime.tv_usec = 0; 766 ufs_inode->ui_mtime.tv_usec = 0;
776 ufs_inode->ui_blocks = cpu_to_fs32(sb, inode->i_blocks); 767 ufs_inode->ui_blocks = cpu_to_fs32(sb, inode->i_blocks);
777 ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags); 768 ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
778 ufs_inode->ui_gen = cpu_to_fs32(sb, ufsi->i_gen); 769 ufs_inode->ui_gen = cpu_to_fs32(sb, inode->i_generation);
779 770
780 if ((flags & UFS_UID_MASK) == UFS_UID_EFT) { 771 if ((UFS_SB(sb)->s_flags & UFS_UID_MASK) == UFS_UID_EFT) {
781 ufs_inode->ui_u3.ui_sun.ui_shadow = cpu_to_fs32(sb, ufsi->i_shadow); 772 ufs_inode->ui_u3.ui_sun.ui_shadow = cpu_to_fs32(sb, ufsi->i_shadow);
782 ufs_inode->ui_u3.ui_sun.ui_oeftflag = cpu_to_fs32(sb, ufsi->i_oeftflag); 773 ufs_inode->ui_u3.ui_sun.ui_oeftflag = cpu_to_fs32(sb, ufsi->i_oeftflag);
783 } 774 }
@@ -796,6 +787,78 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
796 787
797 if (!inode->i_nlink) 788 if (!inode->i_nlink)
798 memset (ufs_inode, 0, sizeof(struct ufs_inode)); 789 memset (ufs_inode, 0, sizeof(struct ufs_inode));
790}
791
792static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
793{
794 struct super_block *sb = inode->i_sb;
795 struct ufs_inode_info *ufsi = UFS_I(inode);
796 unsigned i;
797
798 UFSD("ENTER\n");
799 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
800 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
801
802 ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid);
803 ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid);
804
805 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
806 ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
807 ufs_inode->ui_atime.tv_usec = 0;
808 ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
809 ufs_inode->ui_ctime.tv_usec = 0;
810 ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
811 ufs_inode->ui_mtime.tv_usec = 0;
812
813 ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks);
814 ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
815 ufs_inode->ui_gen = cpu_to_fs32(sb, inode->i_generation);
816
817 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
818 /* ufs_inode->ui_u2.ui_addr.ui_db[0] = cpu_to_fs32(sb, inode->i_rdev); */
819 ufs_inode->ui_u2.ui_addr.ui_db[0] = ufsi->i_u1.u2_i_data[0];
820 } else if (inode->i_blocks) {
821 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
822 ufs_inode->ui_u2.ui_addr.ui_db[i] = ufsi->i_u1.u2_i_data[i];
823 } else {
824 for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
825 ufs_inode->ui_u2.ui_symlink[i] = ufsi->i_u1.i_symlink[i];
826 }
827
828 if (!inode->i_nlink)
829 memset (ufs_inode, 0, sizeof(struct ufs2_inode));
830 UFSD("EXIT\n");
831}
832
833static int ufs_update_inode(struct inode * inode, int do_sync)
834{
835 struct super_block *sb = inode->i_sb;
836 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
837 struct buffer_head * bh;
838
839 UFSD("ENTER, ino %lu\n", inode->i_ino);
840
841 if (inode->i_ino < UFS_ROOTINO ||
842 inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
843 ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
844 return -1;
845 }
846
847 bh = sb_bread(sb, ufs_inotofsba(inode->i_ino));
848 if (!bh) {
849 ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
850 return -1;
851 }
852 if (uspi->fs_magic == UFS2_MAGIC) {
853 struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
854
855 ufs2_update_inode(inode,
856 ufs2_inode + ufs_inotofsbo(inode->i_ino));
857 } else {
858 struct ufs_inode *ufs_inode = (struct ufs_inode *) bh->b_data;
859
860 ufs1_update_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
861 }
799 862
800 mark_buffer_dirty(bh); 863 mark_buffer_dirty(bh);
801 if (do_sync) 864 if (do_sync)
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index e84c0ecf0730..a059ccd064ea 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -355,7 +355,7 @@ out:
355 return err; 355 return err;
356} 356}
357 357
358struct inode_operations ufs_dir_inode_operations = { 358const struct inode_operations ufs_dir_inode_operations = {
359 .create = ufs_create, 359 .create = ufs_create,
360 .lookup = ufs_lookup, 360 .lookup = ufs_lookup,
361 .link = ufs_link, 361 .link = ufs_link,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 8a8e9382ec09..b5a6461ec66b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -61,6 +61,8 @@
61 * UFS2 (of FreeBSD 5.x) support added by 61 * UFS2 (of FreeBSD 5.x) support added by
62 * Niraj Kumar <niraj17@iitbombay.org>, Jan 2004 62 * Niraj Kumar <niraj17@iitbombay.org>, Jan 2004
63 * 63 *
64 * UFS2 write support added by
65 * Evgeniy Dushistov <dushistov@mail.ru>, 2007
64 */ 66 */
65 67
66 68
@@ -93,14 +95,16 @@
93/* 95/*
94 * Print contents of ufs_super_block, useful for debugging 96 * Print contents of ufs_super_block, useful for debugging
95 */ 97 */
96static void ufs_print_super_stuff(struct super_block *sb, unsigned flags, 98static void ufs_print_super_stuff(struct super_block *sb,
97 struct ufs_super_block_first *usb1, 99 struct ufs_super_block_first *usb1,
98 struct ufs_super_block_second *usb2, 100 struct ufs_super_block_second *usb2,
99 struct ufs_super_block_third *usb3) 101 struct ufs_super_block_third *usb3)
100{ 102{
103 u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
104
101 printk("ufs_print_super_stuff\n"); 105 printk("ufs_print_super_stuff\n");
102 printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic)); 106 printk(" magic: 0x%x\n", magic);
103 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 107 if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
104 printk(" fs_size: %llu\n", (unsigned long long) 108 printk(" fs_size: %llu\n", (unsigned long long)
105 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size)); 109 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
106 printk(" fs_dsize: %llu\n", (unsigned long long) 110 printk(" fs_dsize: %llu\n", (unsigned long long)
@@ -117,6 +121,12 @@ static void ufs_print_super_stuff(struct super_block *sb, unsigned flags,
117 printk(" cs_nbfree(No of free blocks): %llu\n", 121 printk(" cs_nbfree(No of free blocks): %llu\n",
118 (unsigned long long) 122 (unsigned long long)
119 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree)); 123 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
124 printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
125 (unsigned long long)
126 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
127 printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
128 (unsigned long long)
129 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
120 } else { 130 } else {
121 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); 131 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
122 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); 132 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
@@ -199,11 +209,11 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
199 printk("\n"); 209 printk("\n");
200} 210}
201#else 211#else
202# define ufs_print_super_stuff(sb, flags, usb1, usb2, usb3) /**/ 212# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
203# define ufs_print_cylinder_stuff(sb, cg) /**/ 213# define ufs_print_cylinder_stuff(sb, cg) /**/
204#endif /* CONFIG_UFS_DEBUG */ 214#endif /* CONFIG_UFS_DEBUG */
205 215
206static struct super_operations ufs_super_ops; 216static const struct super_operations ufs_super_ops;
207 217
208static char error_buf[1024]; 218static char error_buf[1024];
209 219
@@ -422,7 +432,6 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
422{ 432{
423 struct ufs_sb_info *sbi = UFS_SB(sb); 433 struct ufs_sb_info *sbi = UFS_SB(sb);
424 struct ufs_sb_private_info *uspi = sbi->s_uspi; 434 struct ufs_sb_private_info *uspi = sbi->s_uspi;
425 unsigned flags = sbi->s_flags;
426 struct ufs_buffer_head * ubh; 435 struct ufs_buffer_head * ubh;
427 unsigned char * base, * space; 436 unsigned char * base, * space;
428 unsigned size, blks, i; 437 unsigned size, blks, i;
@@ -446,11 +455,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
446 if (i + uspi->s_fpb > blks) 455 if (i + uspi->s_fpb > blks)
447 size = (blks - i) * uspi->s_fsize; 456 size = (blks - i) * uspi->s_fsize;
448 457
449 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 458 ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
450 ubh = ubh_bread(sb,
451 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr) + i, size);
452 else
453 ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
454 459
455 if (!ubh) 460 if (!ubh)
456 goto failed; 461 goto failed;
@@ -545,6 +550,7 @@ static void ufs_put_cstotal(struct super_block *sb)
545 cpu_to_fs32(sb, uspi->cs_total.cs_nffree); 550 cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
546 } 551 }
547 ubh_mark_buffer_dirty(USPI_UBH(uspi)); 552 ubh_mark_buffer_dirty(USPI_UBH(uspi));
553 ufs_print_super_stuff(sb, usb1, usb2, usb3);
548 UFSD("EXIT\n"); 554 UFSD("EXIT\n");
549} 555}
550 556
@@ -572,7 +578,9 @@ static void ufs_put_super_internal(struct super_block *sb)
572 size = uspi->s_bsize; 578 size = uspi->s_bsize;
573 if (i + uspi->s_fpb > blks) 579 if (i + uspi->s_fpb > blks)
574 size = (blks - i) * uspi->s_fsize; 580 size = (blks - i) * uspi->s_fsize;
581
575 ubh = ubh_bread(sb, uspi->s_csaddr + i, size); 582 ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
583
576 ubh_memcpyubh (ubh, space, size); 584 ubh_memcpyubh (ubh, space, size);
577 space += size; 585 space += size;
578 ubh_mark_buffer_uptodate (ubh, 1); 586 ubh_mark_buffer_uptodate (ubh, 1);
@@ -649,7 +657,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
649 kmalloc (sizeof(struct ufs_sb_private_info), GFP_KERNEL); 657 kmalloc (sizeof(struct ufs_sb_private_info), GFP_KERNEL);
650 if (!uspi) 658 if (!uspi)
651 goto failed; 659 goto failed;
652 660 uspi->s_dirblksize = UFS_SECTOR_SIZE;
653 super_block_offset=UFS_SBLOCK; 661 super_block_offset=UFS_SBLOCK;
654 662
655 /* Keep 2Gig file limit. Some UFS variants need to override 663 /* Keep 2Gig file limit. Some UFS variants need to override
@@ -674,10 +682,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
674 uspi->s_sbsize = super_block_size = 1536; 682 uspi->s_sbsize = super_block_size = 1536;
675 uspi->s_sbbase = 0; 683 uspi->s_sbbase = 0;
676 flags |= UFS_TYPE_UFS2 | UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 684 flags |= UFS_TYPE_UFS2 | UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
677 if (!(sb->s_flags & MS_RDONLY)) {
678 printk(KERN_INFO "ufstype=ufs2 is supported read-only\n");
679 sb->s_flags |= MS_RDONLY;
680 }
681 break; 685 break;
682 686
683 case UFS_MOUNT_UFSTYPE_SUN: 687 case UFS_MOUNT_UFSTYPE_SUN:
@@ -718,6 +722,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
718 break; 722 break;
719 723
720 case UFS_MOUNT_UFSTYPE_NEXTSTEP: 724 case UFS_MOUNT_UFSTYPE_NEXTSTEP:
725 /*TODO: check may be we need set special dir block size?*/
721 UFSD("ufstype=nextstep\n"); 726 UFSD("ufstype=nextstep\n");
722 uspi->s_fsize = block_size = 1024; 727 uspi->s_fsize = block_size = 1024;
723 uspi->s_fmask = ~(1024 - 1); 728 uspi->s_fmask = ~(1024 - 1);
@@ -733,6 +738,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
733 break; 738 break;
734 739
735 case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD: 740 case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD:
741 /*TODO: check may be we need set special dir block size?*/
736 UFSD("ufstype=nextstep-cd\n"); 742 UFSD("ufstype=nextstep-cd\n");
737 uspi->s_fsize = block_size = 2048; 743 uspi->s_fsize = block_size = 2048;
738 uspi->s_fmask = ~(2048 - 1); 744 uspi->s_fmask = ~(2048 - 1);
@@ -754,6 +760,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
754 uspi->s_fshift = 10; 760 uspi->s_fshift = 10;
755 uspi->s_sbsize = super_block_size = 2048; 761 uspi->s_sbsize = super_block_size = 2048;
756 uspi->s_sbbase = 0; 762 uspi->s_sbbase = 0;
763 uspi->s_dirblksize = 1024;
757 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 764 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
758 if (!(sb->s_flags & MS_RDONLY)) { 765 if (!(sb->s_flags & MS_RDONLY)) {
759 if (!silent) 766 if (!silent)
@@ -887,7 +894,7 @@ magic_found:
887 } 894 }
888 895
889 896
890 ufs_print_super_stuff(sb, flags, usb1, usb2, usb3); 897 ufs_print_super_stuff(sb, usb1, usb2, usb3);
891 898
892 /* 899 /*
893 * Check, if file system was correctly unmounted. 900 * Check, if file system was correctly unmounted.
@@ -970,7 +977,12 @@ magic_found:
970 uspi->s_npsect = ufs_get_fs_npsect(sb, usb1, usb3); 977 uspi->s_npsect = ufs_get_fs_npsect(sb, usb1, usb3);
971 uspi->s_interleave = fs32_to_cpu(sb, usb1->fs_interleave); 978 uspi->s_interleave = fs32_to_cpu(sb, usb1->fs_interleave);
972 uspi->s_trackskew = fs32_to_cpu(sb, usb1->fs_trackskew); 979 uspi->s_trackskew = fs32_to_cpu(sb, usb1->fs_trackskew);
973 uspi->s_csaddr = fs32_to_cpu(sb, usb1->fs_csaddr); 980
981 if (uspi->fs_magic == UFS2_MAGIC)
982 uspi->s_csaddr = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr);
983 else
984 uspi->s_csaddr = fs32_to_cpu(sb, usb1->fs_csaddr);
985
974 uspi->s_cssize = fs32_to_cpu(sb, usb1->fs_cssize); 986 uspi->s_cssize = fs32_to_cpu(sb, usb1->fs_cssize);
975 uspi->s_cgsize = fs32_to_cpu(sb, usb1->fs_cgsize); 987 uspi->s_cgsize = fs32_to_cpu(sb, usb1->fs_cgsize);
976 uspi->s_ntrak = fs32_to_cpu(sb, usb1->fs_ntrak); 988 uspi->s_ntrak = fs32_to_cpu(sb, usb1->fs_ntrak);
@@ -1057,7 +1069,6 @@ static void ufs_write_super(struct super_block *sb)
1057 unsigned flags; 1069 unsigned flags;
1058 1070
1059 lock_kernel(); 1071 lock_kernel();
1060
1061 UFSD("ENTER\n"); 1072 UFSD("ENTER\n");
1062 flags = UFS_SB(sb)->s_flags; 1073 flags = UFS_SB(sb)->s_flags;
1063 uspi = UFS_SB(sb)->s_uspi; 1074 uspi = UFS_SB(sb)->s_uspi;
@@ -1153,7 +1164,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1153#else 1164#else
1154 if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 1165 if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
1155 ufstype != UFS_MOUNT_UFSTYPE_44BSD && 1166 ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
1156 ufstype != UFS_MOUNT_UFSTYPE_SUNx86) { 1167 ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
1168 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1157 printk("this ufstype is read-only supported\n"); 1169 printk("this ufstype is read-only supported\n");
1158 return -EINVAL; 1170 return -EINVAL;
1159 } 1171 }
@@ -1252,7 +1264,7 @@ static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
1252static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); 1264static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
1253#endif 1265#endif
1254 1266
1255static struct super_operations ufs_super_ops = { 1267static const struct super_operations ufs_super_ops = {
1256 .alloc_inode = ufs_alloc_inode, 1268 .alloc_inode = ufs_alloc_inode,
1257 .destroy_inode = ufs_destroy_inode, 1269 .destroy_inode = ufs_destroy_inode,
1258 .read_inode = ufs_read_inode, 1270 .read_inode = ufs_read_inode,
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index 337512ed5781..d8549f807e80 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -36,7 +36,7 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
36 return NULL; 36 return NULL;
37} 37}
38 38
39struct inode_operations ufs_fast_symlink_inode_operations = { 39const struct inode_operations ufs_fast_symlink_inode_operations = {
40 .readlink = generic_readlink, 40 .readlink = generic_readlink,
41 .follow_link = ufs_follow_link, 41 .follow_link = ufs_follow_link,
42}; 42};
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 0437b0a6fe97..749581fa7729 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -30,8 +30,8 @@
30 */ 30 */
31 31
32/* 32/*
33 * Modified to avoid infinite loop on 2006 by 33 * Adoptation to use page cache and UFS2 write support by
34 * Evgeniy Dushistov <dushistov@mail.ru> 34 * Evgeniy Dushistov <dushistov@mail.ru>, 2006-2007
35 */ 35 */
36 36
37#include <linux/errno.h> 37#include <linux/errno.h>
@@ -63,13 +63,13 @@
63#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift) 63#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
64 64
65 65
66static int ufs_trunc_direct (struct inode * inode) 66static int ufs_trunc_direct(struct inode *inode)
67{ 67{
68 struct ufs_inode_info *ufsi = UFS_I(inode); 68 struct ufs_inode_info *ufsi = UFS_I(inode);
69 struct super_block * sb; 69 struct super_block * sb;
70 struct ufs_sb_private_info * uspi; 70 struct ufs_sb_private_info * uspi;
71 __fs32 * p; 71 void *p;
72 unsigned frag1, frag2, frag3, frag4, block1, block2; 72 u64 frag1, frag2, frag3, frag4, block1, block2;
73 unsigned frag_to_free, free_count; 73 unsigned frag_to_free, free_count;
74 unsigned i, tmp; 74 unsigned i, tmp;
75 int retry; 75 int retry;
@@ -91,13 +91,16 @@ static int ufs_trunc_direct (struct inode * inode)
91 if (frag2 > frag3) { 91 if (frag2 > frag3) {
92 frag2 = frag4; 92 frag2 = frag4;
93 frag3 = frag4 = 0; 93 frag3 = frag4 = 0;
94 } 94 } else if (frag2 < frag3) {
95 else if (frag2 < frag3) {
96 block1 = ufs_fragstoblks (frag2); 95 block1 = ufs_fragstoblks (frag2);
97 block2 = ufs_fragstoblks (frag3); 96 block2 = ufs_fragstoblks (frag3);
98 } 97 }
99 98
100 UFSD("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4); 99 UFSD("frag1 %llu, frag2 %llu, block1 %llu, block2 %llu, frag3 %llu,"
100 " frag4 %llu\n",
101 (unsigned long long)frag1, (unsigned long long)frag2,
102 (unsigned long long)block1, (unsigned long long)block2,
103 (unsigned long long)frag3, (unsigned long long)frag4);
101 104
102 if (frag1 >= frag2) 105 if (frag1 >= frag2)
103 goto next1; 106 goto next1;
@@ -105,8 +108,8 @@ static int ufs_trunc_direct (struct inode * inode)
105 /* 108 /*
106 * Free first free fragments 109 * Free first free fragments
107 */ 110 */
108 p = ufsi->i_u1.i_data + ufs_fragstoblks (frag1); 111 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
109 tmp = fs32_to_cpu(sb, *p); 112 tmp = ufs_data_ptr_to_cpu(sb, p);
110 if (!tmp ) 113 if (!tmp )
111 ufs_panic (sb, "ufs_trunc_direct", "internal error"); 114 ufs_panic (sb, "ufs_trunc_direct", "internal error");
112 frag2 -= frag1; 115 frag2 -= frag1;
@@ -121,12 +124,11 @@ next1:
121 * Free whole blocks 124 * Free whole blocks
122 */ 125 */
123 for (i = block1 ; i < block2; i++) { 126 for (i = block1 ; i < block2; i++) {
124 p = ufsi->i_u1.i_data + i; 127 p = ufs_get_direct_data_ptr(uspi, ufsi, i);
125 tmp = fs32_to_cpu(sb, *p); 128 tmp = ufs_data_ptr_to_cpu(sb, p);
126 if (!tmp) 129 if (!tmp)
127 continue; 130 continue;
128 131 ufs_data_ptr_clear(uspi, p);
129 *p = 0;
130 132
131 if (free_count == 0) { 133 if (free_count == 0) {
132 frag_to_free = tmp; 134 frag_to_free = tmp;
@@ -150,13 +152,12 @@ next1:
150 /* 152 /*
151 * Free last free fragments 153 * Free last free fragments
152 */ 154 */
153 p = ufsi->i_u1.i_data + ufs_fragstoblks (frag3); 155 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
154 tmp = fs32_to_cpu(sb, *p); 156 tmp = ufs_data_ptr_to_cpu(sb, p);
155 if (!tmp ) 157 if (!tmp )
156 ufs_panic(sb, "ufs_truncate_direct", "internal error"); 158 ufs_panic(sb, "ufs_truncate_direct", "internal error");
157 frag4 = ufs_fragnum (frag4); 159 frag4 = ufs_fragnum (frag4);
158 160 ufs_data_ptr_clear(uspi, p);
159 *p = 0;
160 161
161 ufs_free_fragments (inode, tmp, frag4); 162 ufs_free_fragments (inode, tmp, frag4);
162 mark_inode_dirty(inode); 163 mark_inode_dirty(inode);
@@ -167,17 +168,20 @@ next1:
167} 168}
168 169
169 170
170static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p) 171static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
171{ 172{
172 struct super_block * sb; 173 struct super_block * sb;
173 struct ufs_sb_private_info * uspi; 174 struct ufs_sb_private_info * uspi;
174 struct ufs_buffer_head * ind_ubh; 175 struct ufs_buffer_head * ind_ubh;
175 __fs32 * ind; 176 void *ind;
176 unsigned indirect_block, i, tmp; 177 u64 tmp, indirect_block, i, frag_to_free;
177 unsigned frag_to_free, free_count; 178 unsigned free_count;
178 int retry; 179 int retry;
179 180
180 UFSD("ENTER\n"); 181 UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
182 inode->i_ino, (unsigned long long)offset, p);
183
184 BUG_ON(!p);
181 185
182 sb = inode->i_sb; 186 sb = inode->i_sb;
183 uspi = UFS_SB(sb)->s_uspi; 187 uspi = UFS_SB(sb)->s_uspi;
@@ -186,27 +190,27 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
186 free_count = 0; 190 free_count = 0;
187 retry = 0; 191 retry = 0;
188 192
189 tmp = fs32_to_cpu(sb, *p); 193 tmp = ufs_data_ptr_to_cpu(sb, p);
190 if (!tmp) 194 if (!tmp)
191 return 0; 195 return 0;
192 ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize); 196 ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
193 if (tmp != fs32_to_cpu(sb, *p)) { 197 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
194 ubh_brelse (ind_ubh); 198 ubh_brelse (ind_ubh);
195 return 1; 199 return 1;
196 } 200 }
197 if (!ind_ubh) { 201 if (!ind_ubh) {
198 *p = 0; 202 ufs_data_ptr_clear(uspi, p);
199 return 0; 203 return 0;
200 } 204 }
201 205
202 indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0; 206 indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
203 for (i = indirect_block; i < uspi->s_apb; i++) { 207 for (i = indirect_block; i < uspi->s_apb; i++) {
204 ind = ubh_get_addr32 (ind_ubh, i); 208 ind = ubh_get_data_ptr(uspi, ind_ubh, i);
205 tmp = fs32_to_cpu(sb, *ind); 209 tmp = ufs_data_ptr_to_cpu(sb, ind);
206 if (!tmp) 210 if (!tmp)
207 continue; 211 continue;
208 212
209 *ind = 0; 213 ufs_data_ptr_clear(uspi, ind);
210 ubh_mark_buffer_dirty(ind_ubh); 214 ubh_mark_buffer_dirty(ind_ubh);
211 if (free_count == 0) { 215 if (free_count == 0) {
212 frag_to_free = tmp; 216 frag_to_free = tmp;
@@ -226,11 +230,12 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
226 ufs_free_blocks (inode, frag_to_free, free_count); 230 ufs_free_blocks (inode, frag_to_free, free_count);
227 } 231 }
228 for (i = 0; i < uspi->s_apb; i++) 232 for (i = 0; i < uspi->s_apb; i++)
229 if (*ubh_get_addr32(ind_ubh,i)) 233 if (!ufs_is_data_ptr_zero(uspi,
234 ubh_get_data_ptr(uspi, ind_ubh, i)))
230 break; 235 break;
231 if (i >= uspi->s_apb) { 236 if (i >= uspi->s_apb) {
232 tmp = fs32_to_cpu(sb, *p); 237 tmp = ufs_data_ptr_to_cpu(sb, p);
233 *p = 0; 238 ufs_data_ptr_clear(uspi, p);
234 239
235 ufs_free_blocks (inode, tmp, uspi->s_fpb); 240 ufs_free_blocks (inode, tmp, uspi->s_fpb);
236 mark_inode_dirty(inode); 241 mark_inode_dirty(inode);
@@ -248,13 +253,13 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
248 return retry; 253 return retry;
249} 254}
250 255
251static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p) 256static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
252{ 257{
253 struct super_block * sb; 258 struct super_block * sb;
254 struct ufs_sb_private_info * uspi; 259 struct ufs_sb_private_info * uspi;
255 struct ufs_buffer_head * dind_bh; 260 struct ufs_buffer_head *dind_bh;
256 unsigned i, tmp, dindirect_block; 261 u64 i, tmp, dindirect_block;
257 __fs32 * dind; 262 void *dind;
258 int retry = 0; 263 int retry = 0;
259 264
260 UFSD("ENTER\n"); 265 UFSD("ENTER\n");
@@ -266,22 +271,22 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
266 ? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0; 271 ? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
267 retry = 0; 272 retry = 0;
268 273
269 tmp = fs32_to_cpu(sb, *p); 274 tmp = ufs_data_ptr_to_cpu(sb, p);
270 if (!tmp) 275 if (!tmp)
271 return 0; 276 return 0;
272 dind_bh = ubh_bread(sb, tmp, uspi->s_bsize); 277 dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
273 if (tmp != fs32_to_cpu(sb, *p)) { 278 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
274 ubh_brelse (dind_bh); 279 ubh_brelse (dind_bh);
275 return 1; 280 return 1;
276 } 281 }
277 if (!dind_bh) { 282 if (!dind_bh) {
278 *p = 0; 283 ufs_data_ptr_clear(uspi, p);
279 return 0; 284 return 0;
280 } 285 }
281 286
282 for (i = dindirect_block ; i < uspi->s_apb ; i++) { 287 for (i = dindirect_block ; i < uspi->s_apb ; i++) {
283 dind = ubh_get_addr32 (dind_bh, i); 288 dind = ubh_get_data_ptr(uspi, dind_bh, i);
284 tmp = fs32_to_cpu(sb, *dind); 289 tmp = ufs_data_ptr_to_cpu(sb, dind);
285 if (!tmp) 290 if (!tmp)
286 continue; 291 continue;
287 retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind); 292 retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
@@ -289,11 +294,12 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
289 } 294 }
290 295
291 for (i = 0; i < uspi->s_apb; i++) 296 for (i = 0; i < uspi->s_apb; i++)
292 if (*ubh_get_addr32 (dind_bh, i)) 297 if (!ufs_is_data_ptr_zero(uspi,
298 ubh_get_data_ptr(uspi, dind_bh, i)))
293 break; 299 break;
294 if (i >= uspi->s_apb) { 300 if (i >= uspi->s_apb) {
295 tmp = fs32_to_cpu(sb, *p); 301 tmp = ufs_data_ptr_to_cpu(sb, p);
296 *p = 0; 302 ufs_data_ptr_clear(uspi, p);
297 303
298 ufs_free_blocks(inode, tmp, uspi->s_fpb); 304 ufs_free_blocks(inode, tmp, uspi->s_fpb);
299 mark_inode_dirty(inode); 305 mark_inode_dirty(inode);
@@ -311,34 +317,33 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
311 return retry; 317 return retry;
312} 318}
313 319
314static int ufs_trunc_tindirect (struct inode * inode) 320static int ufs_trunc_tindirect(struct inode *inode)
315{ 321{
322 struct super_block *sb = inode->i_sb;
323 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
316 struct ufs_inode_info *ufsi = UFS_I(inode); 324 struct ufs_inode_info *ufsi = UFS_I(inode);
317 struct super_block * sb;
318 struct ufs_sb_private_info * uspi;
319 struct ufs_buffer_head * tind_bh; 325 struct ufs_buffer_head * tind_bh;
320 unsigned tindirect_block, tmp, i; 326 u64 tindirect_block, tmp, i;
321 __fs32 * tind, * p; 327 void *tind, *p;
322 int retry; 328 int retry;
323 329
324 UFSD("ENTER\n"); 330 UFSD("ENTER\n");
325 331
326 sb = inode->i_sb;
327 uspi = UFS_SB(sb)->s_uspi;
328 retry = 0; 332 retry = 0;
329 333
330 tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb)) 334 tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
331 ? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0; 335 ? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
332 p = ufsi->i_u1.i_data + UFS_TIND_BLOCK; 336
333 if (!(tmp = fs32_to_cpu(sb, *p))) 337 p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
338 if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
334 return 0; 339 return 0;
335 tind_bh = ubh_bread (sb, tmp, uspi->s_bsize); 340 tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
336 if (tmp != fs32_to_cpu(sb, *p)) { 341 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
337 ubh_brelse (tind_bh); 342 ubh_brelse (tind_bh);
338 return 1; 343 return 1;
339 } 344 }
340 if (!tind_bh) { 345 if (!tind_bh) {
341 *p = 0; 346 ufs_data_ptr_clear(uspi, p);
342 return 0; 347 return 0;
343 } 348 }
344 349
@@ -349,11 +354,12 @@ static int ufs_trunc_tindirect (struct inode * inode)
349 ubh_mark_buffer_dirty(tind_bh); 354 ubh_mark_buffer_dirty(tind_bh);
350 } 355 }
351 for (i = 0; i < uspi->s_apb; i++) 356 for (i = 0; i < uspi->s_apb; i++)
352 if (*ubh_get_addr32 (tind_bh, i)) 357 if (!ufs_is_data_ptr_zero(uspi,
358 ubh_get_data_ptr(uspi, tind_bh, i)))
353 break; 359 break;
354 if (i >= uspi->s_apb) { 360 if (i >= uspi->s_apb) {
355 tmp = fs32_to_cpu(sb, *p); 361 tmp = ufs_data_ptr_to_cpu(sb, p);
356 *p = 0; 362 ufs_data_ptr_clear(uspi, p);
357 363
358 ufs_free_blocks(inode, tmp, uspi->s_fpb); 364 ufs_free_blocks(inode, tmp, uspi->s_fpb);
359 mark_inode_dirty(inode); 365 mark_inode_dirty(inode);
@@ -375,7 +381,8 @@ static int ufs_alloc_lastblock(struct inode *inode)
375 int err = 0; 381 int err = 0;
376 struct address_space *mapping = inode->i_mapping; 382 struct address_space *mapping = inode->i_mapping;
377 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; 383 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
378 unsigned lastfrag, i, end; 384 unsigned i, end;
385 sector_t lastfrag;
379 struct page *lastpage; 386 struct page *lastpage;
380 struct buffer_head *bh; 387 struct buffer_head *bh;
381 388
@@ -430,7 +437,9 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
430 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 437 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
431 int retry, err = 0; 438 int retry, err = 0;
432 439
433 UFSD("ENTER\n"); 440 UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
441 inode->i_ino, (unsigned long long)i_size_read(inode),
442 (unsigned long long)old_i_size);
434 443
435 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 444 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
436 S_ISLNK(inode->i_mode))) 445 S_ISLNK(inode->i_mode)))
@@ -450,10 +459,12 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
450 lock_kernel(); 459 lock_kernel();
451 while (1) { 460 while (1) {
452 retry = ufs_trunc_direct(inode); 461 retry = ufs_trunc_direct(inode);
453 retry |= ufs_trunc_indirect (inode, UFS_IND_BLOCK, 462 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
454 (__fs32 *) &ufsi->i_u1.i_data[UFS_IND_BLOCK]); 463 ufs_get_direct_data_ptr(uspi, ufsi,
455 retry |= ufs_trunc_dindirect (inode, UFS_IND_BLOCK + uspi->s_apb, 464 UFS_IND_BLOCK));
456 (__fs32 *) &ufsi->i_u1.i_data[UFS_DIND_BLOCK]); 465 retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
466 ufs_get_direct_data_ptr(uspi, ufsi,
467 UFS_DIND_BLOCK));
457 retry |= ufs_trunc_tindirect (inode); 468 retry |= ufs_trunc_tindirect (inode);
458 if (!retry) 469 if (!retry)
459 break; 470 break;
@@ -502,6 +513,6 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
502 return inode_setattr(inode, attr); 513 return inode_setattr(inode, attr);
503} 514}
504 515
505struct inode_operations ufs_file_inode_operations = { 516const struct inode_operations ufs_file_inode_operations = {
506 .setattr = ufs_setattr, 517 .setattr = ufs_setattr,
507}; 518};
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 7dd12bb1d62b..06d344839c42 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -305,8 +305,22 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
305 (((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \ 305 (((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \
306 ((begin) & ((uspi->s_fsize>>2) - 1))) 306 ((begin) & ((uspi->s_fsize>>2) - 1)))
307 307
308#define ubh_get_addr64(ubh,begin) \
309 (((__fs64*)((ubh)->bh[(begin) >> (uspi->s_fshift-3)]->b_data)) + \
310 ((begin) & ((uspi->s_fsize>>3) - 1)))
311
308#define ubh_get_addr ubh_get_addr8 312#define ubh_get_addr ubh_get_addr8
309 313
314static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
315 struct ufs_buffer_head *ubh,
316 u64 blk)
317{
318 if (uspi->fs_magic == UFS2_MAGIC)
319 return ubh_get_addr64(ubh, blk);
320 else
321 return ubh_get_addr32(ubh, blk);
322}
323
310#define ubh_blkmap(ubh,begin,bit) \ 324#define ubh_blkmap(ubh,begin,bit) \
311 ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb))) 325 ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
312 326
@@ -507,3 +521,46 @@ static inline void ufs_fragacct (struct super_block * sb, unsigned blockmap,
507 if (fragsize > 0 && fragsize < uspi->s_fpb) 521 if (fragsize > 0 && fragsize < uspi->s_fpb)
508 fs32_add(sb, &fraglist[fragsize], cnt); 522 fs32_add(sb, &fraglist[fragsize], cnt);
509} 523}
524
525static inline void *ufs_get_direct_data_ptr(struct ufs_sb_private_info *uspi,
526 struct ufs_inode_info *ufsi,
527 unsigned blk)
528{
529 BUG_ON(blk > UFS_TIND_BLOCK);
530 return uspi->fs_magic == UFS2_MAGIC ?
531 (void *)&ufsi->i_u1.u2_i_data[blk] :
532 (void *)&ufsi->i_u1.i_data[blk];
533}
534
535static inline u64 ufs_data_ptr_to_cpu(struct super_block *sb, void *p)
536{
537 return UFS_SB(sb)->s_uspi->fs_magic == UFS2_MAGIC ?
538 fs64_to_cpu(sb, *(__fs64 *)p) :
539 fs32_to_cpu(sb, *(__fs32 *)p);
540}
541
542static inline void ufs_cpu_to_data_ptr(struct super_block *sb, void *p, u64 val)
543{
544 if (UFS_SB(sb)->s_uspi->fs_magic == UFS2_MAGIC)
545 *(__fs64 *)p = cpu_to_fs64(sb, val);
546 else
547 *(__fs32 *)p = cpu_to_fs32(sb, val);
548}
549
550static inline void ufs_data_ptr_clear(struct ufs_sb_private_info *uspi,
551 void *p)
552{
553 if (uspi->fs_magic == UFS2_MAGIC)
554 *(__fs64 *)p = 0;
555 else
556 *(__fs32 *)p = 0;
557}
558
559static inline int ufs_is_data_ptr_zero(struct ufs_sb_private_info *uspi,
560 void *p)
561{
562 if (uspi->fs_magic == UFS2_MAGIC)
563 return *(__fs64 *)p == 0;
564 else
565 return *(__fs32 *)p == 0;
566}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 0afd745a37cd..c28add2fbe95 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -996,7 +996,7 @@ error_inode:
996 goto out; 996 goto out;
997} 997}
998 998
999static struct inode_operations vfat_dir_inode_operations = { 999static const struct inode_operations vfat_dir_inode_operations = {
1000 .create = vfat_create, 1000 .create = vfat_create,
1001 .lookup = vfat_lookup, 1001 .lookup = vfat_lookup,
1002 .unlink = vfat_unlink, 1002 .unlink = vfat_unlink,
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 32e1ce0f04c9..af168a1a98c1 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -31,15 +31,13 @@ typedef struct {
31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) 31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 33#define mrfree(mrp) do { } while (0)
34#define mraccess(mrp) mraccessf(mrp, 0)
35#define mrupdate(mrp) mrupdatef(mrp, 0)
36 34
37static inline void mraccessf(mrlock_t *mrp, int flags) 35static inline void mraccess(mrlock_t *mrp)
38{ 36{
39 down_read(&mrp->mr_lock); 37 down_read(&mrp->mr_lock);
40} 38}
41 39
42static inline void mrupdatef(mrlock_t *mrp, int flags) 40static inline void mrupdate(mrlock_t *mrp)
43{ 41{
44 down_write(&mrp->mr_lock); 42 down_write(&mrp->mr_lock);
45 mrp->mr_writer = 1; 43 mrp->mr_writer = 1;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7b54461695e2..143ffc851c9d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -56,8 +56,6 @@ xfs_count_page_state(
56 do { 56 do {
57 if (buffer_uptodate(bh) && !buffer_mapped(bh)) 57 if (buffer_uptodate(bh) && !buffer_mapped(bh))
58 (*unmapped) = 1; 58 (*unmapped) = 1;
59 else if (buffer_unwritten(bh) && !buffer_delay(bh))
60 clear_buffer_unwritten(bh);
61 else if (buffer_unwritten(bh)) 59 else if (buffer_unwritten(bh))
62 (*unwritten) = 1; 60 (*unwritten) = 1;
63 else if (buffer_delay(bh)) 61 else if (buffer_delay(bh))
@@ -249,7 +247,7 @@ xfs_map_blocks(
249 return -error; 247 return -error;
250} 248}
251 249
252STATIC inline int 250STATIC_INLINE int
253xfs_iomap_valid( 251xfs_iomap_valid(
254 xfs_iomap_t *iomapp, 252 xfs_iomap_t *iomapp,
255 loff_t offset) 253 loff_t offset)
@@ -1272,7 +1270,6 @@ __xfs_get_blocks(
1272 if (direct) 1270 if (direct)
1273 bh_result->b_private = inode; 1271 bh_result->b_private = inode;
1274 set_buffer_unwritten(bh_result); 1272 set_buffer_unwritten(bh_result);
1275 set_buffer_delay(bh_result);
1276 } 1273 }
1277 } 1274 }
1278 1275
@@ -1283,13 +1280,18 @@ __xfs_get_blocks(
1283 bh_result->b_bdev = iomap.iomap_target->bt_bdev; 1280 bh_result->b_bdev = iomap.iomap_target->bt_bdev;
1284 1281
1285 /* 1282 /*
1286 * If we previously allocated a block out beyond eof and we are 1283 * If we previously allocated a block out beyond eof and we are now
1287 * now coming back to use it then we will need to flag it as new 1284 * coming back to use it then we will need to flag it as new even if it
1288 * even if it has a disk address. 1285 * has a disk address.
1286 *
1287 * With sub-block writes into unwritten extents we also need to mark
1288 * the buffer as new so that the unwritten parts of the buffer gets
1289 * correctly zeroed.
1289 */ 1290 */
1290 if (create && 1291 if (create &&
1291 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1292 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1292 (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) 1293 (offset >= i_size_read(inode)) ||
1294 (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
1293 set_buffer_new(bh_result); 1295 set_buffer_new(bh_result);
1294 1296
1295 if (iomap.iomap_flags & IOMAP_DELAY) { 1297 if (iomap.iomap_flags & IOMAP_DELAY) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4fb01ffdfd1a..e2bea6a661f0 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,13 +34,13 @@
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36 36
37STATIC kmem_zone_t *xfs_buf_zone; 37static kmem_zone_t *xfs_buf_zone;
38STATIC kmem_shaker_t xfs_buf_shake; 38static kmem_shaker_t xfs_buf_shake;
39STATIC int xfsbufd(void *); 39STATIC int xfsbufd(void *);
40STATIC int xfsbufd_wakeup(int, gfp_t); 40STATIC int xfsbufd_wakeup(int, gfp_t);
41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 41STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
42 42
43STATIC struct workqueue_struct *xfslogd_workqueue; 43static struct workqueue_struct *xfslogd_workqueue;
44struct workqueue_struct *xfsdatad_workqueue; 44struct workqueue_struct *xfsdatad_workqueue;
45 45
46#ifdef XFS_BUF_TRACE 46#ifdef XFS_BUF_TRACE
@@ -139,7 +139,7 @@ page_region_mask(
139 return mask; 139 return mask;
140} 140}
141 141
142STATIC inline void 142STATIC_INLINE void
143set_page_region( 143set_page_region(
144 struct page *page, 144 struct page *page,
145 size_t offset, 145 size_t offset,
@@ -151,7 +151,7 @@ set_page_region(
151 SetPageUptodate(page); 151 SetPageUptodate(page);
152} 152}
153 153
154STATIC inline int 154STATIC_INLINE int
155test_page_region( 155test_page_region(
156 struct page *page, 156 struct page *page,
157 size_t offset, 157 size_t offset,
@@ -171,9 +171,9 @@ typedef struct a_list {
171 struct a_list *next; 171 struct a_list *next;
172} a_list_t; 172} a_list_t;
173 173
174STATIC a_list_t *as_free_head; 174static a_list_t *as_free_head;
175STATIC int as_list_len; 175static int as_list_len;
176STATIC DEFINE_SPINLOCK(as_lock); 176static DEFINE_SPINLOCK(as_lock);
177 177
178/* 178/*
179 * Try to batch vunmaps because they are costly. 179 * Try to batch vunmaps because they are costly.
@@ -1085,7 +1085,7 @@ xfs_buf_iostart(
1085 return status; 1085 return status;
1086} 1086}
1087 1087
1088STATIC __inline__ int 1088STATIC_INLINE int
1089_xfs_buf_iolocked( 1089_xfs_buf_iolocked(
1090 xfs_buf_t *bp) 1090 xfs_buf_t *bp)
1091{ 1091{
@@ -1095,7 +1095,7 @@ _xfs_buf_iolocked(
1095 return 0; 1095 return 0;
1096} 1096}
1097 1097
1098STATIC __inline__ void 1098STATIC_INLINE void
1099_xfs_buf_ioend( 1099_xfs_buf_ioend(
1100 xfs_buf_t *bp, 1100 xfs_buf_t *bp,
1101 int schedule) 1101 int schedule)
@@ -1426,8 +1426,8 @@ xfs_free_bufhash(
1426/* 1426/*
1427 * buftarg list for delwrite queue processing 1427 * buftarg list for delwrite queue processing
1428 */ 1428 */
1429STATIC LIST_HEAD(xfs_buftarg_list); 1429LIST_HEAD(xfs_buftarg_list);
1430STATIC DEFINE_SPINLOCK(xfs_buftarg_lock); 1430static DEFINE_SPINLOCK(xfs_buftarg_lock);
1431 1431
1432STATIC void 1432STATIC void
1433xfs_register_buftarg( 1433xfs_register_buftarg(
@@ -1679,21 +1679,60 @@ xfsbufd_wakeup(
1679 return 0; 1679 return 0;
1680} 1680}
1681 1681
1682/*
1683 * Move as many buffers as specified to the supplied list
1684 * idicating if we skipped any buffers to prevent deadlocks.
1685 */
1686STATIC int
1687xfs_buf_delwri_split(
1688 xfs_buftarg_t *target,
1689 struct list_head *list,
1690 unsigned long age)
1691{
1692 xfs_buf_t *bp, *n;
1693 struct list_head *dwq = &target->bt_delwrite_queue;
1694 spinlock_t *dwlk = &target->bt_delwrite_lock;
1695 int skipped = 0;
1696 int force;
1697
1698 force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1699 INIT_LIST_HEAD(list);
1700 spin_lock(dwlk);
1701 list_for_each_entry_safe(bp, n, dwq, b_list) {
1702 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1703 ASSERT(bp->b_flags & XBF_DELWRI);
1704
1705 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1706 if (!force &&
1707 time_before(jiffies, bp->b_queuetime + age)) {
1708 xfs_buf_unlock(bp);
1709 break;
1710 }
1711
1712 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
1713 _XBF_RUN_QUEUES);
1714 bp->b_flags |= XBF_WRITE;
1715 list_move_tail(&bp->b_list, list);
1716 } else
1717 skipped++;
1718 }
1719 spin_unlock(dwlk);
1720
1721 return skipped;
1722
1723}
1724
1682STATIC int 1725STATIC int
1683xfsbufd( 1726xfsbufd(
1684 void *data) 1727 void *data)
1685{ 1728{
1686 struct list_head tmp; 1729 struct list_head tmp;
1687 unsigned long age; 1730 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1688 xfs_buftarg_t *target = (xfs_buftarg_t *)data; 1731 int count;
1689 xfs_buf_t *bp, *n; 1732 xfs_buf_t *bp;
1690 struct list_head *dwq = &target->bt_delwrite_queue;
1691 spinlock_t *dwlk = &target->bt_delwrite_lock;
1692 int count;
1693 1733
1694 current->flags |= PF_MEMALLOC; 1734 current->flags |= PF_MEMALLOC;
1695 1735
1696 INIT_LIST_HEAD(&tmp);
1697 do { 1736 do {
1698 if (unlikely(freezing(current))) { 1737 if (unlikely(freezing(current))) {
1699 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1738 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1705,37 +1744,17 @@ xfsbufd(
1705 schedule_timeout_interruptible( 1744 schedule_timeout_interruptible(
1706 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1745 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1707 1746
1708 count = 0; 1747 xfs_buf_delwri_split(target, &tmp,
1709 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1748 xfs_buf_age_centisecs * msecs_to_jiffies(10));
1710 spin_lock(dwlk);
1711 list_for_each_entry_safe(bp, n, dwq, b_list) {
1712 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1713 ASSERT(bp->b_flags & XBF_DELWRI);
1714
1715 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1716 if (!test_bit(XBT_FORCE_FLUSH,
1717 &target->bt_flags) &&
1718 time_before(jiffies,
1719 bp->b_queuetime + age)) {
1720 xfs_buf_unlock(bp);
1721 break;
1722 }
1723
1724 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
1725 _XBF_RUN_QUEUES);
1726 bp->b_flags |= XBF_WRITE;
1727 list_move_tail(&bp->b_list, &tmp);
1728 count++;
1729 }
1730 }
1731 spin_unlock(dwlk);
1732 1749
1750 count = 0;
1733 while (!list_empty(&tmp)) { 1751 while (!list_empty(&tmp)) {
1734 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1752 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1735 ASSERT(target == bp->b_target); 1753 ASSERT(target == bp->b_target);
1736 1754
1737 list_del_init(&bp->b_list); 1755 list_del_init(&bp->b_list);
1738 xfs_buf_iostrategy(bp); 1756 xfs_buf_iostrategy(bp);
1757 count++;
1739 } 1758 }
1740 1759
1741 if (as_list_len > 0) 1760 if (as_list_len > 0)
@@ -1743,7 +1762,6 @@ xfsbufd(
1743 if (count) 1762 if (count)
1744 blk_run_address_space(target->bt_mapping); 1763 blk_run_address_space(target->bt_mapping);
1745 1764
1746 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1747 } while (!kthread_should_stop()); 1765 } while (!kthread_should_stop());
1748 1766
1749 return 0; 1767 return 0;
@@ -1756,40 +1774,24 @@ xfsbufd(
1756 */ 1774 */
1757int 1775int
1758xfs_flush_buftarg( 1776xfs_flush_buftarg(
1759 xfs_buftarg_t *target, 1777 xfs_buftarg_t *target,
1760 int wait) 1778 int wait)
1761{ 1779{
1762 struct list_head tmp; 1780 struct list_head tmp;
1763 xfs_buf_t *bp, *n; 1781 xfs_buf_t *bp, *n;
1764 int pincount = 0; 1782 int pincount = 0;
1765 struct list_head *dwq = &target->bt_delwrite_queue;
1766 spinlock_t *dwlk = &target->bt_delwrite_lock;
1767 1783
1768 xfs_buf_runall_queues(xfsdatad_workqueue); 1784 xfs_buf_runall_queues(xfsdatad_workqueue);
1769 xfs_buf_runall_queues(xfslogd_workqueue); 1785 xfs_buf_runall_queues(xfslogd_workqueue);
1770 1786
1771 INIT_LIST_HEAD(&tmp); 1787 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1772 spin_lock(dwlk); 1788 pincount = xfs_buf_delwri_split(target, &tmp, 0);
1773 list_for_each_entry_safe(bp, n, dwq, b_list) {
1774 ASSERT(bp->b_target == target);
1775 ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
1776 XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
1777 if (xfs_buf_ispin(bp)) {
1778 pincount++;
1779 continue;
1780 }
1781
1782 list_move_tail(&bp->b_list, &tmp);
1783 }
1784 spin_unlock(dwlk);
1785 1789
1786 /* 1790 /*
1787 * Dropped the delayed write list lock, now walk the temporary list 1791 * Dropped the delayed write list lock, now walk the temporary list
1788 */ 1792 */
1789 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1793 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1790 xfs_buf_lock(bp); 1794 ASSERT(target == bp->b_target);
1791 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
1792 bp->b_flags |= XBF_WRITE;
1793 if (wait) 1795 if (wait)
1794 bp->b_flags &= ~XBF_ASYNC; 1796 bp->b_flags &= ~XBF_ASYNC;
1795 else 1797 else
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9dd235cb0107..9e8ef8fef39f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -69,8 +69,8 @@ typedef enum {
69} xfs_buf_flags_t; 69} xfs_buf_flags_t;
70 70
71typedef enum { 71typedef enum {
72 XBT_FORCE_SLEEP = (0 << 1), 72 XBT_FORCE_SLEEP = 0,
73 XBT_FORCE_FLUSH = (1 << 1), 73 XBT_FORCE_FLUSH = 1,
74} xfs_buftarg_flags_t; 74} xfs_buftarg_flags_t;
75 75
76typedef struct xfs_bufhash { 76typedef struct xfs_bufhash {
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 5fb75d9151f2..e3a5fedac1ba 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -24,7 +24,7 @@
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_export.h" 25#include "xfs_export.h"
26 26
27STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; 27static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
28 28
29/* 29/*
30 * XFS encodes and decodes the fileid portion of NFS filehandles 30 * XFS encodes and decodes the fileid portion of NFS filehandles
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index d26f5cd2ba70..cb51dc961355 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -46,7 +46,7 @@ static struct vm_operations_struct xfs_file_vm_ops;
46static struct vm_operations_struct xfs_dmapi_file_vm_ops; 46static struct vm_operations_struct xfs_dmapi_file_vm_ops;
47#endif 47#endif
48 48
49STATIC inline ssize_t 49STATIC_INLINE ssize_t
50__xfs_file_read( 50__xfs_file_read(
51 struct kiocb *iocb, 51 struct kiocb *iocb,
52 const struct iovec *iov, 52 const struct iovec *iov,
@@ -84,7 +84,7 @@ xfs_file_aio_read_invis(
84 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); 84 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
85} 85}
86 86
87STATIC inline ssize_t 87STATIC_INLINE ssize_t
88__xfs_file_write( 88__xfs_file_write(
89 struct kiocb *iocb, 89 struct kiocb *iocb,
90 const struct iovec *iov, 90 const struct iovec *iov,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f011c9cd0d62..ff5c41ff8d40 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,8 +41,6 @@
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_acl.h" 43#include "xfs_acl.h"
44#include "xfs_cap.h"
45#include "xfs_mac.h"
46#include "xfs_attr.h" 44#include "xfs_attr.h"
47#include "xfs_bmap.h" 45#include "xfs_bmap.h"
48#include "xfs_buf_item.h" 46#include "xfs_buf_item.h"
@@ -355,7 +353,6 @@ STATIC int
355xfs_readlink_by_handle( 353xfs_readlink_by_handle(
356 xfs_mount_t *mp, 354 xfs_mount_t *mp,
357 void __user *arg, 355 void __user *arg,
358 struct file *parfilp,
359 struct inode *parinode) 356 struct inode *parinode)
360{ 357{
361 int error; 358 int error;
@@ -388,7 +385,7 @@ xfs_readlink_by_handle(
388 aiov.iov_len = olen; 385 aiov.iov_len = olen;
389 aiov.iov_base = hreq.ohandle; 386 aiov.iov_base = hreq.ohandle;
390 387
391 auio.uio_iov = &aiov; 388 auio.uio_iov = (struct kvec *)&aiov;
392 auio.uio_iovcnt = 1; 389 auio.uio_iovcnt = 1;
393 auio.uio_offset = 0; 390 auio.uio_offset = 0;
394 auio.uio_segflg = UIO_USERSPACE; 391 auio.uio_segflg = UIO_USERSPACE;
@@ -406,7 +403,6 @@ STATIC int
406xfs_fssetdm_by_handle( 403xfs_fssetdm_by_handle(
407 xfs_mount_t *mp, 404 xfs_mount_t *mp,
408 void __user *arg, 405 void __user *arg,
409 struct file *parfilp,
410 struct inode *parinode) 406 struct inode *parinode)
411{ 407{
412 int error; 408 int error;
@@ -448,7 +444,6 @@ STATIC int
448xfs_attrlist_by_handle( 444xfs_attrlist_by_handle(
449 xfs_mount_t *mp, 445 xfs_mount_t *mp,
450 void __user *arg, 446 void __user *arg,
451 struct file *parfilp,
452 struct inode *parinode) 447 struct inode *parinode)
453{ 448{
454 int error; 449 int error;
@@ -569,7 +564,6 @@ STATIC int
569xfs_attrmulti_by_handle( 564xfs_attrmulti_by_handle(
570 xfs_mount_t *mp, 565 xfs_mount_t *mp,
571 void __user *arg, 566 void __user *arg,
572 struct file *parfilp,
573 struct inode *parinode) 567 struct inode *parinode)
574{ 568{
575 int error; 569 int error;
@@ -689,7 +683,6 @@ xfs_ioc_xattr(
689STATIC int 683STATIC int
690xfs_ioc_getbmap( 684xfs_ioc_getbmap(
691 bhv_desc_t *bdp, 685 bhv_desc_t *bdp,
692 struct file *filp,
693 int flags, 686 int flags,
694 unsigned int cmd, 687 unsigned int cmd,
695 void __user *arg); 688 void __user *arg);
@@ -788,7 +781,7 @@ xfs_ioctl(
788 781
789 case XFS_IOC_GETBMAP: 782 case XFS_IOC_GETBMAP:
790 case XFS_IOC_GETBMAPA: 783 case XFS_IOC_GETBMAPA:
791 return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); 784 return xfs_ioc_getbmap(bdp, ioflags, cmd, arg);
792 785
793 case XFS_IOC_GETBMAPX: 786 case XFS_IOC_GETBMAPX:
794 return xfs_ioc_getbmapx(bdp, arg); 787 return xfs_ioc_getbmapx(bdp, arg);
@@ -802,16 +795,16 @@ xfs_ioctl(
802 return xfs_open_by_handle(mp, arg, filp, inode); 795 return xfs_open_by_handle(mp, arg, filp, inode);
803 796
804 case XFS_IOC_FSSETDM_BY_HANDLE: 797 case XFS_IOC_FSSETDM_BY_HANDLE:
805 return xfs_fssetdm_by_handle(mp, arg, filp, inode); 798 return xfs_fssetdm_by_handle(mp, arg, inode);
806 799
807 case XFS_IOC_READLINK_BY_HANDLE: 800 case XFS_IOC_READLINK_BY_HANDLE:
808 return xfs_readlink_by_handle(mp, arg, filp, inode); 801 return xfs_readlink_by_handle(mp, arg, inode);
809 802
810 case XFS_IOC_ATTRLIST_BY_HANDLE: 803 case XFS_IOC_ATTRLIST_BY_HANDLE:
811 return xfs_attrlist_by_handle(mp, arg, filp, inode); 804 return xfs_attrlist_by_handle(mp, arg, inode);
812 805
813 case XFS_IOC_ATTRMULTI_BY_HANDLE: 806 case XFS_IOC_ATTRMULTI_BY_HANDLE:
814 return xfs_attrmulti_by_handle(mp, arg, filp, inode); 807 return xfs_attrmulti_by_handle(mp, arg, inode);
815 808
816 case XFS_IOC_SWAPEXT: { 809 case XFS_IOC_SWAPEXT: {
817 error = xfs_swapext((struct xfs_swapext __user *)arg); 810 error = xfs_swapext((struct xfs_swapext __user *)arg);
@@ -1095,11 +1088,6 @@ xfs_ioc_fsgeometry(
1095/* 1088/*
1096 * Linux extended inode flags interface. 1089 * Linux extended inode flags interface.
1097 */ 1090 */
1098#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */
1099#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */
1100#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
1101#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
1102#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
1103 1091
1104STATIC unsigned int 1092STATIC unsigned int
1105xfs_merge_ioc_xflags( 1093xfs_merge_ioc_xflags(
@@ -1108,23 +1096,23 @@ xfs_merge_ioc_xflags(
1108{ 1096{
1109 unsigned int xflags = start; 1097 unsigned int xflags = start;
1110 1098
1111 if (flags & LINUX_XFLAG_IMMUTABLE) 1099 if (flags & FS_IMMUTABLE_FL)
1112 xflags |= XFS_XFLAG_IMMUTABLE; 1100 xflags |= XFS_XFLAG_IMMUTABLE;
1113 else 1101 else
1114 xflags &= ~XFS_XFLAG_IMMUTABLE; 1102 xflags &= ~XFS_XFLAG_IMMUTABLE;
1115 if (flags & LINUX_XFLAG_APPEND) 1103 if (flags & FS_APPEND_FL)
1116 xflags |= XFS_XFLAG_APPEND; 1104 xflags |= XFS_XFLAG_APPEND;
1117 else 1105 else
1118 xflags &= ~XFS_XFLAG_APPEND; 1106 xflags &= ~XFS_XFLAG_APPEND;
1119 if (flags & LINUX_XFLAG_SYNC) 1107 if (flags & FS_SYNC_FL)
1120 xflags |= XFS_XFLAG_SYNC; 1108 xflags |= XFS_XFLAG_SYNC;
1121 else 1109 else
1122 xflags &= ~XFS_XFLAG_SYNC; 1110 xflags &= ~XFS_XFLAG_SYNC;
1123 if (flags & LINUX_XFLAG_NOATIME) 1111 if (flags & FS_NOATIME_FL)
1124 xflags |= XFS_XFLAG_NOATIME; 1112 xflags |= XFS_XFLAG_NOATIME;
1125 else 1113 else
1126 xflags &= ~XFS_XFLAG_NOATIME; 1114 xflags &= ~XFS_XFLAG_NOATIME;
1127 if (flags & LINUX_XFLAG_NODUMP) 1115 if (flags & FS_NODUMP_FL)
1128 xflags |= XFS_XFLAG_NODUMP; 1116 xflags |= XFS_XFLAG_NODUMP;
1129 else 1117 else
1130 xflags &= ~XFS_XFLAG_NODUMP; 1118 xflags &= ~XFS_XFLAG_NODUMP;
@@ -1139,15 +1127,15 @@ xfs_di2lxflags(
1139 unsigned int flags = 0; 1127 unsigned int flags = 0;
1140 1128
1141 if (di_flags & XFS_DIFLAG_IMMUTABLE) 1129 if (di_flags & XFS_DIFLAG_IMMUTABLE)
1142 flags |= LINUX_XFLAG_IMMUTABLE; 1130 flags |= FS_IMMUTABLE_FL;
1143 if (di_flags & XFS_DIFLAG_APPEND) 1131 if (di_flags & XFS_DIFLAG_APPEND)
1144 flags |= LINUX_XFLAG_APPEND; 1132 flags |= FS_APPEND_FL;
1145 if (di_flags & XFS_DIFLAG_SYNC) 1133 if (di_flags & XFS_DIFLAG_SYNC)
1146 flags |= LINUX_XFLAG_SYNC; 1134 flags |= FS_SYNC_FL;
1147 if (di_flags & XFS_DIFLAG_NOATIME) 1135 if (di_flags & XFS_DIFLAG_NOATIME)
1148 flags |= LINUX_XFLAG_NOATIME; 1136 flags |= FS_NOATIME_FL;
1149 if (di_flags & XFS_DIFLAG_NODUMP) 1137 if (di_flags & XFS_DIFLAG_NODUMP)
1150 flags |= LINUX_XFLAG_NODUMP; 1138 flags |= FS_NODUMP_FL;
1151 return flags; 1139 return flags;
1152} 1140}
1153 1141
@@ -1247,9 +1235,9 @@ xfs_ioc_xattr(
1247 break; 1235 break;
1248 } 1236 }
1249 1237
1250 if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \ 1238 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
1251 LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \ 1239 FS_NOATIME_FL | FS_NODUMP_FL | \
1252 LINUX_XFLAG_SYNC)) { 1240 FS_SYNC_FL)) {
1253 error = -EOPNOTSUPP; 1241 error = -EOPNOTSUPP;
1254 break; 1242 break;
1255 } 1243 }
@@ -1281,7 +1269,6 @@ xfs_ioc_xattr(
1281STATIC int 1269STATIC int
1282xfs_ioc_getbmap( 1270xfs_ioc_getbmap(
1283 bhv_desc_t *bdp, 1271 bhv_desc_t *bdp,
1284 struct file *filp,
1285 int ioflags, 1272 int ioflags,
1286 unsigned int cmd, 1273 unsigned int cmd,
1287 void __user *arg) 1274 void __user *arg)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 3ba814ae3bba..0b5fa124bef2 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_utils.h" 48#include "xfs_utils.h"
@@ -250,13 +248,13 @@ xfs_init_security(
250 * 248 *
251 * XXX(hch): nfsd is broken, better fix it instead. 249 * XXX(hch): nfsd is broken, better fix it instead.
252 */ 250 */
253STATIC inline int 251STATIC_INLINE int
254xfs_has_fs_struct(struct task_struct *task) 252xfs_has_fs_struct(struct task_struct *task)
255{ 253{
256 return (task->fs != init_task.fs); 254 return (task->fs != init_task.fs);
257} 255}
258 256
259STATIC inline void 257STATIC void
260xfs_cleanup_inode( 258xfs_cleanup_inode(
261 bhv_vnode_t *dvp, 259 bhv_vnode_t *dvp,
262 bhv_vnode_t *vp, 260 bhv_vnode_t *vp,
@@ -815,7 +813,7 @@ xfs_vn_removexattr(
815} 813}
816 814
817 815
818struct inode_operations xfs_inode_operations = { 816const struct inode_operations xfs_inode_operations = {
819 .permission = xfs_vn_permission, 817 .permission = xfs_vn_permission,
820 .truncate = xfs_vn_truncate, 818 .truncate = xfs_vn_truncate,
821 .getattr = xfs_vn_getattr, 819 .getattr = xfs_vn_getattr,
@@ -826,7 +824,7 @@ struct inode_operations xfs_inode_operations = {
826 .removexattr = xfs_vn_removexattr, 824 .removexattr = xfs_vn_removexattr,
827}; 825};
828 826
829struct inode_operations xfs_dir_inode_operations = { 827const struct inode_operations xfs_dir_inode_operations = {
830 .create = xfs_vn_create, 828 .create = xfs_vn_create,
831 .lookup = xfs_vn_lookup, 829 .lookup = xfs_vn_lookup,
832 .link = xfs_vn_link, 830 .link = xfs_vn_link,
@@ -845,7 +843,7 @@ struct inode_operations xfs_dir_inode_operations = {
845 .removexattr = xfs_vn_removexattr, 843 .removexattr = xfs_vn_removexattr,
846}; 844};
847 845
848struct inode_operations xfs_symlink_inode_operations = { 846const struct inode_operations xfs_symlink_inode_operations = {
849 .readlink = generic_readlink, 847 .readlink = generic_readlink,
850 .follow_link = xfs_vn_follow_link, 848 .follow_link = xfs_vn_follow_link,
851 .put_link = xfs_vn_put_link, 849 .put_link = xfs_vn_put_link,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ad6173da5678..95a69398fce0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,9 +18,9 @@
18#ifndef __XFS_IOPS_H__ 18#ifndef __XFS_IOPS_H__
19#define __XFS_IOPS_H__ 19#define __XFS_IOPS_H__
20 20
21extern struct inode_operations xfs_inode_operations; 21extern const struct inode_operations xfs_inode_operations;
22extern struct inode_operations xfs_dir_inode_operations; 22extern const struct inode_operations xfs_dir_inode_operations;
23extern struct inode_operations xfs_symlink_inode_operations; 23extern const struct inode_operations xfs_symlink_inode_operations;
24 24
25extern const struct file_operations xfs_file_operations; 25extern const struct file_operations xfs_file_operations;
26extern const struct file_operations xfs_dir_file_operations; 26extern const struct file_operations xfs_dir_file_operations;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 2b0e0018738a..715adad7dd4d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -109,16 +109,6 @@
109#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 109#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
110#endif 110#endif
111 111
112/*
113 * State flag for unwritten extent buffers.
114 *
115 * We need to be able to distinguish between these and delayed
116 * allocate buffers within XFS. The generic IO path code does
117 * not need to distinguish - we use the BH_Delay flag for both
118 * delalloc and these ondisk-uninitialised buffers.
119 */
120BUFFER_FNS(PrivateStart, unwritten);
121
122#define restricted_chown xfs_params.restrict_chown.val 112#define restricted_chown xfs_params.restrict_chown.val
123#define irix_sgid_inherit xfs_params.sgid_inherit.val 113#define irix_sgid_inherit xfs_params.sgid_inherit.val
124#define irix_symlink_mode xfs_params.symlink_mode.val 114#define irix_symlink_mode xfs_params.symlink_mode.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 65e79b471d49..ff8d64eba9f8 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_inode_item.h" 47#include "xfs_inode_item.h"
50#include "xfs_buf_item.h" 48#include "xfs_buf_item.h"
@@ -134,13 +132,11 @@ STATIC int
134xfs_iozero( 132xfs_iozero(
135 struct inode *ip, /* inode */ 133 struct inode *ip, /* inode */
136 loff_t pos, /* offset in file */ 134 loff_t pos, /* offset in file */
137 size_t count, /* size of data to zero */ 135 size_t count) /* size of data to zero */
138 loff_t end_size) /* max file size to set */
139{ 136{
140 unsigned bytes; 137 unsigned bytes;
141 struct page *page; 138 struct page *page;
142 struct address_space *mapping; 139 struct address_space *mapping;
143 char *kaddr;
144 int status; 140 int status;
145 141
146 mapping = ip->i_mapping; 142 mapping = ip->i_mapping;
@@ -158,26 +154,21 @@ xfs_iozero(
158 if (!page) 154 if (!page)
159 break; 155 break;
160 156
161 kaddr = kmap(page);
162 status = mapping->a_ops->prepare_write(NULL, page, offset, 157 status = mapping->a_ops->prepare_write(NULL, page, offset,
163 offset + bytes); 158 offset + bytes);
164 if (status) { 159 if (status)
165 goto unlock; 160 goto unlock;
166 }
167 161
168 memset((void *) (kaddr + offset), 0, bytes); 162 memclear_highpage_flush(page, offset, bytes);
169 flush_dcache_page(page); 163
170 status = mapping->a_ops->commit_write(NULL, page, offset, 164 status = mapping->a_ops->commit_write(NULL, page, offset,
171 offset + bytes); 165 offset + bytes);
172 if (!status) { 166 if (!status) {
173 pos += bytes; 167 pos += bytes;
174 count -= bytes; 168 count -= bytes;
175 if (pos > i_size_read(ip))
176 i_size_write(ip, pos < end_size ? pos : end_size);
177 } 169 }
178 170
179unlock: 171unlock:
180 kunmap(page);
181 unlock_page(page); 172 unlock_page(page);
182 page_cache_release(page); 173 page_cache_release(page);
183 if (status) 174 if (status)
@@ -449,8 +440,8 @@ STATIC int /* error (positive) */
449xfs_zero_last_block( 440xfs_zero_last_block(
450 struct inode *ip, 441 struct inode *ip,
451 xfs_iocore_t *io, 442 xfs_iocore_t *io,
452 xfs_fsize_t isize, 443 xfs_fsize_t offset,
453 xfs_fsize_t end_size) 444 xfs_fsize_t isize)
454{ 445{
455 xfs_fileoff_t last_fsb; 446 xfs_fileoff_t last_fsb;
456 xfs_mount_t *mp = io->io_mount; 447 xfs_mount_t *mp = io->io_mount;
@@ -459,7 +450,6 @@ xfs_zero_last_block(
459 int zero_len; 450 int zero_len;
460 int error = 0; 451 int error = 0;
461 xfs_bmbt_irec_t imap; 452 xfs_bmbt_irec_t imap;
462 loff_t loff;
463 453
464 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 454 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
465 455
@@ -494,9 +484,10 @@ xfs_zero_last_block(
494 */ 484 */
495 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 485 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
496 486
497 loff = XFS_FSB_TO_B(mp, last_fsb);
498 zero_len = mp->m_sb.sb_blocksize - zero_offset; 487 zero_len = mp->m_sb.sb_blocksize - zero_offset;
499 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 488 if (isize + zero_len > offset)
489 zero_len = offset - isize;
490 error = xfs_iozero(ip, isize, zero_len);
500 491
501 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 492 XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
502 ASSERT(error >= 0); 493 ASSERT(error >= 0);
@@ -519,14 +510,15 @@ xfs_zero_eof(
519 bhv_vnode_t *vp, 510 bhv_vnode_t *vp,
520 xfs_iocore_t *io, 511 xfs_iocore_t *io,
521 xfs_off_t offset, /* starting I/O offset */ 512 xfs_off_t offset, /* starting I/O offset */
522 xfs_fsize_t isize, /* current inode size */ 513 xfs_fsize_t isize) /* current inode size */
523 xfs_fsize_t end_size) /* terminal inode size */
524{ 514{
525 struct inode *ip = vn_to_inode(vp); 515 struct inode *ip = vn_to_inode(vp);
526 xfs_fileoff_t start_zero_fsb; 516 xfs_fileoff_t start_zero_fsb;
527 xfs_fileoff_t end_zero_fsb; 517 xfs_fileoff_t end_zero_fsb;
528 xfs_fileoff_t zero_count_fsb; 518 xfs_fileoff_t zero_count_fsb;
529 xfs_fileoff_t last_fsb; 519 xfs_fileoff_t last_fsb;
520 xfs_fileoff_t zero_off;
521 xfs_fsize_t zero_len;
530 xfs_mount_t *mp = io->io_mount; 522 xfs_mount_t *mp = io->io_mount;
531 int nimaps; 523 int nimaps;
532 int error = 0; 524 int error = 0;
@@ -540,7 +532,7 @@ xfs_zero_eof(
540 * First handle zeroing the block on which isize resides. 532 * First handle zeroing the block on which isize resides.
541 * We only zero a part of that block so it is handled specially. 533 * We only zero a part of that block so it is handled specially.
542 */ 534 */
543 error = xfs_zero_last_block(ip, io, isize, end_size); 535 error = xfs_zero_last_block(ip, io, offset, isize);
544 if (error) { 536 if (error) {
545 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 537 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
546 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 538 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -601,10 +593,13 @@ xfs_zero_eof(
601 */ 593 */
602 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 594 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
603 595
604 error = xfs_iozero(ip, 596 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
605 XFS_FSB_TO_B(mp, start_zero_fsb), 597 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
606 XFS_FSB_TO_B(mp, imap.br_blockcount), 598
607 end_size); 599 if ((zero_off + zero_len) > offset)
600 zero_len = offset - zero_off;
601
602 error = xfs_iozero(ip, zero_off, zero_len);
608 if (error) { 603 if (error) {
609 goto out_lock; 604 goto out_lock;
610 } 605 }
@@ -783,8 +778,7 @@ start:
783 */ 778 */
784 779
785 if (pos > isize) { 780 if (pos > isize) {
786 error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, 781 error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize);
787 isize, pos + count);
788 if (error) { 782 if (error) {
789 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 783 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
790 goto out_unlock_mutex; 784 goto out_unlock_mutex;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index c77e62efb742..7ac51b1d2161 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -83,7 +83,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
83extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 83extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
84 84
85extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t, 85extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t,
86 xfs_fsize_t, xfs_fsize_t); 86 xfs_fsize_t);
87extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, 87extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
88 const struct iovec *, unsigned int, 88 const struct iovec *, unsigned int,
89 loff_t *, int, struct cred *); 89 loff_t *, int, struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b93265b7c79c..1a4103ca593c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_utils.h" 48#include "xfs_utils.h"
@@ -58,10 +56,10 @@
58#include <linux/kthread.h> 56#include <linux/kthread.h>
59#include <linux/freezer.h> 57#include <linux/freezer.h>
60 58
61STATIC struct quotactl_ops xfs_quotactl_operations; 59static struct quotactl_ops xfs_quotactl_operations;
62STATIC struct super_operations xfs_super_operations; 60static struct super_operations xfs_super_operations;
63STATIC kmem_zone_t *xfs_vnode_zone; 61static kmem_zone_t *xfs_vnode_zone;
64STATIC kmem_zone_t *xfs_ioend_zone; 62static kmem_zone_t *xfs_ioend_zone;
65mempool_t *xfs_ioend_pool; 63mempool_t *xfs_ioend_pool;
66 64
67STATIC struct xfs_mount_args * 65STATIC struct xfs_mount_args *
@@ -121,7 +119,7 @@ xfs_max_file_offset(
121 return (((__uint64_t)pagefactor) << bitshift) - 1; 119 return (((__uint64_t)pagefactor) << bitshift) - 1;
122} 120}
123 121
124STATIC __inline__ void 122STATIC_INLINE void
125xfs_set_inodeops( 123xfs_set_inodeops(
126 struct inode *inode) 124 struct inode *inode)
127{ 125{
@@ -147,7 +145,7 @@ xfs_set_inodeops(
147 } 145 }
148} 146}
149 147
150STATIC __inline__ void 148STATIC_INLINE void
151xfs_revalidate_inode( 149xfs_revalidate_inode(
152 xfs_mount_t *mp, 150 xfs_mount_t *mp,
153 bhv_vnode_t *vp, 151 bhv_vnode_t *vp,
@@ -553,7 +551,6 @@ vfs_sync_worker(
553 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ 551 error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
554 SYNC_ATTR | SYNC_REFCACHE, NULL); 552 SYNC_ATTR | SYNC_REFCACHE, NULL);
555 vfsp->vfs_sync_seq++; 553 vfsp->vfs_sync_seq++;
556 wmb();
557 wake_up(&vfsp->vfs_wait_single_sync_task); 554 wake_up(&vfsp->vfs_wait_single_sync_task);
558} 555}
559 556
@@ -659,9 +656,17 @@ xfs_fs_sync_super(
659 int error; 656 int error;
660 int flags; 657 int flags;
661 658
662 if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) 659 if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
663 flags = SYNC_QUIESCE; 660 /*
664 else 661 * First stage of freeze - no more writers will make progress
662 * now we are here, so we flush delwri and delalloc buffers
663 * here, then wait for all I/O to complete. Data is frozen at
664 * that point. Metadata is not frozen, transactions can still
665 * occur here so don't bother flushing the buftarg (i.e
666 * SYNC_QUIESCE) because it'll just get dirty again.
667 */
668 flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT;
669 } else
665 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); 670 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
666 671
667 error = bhv_vfs_sync(vfsp, flags, NULL); 672 error = bhv_vfs_sync(vfsp, flags, NULL);
@@ -873,7 +878,7 @@ xfs_fs_get_sb(
873 mnt); 878 mnt);
874} 879}
875 880
876STATIC struct super_operations xfs_super_operations = { 881static struct super_operations xfs_super_operations = {
877 .alloc_inode = xfs_fs_alloc_inode, 882 .alloc_inode = xfs_fs_alloc_inode,
878 .destroy_inode = xfs_fs_destroy_inode, 883 .destroy_inode = xfs_fs_destroy_inode,
879 .write_inode = xfs_fs_write_inode, 884 .write_inode = xfs_fs_write_inode,
@@ -887,7 +892,7 @@ STATIC struct super_operations xfs_super_operations = {
887 .show_options = xfs_fs_show_options, 892 .show_options = xfs_fs_show_options,
888}; 893};
889 894
890STATIC struct quotactl_ops xfs_quotactl_operations = { 895static struct quotactl_ops xfs_quotactl_operations = {
891 .quota_sync = xfs_fs_quotasync, 896 .quota_sync = xfs_fs_quotasync,
892 .get_xstate = xfs_fs_getxstate, 897 .get_xstate = xfs_fs_getxstate,
893 .set_xstate = xfs_fs_setxstate, 898 .set_xstate = xfs_fs_setxstate,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index af246532fbfb..5c46c35a97a5 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -54,102 +54,204 @@ xfs_stats_clear_proc_handler(
54} 54}
55#endif /* CONFIG_PROC_FS */ 55#endif /* CONFIG_PROC_FS */
56 56
57STATIC ctl_table xfs_table[] = { 57static ctl_table xfs_table[] = {
58 {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, 58 {
59 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 59 .ctl_name = XFS_RESTRICT_CHOWN,
60 &sysctl_intvec, NULL, 60 .procname = "restrict_chown",
61 &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, 61 .data = &xfs_params.restrict_chown.val,
62 62 .maxlen = sizeof(int),
63 {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, 63 .mode = 0644,
64 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 64 .proc_handler = &proc_dointvec_minmax,
65 &sysctl_intvec, NULL, 65 .strategy = &sysctl_intvec,
66 &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, 66 .extra1 = &xfs_params.restrict_chown.min,
67 67 .extra2 = &xfs_params.restrict_chown.max
68 {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, 68 },
69 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 69 {
70 &sysctl_intvec, NULL, 70 .ctl_name = XFS_SGID_INHERIT,
71 &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, 71 .procname = "irix_sgid_inherit",
72 72 .data = &xfs_params.sgid_inherit.val,
73 {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, 73 .maxlen = sizeof(int),
74 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 74 .mode = 0644,
75 &sysctl_intvec, NULL, 75 .proc_handler = &proc_dointvec_minmax,
76 &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, 76 .strategy = &sysctl_intvec,
77 77 .extra1 = &xfs_params.sgid_inherit.min,
78 {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, 78 .extra2 = &xfs_params.sgid_inherit.max
79 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 79 },
80 &sysctl_intvec, NULL, 80 {
81 &xfs_params.error_level.min, &xfs_params.error_level.max}, 81 .ctl_name = XFS_SYMLINK_MODE,
82 82 .procname = "irix_symlink_mode",
83 {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, 83 .data = &xfs_params.symlink_mode.val,
84 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 84 .maxlen = sizeof(int),
85 &sysctl_intvec, NULL, 85 .mode = 0644,
86 &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, 86 .proc_handler = &proc_dointvec_minmax,
87 87 .strategy = &sysctl_intvec,
88 {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, 88 .extra1 = &xfs_params.symlink_mode.min,
89 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 89 .extra2 = &xfs_params.symlink_mode.max
90 &sysctl_intvec, NULL, 90 },
91 &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, 91 {
92 92 .ctl_name = XFS_PANIC_MASK,
93 {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, 93 .procname = "panic_mask",
94 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 94 .data = &xfs_params.panic_mask.val,
95 &sysctl_intvec, NULL, 95 .maxlen = sizeof(int),
96 &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, 96 .mode = 0644,
97 97 .proc_handler = &proc_dointvec_minmax,
98 {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, 98 .strategy = &sysctl_intvec,
99 sizeof(int), 0644, NULL, &proc_dointvec_minmax, 99 .extra1 = &xfs_params.panic_mask.min,
100 &sysctl_intvec, NULL, 100 .extra2 = &xfs_params.panic_mask.max
101 &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, 101 },
102
103 {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
104 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
105 &sysctl_intvec, NULL,
106 &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
107
108 {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
109 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
110 &sysctl_intvec, NULL,
111 &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
112
113 {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
114 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
115 &sysctl_intvec, NULL,
116 &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
117
118 {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
119 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
120 &sysctl_intvec, NULL,
121 &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
122
123 {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val,
124 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
125 &sysctl_intvec, NULL,
126 &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max},
127 102
103 {
104 .ctl_name = XFS_ERRLEVEL,
105 .procname = "error_level",
106 .data = &xfs_params.error_level.val,
107 .maxlen = sizeof(int),
108 .mode = 0644,
109 .proc_handler = &proc_dointvec_minmax,
110 .strategy = &sysctl_intvec,
111 .extra1 = &xfs_params.error_level.min,
112 .extra2 = &xfs_params.error_level.max
113 },
114 {
115 .ctl_name = XFS_SYNCD_TIMER,
116 .procname = "xfssyncd_centisecs",
117 .data = &xfs_params.syncd_timer.val,
118 .maxlen = sizeof(int),
119 .mode = 0644,
120 .proc_handler = &proc_dointvec_minmax,
121 .strategy = &sysctl_intvec,
122 .extra1 = &xfs_params.syncd_timer.min,
123 .extra2 = &xfs_params.syncd_timer.max
124 },
125 {
126 .ctl_name = XFS_INHERIT_SYNC,
127 .procname = "inherit_sync",
128 .data = &xfs_params.inherit_sync.val,
129 .maxlen = sizeof(int),
130 .mode = 0644,
131 .proc_handler = &proc_dointvec_minmax,
132 .strategy = &sysctl_intvec,
133 .extra1 = &xfs_params.inherit_sync.min,
134 .extra2 = &xfs_params.inherit_sync.max
135 },
136 {
137 .ctl_name = XFS_INHERIT_NODUMP,
138 .procname = "inherit_nodump",
139 .data = &xfs_params.inherit_nodump.val,
140 .maxlen = sizeof(int),
141 .mode = 0644,
142 .proc_handler = &proc_dointvec_minmax,
143 .strategy = &sysctl_intvec,
144 .extra1 = &xfs_params.inherit_nodump.min,
145 .extra2 = &xfs_params.inherit_nodump.max
146 },
147 {
148 .ctl_name = XFS_INHERIT_NOATIME,
149 .procname = "inherit_noatime",
150 .data = &xfs_params.inherit_noatim.val,
151 .maxlen = sizeof(int),
152 .mode = 0644,
153 .proc_handler = &proc_dointvec_minmax,
154 .strategy = &sysctl_intvec,
155 .extra1 = &xfs_params.inherit_noatim.min,
156 .extra2 = &xfs_params.inherit_noatim.max
157 },
158 {
159 .ctl_name = XFS_BUF_TIMER,
160 .procname = "xfsbufd_centisecs",
161 .data = &xfs_params.xfs_buf_timer.val,
162 .maxlen = sizeof(int),
163 .mode = 0644,
164 .proc_handler = &proc_dointvec_minmax,
165 .strategy = &sysctl_intvec,
166 .extra1 = &xfs_params.xfs_buf_timer.min,
167 .extra2 = &xfs_params.xfs_buf_timer.max
168 },
169 {
170 .ctl_name = XFS_BUF_AGE,
171 .procname = "age_buffer_centisecs",
172 .data = &xfs_params.xfs_buf_age.val,
173 .maxlen = sizeof(int),
174 .mode = 0644,
175 .proc_handler = &proc_dointvec_minmax,
176 .strategy = &sysctl_intvec,
177 .extra1 = &xfs_params.xfs_buf_age.min,
178 .extra2 = &xfs_params.xfs_buf_age.max
179 },
180 {
181 .ctl_name = XFS_INHERIT_NOSYM,
182 .procname = "inherit_nosymlinks",
183 .data = &xfs_params.inherit_nosym.val,
184 .maxlen = sizeof(int),
185 .mode = 0644,
186 .proc_handler = &proc_dointvec_minmax,
187 .strategy = &sysctl_intvec,
188 .extra1 = &xfs_params.inherit_nosym.min,
189 .extra2 = &xfs_params.inherit_nosym.max
190 },
191 {
192 .ctl_name = XFS_ROTORSTEP,
193 .procname = "rotorstep",
194 .data = &xfs_params.rotorstep.val,
195 .maxlen = sizeof(int),
196 .mode = 0644,
197 .proc_handler = &proc_dointvec_minmax,
198 .strategy = &sysctl_intvec,
199 .extra1 = &xfs_params.rotorstep.min,
200 .extra2 = &xfs_params.rotorstep.max
201 },
202 {
203 .ctl_name = XFS_INHERIT_NODFRG,
204 .procname = "inherit_nodefrag",
205 .data = &xfs_params.inherit_nodfrg.val,
206 .maxlen = sizeof(int),
207 .mode = 0644,
208 .proc_handler = &proc_dointvec_minmax,
209 .strategy = &sysctl_intvec,
210 .extra1 = &xfs_params.inherit_nodfrg.min,
211 .extra2 = &xfs_params.inherit_nodfrg.max
212 },
128 /* please keep this the last entry */ 213 /* please keep this the last entry */
129#ifdef CONFIG_PROC_FS 214#ifdef CONFIG_PROC_FS
130 {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, 215 {
131 sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, 216 .ctl_name = XFS_STATS_CLEAR,
132 &sysctl_intvec, NULL, 217 .procname = "stats_clear",
133 &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, 218 .data = &xfs_params.stats_clear.val,
219 .maxlen = sizeof(int),
220 .mode = 0644,
221 .proc_handler = &xfs_stats_clear_proc_handler,
222 .strategy = &sysctl_intvec,
223 .extra1 = &xfs_params.stats_clear.min,
224 .extra2 = &xfs_params.stats_clear.max
225 },
134#endif /* CONFIG_PROC_FS */ 226#endif /* CONFIG_PROC_FS */
135 227
136 {0} 228 {}
137}; 229};
138 230
139STATIC ctl_table xfs_dir_table[] = { 231static ctl_table xfs_dir_table[] = {
140 {FS_XFS, "xfs", NULL, 0, 0555, xfs_table}, 232 {
141 {0} 233 .ctl_name = FS_XFS,
234 .procname = "xfs",
235 .mode = 0555,
236 .child = xfs_table
237 },
238 {}
142}; 239};
143 240
144STATIC ctl_table xfs_root_table[] = { 241static ctl_table xfs_root_table[] = {
145 {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table}, 242 {
146 {0} 243 .ctl_name = CTL_FS,
244 .procname = "fs",
245 .mode = 0555,
246 .child = xfs_dir_table
247 },
248 {}
147}; 249};
148 250
149void 251void
150xfs_sysctl_register(void) 252xfs_sysctl_register(void)
151{ 253{
152 xfs_table_header = register_sysctl_table(xfs_root_table, 1); 254 xfs_table_header = register_sysctl_table(xfs_root_table, 0);
153} 255}
154 256
155void 257void
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index da255bdf5260..e2c2ce98ab5b 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -91,7 +91,7 @@ typedef enum {
91#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ 91#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ 92#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ 93#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
94#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */ 94#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
95 95
96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ 96#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 97#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 553fa731ade5..ada24baf88de 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -26,7 +26,7 @@ DEFINE_SPINLOCK(vnumber_lock);
26 */ 26 */
27#define NVSYNC 37 27#define NVSYNC 37
28#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) 28#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
29STATIC wait_queue_head_t vsync[NVSYNC]; 29static wait_queue_head_t vsync[NVSYNC];
30 30
31void 31void
32vn_init(void) 32vn_init(void)
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 515f5fdea57a..b76118cf4897 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -489,14 +489,14 @@ static inline struct bhv_vnode *vn_grab(struct bhv_vnode *vp)
489#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) 489#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
490#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) 490#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
491 491
492static __inline__ void vn_flagset(struct bhv_vnode *vp, uint flag) 492STATIC_INLINE void vn_flagset(struct bhv_vnode *vp, uint flag)
493{ 493{
494 spin_lock(&vp->v_lock); 494 spin_lock(&vp->v_lock);
495 vp->v_flag |= flag; 495 vp->v_flag |= flag;
496 spin_unlock(&vp->v_lock); 496 spin_unlock(&vp->v_lock);
497} 497}
498 498
499static __inline__ uint vn_flagclr(struct bhv_vnode *vp, uint flag) 499STATIC_INLINE uint vn_flagclr(struct bhv_vnode *vp, uint flag)
500{ 500{
501 uint cleared; 501 uint cleared;
502 502
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 3aa771531856..4adaf13aac6f 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_trans_space.h" 48#include "xfs_trans_space.h"
@@ -484,7 +482,7 @@ xfs_qm_dqalloc(
484 482
485 xfs_trans_bhold(tp, bp); 483 xfs_trans_bhold(tp, bp);
486 484
487 if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { 485 if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
488 goto error1; 486 goto error1;
489 } 487 }
490 488
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 33ad5af386e0..ddb61fe22a5c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
@@ -399,7 +397,7 @@ xfs_qm_dquot_logitem_committing(
399/* 397/*
400 * This is the ops vector for dquots 398 * This is the ops vector for dquots
401 */ 399 */
402STATIC struct xfs_item_ops xfs_dquot_item_ops = { 400static struct xfs_item_ops xfs_dquot_item_ops = {
403 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, 401 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
404 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 402 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
405 xfs_qm_dquot_logitem_format, 403 xfs_qm_dquot_logitem_format,
@@ -606,7 +604,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
606 return; 604 return;
607} 605}
608 606
609STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 607static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
610 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 608 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
611 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 609 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
612 xfs_qm_qoff_logitem_format, 610 xfs_qm_qoff_logitem_format,
@@ -628,7 +626,7 @@ STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
628/* 626/*
629 * This is the ops vector shared by all quotaoff-start log items. 627 * This is the ops vector shared by all quotaoff-start log items.
630 */ 628 */
631STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 629static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
632 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, 630 .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
633 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 631 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
634 xfs_qm_qoff_logitem_format, 632 xfs_qm_qoff_logitem_format,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7c6a3a50379e..1de2acdc7f70 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -44,8 +44,6 @@
44#include "xfs_bmap.h" 44#include "xfs_bmap.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_acl.h" 46#include "xfs_acl.h"
47#include "xfs_cap.h"
48#include "xfs_mac.h"
49#include "xfs_attr.h" 47#include "xfs_attr.h"
50#include "xfs_buf_item.h" 48#include "xfs_buf_item.h"
51#include "xfs_trans_space.h" 49#include "xfs_trans_space.h"
@@ -64,10 +62,10 @@ uint ndquot;
64 62
65kmem_zone_t *qm_dqzone; 63kmem_zone_t *qm_dqzone;
66kmem_zone_t *qm_dqtrxzone; 64kmem_zone_t *qm_dqtrxzone;
67STATIC kmem_shaker_t xfs_qm_shaker; 65static kmem_shaker_t xfs_qm_shaker;
68 66
69STATIC cred_t xfs_zerocr; 67static cred_t xfs_zerocr;
70STATIC xfs_inode_t xfs_zeroino; 68static xfs_inode_t xfs_zeroino;
71 69
72STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 70STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
73STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 71STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index db8872be8c87..d2cdb8a2aad6 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -44,8 +44,6 @@
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_acl.h" 46#include "xfs_acl.h"
47#include "xfs_cap.h"
48#include "xfs_mac.h"
49#include "xfs_attr.h" 47#include "xfs_attr.h"
50#include "xfs_buf_item.h" 48#include "xfs_buf_item.h"
51#include "xfs_qm.h" 49#include "xfs_qm.h"
@@ -384,7 +382,7 @@ xfs_qm_dqrele_null(
384} 382}
385 383
386 384
387STATIC struct xfs_qmops xfs_qmcore_xfs = { 385static struct xfs_qmops xfs_qmcore_xfs = {
388 .xfs_qminit = xfs_qm_newmount, 386 .xfs_qminit = xfs_qm_newmount,
389 .xfs_qmdone = xfs_qm_unmount_quotadestroy, 387 .xfs_qmdone = xfs_qm_unmount_quotadestroy,
390 .xfs_qmmount = xfs_qm_endmount, 388 .xfs_qmmount = xfs_qm_endmount,
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 6f858fb81a36..709f5f545cf5 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -43,8 +43,6 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_qm.h" 48#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index ed620c4d1594..716f562aa8b2 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -46,8 +46,6 @@
46#include "xfs_error.h" 46#include "xfs_error.h"
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_acl.h" 48#include "xfs_acl.h"
49#include "xfs_cap.h"
50#include "xfs_mac.h"
51#include "xfs_attr.h" 49#include "xfs_attr.h"
52#include "xfs_buf_item.h" 50#include "xfs_buf_item.h"
53#include "xfs_utils.h" 51#include "xfs_utils.h"
@@ -134,7 +132,7 @@ xfs_qm_quotactl(
134 break; 132 break;
135 133
136 case Q_XQUOTASYNC: 134 case Q_XQUOTASYNC:
137 return (xfs_sync_inodes(mp, SYNC_DELWRI, 0, NULL)); 135 return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL));
138 136
139 default: 137 default:
140 break; 138 break;
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 0242e9666e8e..d7491e7b1f3b 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -43,8 +43,6 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 4363512d2f90..08bbd3cb87ae 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -19,7 +19,7 @@
19#include "debug.h" 19#include "debug.h"
20#include "spin.h" 20#include "spin.h"
21 21
22static char message[256]; /* keep it off the stack */ 22static char message[1024]; /* keep it off the stack */
23static DEFINE_SPINLOCK(xfs_err_lock); 23static DEFINE_SPINLOCK(xfs_err_lock);
24 24
25/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ 25/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
@@ -44,13 +44,14 @@ cmn_err(register int level, char *fmt, ...)
44 spin_lock_irqsave(&xfs_err_lock,flags); 44 spin_lock_irqsave(&xfs_err_lock,flags);
45 va_start(ap, fmt); 45 va_start(ap, fmt);
46 if (*fmt == '!') fp++; 46 if (*fmt == '!') fp++;
47 len = vsprintf(message, fp, ap); 47 len = vsnprintf(message, sizeof(message), fp, ap);
48 if (level != CE_DEBUG && message[len-1] != '\n') 48 if (len >= sizeof(message))
49 strcat(message, "\n"); 49 len = sizeof(message) - 1;
50 printk("%s%s", err_level[level], message); 50 if (message[len-1] == '\n')
51 message[len-1] = 0;
52 printk("%s%s\n", err_level[level], message);
51 va_end(ap); 53 va_end(ap);
52 spin_unlock_irqrestore(&xfs_err_lock,flags); 54 spin_unlock_irqrestore(&xfs_err_lock,flags);
53
54 BUG_ON(level == CE_PANIC); 55 BUG_ON(level == CE_PANIC);
55} 56}
56 57
@@ -64,11 +65,13 @@ icmn_err(register int level, char *fmt, va_list ap)
64 if(level > XFS_MAX_ERR_LEVEL) 65 if(level > XFS_MAX_ERR_LEVEL)
65 level = XFS_MAX_ERR_LEVEL; 66 level = XFS_MAX_ERR_LEVEL;
66 spin_lock_irqsave(&xfs_err_lock,flags); 67 spin_lock_irqsave(&xfs_err_lock,flags);
67 len = vsprintf(message, fmt, ap); 68 len = vsnprintf(message, sizeof(message), fmt, ap);
68 if (level != CE_DEBUG && message[len-1] != '\n') 69 if (len >= sizeof(message))
69 strcat(message, "\n"); 70 len = sizeof(message) - 1;
71 if (message[len-1] == '\n')
72 message[len-1] = 0;
73 printk("%s%s\n", err_level[level], message);
70 spin_unlock_irqrestore(&xfs_err_lock,flags); 74 spin_unlock_irqrestore(&xfs_err_lock,flags);
71 printk("%s%s", err_level[level], message);
72 BUG_ON(level == CE_PANIC); 75 BUG_ON(level == CE_PANIC);
73} 76}
74 77
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 4f54dca662a8..2a70cc605ae3 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -38,13 +38,37 @@ extern void assfail(char *expr, char *f, int l);
38 38
39#ifndef DEBUG 39#ifndef DEBUG
40# define ASSERT(expr) ((void)0) 40# define ASSERT(expr) ((void)0)
41#else 41
42#ifndef STATIC
43# define STATIC static noinline
44#endif
45
46#ifndef STATIC_INLINE
47# define STATIC_INLINE static inline
48#endif
49
50#else /* DEBUG */
51
42# define ASSERT(expr) ASSERT_ALWAYS(expr) 52# define ASSERT(expr) ASSERT_ALWAYS(expr)
43extern unsigned long random(void); 53extern unsigned long random(void);
44#endif
45 54
46#ifndef STATIC 55#ifndef STATIC
47# define STATIC static 56# define STATIC noinline
48#endif 57#endif
49 58
59/*
60 * We stop inlining of inline functions in debug mode.
61 * Unfortunately, this means static inline in header files
62 * get multiple definitions, so they need to remain static.
63 * This then gives tonnes of warnings about unused but defined
64 * functions, so we need to add the unused attribute to prevent
65 * these spurious warnings.
66 */
67#ifndef STATIC_INLINE
68# define STATIC_INLINE static __attribute__ ((unused)) noinline
69#endif
70
71#endif /* DEBUG */
72
73
50#endif /* __XFS_SUPPORT_DEBUG_H__ */ 74#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index 977879c24ff5..324e413deadd 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -55,7 +55,7 @@ enum uio_seg {
55}; 55};
56 56
57struct uio { 57struct uio {
58 struct iovec *uio_iov; /* pointer to array of iovecs */ 58 struct kvec *uio_iov; /* pointer to array of iovecs */
59 int uio_iovcnt; /* number of iovecs in array */ 59 int uio_iovcnt; /* number of iovecs in array */
60 xfs_off_t uio_offset; /* offset in file this uio corresponds to */ 60 xfs_off_t uio_offset; /* offset in file this uio corresponds to */
61 int uio_resid; /* residual i/o count */ 61 int uio_resid; /* residual i/o count */
@@ -63,7 +63,7 @@ struct uio {
63}; 63};
64 64
65typedef struct uio uio_t; 65typedef struct uio uio_t;
66typedef struct iovec iovec_t; 66typedef struct kvec iovec_t;
67 67
68extern int xfs_uio_read (caddr_t, size_t, uio_t *); 68extern int xfs_uio_read (caddr_t, size_t, uio_t *);
69 69
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4b0cb474be4c..4ca4beb7bb54 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -31,7 +31,6 @@
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_btree.h" 32#include "xfs_btree.h"
33#include "xfs_acl.h" 33#include "xfs_acl.h"
34#include "xfs_mac.h"
35#include "xfs_attr.h" 34#include "xfs_attr.h"
36 35
37#include <linux/capability.h> 36#include <linux/capability.h>
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index bce81c7a4fdc..5bd1a2c8bd07 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -58,7 +58,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
58/* 58/*
59 * Real block structures have a size equal to the disk block size. 59 * Real block structures have a size equal to the disk block size.
60 */ 60 */
61#define XFS_ALLOC_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
62#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0]) 61#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
63#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0]) 62#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
64 63
@@ -87,16 +86,13 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
87 * Record, key, and pointer address macros for btree blocks. 86 * Record, key, and pointer address macros for btree blocks.
88 */ 87 */
89#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ 88#define XFS_ALLOC_REC_ADDR(bb,i,cur) \
90 XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, \ 89 XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
91 bb, i, XFS_ALLOC_BLOCK_MAXRECS(0, cur))
92 90
93#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \ 91#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
94 XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \ 92 XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
95 bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
96 93
97#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ 94#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
98 XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \ 95 XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
99 bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
100 96
101/* 97/*
102 * Decrement cursor by one record at the level. 98 * Decrement cursor by one record at the level.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 9ada7bdbae52..9d358ffce4e5 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -57,9 +57,9 @@
57 */ 57 */
58 58
59#define ATTR_SYSCOUNT 2 59#define ATTR_SYSCOUNT 2
60STATIC struct attrnames posix_acl_access; 60static struct attrnames posix_acl_access;
61STATIC struct attrnames posix_acl_default; 61static struct attrnames posix_acl_default;
62STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT]; 62static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
63 63
64/*======================================================================== 64/*========================================================================
65 * Function prototypes for the kernel. 65 * Function prototypes for the kernel.
@@ -199,18 +199,14 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
199 return (error); 199 return (error);
200 200
201 /* 201 /*
202 * Determine space new attribute will use, and if it would be
203 * "local" or "remote" (note: local != inline).
204 */
205 size = xfs_attr_leaf_newentsize(namelen, valuelen,
206 mp->m_sb.sb_blocksize, &local);
207
208 /*
209 * If the inode doesn't have an attribute fork, add one. 202 * If the inode doesn't have an attribute fork, add one.
210 * (inode must not be locked when we call this routine) 203 * (inode must not be locked when we call this routine)
211 */ 204 */
212 if (XFS_IFORK_Q(dp) == 0) { 205 if (XFS_IFORK_Q(dp) == 0) {
213 if ((error = xfs_bmap_add_attrfork(dp, size, rsvd))) 206 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
207 XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
208
209 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
214 return(error); 210 return(error);
215 } 211 }
216 212
@@ -231,6 +227,13 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
231 args.addname = 1; 227 args.addname = 1;
232 args.oknoent = 1; 228 args.oknoent = 1;
233 229
230 /*
231 * Determine space new attribute will use, and if it would be
232 * "local" or "remote" (note: local != inline).
233 */
234 size = xfs_attr_leaf_newentsize(namelen, valuelen,
235 mp->m_sb.sb_blocksize, &local);
236
234 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); 237 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
235 if (local) { 238 if (local) {
236 if (size > (mp->m_sb.sb_blocksize >> 1)) { 239 if (size > (mp->m_sb.sb_blocksize >> 1)) {
@@ -346,7 +349,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
346 error = xfs_attr_shortform_to_leaf(&args); 349 error = xfs_attr_shortform_to_leaf(&args);
347 if (!error) { 350 if (!error) {
348 error = xfs_bmap_finish(&args.trans, args.flist, 351 error = xfs_bmap_finish(&args.trans, args.flist,
349 *args.firstblock, &committed); 352 &committed);
350 } 353 }
351 if (error) { 354 if (error) {
352 ASSERT(committed); 355 ASSERT(committed);
@@ -973,7 +976,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
973 error = xfs_attr_leaf_to_node(args); 976 error = xfs_attr_leaf_to_node(args);
974 if (!error) { 977 if (!error) {
975 error = xfs_bmap_finish(&args->trans, args->flist, 978 error = xfs_bmap_finish(&args->trans, args->flist,
976 *args->firstblock, &committed); 979 &committed);
977 } 980 }
978 if (error) { 981 if (error) {
979 ASSERT(committed); 982 ASSERT(committed);
@@ -1074,7 +1077,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1074 if (!error) { 1077 if (!error) {
1075 error = xfs_bmap_finish(&args->trans, 1078 error = xfs_bmap_finish(&args->trans,
1076 args->flist, 1079 args->flist,
1077 *args->firstblock,
1078 &committed); 1080 &committed);
1079 } 1081 }
1080 if (error) { 1082 if (error) {
@@ -1152,7 +1154,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1152 /* bp is gone due to xfs_da_shrink_inode */ 1154 /* bp is gone due to xfs_da_shrink_inode */
1153 if (!error) { 1155 if (!error) {
1154 error = xfs_bmap_finish(&args->trans, args->flist, 1156 error = xfs_bmap_finish(&args->trans, args->flist,
1155 *args->firstblock, &committed); 1157 &committed);
1156 } 1158 }
1157 if (error) { 1159 if (error) {
1158 ASSERT(committed); 1160 ASSERT(committed);
@@ -1307,7 +1309,6 @@ restart:
1307 if (!error) { 1309 if (!error) {
1308 error = xfs_bmap_finish(&args->trans, 1310 error = xfs_bmap_finish(&args->trans,
1309 args->flist, 1311 args->flist,
1310 *args->firstblock,
1311 &committed); 1312 &committed);
1312 } 1313 }
1313 if (error) { 1314 if (error) {
@@ -1347,7 +1348,7 @@ restart:
1347 error = xfs_da_split(state); 1348 error = xfs_da_split(state);
1348 if (!error) { 1349 if (!error) {
1349 error = xfs_bmap_finish(&args->trans, args->flist, 1350 error = xfs_bmap_finish(&args->trans, args->flist,
1350 *args->firstblock, &committed); 1351 &committed);
1351 } 1352 }
1352 if (error) { 1353 if (error) {
1353 ASSERT(committed); 1354 ASSERT(committed);
@@ -1459,7 +1460,6 @@ restart:
1459 if (!error) { 1460 if (!error) {
1460 error = xfs_bmap_finish(&args->trans, 1461 error = xfs_bmap_finish(&args->trans,
1461 args->flist, 1462 args->flist,
1462 *args->firstblock,
1463 &committed); 1463 &committed);
1464 } 1464 }
1465 if (error) { 1465 if (error) {
@@ -1594,7 +1594,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1594 error = xfs_da_join(state); 1594 error = xfs_da_join(state);
1595 if (!error) { 1595 if (!error) {
1596 error = xfs_bmap_finish(&args->trans, args->flist, 1596 error = xfs_bmap_finish(&args->trans, args->flist,
1597 *args->firstblock, &committed); 1597 &committed);
1598 } 1598 }
1599 if (error) { 1599 if (error) {
1600 ASSERT(committed); 1600 ASSERT(committed);
@@ -1646,7 +1646,6 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1646 if (!error) { 1646 if (!error) {
1647 error = xfs_bmap_finish(&args->trans, 1647 error = xfs_bmap_finish(&args->trans,
1648 args->flist, 1648 args->flist,
1649 *args->firstblock,
1650 &committed); 1649 &committed);
1651 } 1650 }
1652 if (error) { 1651 if (error) {
@@ -2090,7 +2089,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2090 args->flist, NULL); 2089 args->flist, NULL);
2091 if (!error) { 2090 if (!error) {
2092 error = xfs_bmap_finish(&args->trans, args->flist, 2091 error = xfs_bmap_finish(&args->trans, args->flist,
2093 *args->firstblock, &committed); 2092 &committed);
2094 } 2093 }
2095 if (error) { 2094 if (error) {
2096 ASSERT(committed); 2095 ASSERT(committed);
@@ -2246,7 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2246 NULL, &done); 2245 NULL, &done);
2247 if (!error) { 2246 if (!error) {
2248 error = xfs_bmap_finish(&args->trans, args->flist, 2247 error = xfs_bmap_finish(&args->trans, args->flist,
2249 *args->firstblock, &committed); 2248 &committed);
2250 } 2249 }
2251 if (error) { 2250 if (error) {
2252 ASSERT(committed); 2251 ASSERT(committed);
@@ -2477,7 +2476,7 @@ posix_acl_default_exists(
2477 return xfs_acl_vhasacl_default(vp); 2476 return xfs_acl_vhasacl_default(vp);
2478} 2477}
2479 2478
2480STATIC struct attrnames posix_acl_access = { 2479static struct attrnames posix_acl_access = {
2481 .attr_name = "posix_acl_access", 2480 .attr_name = "posix_acl_access",
2482 .attr_namelen = sizeof("posix_acl_access") - 1, 2481 .attr_namelen = sizeof("posix_acl_access") - 1,
2483 .attr_get = posix_acl_access_get, 2482 .attr_get = posix_acl_access_get,
@@ -2486,7 +2485,7 @@ STATIC struct attrnames posix_acl_access = {
2486 .attr_exists = posix_acl_access_exists, 2485 .attr_exists = posix_acl_access_exists,
2487}; 2486};
2488 2487
2489STATIC struct attrnames posix_acl_default = { 2488static struct attrnames posix_acl_default = {
2490 .attr_name = "posix_acl_default", 2489 .attr_name = "posix_acl_default",
2491 .attr_namelen = sizeof("posix_acl_default") - 1, 2490 .attr_namelen = sizeof("posix_acl_default") - 1,
2492 .attr_get = posix_acl_default_get, 2491 .attr_get = posix_acl_default_get,
@@ -2495,7 +2494,7 @@ STATIC struct attrnames posix_acl_default = {
2495 .attr_exists = posix_acl_default_exists, 2494 .attr_exists = posix_acl_default_exists,
2496}; 2495};
2497 2496
2498STATIC struct attrnames *attr_system_names[] = 2497static struct attrnames *attr_system_names[] =
2499 { &posix_acl_access, &posix_acl_default }; 2498 { &posix_acl_access, &posix_acl_default };
2500 2499
2501 2500
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9719bbef122c..8eab73e8340a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,7 +94,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
94 * Namespace helper routines 94 * Namespace helper routines
95 *========================================================================*/ 95 *========================================================================*/
96 96
97STATIC inline attrnames_t * 97STATIC_INLINE attrnames_t *
98xfs_attr_flags_namesp(int flags) 98xfs_attr_flags_namesp(int flags)
99{ 99{
100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure: 100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
@@ -105,7 +105,7 @@ xfs_attr_flags_namesp(int flags)
105 * If namespace bits don't match return 0. 105 * If namespace bits don't match return 0.
106 * If all match then return 1. 106 * If all match then return 1.
107 */ 107 */
108STATIC inline int 108STATIC_INLINE int
109xfs_attr_namesp_match(int arg_flags, int ondisk_flags) 109xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
110{ 110{
111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
@@ -116,7 +116,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
116 * then return 0. 116 * then return 0.
117 * If all match or are overridable then return 1. 117 * If all match or are overridable then return 1.
118 */ 118 */
119STATIC inline int 119STATIC_INLINE int
120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags) 120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
121{ 121{
122 if (((arg_flags & ATTR_SECURE) == 0) != 122 if (((arg_flags & ATTR_SECURE) == 0) !=
@@ -150,6 +150,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
150 int offset; 150 int offset;
151 int minforkoff; /* lower limit on valid forkoff locations */ 151 int minforkoff; /* lower limit on valid forkoff locations */
152 int maxforkoff; /* upper limit on valid forkoff locations */ 152 int maxforkoff; /* upper limit on valid forkoff locations */
153 int dsize;
153 xfs_mount_t *mp = dp->i_mount; 154 xfs_mount_t *mp = dp->i_mount;
154 155
155 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ 156 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -169,8 +170,43 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
169 return 0; 170 return 0;
170 } 171 }
171 172
172 /* data fork btree root can have at least this many key/ptr pairs */ 173 dsize = dp->i_df.if_bytes;
173 minforkoff = MAX(dp->i_df.if_bytes, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); 174
175 switch (dp->i_d.di_format) {
176 case XFS_DINODE_FMT_EXTENTS:
177 /*
178 * If there is no attr fork and the data fork is extents,
179 * determine if creating the default attr fork will result
180 * in the extents form migrating to btree. If so, the
181 * minimum offset only needs to be the space required for
182 * the btree root.
183 */
184 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > mp->m_attroffset)
185 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
186 break;
187
188 case XFS_DINODE_FMT_BTREE:
189 /*
190 * If have data btree then keep forkoff if we have one,
191 * otherwise we are adding a new attr, so then we set
192 * minforkoff to where the btree root can finish so we have
193 * plenty of room for attrs
194 */
195 if (dp->i_d.di_forkoff) {
196 if (offset < dp->i_d.di_forkoff)
197 return 0;
198 else
199 return dp->i_d.di_forkoff;
200 } else
201 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
202 break;
203 }
204
205 /*
206 * A data fork btree root must have space for at least
207 * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
208 */
209 minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
174 minforkoff = roundup(minforkoff, 8) >> 3; 210 minforkoff = roundup(minforkoff, 8) >> 3;
175 211
176 /* attr fork btree root can have at least this many key/ptr pairs */ 212 /* attr fork btree root can have at least this many key/ptr pairs */
@@ -336,7 +372,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
336 */ 372 */
337 totsize -= size; 373 totsize -= size;
338 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 374 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
339 (mp->m_flags & XFS_MOUNT_ATTR2)) { 375 (mp->m_flags & XFS_MOUNT_ATTR2) &&
376 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
340 /* 377 /*
341 * Last attribute now removed, revert to original 378 * Last attribute now removed, revert to original
342 * inode format making all literal area available 379 * inode format making all literal area available
@@ -355,7 +392,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
355 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 392 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
356 ASSERT(dp->i_d.di_forkoff); 393 ASSERT(dp->i_d.di_forkoff);
357 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 394 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
358 !(mp->m_flags & XFS_MOUNT_ATTR2)); 395 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
396 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
359 dp->i_afp->if_ext_max = 397 dp->i_afp->if_ext_max =
360 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 398 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
361 dp->i_df.if_ext_max = 399 dp->i_df.if_ext_max =
@@ -748,6 +786,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
748 + be16_to_cpu(name_loc->valuelen); 786 + be16_to_cpu(name_loc->valuelen);
749 } 787 }
750 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && 788 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
789 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
751 (bytes == sizeof(struct xfs_attr_sf_hdr))) 790 (bytes == sizeof(struct xfs_attr_sf_hdr)))
752 return(-1); 791 return(-1);
753 return(xfs_attr_shortform_bytesfit(dp, bytes)); 792 return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -786,6 +825,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
786 825
787 if (forkoff == -1) { 826 if (forkoff == -1) {
788 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); 827 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
828 ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
789 829
790 /* 830 /*
791 * Last attribute was removed, revert to original 831 * Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 43be6a7e47c6..1afe07f67e3b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -29,7 +29,7 @@
29/* 29/*
30 * Index of high bit number in byte, -1 for none set, 0..7 otherwise. 30 * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
31 */ 31 */
32STATIC const char xfs_highbit[256] = { 32static const char xfs_highbit[256] = {
33 -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ 33 -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
34 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ 34 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
35 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ 35 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 498ad50d1f45..87795188cedf 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -185,16 +185,6 @@ xfs_bmap_btree_to_extents(
185 int *logflagsp, /* inode logging flags */ 185 int *logflagsp, /* inode logging flags */
186 int whichfork); /* data or attr fork */ 186 int whichfork); /* data or attr fork */
187 187
188#ifdef DEBUG
189/*
190 * Check that the extents list for the inode ip is in the right order.
191 */
192STATIC void
193xfs_bmap_check_extents(
194 xfs_inode_t *ip, /* incore inode pointer */
195 int whichfork); /* data or attr fork */
196#endif
197
198/* 188/*
199 * Called by xfs_bmapi to update file extent records and the btree 189 * Called by xfs_bmapi to update file extent records and the btree
200 * after removing space (or undoing a delayed allocation). 190 * after removing space (or undoing a delayed allocation).
@@ -410,7 +400,6 @@ xfs_bmap_count_leaves(
410STATIC int 400STATIC int
411xfs_bmap_disk_count_leaves( 401xfs_bmap_disk_count_leaves(
412 xfs_ifork_t *ifp, 402 xfs_ifork_t *ifp,
413 xfs_mount_t *mp,
414 xfs_extnum_t idx, 403 xfs_extnum_t idx,
415 xfs_bmbt_block_t *block, 404 xfs_bmbt_block_t *block,
416 int numrecs, 405 int numrecs,
@@ -684,7 +673,7 @@ xfs_bmap_add_extent(
684 ASSERT(nblks <= da_old); 673 ASSERT(nblks <= da_old);
685 if (nblks < da_old) 674 if (nblks < da_old)
686 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, 675 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
687 (int)(da_old - nblks), rsvd); 676 (int64_t)(da_old - nblks), rsvd);
688 } 677 }
689 /* 678 /*
690 * Clear out the allocated field, done with it now in any case. 679 * Clear out the allocated field, done with it now in any case.
@@ -1209,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
1209 diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) - 1198 diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) -
1210 (cur ? cur->bc_private.b.allocated : 0)); 1199 (cur ? cur->bc_private.b.allocated : 0));
1211 if (diff > 0 && 1200 if (diff > 0 &&
1212 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) { 1201 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) {
1213 /* 1202 /*
1214 * Ick gross gag me with a spoon. 1203 * Ick gross gag me with a spoon.
1215 */ 1204 */
@@ -1220,7 +1209,7 @@ xfs_bmap_add_extent_delay_real(
1220 diff--; 1209 diff--;
1221 if (!diff || 1210 if (!diff ||
1222 !xfs_mod_incore_sb(ip->i_mount, 1211 !xfs_mod_incore_sb(ip->i_mount,
1223 XFS_SBS_FDBLOCKS, -diff, rsvd)) 1212 XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
1224 break; 1213 break;
1225 } 1214 }
1226 if (temp2) { 1215 if (temp2) {
@@ -1228,7 +1217,7 @@ xfs_bmap_add_extent_delay_real(
1228 diff--; 1217 diff--;
1229 if (!diff || 1218 if (!diff ||
1230 !xfs_mod_incore_sb(ip->i_mount, 1219 !xfs_mod_incore_sb(ip->i_mount,
1231 XFS_SBS_FDBLOCKS, -diff, rsvd)) 1220 XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
1232 break; 1221 break;
1233 } 1222 }
1234 } 1223 }
@@ -2015,7 +2004,7 @@ xfs_bmap_add_extent_hole_delay(
2015 if (oldlen != newlen) { 2004 if (oldlen != newlen) {
2016 ASSERT(oldlen > newlen); 2005 ASSERT(oldlen > newlen);
2017 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, 2006 xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
2018 (int)(oldlen - newlen), rsvd); 2007 (int64_t)(oldlen - newlen), rsvd);
2019 /* 2008 /*
2020 * Nothing to do for disk quota accounting here. 2009 * Nothing to do for disk quota accounting here.
2021 */ 2010 */
@@ -3359,7 +3348,7 @@ xfs_bmap_del_extent(
3359 */ 3348 */
3360 ASSERT(da_old >= da_new); 3349 ASSERT(da_old >= da_new);
3361 if (da_old > da_new) 3350 if (da_old > da_new)
3362 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new), 3351 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
3363 rsvd); 3352 rsvd);
3364 if (delta) { 3353 if (delta) {
3365 /* DELTA: report the original extent. */ 3354 /* DELTA: report the original extent. */
@@ -3543,6 +3532,7 @@ xfs_bmap_forkoff_reset(
3543 if (whichfork == XFS_ATTR_FORK && 3532 if (whichfork == XFS_ATTR_FORK &&
3544 (ip->i_d.di_format != XFS_DINODE_FMT_DEV) && 3533 (ip->i_d.di_format != XFS_DINODE_FMT_DEV) &&
3545 (ip->i_d.di_format != XFS_DINODE_FMT_UUID) && 3534 (ip->i_d.di_format != XFS_DINODE_FMT_UUID) &&
3535 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
3546 ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) { 3536 ((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) {
3547 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3537 ip->i_d.di_forkoff = mp->m_attroffset >> 3;
3548 ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / 3538 ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) /
@@ -4079,7 +4069,7 @@ xfs_bmap_add_attrfork(
4079 } else 4069 } else
4080 XFS_SB_UNLOCK(mp, s); 4070 XFS_SB_UNLOCK(mp, s);
4081 } 4071 }
4082 if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) 4072 if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
4083 goto error2; 4073 goto error2;
4084 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL); 4074 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL);
4085 ASSERT(ip->i_df.if_ext_max == 4075 ASSERT(ip->i_df.if_ext_max ==
@@ -4212,7 +4202,6 @@ int /* error */
4212xfs_bmap_finish( 4202xfs_bmap_finish(
4213 xfs_trans_t **tp, /* transaction pointer addr */ 4203 xfs_trans_t **tp, /* transaction pointer addr */
4214 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 4204 xfs_bmap_free_t *flist, /* i/o: list extents to free */
4215 xfs_fsblock_t firstblock, /* controlled ag for allocs */
4216 int *committed) /* xact committed or not */ 4205 int *committed) /* xact committed or not */
4217{ 4206{
4218 xfs_efd_log_item_t *efd; /* extent free data */ 4207 xfs_efd_log_item_t *efd; /* extent free data */
@@ -4533,8 +4522,7 @@ xfs_bmap_read_extents(
4533 error0); 4522 error0);
4534 if (level == 0) 4523 if (level == 0)
4535 break; 4524 break;
4536 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 4525 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
4537 1, mp->m_bmap_dmxr[1]);
4538 bno = be64_to_cpu(*pp); 4526 bno = be64_to_cpu(*pp);
4539 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 4527 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4540 xfs_trans_brelse(tp, bp); 4528 xfs_trans_brelse(tp, bp);
@@ -4577,8 +4565,7 @@ xfs_bmap_read_extents(
4577 /* 4565 /*
4578 * Copy records into the extent records. 4566 * Copy records into the extent records.
4579 */ 4567 */
4580 frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, 4568 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
4581 block, 1, mp->m_bmap_dmxr[0]);
4582 start = i; 4569 start = i;
4583 for (j = 0; j < num_recs; j++, i++, frp++) { 4570 for (j = 0; j < num_recs; j++, i++, frp++) {
4584 trp = xfs_iext_get_ext(ifp, i); 4571 trp = xfs_iext_get_ext(ifp, i);
@@ -4929,28 +4916,28 @@ xfs_bmapi(
4929 if (rt) { 4916 if (rt) {
4930 error = xfs_mod_incore_sb(mp, 4917 error = xfs_mod_incore_sb(mp,
4931 XFS_SBS_FREXTENTS, 4918 XFS_SBS_FREXTENTS,
4932 -(extsz), (flags & 4919 -((int64_t)extsz), (flags &
4933 XFS_BMAPI_RSVBLOCKS)); 4920 XFS_BMAPI_RSVBLOCKS));
4934 } else { 4921 } else {
4935 error = xfs_mod_incore_sb(mp, 4922 error = xfs_mod_incore_sb(mp,
4936 XFS_SBS_FDBLOCKS, 4923 XFS_SBS_FDBLOCKS,
4937 -(alen), (flags & 4924 -((int64_t)alen), (flags &
4938 XFS_BMAPI_RSVBLOCKS)); 4925 XFS_BMAPI_RSVBLOCKS));
4939 } 4926 }
4940 if (!error) { 4927 if (!error) {
4941 error = xfs_mod_incore_sb(mp, 4928 error = xfs_mod_incore_sb(mp,
4942 XFS_SBS_FDBLOCKS, 4929 XFS_SBS_FDBLOCKS,
4943 -(indlen), (flags & 4930 -((int64_t)indlen), (flags &
4944 XFS_BMAPI_RSVBLOCKS)); 4931 XFS_BMAPI_RSVBLOCKS));
4945 if (error && rt) 4932 if (error && rt)
4946 xfs_mod_incore_sb(mp, 4933 xfs_mod_incore_sb(mp,
4947 XFS_SBS_FREXTENTS, 4934 XFS_SBS_FREXTENTS,
4948 extsz, (flags & 4935 (int64_t)extsz, (flags &
4949 XFS_BMAPI_RSVBLOCKS)); 4936 XFS_BMAPI_RSVBLOCKS));
4950 else if (error) 4937 else if (error)
4951 xfs_mod_incore_sb(mp, 4938 xfs_mod_incore_sb(mp,
4952 XFS_SBS_FDBLOCKS, 4939 XFS_SBS_FDBLOCKS,
4953 alen, (flags & 4940 (int64_t)alen, (flags &
4954 XFS_BMAPI_RSVBLOCKS)); 4941 XFS_BMAPI_RSVBLOCKS));
4955 } 4942 }
4956 4943
@@ -5616,13 +5603,13 @@ xfs_bunmapi(
5616 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5603 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5617 do_div(rtexts, mp->m_sb.sb_rextsize); 5604 do_div(rtexts, mp->m_sb.sb_rextsize);
5618 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5605 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5619 (int)rtexts, rsvd); 5606 (int64_t)rtexts, rsvd);
5620 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, 5607 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5621 NULL, ip, -((long)del.br_blockcount), 0, 5608 NULL, ip, -((long)del.br_blockcount), 0,
5622 XFS_QMOPT_RES_RTBLKS); 5609 XFS_QMOPT_RES_RTBLKS);
5623 } else { 5610 } else {
5624 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5611 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
5625 (int)del.br_blockcount, rsvd); 5612 (int64_t)del.br_blockcount, rsvd);
5626 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, 5613 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5627 NULL, ip, -((long)del.br_blockcount), 0, 5614 NULL, ip, -((long)del.br_blockcount), 0,
5628 XFS_QMOPT_RES_REGBLKS); 5615 XFS_QMOPT_RES_REGBLKS);
@@ -6048,32 +6035,6 @@ xfs_bmap_eof(
6048} 6035}
6049 6036
6050#ifdef DEBUG 6037#ifdef DEBUG
6051/*
6052 * Check that the extents list for the inode ip is in the right order.
6053 */
6054STATIC void
6055xfs_bmap_check_extents(
6056 xfs_inode_t *ip, /* incore inode pointer */
6057 int whichfork) /* data or attr fork */
6058{
6059 xfs_bmbt_rec_t *ep; /* current extent entry */
6060 xfs_extnum_t idx; /* extent record index */
6061 xfs_ifork_t *ifp; /* inode fork pointer */
6062 xfs_extnum_t nextents; /* number of extents in list */
6063 xfs_bmbt_rec_t *nextp; /* next extent entry */
6064
6065 ifp = XFS_IFORK_PTR(ip, whichfork);
6066 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
6067 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
6068 ep = xfs_iext_get_ext(ifp, 0);
6069 for (idx = 0; idx < nextents - 1; idx++) {
6070 nextp = xfs_iext_get_ext(ifp, idx + 1);
6071 xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
6072 (void *)(nextp));
6073 ep = nextp;
6074 }
6075}
6076
6077STATIC 6038STATIC
6078xfs_buf_t * 6039xfs_buf_t *
6079xfs_bmap_get_bp( 6040xfs_bmap_get_bp(
@@ -6156,8 +6117,7 @@ xfs_check_block(
6156 if (root) { 6117 if (root) {
6157 keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz); 6118 keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
6158 } else { 6119 } else {
6159 keyp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, 6120 keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
6160 xfs_bmbt, block, i, dmxr);
6161 } 6121 }
6162 6122
6163 if (prevp) { 6123 if (prevp) {
@@ -6172,15 +6132,14 @@ xfs_check_block(
6172 if (root) { 6132 if (root) {
6173 pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz); 6133 pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
6174 } else { 6134 } else {
6175 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, 6135 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
6176 xfs_bmbt, block, i, dmxr);
6177 } 6136 }
6178 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 6137 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
6179 if (root) { 6138 if (root) {
6180 thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); 6139 thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
6181 } else { 6140 } else {
6182 thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, 6141 thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j,
6183 xfs_bmbt, block, j, dmxr); 6142 dmxr);
6184 } 6143 }
6185 if (*thispa == *pp) { 6144 if (*thispa == *pp) {
6186 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6145 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
@@ -6267,8 +6226,7 @@ xfs_bmap_check_leaf_extents(
6267 */ 6226 */
6268 6227
6269 xfs_check_block(block, mp, 0, 0); 6228 xfs_check_block(block, mp, 0, 0);
6270 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 6229 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
6271 1, mp->m_bmap_dmxr[1]);
6272 bno = be64_to_cpu(*pp); 6230 bno = be64_to_cpu(*pp);
6273 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 6231 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
6274 if (bp_release) { 6232 if (bp_release) {
@@ -6305,11 +6263,9 @@ xfs_bmap_check_leaf_extents(
6305 * conform with the first entry in this one. 6263 * conform with the first entry in this one.
6306 */ 6264 */
6307 6265
6308 ep = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, 6266 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
6309 block, 1, mp->m_bmap_dmxr[0]);
6310 for (j = 1; j < num_recs; j++) { 6267 for (j = 1; j < num_recs; j++) {
6311 nextp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, 6268 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
6312 block, j + 1, mp->m_bmap_dmxr[0]);
6313 if (lastp) { 6269 if (lastp) {
6314 xfs_btree_check_rec(XFS_BTNUM_BMAP, 6270 xfs_btree_check_rec(XFS_BTNUM_BMAP,
6315 (void *)lastp, (void *)ep); 6271 (void *)lastp, (void *)ep);
@@ -6454,8 +6410,7 @@ xfs_bmap_count_tree(
6454 } 6410 }
6455 6411
6456 /* Dive to the next level */ 6412 /* Dive to the next level */
6457 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, 6413 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
6458 xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
6459 bno = be64_to_cpu(*pp); 6414 bno = be64_to_cpu(*pp);
6460 if (unlikely((error = 6415 if (unlikely((error =
6461 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 6416 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6470,7 +6425,7 @@ xfs_bmap_count_tree(
6470 for (;;) { 6425 for (;;) {
6471 nextbno = be64_to_cpu(block->bb_rightsib); 6426 nextbno = be64_to_cpu(block->bb_rightsib);
6472 numrecs = be16_to_cpu(block->bb_numrecs); 6427 numrecs = be16_to_cpu(block->bb_numrecs);
6473 if (unlikely(xfs_bmap_disk_count_leaves(ifp, mp, 6428 if (unlikely(xfs_bmap_disk_count_leaves(ifp,
6474 0, block, numrecs, count) < 0)) { 6429 0, block, numrecs, count) < 0)) {
6475 xfs_trans_brelse(tp, bp); 6430 xfs_trans_brelse(tp, bp);
6476 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", 6431 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
@@ -6518,7 +6473,6 @@ xfs_bmap_count_leaves(
6518int 6473int
6519xfs_bmap_disk_count_leaves( 6474xfs_bmap_disk_count_leaves(
6520 xfs_ifork_t *ifp, 6475 xfs_ifork_t *ifp,
6521 xfs_mount_t *mp,
6522 xfs_extnum_t idx, 6476 xfs_extnum_t idx,
6523 xfs_bmbt_block_t *block, 6477 xfs_bmbt_block_t *block,
6524 int numrecs, 6478 int numrecs,
@@ -6528,8 +6482,7 @@ xfs_bmap_disk_count_leaves(
6528 xfs_bmbt_rec_t *frp; 6482 xfs_bmbt_rec_t *frp;
6529 6483
6530 for (b = 1; b <= numrecs; b++) { 6484 for (b = 1; b <= numrecs; b++) {
6531 frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, 6485 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
6532 xfs_bmbt, block, idx + b, mp->m_bmap_dmxr[0]);
6533 *count += xfs_bmbt_disk_get_blockcount(frp); 6486 *count += xfs_bmbt_disk_get_blockcount(frp);
6534 } 6487 }
6535 return 0; 6488 return 0;
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 80e93409b78d..4f24c7e39b31 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -202,7 +202,6 @@ int /* error */
202xfs_bmap_finish( 202xfs_bmap_finish(
203 struct xfs_trans **tp, /* transaction pointer addr */ 203 struct xfs_trans **tp, /* transaction pointer addr */
204 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 204 xfs_bmap_free_t *flist, /* i/o: list extents to free */
205 xfs_fsblock_t firstblock, /* controlled a.g. for allocs */
206 int *committed); /* xact committed or not */ 205 int *committed); /* xact committed or not */
207 206
208/* 207/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index a7b835bf870a..0bf192fea3eb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -678,47 +678,6 @@ error0:
678 return error; 678 return error;
679} 679}
680 680
681#ifdef DEBUG
682/*
683 * Get the data from the pointed-to record.
684 */
685int
686xfs_bmbt_get_rec(
687 xfs_btree_cur_t *cur,
688 xfs_fileoff_t *off,
689 xfs_fsblock_t *bno,
690 xfs_filblks_t *len,
691 xfs_exntst_t *state,
692 int *stat)
693{
694 xfs_bmbt_block_t *block;
695 xfs_buf_t *bp;
696#ifdef DEBUG
697 int error;
698#endif
699 int ptr;
700 xfs_bmbt_rec_t *rp;
701
702 block = xfs_bmbt_get_block(cur, 0, &bp);
703 ptr = cur->bc_ptrs[0];
704#ifdef DEBUG
705 if ((error = xfs_btree_check_lblock(cur, block, 0, bp)))
706 return error;
707#endif
708 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
709 *stat = 0;
710 return 0;
711 }
712 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
713 *off = xfs_bmbt_disk_get_startoff(rp);
714 *bno = xfs_bmbt_disk_get_startblock(rp);
715 *len = xfs_bmbt_disk_get_blockcount(rp);
716 *state = xfs_bmbt_disk_get_state(rp);
717 *stat = 1;
718 return 0;
719}
720#endif
721
722/* 681/*
723 * Insert one record/level. Return information to the caller 682 * Insert one record/level. Return information to the caller
724 * allowing the next level up to proceed if necessary. 683 * allowing the next level up to proceed if necessary.
@@ -1731,9 +1690,9 @@ xfs_bmdr_to_bmbt(
1731 rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 1690 rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
1732 rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 1691 rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
1733 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 1692 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
1734 fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); 1693 fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
1735 tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 1694 tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
1736 fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); 1695 fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
1737 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 1696 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
1738 dmxr = be16_to_cpu(dblock->bb_numrecs); 1697 dmxr = be16_to_cpu(dblock->bb_numrecs);
1739 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 1698 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
@@ -1862,7 +1821,7 @@ xfs_bmbt_delete(
1862 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 1821 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
1863 */ 1822 */
1864 1823
1865STATIC __inline__ void 1824STATIC_INLINE void
1866__xfs_bmbt_get_all( 1825__xfs_bmbt_get_all(
1867 __uint64_t l0, 1826 __uint64_t l0,
1868 __uint64_t l1, 1827 __uint64_t l1,
@@ -2016,30 +1975,6 @@ xfs_bmbt_disk_get_blockcount(
2016} 1975}
2017 1976
2018/* 1977/*
2019 * Extract the startblock field from an on disk bmap extent record.
2020 */
2021xfs_fsblock_t
2022xfs_bmbt_disk_get_startblock(
2023 xfs_bmbt_rec_t *r)
2024{
2025#if XFS_BIG_BLKNOS
2026 return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
2027 (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
2028#else
2029#ifdef DEBUG
2030 xfs_dfsbno_t b;
2031
2032 b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
2033 (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
2034 ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
2035 return (xfs_fsblock_t)b;
2036#else /* !DEBUG */
2037 return (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
2038#endif /* DEBUG */
2039#endif /* XFS_BIG_BLKNOS */
2040}
2041
2042/*
2043 * Extract the startoff field from a disk format bmap extent record. 1978 * Extract the startoff field from a disk format bmap extent record.
2044 */ 1979 */
2045xfs_fileoff_t 1980xfs_fileoff_t
@@ -2049,17 +1984,6 @@ xfs_bmbt_disk_get_startoff(
2049 return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & 1984 return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
2050 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 1985 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
2051} 1986}
2052
2053xfs_exntst_t
2054xfs_bmbt_disk_get_state(
2055 xfs_bmbt_rec_t *r)
2056{
2057 int ext_flag;
2058
2059 ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
2060 return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
2061 ext_flag);
2062}
2063#endif /* XFS_NATIVE_HOST */ 1987#endif /* XFS_NATIVE_HOST */
2064 1988
2065 1989
@@ -2684,9 +2608,9 @@ xfs_bmbt_to_bmdr(
2684 dblock->bb_numrecs = rblock->bb_numrecs; 2608 dblock->bb_numrecs = rblock->bb_numrecs;
2685 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 2609 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
2686 fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 2610 fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
2687 tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); 2611 tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
2688 fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 2612 fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
2689 tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); 2613 tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
2690 dmxr = be16_to_cpu(dblock->bb_numrecs); 2614 dmxr = be16_to_cpu(dblock->bb_numrecs);
2691 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 2615 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
2692 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 2616 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 49539de9525b..a77b1b753d0c 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -175,19 +175,11 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
175 175
176#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) 176#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
177 177
178#define XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
179#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) 178#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
180#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ 179#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
181 ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ 180 ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
182 (cur)->bc_private.b.whichfork)->if_broot_bytes) 181 (cur)->bc_private.b.whichfork)->if_broot_bytes)
183 182
184#define XFS_BMAP_BLOCK_DSIZE(lev,cur) \
185 (((lev) == (cur)->bc_nlevels - 1 ? \
186 XFS_BMAP_RBLOCK_DSIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
187#define XFS_BMAP_BLOCK_ISIZE(lev,cur) \
188 (((lev) == (cur)->bc_nlevels - 1 ? \
189 XFS_BMAP_RBLOCK_ISIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
190
191#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ 183#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
192 (((lev) == (cur)->bc_nlevels - 1 ? \ 184 (((lev) == (cur)->bc_nlevels - 1 ? \
193 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ 185 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
@@ -210,37 +202,21 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
210 xfs_bmbt, (lev) == 0) : \ 202 xfs_bmbt, (lev) == 0) : \
211 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 203 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
212 204
213#define XFS_BMAP_REC_DADDR(bb,i,cur) \ 205#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
214 (XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE( \ 206
215 be16_to_cpu((bb)->bb_level), cur), \ 207#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
216 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
217 be16_to_cpu((bb)->bb_level), cur)))
218#define XFS_BMAP_REC_IADDR(bb,i,cur) \
219 (XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE( \
220 be16_to_cpu((bb)->bb_level), cur), \
221 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
222 be16_to_cpu((bb)->bb_level), cur)))
223 208
224#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ 209#define XFS_BMAP_KEY_DADDR(bb,i,cur) \
225 (XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE( \ 210 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
226 be16_to_cpu((bb)->bb_level), cur), \ 211
227 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
228 be16_to_cpu((bb)->bb_level), cur)))
229#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ 212#define XFS_BMAP_KEY_IADDR(bb,i,cur) \
230 (XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE( \ 213 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
231 be16_to_cpu((bb)->bb_level), cur), \
232 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
233 be16_to_cpu((bb)->bb_level), cur)))
234 214
235#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ 215#define XFS_BMAP_PTR_DADDR(bb,i,cur) \
236 (XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE( \ 216 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
237 be16_to_cpu((bb)->bb_level), cur), \
238 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
239 be16_to_cpu((bb)->bb_level), cur))) 217 be16_to_cpu((bb)->bb_level), cur)))
240#define XFS_BMAP_PTR_IADDR(bb,i,cur) \ 218#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
241 (XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE( \ 219 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
242 be16_to_cpu((bb)->bb_level), cur), \
243 xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
244 be16_to_cpu((bb)->bb_level), cur))) 220 be16_to_cpu((bb)->bb_level), cur)))
245 221
246/* 222/*
@@ -248,11 +224,11 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
248 * we don't have a cursor. 224 * we don't have a cursor.
249 */ 225 */
250#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ 226#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
251 (XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))) 227 (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i))
252#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ 228#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
253 (XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))) 229 (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
254#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \ 230#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
255 (XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))) 231 (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
256 232
257#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs) 233#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
258#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0) 234#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
@@ -315,15 +291,11 @@ extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_t *r);
315 291
316#ifndef XFS_NATIVE_HOST 292#ifndef XFS_NATIVE_HOST
317extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); 293extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
318extern xfs_exntst_t xfs_bmbt_disk_get_state(xfs_bmbt_rec_t *r);
319extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 294extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
320extern xfs_fsblock_t xfs_bmbt_disk_get_startblock(xfs_bmbt_rec_t *r);
321extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 295extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
322#else 296#else
323#define xfs_bmbt_disk_get_all(r, s) xfs_bmbt_get_all(r, s) 297#define xfs_bmbt_disk_get_all(r, s) xfs_bmbt_get_all(r, s)
324#define xfs_bmbt_disk_get_state(r) xfs_bmbt_get_state(r)
325#define xfs_bmbt_disk_get_blockcount(r) xfs_bmbt_get_blockcount(r) 298#define xfs_bmbt_disk_get_blockcount(r) xfs_bmbt_get_blockcount(r)
326#define xfs_bmbt_disk_get_startblock(r) xfs_bmbt_get_blockcount(r)
327#define xfs_bmbt_disk_get_startoff(r) xfs_bmbt_get_startoff(r) 299#define xfs_bmbt_disk_get_startoff(r) xfs_bmbt_get_startoff(r)
328#endif /* XFS_NATIVE_HOST */ 300#endif /* XFS_NATIVE_HOST */
329 301
@@ -364,15 +336,6 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int);
364extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, 336extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t,
365 xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); 337 xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t);
366 338
367#ifdef DEBUG
368/*
369 * Get the data from the pointed-to record.
370 */
371extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
372 xfs_fsblock_t *, xfs_filblks_t *,
373 xfs_exntst_t *, int *);
374#endif
375
376#endif /* __KERNEL__ */ 339#endif /* __KERNEL__ */
377 340
378#endif /* __XFS_BMAP_BTREE_H__ */ 341#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 892b06c54263..4e27d55a1e73 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -122,13 +122,13 @@ extern const __uint32_t xfs_magics[];
122 * Given block size, type prefix, block pointer, and index of requested entry 122 * Given block size, type prefix, block pointer, and index of requested entry
123 * (first entry numbered 1). 123 * (first entry numbered 1).
124 */ 124 */
125#define XFS_BTREE_REC_ADDR(bsz,t,bb,i,mxr) \ 125#define XFS_BTREE_REC_ADDR(t,bb,i) \
126 ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 126 ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
127 ((i) - 1) * sizeof(t ## _rec_t))) 127 ((i) - 1) * sizeof(t ## _rec_t)))
128#define XFS_BTREE_KEY_ADDR(bsz,t,bb,i,mxr) \ 128#define XFS_BTREE_KEY_ADDR(t,bb,i) \
129 ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 129 ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
130 ((i) - 1) * sizeof(t ## _key_t))) 130 ((i) - 1) * sizeof(t ## _key_t)))
131#define XFS_BTREE_PTR_ADDR(bsz,t,bb,i,mxr) \ 131#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \
132 ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 132 ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
133 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) 133 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
134 134
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7a55c248ea70..6c1bddc04e31 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -660,7 +660,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
660/* 660/*
661 * This is the ops vector shared by all buf log items. 661 * This is the ops vector shared by all buf log items.
662 */ 662 */
663STATIC struct xfs_item_ops xfs_buf_item_ops = { 663static struct xfs_item_ops xfs_buf_item_ops = {
664 .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, 664 .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
665 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 665 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
666 xfs_buf_item_format, 666 xfs_buf_item_format,
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 07c708c2b529..d7e136143066 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -21,23 +21,7 @@
21/* 21/*
22 * This is the structure used to lay out a buf log item in the 22 * This is the structure used to lay out a buf log item in the
23 * log. The data map describes which 128 byte chunks of the buffer 23 * log. The data map describes which 128 byte chunks of the buffer
24 * have been logged. This structure works only on buffers that 24 * have been logged.
25 * reside up to the first TB in the filesystem. These buffers are
26 * generated only by pre-6.2 systems and are known as XFS_LI_6_1_BUF.
27 */
28typedef struct xfs_buf_log_format_v1 {
29 unsigned short blf_type; /* buf log item type indicator */
30 unsigned short blf_size; /* size of this item */
31 __int32_t blf_blkno; /* starting blkno of this buf */
32 ushort blf_flags; /* misc state */
33 ushort blf_len; /* number of blocks in this buf */
34 unsigned int blf_map_size; /* size of data bitmap in words */
35 unsigned int blf_data_map[1];/* variable size bitmap of */
36 /* regions of buffer in this item */
37} xfs_buf_log_format_v1_t;
38
39/*
40 * This is a form of the above structure with a 64 bit blkno field.
41 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. 25 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
42 */ 26 */
43typedef struct xfs_buf_log_format_t { 27typedef struct xfs_buf_log_format_t {
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
deleted file mode 100644
index 7a0e482dd436..000000000000
--- a/fs/xfs/xfs_cap.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_CAP_H__
19#define __XFS_CAP_H__
20
21/*
22 * Capabilities
23 */
24typedef __uint64_t xfs_cap_value_t;
25
26typedef struct xfs_cap_set {
27 xfs_cap_value_t cap_effective; /* use in capability checks */
28 xfs_cap_value_t cap_permitted; /* combined with file attrs */
29 xfs_cap_value_t cap_inheritable;/* pass through exec */
30} xfs_cap_set_t;
31
32/* On-disk XFS extended attribute names */
33#define SGI_CAP_FILE "SGI_CAP_FILE"
34#define SGI_CAP_FILE_SIZE (sizeof(SGI_CAP_FILE)-1)
35#define SGI_CAP_LINUX "SGI_CAP_LINUX"
36#define SGI_CAP_LINUX_SIZE (sizeof(SGI_CAP_LINUX)-1)
37
38/*
39 * For Linux, we take the bitfields directly from capability.h
40 * and no longer attempt to keep this attribute ondisk compatible
41 * with IRIX. Since this attribute is only set on executables,
42 * it just doesn't make much sense to try. We do use a different
43 * named attribute though, to avoid confusion.
44 */
45
46#ifdef __KERNEL__
47
48#ifdef CONFIG_FS_POSIX_CAP
49
50#include <linux/posix_cap_xattr.h>
51
52struct bhv_vnode;
53
54extern int xfs_cap_vhascap(struct bhv_vnode *);
55extern int xfs_cap_vset(struct bhv_vnode *, void *, size_t);
56extern int xfs_cap_vget(struct bhv_vnode *, void *, size_t);
57extern int xfs_cap_vremove(struct bhv_vnode *);
58
59#define _CAP_EXISTS xfs_cap_vhascap
60
61#else
62#define xfs_cap_vset(v,p,sz) (-EOPNOTSUPP)
63#define xfs_cap_vget(v,p,sz) (-EOPNOTSUPP)
64#define xfs_cap_vremove(v) (-EOPNOTSUPP)
65#define _CAP_EXISTS (NULL)
66#endif
67
68#endif /* __KERNEL__ */
69
70#endif /* __XFS_CAP_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index a68bc1f1a313..aea37df4aa62 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1090,8 +1090,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1090 if (blk->magic == XFS_DA_NODE_MAGIC) { 1090 if (blk->magic == XFS_DA_NODE_MAGIC) {
1091 node = blk->bp->data; 1091 node = blk->bp->data;
1092 max = be16_to_cpu(node->hdr.count); 1092 max = be16_to_cpu(node->hdr.count);
1093 btreehashval = node->btree[max-1].hashval; 1093 blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1094 blk->hashval = be32_to_cpu(btreehashval);
1095 1094
1096 /* 1095 /*
1097 * Binary search. (note: small blocks will skip loop) 1096 * Binary search. (note: small blocks will skip loop)
@@ -2166,21 +2165,6 @@ xfs_da_reada_buf(
2166 return rval; 2165 return rval;
2167} 2166}
2168 2167
2169/*
2170 * Calculate the number of bits needed to hold i different values.
2171 */
2172uint
2173xfs_da_log2_roundup(uint i)
2174{
2175 uint rval;
2176
2177 for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2178 if ((1 << rval) >= i)
2179 break;
2180 }
2181 return(rval);
2182}
2183
2184kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ 2168kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2185kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ 2169kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2186 2170
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 4ab865ec8b82..44dabf02f2a3 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -249,7 +249,6 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
249 xfs_dabuf_t *dead_buf); 249 xfs_dabuf_t *dead_buf);
250 250
251uint xfs_da_hashname(const uchar_t *name_string, int name_length); 251uint xfs_da_hashname(const uchar_t *name_string, int name_length);
252uint xfs_da_log2_roundup(uint i);
253xfs_da_state_t *xfs_da_state_alloc(void); 252xfs_da_state_t *xfs_da_state_alloc(void);
254void xfs_da_state_free(xfs_da_state_t *state); 253void xfs_da_state_free(xfs_da_state_t *state);
255 254
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 50d0faea371d..b847e6a7a3f0 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -41,7 +41,6 @@
41#include "xfs_itable.h" 41#include "xfs_itable.h"
42#include "xfs_dfrag.h" 42#include "xfs_dfrag.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_mac.h"
45#include "xfs_rw.h" 44#include "xfs_rw.h"
46 45
47/* 46/*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index b95681b03d81..b1af54464f00 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -132,32 +132,6 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
132} 132}
133 133
134int 134int
135xfs_errortag_clear(int error_tag, xfs_mount_t *mp)
136{
137 int i;
138 int64_t fsid;
139
140 memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
141
142 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
143 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
144 xfs_etest[i] = 0;
145 xfs_etest_fsid[i] = 0LL;
146 kmem_free(xfs_etest_fsname[i],
147 strlen(xfs_etest_fsname[i]) + 1);
148 xfs_etest_fsname[i] = NULL;
149 cmn_err(CE_WARN, "Cleared XFS error tag #%d",
150 error_tag);
151 return 0;
152 }
153 }
154
155 cmn_err(CE_WARN, "XFS error tag %d not on", error_tag);
156
157 return 1;
158}
159
160int
161xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud) 135xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud)
162{ 136{
163 int i; 137 int i;
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 0893e16b7d83..5599ada456a1 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -144,7 +144,6 @@ extern void xfs_error_test_init(void);
144#endif /* __ANSI_CPP__ */ 144#endif /* __ANSI_CPP__ */
145 145
146extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); 146extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
147extern int xfs_errortag_clear(int error_tag, xfs_mount_t *mp);
148extern int xfs_errortag_clearall(xfs_mount_t *mp); 147extern int xfs_errortag_clearall(xfs_mount_t *mp);
149extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud); 148extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
150#else 149#else
@@ -180,6 +179,6 @@ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
180 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) 179 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
181 180
182#define xfs_fs_mount_cmn_err(f, fmt, args...) \ 181#define xfs_fs_mount_cmn_err(f, fmt, args...) \
183 ((f & XFS_MFSI_QUIET)? cmn_err(CE_WARN, "XFS: " fmt, ## args) : (void)0) 182 ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args))
184 183
185#endif /* __XFS_ERROR_H__ */ 184#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6dba78199faf..3b14427ee123 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -227,7 +227,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
227/* 227/*
228 * This is the ops vector shared by all efi log items. 228 * This is the ops vector shared by all efi log items.
229 */ 229 */
230STATIC struct xfs_item_ops xfs_efi_item_ops = { 230static struct xfs_item_ops xfs_efi_item_ops = {
231 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, 231 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
232 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 232 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
233 xfs_efi_item_format, 233 xfs_efi_item_format,
@@ -525,7 +525,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
525/* 525/*
526 * This is the ops vector shared by all efd log items. 526 * This is the ops vector shared by all efd log items.
527 */ 527 */
528STATIC struct xfs_item_ops xfs_efd_item_ops = { 528static struct xfs_item_ops xfs_efd_item_ops = {
529 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, 529 .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
530 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 530 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
531 xfs_efd_item_format, 531 xfs_efd_item_format,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c064e72ada9e..32c37c1c47ab 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -250,8 +250,7 @@ xfs_growfs_data_private(
250 block->bb_numrecs = cpu_to_be16(1); 250 block->bb_numrecs = cpu_to_be16(1);
251 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 251 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
252 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 252 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
253 arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, 253 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
254 block, 1, mp->m_alloc_mxr[0]);
255 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 254 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
256 arec->ar_blockcount = cpu_to_be32( 255 arec->ar_blockcount = cpu_to_be32(
257 agsize - be32_to_cpu(arec->ar_startblock)); 256 agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,8 +271,7 @@ xfs_growfs_data_private(
272 block->bb_numrecs = cpu_to_be16(1); 271 block->bb_numrecs = cpu_to_be16(1);
273 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 272 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
274 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 273 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
275 arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, 274 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
276 block, 1, mp->m_alloc_mxr[0]);
277 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 275 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
278 arec->ar_blockcount = cpu_to_be32( 276 arec->ar_blockcount = cpu_to_be32(
279 agsize - be32_to_cpu(arec->ar_startblock)); 277 agsize - be32_to_cpu(arec->ar_startblock));
@@ -460,7 +458,7 @@ xfs_fs_counts(
460{ 458{
461 unsigned long s; 459 unsigned long s;
462 460
463 xfs_icsb_sync_counters_lazy(mp); 461 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
464 s = XFS_SB_LOCK(mp); 462 s = XFS_SB_LOCK(mp);
465 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 463 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
466 cnt->freertx = mp->m_sb.sb_frextents; 464 cnt->freertx = mp->m_sb.sb_frextents;
@@ -491,7 +489,7 @@ xfs_reserve_blocks(
491 __uint64_t *inval, 489 __uint64_t *inval,
492 xfs_fsop_resblks_t *outval) 490 xfs_fsop_resblks_t *outval)
493{ 491{
494 __int64_t lcounter, delta; 492 __int64_t lcounter, delta, fdblks_delta;
495 __uint64_t request; 493 __uint64_t request;
496 unsigned long s; 494 unsigned long s;
497 495
@@ -504,17 +502,35 @@ xfs_reserve_blocks(
504 } 502 }
505 503
506 request = *inval; 504 request = *inval;
505
506 /*
507 * With per-cpu counters, this becomes an interesting
508 * problem. we needto work out if we are freeing or allocation
509 * blocks first, then we can do the modification as necessary.
510 *
511 * We do this under the XFS_SB_LOCK so that if we are near
512 * ENOSPC, we will hold out any changes while we work out
513 * what to do. This means that the amount of free space can
514 * change while we do this, so we need to retry if we end up
515 * trying to reserve more space than is available.
516 *
517 * We also use the xfs_mod_incore_sb() interface so that we
518 * don't have to care about whether per cpu counter are
519 * enabled, disabled or even compiled in....
520 */
521retry:
507 s = XFS_SB_LOCK(mp); 522 s = XFS_SB_LOCK(mp);
523 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
508 524
509 /* 525 /*
510 * If our previous reservation was larger than the current value, 526 * If our previous reservation was larger than the current value,
511 * then move any unused blocks back to the free pool. 527 * then move any unused blocks back to the free pool.
512 */ 528 */
513 529 fdblks_delta = 0;
514 if (mp->m_resblks > request) { 530 if (mp->m_resblks > request) {
515 lcounter = mp->m_resblks_avail - request; 531 lcounter = mp->m_resblks_avail - request;
516 if (lcounter > 0) { /* release unused blocks */ 532 if (lcounter > 0) { /* release unused blocks */
517 mp->m_sb.sb_fdblocks += lcounter; 533 fdblks_delta = lcounter;
518 mp->m_resblks_avail -= lcounter; 534 mp->m_resblks_avail -= lcounter;
519 } 535 }
520 mp->m_resblks = request; 536 mp->m_resblks = request;
@@ -522,24 +538,50 @@ xfs_reserve_blocks(
522 __int64_t free; 538 __int64_t free;
523 539
524 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 540 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
541 if (!free)
542 goto out; /* ENOSPC and fdblks_delta = 0 */
543
525 delta = request - mp->m_resblks; 544 delta = request - mp->m_resblks;
526 lcounter = free - delta; 545 lcounter = free - delta;
527 if (lcounter < 0) { 546 if (lcounter < 0) {
528 /* We can't satisfy the request, just get what we can */ 547 /* We can't satisfy the request, just get what we can */
529 mp->m_resblks += free; 548 mp->m_resblks += free;
530 mp->m_resblks_avail += free; 549 mp->m_resblks_avail += free;
550 fdblks_delta = -free;
531 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp); 551 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
532 } else { 552 } else {
553 fdblks_delta = -delta;
533 mp->m_sb.sb_fdblocks = 554 mp->m_sb.sb_fdblocks =
534 lcounter + XFS_ALLOC_SET_ASIDE(mp); 555 lcounter + XFS_ALLOC_SET_ASIDE(mp);
535 mp->m_resblks = request; 556 mp->m_resblks = request;
536 mp->m_resblks_avail += delta; 557 mp->m_resblks_avail += delta;
537 } 558 }
538 } 559 }
539 560out:
540 outval->resblks = mp->m_resblks; 561 outval->resblks = mp->m_resblks;
541 outval->resblks_avail = mp->m_resblks_avail; 562 outval->resblks_avail = mp->m_resblks_avail;
542 XFS_SB_UNLOCK(mp, s); 563 XFS_SB_UNLOCK(mp, s);
564
565 if (fdblks_delta) {
566 /*
567 * If we are putting blocks back here, m_resblks_avail is
568 * already at it's max so this will put it in the free pool.
569 *
570 * If we need space, we'll either succeed in getting it
571 * from the free block count or we'll get an enospc. If
572 * we get a ENOSPC, it means things changed while we were
573 * calculating fdblks_delta and so we should try again to
574 * see if there is anything left to reserve.
575 *
576 * Don't set the reserved flag here - we don't want to reserve
577 * the extra reserve blocks from the reserve.....
578 */
579 int error;
580 error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
581 if (error == ENOSPC)
582 goto retry;
583 }
584
543 return 0; 585 return 0;
544} 586}
545 587
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a446e5a115c6..b5feb3e77116 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -342,7 +342,7 @@ xfs_ialloc_ag_alloc(
342 return 0; 342 return 0;
343} 343}
344 344
345STATIC __inline xfs_agnumber_t 345STATIC_INLINE xfs_agnumber_t
346xfs_ialloc_next_ag( 346xfs_ialloc_next_ag(
347 xfs_mount_t *mp) 347 xfs_mount_t *mp)
348{ 348{
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 2c0e49893ff7..bf8e9aff272e 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -89,7 +89,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
89/* 89/*
90 * Real block structures have a size equal to the disk block size. 90 * Real block structures have a size equal to the disk block size.
91 */ 91 */
92#define XFS_INOBT_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
93#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0]) 92#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
94#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0]) 93#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
95#define XFS_INOBT_IS_LAST_REC(cur) \ 94#define XFS_INOBT_IS_LAST_REC(cur) \
@@ -110,14 +109,13 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
110 * Record, key, and pointer address macros for btree blocks. 109 * Record, key, and pointer address macros for btree blocks.
111 */ 110 */
112#define XFS_INOBT_REC_ADDR(bb,i,cur) \ 111#define XFS_INOBT_REC_ADDR(bb,i,cur) \
113 (XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, \ 112 (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
114 i, XFS_INOBT_BLOCK_MAXRECS(0, cur))) 113
115#define XFS_INOBT_KEY_ADDR(bb,i,cur) \ 114#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
116 (XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \ 115 (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
117 i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
118 116
119#define XFS_INOBT_PTR_ADDR(bb,i,cur) \ 117#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
120 (XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \ 118 (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
121 i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) 119 i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
122 120
123/* 121/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 44dfac521285..3da9829c19d5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,7 +47,6 @@
47#include "xfs_utils.h" 47#include "xfs_utils.h"
48#include "xfs_dir2_trace.h" 48#include "xfs_dir2_trace.h"
49#include "xfs_quota.h" 49#include "xfs_quota.h"
50#include "xfs_mac.h"
51#include "xfs_acl.h" 50#include "xfs_acl.h"
52 51
53 52
@@ -1699,8 +1698,7 @@ xfs_itruncate_finish(
1699 * Duplicate the transaction that has the permanent 1698 * Duplicate the transaction that has the permanent
1700 * reservation and commit the old transaction. 1699 * reservation and commit the old transaction.
1701 */ 1700 */
1702 error = xfs_bmap_finish(tp, &free_list, first_block, 1701 error = xfs_bmap_finish(tp, &free_list, &committed);
1703 &committed);
1704 ntp = *tp; 1702 ntp = *tp;
1705 if (error) { 1703 if (error) {
1706 /* 1704 /*
@@ -1810,7 +1808,7 @@ xfs_igrow_start(
1810 * and any blocks between the old and new file sizes. 1808 * and any blocks between the old and new file sizes.
1811 */ 1809 */
1812 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1810 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1813 ip->i_d.di_size, new_size); 1811 ip->i_d.di_size);
1814 return error; 1812 return error;
1815} 1813}
1816 1814
@@ -2125,7 +2123,7 @@ xfs_iunlink_remove(
2125 return 0; 2123 return 0;
2126} 2124}
2127 2125
2128static __inline__ int xfs_inode_clean(xfs_inode_t *ip) 2126STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
2129{ 2127{
2130 return (((ip->i_itemp == NULL) || 2128 return (((ip->i_itemp == NULL) ||
2131 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 2129 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
@@ -2707,10 +2705,24 @@ xfs_idestroy(
2707 ktrace_free(ip->i_dir_trace); 2705 ktrace_free(ip->i_dir_trace);
2708#endif 2706#endif
2709 if (ip->i_itemp) { 2707 if (ip->i_itemp) {
2710 /* XXXdpd should be able to assert this but shutdown 2708 /*
2711 * is leaving the AIL behind. */ 2709 * Only if we are shutting down the fs will we see an
2712 ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) || 2710 * inode still in the AIL. If it is there, we should remove
2713 XFS_FORCED_SHUTDOWN(ip->i_mount)); 2711 * it to prevent a use-after-free from occurring.
2712 */
2713 xfs_mount_t *mp = ip->i_mount;
2714 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
2715 int s;
2716
2717 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
2718 XFS_FORCED_SHUTDOWN(ip->i_mount));
2719 if (lip->li_flags & XFS_LI_IN_AIL) {
2720 AIL_LOCK(mp, s);
2721 if (lip->li_flags & XFS_LI_IN_AIL)
2722 xfs_trans_delete_ail(mp, lip, s);
2723 else
2724 AIL_UNLOCK(mp, s);
2725 }
2714 xfs_inode_item_destroy(ip); 2726 xfs_inode_item_destroy(ip);
2715 } 2727 }
2716 kmem_zone_free(xfs_inode_zone, ip); 2728 kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a7a92251eb56..565d470a6b4a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -887,7 +887,7 @@ xfs_inode_item_committing(
887/* 887/*
888 * This is the ops vector shared by all buf log items. 888 * This is the ops vector shared by all buf log items.
889 */ 889 */
890STATIC struct xfs_item_ops xfs_inode_item_ops = { 890static struct xfs_item_ops xfs_inode_item_ops = {
891 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, 891 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
892 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 892 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
893 xfs_inode_item_format, 893 xfs_inode_item_format,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 19655124da78..cc6a7b5a9912 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -43,8 +43,6 @@
43#include "xfs_itable.h" 43#include "xfs_itable.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_acl.h" 45#include "xfs_acl.h"
46#include "xfs_cap.h"
47#include "xfs_mac.h"
48#include "xfs_attr.h" 46#include "xfs_attr.h"
49#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
50#include "xfs_trans_space.h" 48#include "xfs_trans_space.h"
@@ -542,7 +540,7 @@ xfs_iomap_write_direct(
542 /* 540 /*
543 * Complete the transaction 541 * Complete the transaction
544 */ 542 */
545 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 543 error = xfs_bmap_finish(&tp, &free_list, &committed);
546 if (error) 544 if (error)
547 goto error0; 545 goto error0;
548 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 546 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
@@ -838,8 +836,7 @@ xfs_iomap_write_allocate(
838 if (error) 836 if (error)
839 goto trans_cancel; 837 goto trans_cancel;
840 838
841 error = xfs_bmap_finish(&tp, &free_list, 839 error = xfs_bmap_finish(&tp, &free_list, &committed);
842 first_block, &committed);
843 if (error) 840 if (error)
844 goto trans_cancel; 841 goto trans_cancel;
845 842
@@ -947,8 +944,7 @@ xfs_iomap_write_unwritten(
947 if (error) 944 if (error)
948 goto error_on_bmapi_transaction; 945 goto error_on_bmapi_transaction;
949 946
950 error = xfs_bmap_finish(&(tp), &(free_list), 947 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
951 firstfsb, &committed);
952 if (error) 948 if (error)
953 goto error_on_bmapi_transaction; 949 goto error_on_bmapi_transaction;
954 950
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 3cb678e3a132..ca74d3f5910e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1514,7 +1514,6 @@ xlog_recover_reorder_trans(
1514{ 1514{
1515 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1515 xlog_recover_item_t *first_item, *itemq, *itemq_next;
1516 xfs_buf_log_format_t *buf_f; 1516 xfs_buf_log_format_t *buf_f;
1517 xfs_buf_log_format_v1_t *obuf_f;
1518 ushort flags = 0; 1517 ushort flags = 0;
1519 1518
1520 first_item = itemq = trans->r_itemq; 1519 first_item = itemq = trans->r_itemq;
@@ -1522,29 +1521,16 @@ xlog_recover_reorder_trans(
1522 do { 1521 do {
1523 itemq_next = itemq->ri_next; 1522 itemq_next = itemq->ri_next;
1524 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; 1523 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1525 switch (ITEM_TYPE(itemq)) {
1526 case XFS_LI_BUF:
1527 flags = buf_f->blf_flags;
1528 break;
1529 case XFS_LI_6_1_BUF:
1530 case XFS_LI_5_3_BUF:
1531 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
1532 flags = obuf_f->blf_flags;
1533 break;
1534 }
1535 1524
1536 switch (ITEM_TYPE(itemq)) { 1525 switch (ITEM_TYPE(itemq)) {
1537 case XFS_LI_BUF: 1526 case XFS_LI_BUF:
1538 case XFS_LI_6_1_BUF: 1527 flags = buf_f->blf_flags;
1539 case XFS_LI_5_3_BUF:
1540 if (!(flags & XFS_BLI_CANCEL)) { 1528 if (!(flags & XFS_BLI_CANCEL)) {
1541 xlog_recover_insert_item_frontq(&trans->r_itemq, 1529 xlog_recover_insert_item_frontq(&trans->r_itemq,
1542 itemq); 1530 itemq);
1543 break; 1531 break;
1544 } 1532 }
1545 case XFS_LI_INODE: 1533 case XFS_LI_INODE:
1546 case XFS_LI_6_1_INODE:
1547 case XFS_LI_5_3_INODE:
1548 case XFS_LI_DQUOT: 1534 case XFS_LI_DQUOT:
1549 case XFS_LI_QUOTAOFF: 1535 case XFS_LI_QUOTAOFF:
1550 case XFS_LI_EFD: 1536 case XFS_LI_EFD:
@@ -1583,7 +1569,6 @@ xlog_recover_do_buffer_pass1(
1583 xfs_buf_cancel_t *nextp; 1569 xfs_buf_cancel_t *nextp;
1584 xfs_buf_cancel_t *prevp; 1570 xfs_buf_cancel_t *prevp;
1585 xfs_buf_cancel_t **bucket; 1571 xfs_buf_cancel_t **bucket;
1586 xfs_buf_log_format_v1_t *obuf_f;
1587 xfs_daddr_t blkno = 0; 1572 xfs_daddr_t blkno = 0;
1588 uint len = 0; 1573 uint len = 0;
1589 ushort flags = 0; 1574 ushort flags = 0;
@@ -1594,13 +1579,6 @@ xlog_recover_do_buffer_pass1(
1594 len = buf_f->blf_len; 1579 len = buf_f->blf_len;
1595 flags = buf_f->blf_flags; 1580 flags = buf_f->blf_flags;
1596 break; 1581 break;
1597 case XFS_LI_6_1_BUF:
1598 case XFS_LI_5_3_BUF:
1599 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
1600 blkno = (xfs_daddr_t) obuf_f->blf_blkno;
1601 len = obuf_f->blf_len;
1602 flags = obuf_f->blf_flags;
1603 break;
1604 } 1582 }
1605 1583
1606 /* 1584 /*
@@ -1746,7 +1724,6 @@ xlog_recover_do_buffer_pass2(
1746 xlog_t *log, 1724 xlog_t *log,
1747 xfs_buf_log_format_t *buf_f) 1725 xfs_buf_log_format_t *buf_f)
1748{ 1726{
1749 xfs_buf_log_format_v1_t *obuf_f;
1750 xfs_daddr_t blkno = 0; 1727 xfs_daddr_t blkno = 0;
1751 ushort flags = 0; 1728 ushort flags = 0;
1752 uint len = 0; 1729 uint len = 0;
@@ -1757,13 +1734,6 @@ xlog_recover_do_buffer_pass2(
1757 flags = buf_f->blf_flags; 1734 flags = buf_f->blf_flags;
1758 len = buf_f->blf_len; 1735 len = buf_f->blf_len;
1759 break; 1736 break;
1760 case XFS_LI_6_1_BUF:
1761 case XFS_LI_5_3_BUF:
1762 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
1763 blkno = (xfs_daddr_t) obuf_f->blf_blkno;
1764 flags = obuf_f->blf_flags;
1765 len = (xfs_daddr_t) obuf_f->blf_len;
1766 break;
1767 } 1737 }
1768 1738
1769 return xlog_check_buffer_cancelled(log, blkno, len, flags); 1739 return xlog_check_buffer_cancelled(log, blkno, len, flags);
@@ -1799,7 +1769,6 @@ xlog_recover_do_inode_buffer(
1799 int inodes_per_buf; 1769 int inodes_per_buf;
1800 xfs_agino_t *logged_nextp; 1770 xfs_agino_t *logged_nextp;
1801 xfs_agino_t *buffer_nextp; 1771 xfs_agino_t *buffer_nextp;
1802 xfs_buf_log_format_v1_t *obuf_f;
1803 unsigned int *data_map = NULL; 1772 unsigned int *data_map = NULL;
1804 unsigned int map_size = 0; 1773 unsigned int map_size = 0;
1805 1774
@@ -1808,12 +1777,6 @@ xlog_recover_do_inode_buffer(
1808 data_map = buf_f->blf_data_map; 1777 data_map = buf_f->blf_data_map;
1809 map_size = buf_f->blf_map_size; 1778 map_size = buf_f->blf_map_size;
1810 break; 1779 break;
1811 case XFS_LI_6_1_BUF:
1812 case XFS_LI_5_3_BUF:
1813 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
1814 data_map = obuf_f->blf_data_map;
1815 map_size = obuf_f->blf_map_size;
1816 break;
1817 } 1780 }
1818 /* 1781 /*
1819 * Set the variables corresponding to the current region to 1782 * Set the variables corresponding to the current region to
@@ -1912,7 +1875,6 @@ xlog_recover_do_reg_buffer(
1912 int i; 1875 int i;
1913 int bit; 1876 int bit;
1914 int nbits; 1877 int nbits;
1915 xfs_buf_log_format_v1_t *obuf_f;
1916 unsigned int *data_map = NULL; 1878 unsigned int *data_map = NULL;
1917 unsigned int map_size = 0; 1879 unsigned int map_size = 0;
1918 int error; 1880 int error;
@@ -1922,12 +1884,6 @@ xlog_recover_do_reg_buffer(
1922 data_map = buf_f->blf_data_map; 1884 data_map = buf_f->blf_data_map;
1923 map_size = buf_f->blf_map_size; 1885 map_size = buf_f->blf_map_size;
1924 break; 1886 break;
1925 case XFS_LI_6_1_BUF:
1926 case XFS_LI_5_3_BUF:
1927 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
1928 data_map = obuf_f->blf_data_map;
1929 map_size = obuf_f->blf_map_size;
1930 break;
1931 } 1887 }
1932 bit = 0; 1888 bit = 0;
1933 i = 1; /* 0 is the buf format structure */ 1889 i = 1; /* 0 is the buf format structure */
@@ -2160,7 +2116,6 @@ xlog_recover_do_buffer_trans(
2160 int pass) 2116 int pass)
2161{ 2117{
2162 xfs_buf_log_format_t *buf_f; 2118 xfs_buf_log_format_t *buf_f;
2163 xfs_buf_log_format_v1_t *obuf_f;
2164 xfs_mount_t *mp; 2119 xfs_mount_t *mp;
2165 xfs_buf_t *bp; 2120 xfs_buf_t *bp;
2166 int error; 2121 int error;
@@ -2197,13 +2152,6 @@ xlog_recover_do_buffer_trans(
2197 len = buf_f->blf_len; 2152 len = buf_f->blf_len;
2198 flags = buf_f->blf_flags; 2153 flags = buf_f->blf_flags;
2199 break; 2154 break;
2200 case XFS_LI_6_1_BUF:
2201 case XFS_LI_5_3_BUF:
2202 obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
2203 blkno = obuf_f->blf_blkno;
2204 len = obuf_f->blf_len;
2205 flags = obuf_f->blf_flags;
2206 break;
2207 default: 2155 default:
2208 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 2156 xfs_fs_cmn_err(CE_ALERT, log->l_mp,
2209 "xfs_log_recover: unknown buffer type 0x%x, logdev %s", 2157 "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
@@ -2830,9 +2778,7 @@ xlog_recover_do_trans(
2830 * where xfs_daddr_t is 32-bits but mount will warn us 2778 * where xfs_daddr_t is 32-bits but mount will warn us
2831 * off a > 1 TB filesystem before we get here. 2779 * off a > 1 TB filesystem before we get here.
2832 */ 2780 */
2833 if ((ITEM_TYPE(item) == XFS_LI_BUF) || 2781 if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
2834 (ITEM_TYPE(item) == XFS_LI_6_1_BUF) ||
2835 (ITEM_TYPE(item) == XFS_LI_5_3_BUF)) {
2836 if ((error = xlog_recover_do_buffer_trans(log, item, 2782 if ((error = xlog_recover_do_buffer_trans(log, item,
2837 pass))) 2783 pass)))
2838 break; 2784 break;
@@ -3902,6 +3848,9 @@ xlog_do_recover(
3902 ASSERT(XFS_SB_GOOD_VERSION(sbp)); 3848 ASSERT(XFS_SB_GOOD_VERSION(sbp));
3903 xfs_buf_relse(bp); 3849 xfs_buf_relse(bp);
3904 3850
3851 /* We've re-read the superblock so re-initialize per-cpu counters */
3852 xfs_icsb_reinit_counters(log->l_mp);
3853
3905 xlog_recover_check_summary(log); 3854 xlog_recover_check_summary(log);
3906 3855
3907 /* Normal transactions can now occur */ 3856 /* Normal transactions can now occur */
diff --git a/fs/xfs/xfs_mac.h b/fs/xfs/xfs_mac.h
deleted file mode 100644
index 18e0e98e03d0..000000000000
--- a/fs/xfs/xfs_mac.h
+++ /dev/null
@@ -1,106 +0,0 @@
1/*
2 * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_MAC_H__
19#define __XFS_MAC_H__
20
21/*
22 * Mandatory Access Control
23 *
24 * Layout of a composite MAC label:
25 * ml_list contains the list of categories (MSEN) followed by the list of
26 * divisions (MINT). This is actually a header for the data structure which
27 * will have an ml_list with more than one element.
28 *
29 * -------------------------------
30 * | ml_msen_type | ml_mint_type |
31 * -------------------------------
32 * | ml_level | ml_grade |
33 * -------------------------------
34 * | ml_catcount |
35 * -------------------------------
36 * | ml_divcount |
37 * -------------------------------
38 * | category 1 |
39 * | . . . |
40 * | category N | (where N = ml_catcount)
41 * -------------------------------
42 * | division 1 |
43 * | . . . |
44 * | division M | (where M = ml_divcount)
45 * -------------------------------
46 */
47#define XFS_MAC_MAX_SETS 250
48typedef struct xfs_mac_label {
49 __uint8_t ml_msen_type; /* MSEN label type */
50 __uint8_t ml_mint_type; /* MINT label type */
51 __uint8_t ml_level; /* Hierarchical level */
52 __uint8_t ml_grade; /* Hierarchical grade */
53 __uint16_t ml_catcount; /* Category count */
54 __uint16_t ml_divcount; /* Division count */
55 /* Category set, then Division set */
56 __uint16_t ml_list[XFS_MAC_MAX_SETS];
57} xfs_mac_label_t;
58
59/* MSEN label type names. Choose an upper case ASCII character. */
60#define XFS_MSEN_ADMIN_LABEL 'A' /* Admin: low<admin != tcsec<high */
61#define XFS_MSEN_EQUAL_LABEL 'E' /* Wildcard - always equal */
62#define XFS_MSEN_HIGH_LABEL 'H' /* System High - always dominates */
63#define XFS_MSEN_MLD_HIGH_LABEL 'I' /* System High, multi-level dir */
64#define XFS_MSEN_LOW_LABEL 'L' /* System Low - always dominated */
65#define XFS_MSEN_MLD_LABEL 'M' /* TCSEC label on a multi-level dir */
66#define XFS_MSEN_MLD_LOW_LABEL 'N' /* System Low, multi-level dir */
67#define XFS_MSEN_TCSEC_LABEL 'T' /* TCSEC label */
68#define XFS_MSEN_UNKNOWN_LABEL 'U' /* unknown label */
69
70/* MINT label type names. Choose a lower case ASCII character. */
71#define XFS_MINT_BIBA_LABEL 'b' /* Dual of a TCSEC label */
72#define XFS_MINT_EQUAL_LABEL 'e' /* Wildcard - always equal */
73#define XFS_MINT_HIGH_LABEL 'h' /* High Grade - always dominates */
74#define XFS_MINT_LOW_LABEL 'l' /* Low Grade - always dominated */
75
76/* On-disk XFS extended attribute names */
77#define SGI_MAC_FILE "SGI_MAC_FILE"
78#define SGI_MAC_FILE_SIZE (sizeof(SGI_MAC_FILE)-1)
79
80
81#ifdef __KERNEL__
82
83#ifdef CONFIG_FS_POSIX_MAC
84
85/* NOT YET IMPLEMENTED */
86
87#define MACEXEC 00100
88#define MACWRITE 00200
89#define MACREAD 00400
90
91struct xfs_inode;
92extern int xfs_mac_iaccess(struct xfs_inode *, mode_t, cred_t *);
93
94#define _MAC_XFS_IACCESS(i,m,c) (xfs_mac_iaccess(i,m,c))
95#define _MAC_VACCESS(v,c,m) (xfs_mac_vaccess(v,c,m))
96#define _MAC_EXISTS xfs_mac_vhaslabel
97
98#else
99#define _MAC_XFS_IACCESS(i,m,c) (0)
100#define _MAC_VACCESS(v,c,m) (0)
101#define _MAC_EXISTS (NULL)
102#endif
103
104#endif /* __KERNEL__ */
105
106#endif /* __XFS_MAC_H__ */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9dfae18d995f..3bed0cf0d8af 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -52,21 +52,19 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
52 52
53#ifdef HAVE_PERCPU_SB 53#ifdef HAVE_PERCPU_SB
54STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 54STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
55STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); 55STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
56 int, int);
56STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 57STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
57STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 58STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
58 int, int); 59 int64_t, int);
59STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
60 int, int);
61STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 60STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
62 61
63#else 62#else
64 63
65#define xfs_icsb_destroy_counters(mp) do { } while (0) 64#define xfs_icsb_destroy_counters(mp) do { } while (0)
66#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 65#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0)
67#define xfs_icsb_sync_counters(mp) do { } while (0) 66#define xfs_icsb_sync_counters(mp) do { } while (0)
68#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 67#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
69#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
70 68
71#endif 69#endif
72 70
@@ -545,9 +543,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
545 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 543 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
546 } 544 }
547 545
548 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 546 /* Initialize per-cpu counters */
549 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 547 xfs_icsb_reinit_counters(mp);
550 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
551 548
552 mp->m_sb_bp = bp; 549 mp->m_sb_bp = bp;
553 xfs_buf_relse(bp); 550 xfs_buf_relse(bp);
@@ -1254,8 +1251,11 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1254 * The SB_LOCK must be held when this routine is called. 1251 * The SB_LOCK must be held when this routine is called.
1255 */ 1252 */
1256int 1253int
1257xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, 1254xfs_mod_incore_sb_unlocked(
1258 int delta, int rsvd) 1255 xfs_mount_t *mp,
1256 xfs_sb_field_t field,
1257 int64_t delta,
1258 int rsvd)
1259{ 1259{
1260 int scounter; /* short counter for 32 bit fields */ 1260 int scounter; /* short counter for 32 bit fields */
1261 long long lcounter; /* long counter for 64 bit fields */ 1261 long long lcounter; /* long counter for 64 bit fields */
@@ -1287,7 +1287,6 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1287 mp->m_sb.sb_ifree = lcounter; 1287 mp->m_sb.sb_ifree = lcounter;
1288 return 0; 1288 return 0;
1289 case XFS_SBS_FDBLOCKS: 1289 case XFS_SBS_FDBLOCKS:
1290
1291 lcounter = (long long) 1290 lcounter = (long long)
1292 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1291 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1293 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1292 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
@@ -1418,7 +1417,11 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1418 * routine to do the work. 1417 * routine to do the work.
1419 */ 1418 */
1420int 1419int
1421xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) 1420xfs_mod_incore_sb(
1421 xfs_mount_t *mp,
1422 xfs_sb_field_t field,
1423 int64_t delta,
1424 int rsvd)
1422{ 1425{
1423 unsigned long s; 1426 unsigned long s;
1424 int status; 1427 int status;
@@ -1485,9 +1488,11 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1485 case XFS_SBS_IFREE: 1488 case XFS_SBS_IFREE:
1486 case XFS_SBS_FDBLOCKS: 1489 case XFS_SBS_FDBLOCKS:
1487 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1490 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1488 status = xfs_icsb_modify_counters_locked(mp, 1491 XFS_SB_UNLOCK(mp, s);
1492 status = xfs_icsb_modify_counters(mp,
1489 msbp->msb_field, 1493 msbp->msb_field,
1490 msbp->msb_delta, rsvd); 1494 msbp->msb_delta, rsvd);
1495 s = XFS_SB_LOCK(mp);
1491 break; 1496 break;
1492 } 1497 }
1493 /* FALLTHROUGH */ 1498 /* FALLTHROUGH */
@@ -1521,11 +1526,12 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1521 case XFS_SBS_IFREE: 1526 case XFS_SBS_IFREE:
1522 case XFS_SBS_FDBLOCKS: 1527 case XFS_SBS_FDBLOCKS:
1523 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1528 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1524 status = 1529 XFS_SB_UNLOCK(mp, s);
1525 xfs_icsb_modify_counters_locked(mp, 1530 status = xfs_icsb_modify_counters(mp,
1526 msbp->msb_field, 1531 msbp->msb_field,
1527 -(msbp->msb_delta), 1532 -(msbp->msb_delta),
1528 rsvd); 1533 rsvd);
1534 s = XFS_SB_LOCK(mp);
1529 break; 1535 break;
1530 } 1536 }
1531 /* FALLTHROUGH */ 1537 /* FALLTHROUGH */
@@ -1733,14 +1739,17 @@ xfs_icsb_cpu_notify(
1733 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1739 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1734 break; 1740 break;
1735 case CPU_ONLINE: 1741 case CPU_ONLINE:
1736 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1742 xfs_icsb_lock(mp);
1737 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1743 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
1738 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1744 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
1745 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
1746 xfs_icsb_unlock(mp);
1739 break; 1747 break;
1740 case CPU_DEAD: 1748 case CPU_DEAD:
1741 /* Disable all the counters, then fold the dead cpu's 1749 /* Disable all the counters, then fold the dead cpu's
1742 * count into the total on the global superblock and 1750 * count into the total on the global superblock and
1743 * re-enable the counters. */ 1751 * re-enable the counters. */
1752 xfs_icsb_lock(mp);
1744 s = XFS_SB_LOCK(mp); 1753 s = XFS_SB_LOCK(mp);
1745 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1754 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1746 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1755 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
@@ -1752,10 +1761,14 @@ xfs_icsb_cpu_notify(
1752 1761
1753 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1762 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1754 1763
1755 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); 1764 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
1756 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); 1765 XFS_ICSB_SB_LOCKED, 0);
1757 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); 1766 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
1767 XFS_ICSB_SB_LOCKED, 0);
1768 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
1769 XFS_ICSB_SB_LOCKED, 0);
1758 XFS_SB_UNLOCK(mp, s); 1770 XFS_SB_UNLOCK(mp, s);
1771 xfs_icsb_unlock(mp);
1759 break; 1772 break;
1760 } 1773 }
1761 1774
@@ -1784,6 +1797,9 @@ xfs_icsb_init_counters(
1784 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1797 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1785 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1798 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1786 } 1799 }
1800
1801 mutex_init(&mp->m_icsb_mutex);
1802
1787 /* 1803 /*
1788 * start with all counters disabled so that the 1804 * start with all counters disabled so that the
1789 * initial balance kicks us off correctly 1805 * initial balance kicks us off correctly
@@ -1792,6 +1808,22 @@ xfs_icsb_init_counters(
1792 return 0; 1808 return 0;
1793} 1809}
1794 1810
1811void
1812xfs_icsb_reinit_counters(
1813 xfs_mount_t *mp)
1814{
1815 xfs_icsb_lock(mp);
1816 /*
1817 * start with all counters disabled so that the
1818 * initial balance kicks us off correctly
1819 */
1820 mp->m_icsb_counters = -1;
1821 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
1822 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
1823 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
1824 xfs_icsb_unlock(mp);
1825}
1826
1795STATIC void 1827STATIC void
1796xfs_icsb_destroy_counters( 1828xfs_icsb_destroy_counters(
1797 xfs_mount_t *mp) 1829 xfs_mount_t *mp)
@@ -1800,9 +1832,10 @@ xfs_icsb_destroy_counters(
1800 unregister_hotcpu_notifier(&mp->m_icsb_notifier); 1832 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1801 free_percpu(mp->m_sb_cnts); 1833 free_percpu(mp->m_sb_cnts);
1802 } 1834 }
1835 mutex_destroy(&mp->m_icsb_mutex);
1803} 1836}
1804 1837
1805STATIC inline void 1838STATIC_INLINE void
1806xfs_icsb_lock_cntr( 1839xfs_icsb_lock_cntr(
1807 xfs_icsb_cnts_t *icsbp) 1840 xfs_icsb_cnts_t *icsbp)
1808{ 1841{
@@ -1811,7 +1844,7 @@ xfs_icsb_lock_cntr(
1811 } 1844 }
1812} 1845}
1813 1846
1814STATIC inline void 1847STATIC_INLINE void
1815xfs_icsb_unlock_cntr( 1848xfs_icsb_unlock_cntr(
1816 xfs_icsb_cnts_t *icsbp) 1849 xfs_icsb_cnts_t *icsbp)
1817{ 1850{
@@ -1819,7 +1852,7 @@ xfs_icsb_unlock_cntr(
1819} 1852}
1820 1853
1821 1854
1822STATIC inline void 1855STATIC_INLINE void
1823xfs_icsb_lock_all_counters( 1856xfs_icsb_lock_all_counters(
1824 xfs_mount_t *mp) 1857 xfs_mount_t *mp)
1825{ 1858{
@@ -1832,7 +1865,7 @@ xfs_icsb_lock_all_counters(
1832 } 1865 }
1833} 1866}
1834 1867
1835STATIC inline void 1868STATIC_INLINE void
1836xfs_icsb_unlock_all_counters( 1869xfs_icsb_unlock_all_counters(
1837 xfs_mount_t *mp) 1870 xfs_mount_t *mp)
1838{ 1871{
@@ -1888,6 +1921,17 @@ xfs_icsb_disable_counter(
1888 1921
1889 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1922 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1890 1923
1924 /*
1925 * If we are already disabled, then there is nothing to do
1926 * here. We check before locking all the counters to avoid
1927 * the expensive lock operation when being called in the
1928 * slow path and the counter is already disabled. This is
1929 * safe because the only time we set or clear this state is under
1930 * the m_icsb_mutex.
1931 */
1932 if (xfs_icsb_counter_disabled(mp, field))
1933 return 0;
1934
1891 xfs_icsb_lock_all_counters(mp); 1935 xfs_icsb_lock_all_counters(mp);
1892 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1936 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1893 /* drain back to superblock */ 1937 /* drain back to superblock */
@@ -1948,8 +1992,8 @@ xfs_icsb_enable_counter(
1948 xfs_icsb_unlock_all_counters(mp); 1992 xfs_icsb_unlock_all_counters(mp);
1949} 1993}
1950 1994
1951STATIC void 1995void
1952xfs_icsb_sync_counters_int( 1996xfs_icsb_sync_counters_flags(
1953 xfs_mount_t *mp, 1997 xfs_mount_t *mp,
1954 int flags) 1998 int flags)
1955{ 1999{
@@ -1981,40 +2025,39 @@ STATIC void
1981xfs_icsb_sync_counters( 2025xfs_icsb_sync_counters(
1982 xfs_mount_t *mp) 2026 xfs_mount_t *mp)
1983{ 2027{
1984 xfs_icsb_sync_counters_int(mp, 0); 2028 xfs_icsb_sync_counters_flags(mp, 0);
1985}
1986
1987/*
1988 * lazy addition used for things like df, background sb syncs, etc
1989 */
1990void
1991xfs_icsb_sync_counters_lazy(
1992 xfs_mount_t *mp)
1993{
1994 xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
1995} 2029}
1996 2030
1997/* 2031/*
1998 * Balance and enable/disable counters as necessary. 2032 * Balance and enable/disable counters as necessary.
1999 * 2033 *
2000 * Thresholds for re-enabling counters are somewhat magic. 2034 * Thresholds for re-enabling counters are somewhat magic. inode counts are
2001 * inode counts are chosen to be the same number as single 2035 * chosen to be the same number as single on disk allocation chunk per CPU, and
2002 * on disk allocation chunk per CPU, and free blocks is 2036 * free blocks is something far enough zero that we aren't going thrash when we
2003 * something far enough zero that we aren't going thrash 2037 * get near ENOSPC. We also need to supply a minimum we require per cpu to
2004 * when we get near ENOSPC. 2038 * prevent looping endlessly when xfs_alloc_space asks for more than will
2039 * be distributed to a single CPU but each CPU has enough blocks to be
2040 * reenabled.
2041 *
2042 * Note that we can be called when counters are already disabled.
2043 * xfs_icsb_disable_counter() optimises the counter locking in this case to
2044 * prevent locking every per-cpu counter needlessly.
2005 */ 2045 */
2006#define XFS_ICSB_INO_CNTR_REENABLE 64 2046
2047#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
2007#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2048#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2008 (512 + XFS_ALLOC_SET_ASIDE(mp)) 2049 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
2009STATIC void 2050STATIC void
2010xfs_icsb_balance_counter( 2051xfs_icsb_balance_counter(
2011 xfs_mount_t *mp, 2052 xfs_mount_t *mp,
2012 xfs_sb_field_t field, 2053 xfs_sb_field_t field,
2013 int flags) 2054 int flags,
2055 int min_per_cpu)
2014{ 2056{
2015 uint64_t count, resid; 2057 uint64_t count, resid;
2016 int weight = num_online_cpus(); 2058 int weight = num_online_cpus();
2017 int s; 2059 int s;
2060 uint64_t min = (uint64_t)min_per_cpu;
2018 2061
2019 if (!(flags & XFS_ICSB_SB_LOCKED)) 2062 if (!(flags & XFS_ICSB_SB_LOCKED))
2020 s = XFS_SB_LOCK(mp); 2063 s = XFS_SB_LOCK(mp);
@@ -2027,19 +2070,19 @@ xfs_icsb_balance_counter(
2027 case XFS_SBS_ICOUNT: 2070 case XFS_SBS_ICOUNT:
2028 count = mp->m_sb.sb_icount; 2071 count = mp->m_sb.sb_icount;
2029 resid = do_div(count, weight); 2072 resid = do_div(count, weight);
2030 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2073 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2031 goto out; 2074 goto out;
2032 break; 2075 break;
2033 case XFS_SBS_IFREE: 2076 case XFS_SBS_IFREE:
2034 count = mp->m_sb.sb_ifree; 2077 count = mp->m_sb.sb_ifree;
2035 resid = do_div(count, weight); 2078 resid = do_div(count, weight);
2036 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2079 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2037 goto out; 2080 goto out;
2038 break; 2081 break;
2039 case XFS_SBS_FDBLOCKS: 2082 case XFS_SBS_FDBLOCKS:
2040 count = mp->m_sb.sb_fdblocks; 2083 count = mp->m_sb.sb_fdblocks;
2041 resid = do_div(count, weight); 2084 resid = do_div(count, weight);
2042 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp)) 2085 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
2043 goto out; 2086 goto out;
2044 break; 2087 break;
2045 default: 2088 default:
@@ -2054,32 +2097,39 @@ out:
2054 XFS_SB_UNLOCK(mp, s); 2097 XFS_SB_UNLOCK(mp, s);
2055} 2098}
2056 2099
2057STATIC int 2100int
2058xfs_icsb_modify_counters_int( 2101xfs_icsb_modify_counters(
2059 xfs_mount_t *mp, 2102 xfs_mount_t *mp,
2060 xfs_sb_field_t field, 2103 xfs_sb_field_t field,
2061 int delta, 2104 int64_t delta,
2062 int rsvd, 2105 int rsvd)
2063 int flags)
2064{ 2106{
2065 xfs_icsb_cnts_t *icsbp; 2107 xfs_icsb_cnts_t *icsbp;
2066 long long lcounter; /* long counter for 64 bit fields */ 2108 long long lcounter; /* long counter for 64 bit fields */
2067 int cpu, s, locked = 0; 2109 int cpu, ret = 0, s;
2068 int ret = 0, balance_done = 0;
2069 2110
2111 might_sleep();
2070again: 2112again:
2071 cpu = get_cpu(); 2113 cpu = get_cpu();
2072 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), 2114 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu);
2073 xfs_icsb_lock_cntr(icsbp); 2115
2116 /*
2117 * if the counter is disabled, go to slow path
2118 */
2074 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2119 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
2075 goto slow_path; 2120 goto slow_path;
2121 xfs_icsb_lock_cntr(icsbp);
2122 if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
2123 xfs_icsb_unlock_cntr(icsbp);
2124 goto slow_path;
2125 }
2076 2126
2077 switch (field) { 2127 switch (field) {
2078 case XFS_SBS_ICOUNT: 2128 case XFS_SBS_ICOUNT:
2079 lcounter = icsbp->icsb_icount; 2129 lcounter = icsbp->icsb_icount;
2080 lcounter += delta; 2130 lcounter += delta;
2081 if (unlikely(lcounter < 0)) 2131 if (unlikely(lcounter < 0))
2082 goto slow_path; 2132 goto balance_counter;
2083 icsbp->icsb_icount = lcounter; 2133 icsbp->icsb_icount = lcounter;
2084 break; 2134 break;
2085 2135
@@ -2087,7 +2137,7 @@ again:
2087 lcounter = icsbp->icsb_ifree; 2137 lcounter = icsbp->icsb_ifree;
2088 lcounter += delta; 2138 lcounter += delta;
2089 if (unlikely(lcounter < 0)) 2139 if (unlikely(lcounter < 0))
2090 goto slow_path; 2140 goto balance_counter;
2091 icsbp->icsb_ifree = lcounter; 2141 icsbp->icsb_ifree = lcounter;
2092 break; 2142 break;
2093 2143
@@ -2097,7 +2147,7 @@ again:
2097 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 2147 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
2098 lcounter += delta; 2148 lcounter += delta;
2099 if (unlikely(lcounter < 0)) 2149 if (unlikely(lcounter < 0))
2100 goto slow_path; 2150 goto balance_counter;
2101 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 2151 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
2102 break; 2152 break;
2103 default: 2153 default:
@@ -2106,72 +2156,78 @@ again:
2106 } 2156 }
2107 xfs_icsb_unlock_cntr(icsbp); 2157 xfs_icsb_unlock_cntr(icsbp);
2108 put_cpu(); 2158 put_cpu();
2109 if (locked)
2110 XFS_SB_UNLOCK(mp, s);
2111 return 0; 2159 return 0;
2112 2160
2113 /*
2114 * The slow path needs to be run with the SBLOCK
2115 * held so that we prevent other threads from
2116 * attempting to run this path at the same time.
2117 * this provides exclusion for the balancing code,
2118 * and exclusive fallback if the balance does not
2119 * provide enough resources to continue in an unlocked
2120 * manner.
2121 */
2122slow_path: 2161slow_path:
2123 xfs_icsb_unlock_cntr(icsbp);
2124 put_cpu(); 2162 put_cpu();
2125 2163
2126 /* need to hold superblock incase we need 2164 /*
2127 * to disable a counter */ 2165 * serialise with a mutex so we don't burn lots of cpu on
2128 if (!(flags & XFS_ICSB_SB_LOCKED)) { 2166 * the superblock lock. We still need to hold the superblock
2129 s = XFS_SB_LOCK(mp); 2167 * lock, however, when we modify the global structures.
2130 locked = 1; 2168 */
2131 flags |= XFS_ICSB_SB_LOCKED; 2169 xfs_icsb_lock(mp);
2132 } 2170
2133 if (!balance_done) { 2171 /*
2134 xfs_icsb_balance_counter(mp, field, flags); 2172 * Now running atomically.
2135 balance_done = 1; 2173 *
2174 * If the counter is enabled, someone has beaten us to rebalancing.
2175 * Drop the lock and try again in the fast path....
2176 */
2177 if (!(xfs_icsb_counter_disabled(mp, field))) {
2178 xfs_icsb_unlock(mp);
2136 goto again; 2179 goto again;
2137 } else {
2138 /*
2139 * we might not have enough on this local
2140 * cpu to allocate for a bulk request.
2141 * We need to drain this field from all CPUs
2142 * and disable the counter fastpath
2143 */
2144 xfs_icsb_disable_counter(mp, field);
2145 } 2180 }
2146 2181
2182 /*
2183 * The counter is currently disabled. Because we are
2184 * running atomically here, we know a rebalance cannot
2185 * be in progress. Hence we can go straight to operating
2186 * on the global superblock. We do not call xfs_mod_incore_sb()
2187 * here even though we need to get the SB_LOCK. Doing so
2188 * will cause us to re-enter this function and deadlock.
2189 * Hence we get the SB_LOCK ourselves and then call
2190 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
2191 * directly on the global counters.
2192 */
2193 s = XFS_SB_LOCK(mp);
2147 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2194 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
2195 XFS_SB_UNLOCK(mp, s);
2148 2196
2149 if (locked) 2197 /*
2150 XFS_SB_UNLOCK(mp, s); 2198 * Now that we've modified the global superblock, we
2199 * may be able to re-enable the distributed counters
2200 * (e.g. lots of space just got freed). After that
2201 * we are done.
2202 */
2203 if (ret != ENOSPC)
2204 xfs_icsb_balance_counter(mp, field, 0, 0);
2205 xfs_icsb_unlock(mp);
2151 return ret; 2206 return ret;
2152}
2153 2207
2154STATIC int 2208balance_counter:
2155xfs_icsb_modify_counters( 2209 xfs_icsb_unlock_cntr(icsbp);
2156 xfs_mount_t *mp, 2210 put_cpu();
2157 xfs_sb_field_t field,
2158 int delta,
2159 int rsvd)
2160{
2161 return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
2162}
2163 2211
2164/* 2212 /*
2165 * Called when superblock is already locked 2213 * We may have multiple threads here if multiple per-cpu
2166 */ 2214 * counters run dry at the same time. This will mean we can
2167STATIC int 2215 * do more balances than strictly necessary but it is not
2168xfs_icsb_modify_counters_locked( 2216 * the common slowpath case.
2169 xfs_mount_t *mp, 2217 */
2170 xfs_sb_field_t field, 2218 xfs_icsb_lock(mp);
2171 int delta, 2219
2172 int rsvd) 2220 /*
2173{ 2221 * running atomically.
2174 return xfs_icsb_modify_counters_int(mp, field, delta, 2222 *
2175 rsvd, XFS_ICSB_SB_LOCKED); 2223 * This will leave the counter in the correct state for future
2224 * accesses. After the rebalance, we simply try again and our retry
2225 * will either succeed through the fast path or slow path without
2226 * another balance operation being required.
2227 */
2228 xfs_icsb_balance_counter(mp, field, 0, delta);
2229 xfs_icsb_unlock(mp);
2230 goto again;
2176} 2231}
2232
2177#endif 2233#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e5f396ff9a3d..82304b94646d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,6 +18,7 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21
21typedef struct xfs_trans_reservations { 22typedef struct xfs_trans_reservations {
22 uint tr_write; /* extent alloc trans */ 23 uint tr_write; /* extent alloc trans */
23 uint tr_itruncate; /* truncate trans */ 24 uint tr_itruncate; /* truncate trans */
@@ -306,11 +307,13 @@ typedef struct xfs_icsb_cnts {
306#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ 307#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
307 308
308extern int xfs_icsb_init_counters(struct xfs_mount *); 309extern int xfs_icsb_init_counters(struct xfs_mount *);
309extern void xfs_icsb_sync_counters_lazy(struct xfs_mount *); 310extern void xfs_icsb_reinit_counters(struct xfs_mount *);
311extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
310 312
311#else 313#else
312#define xfs_icsb_init_counters(mp) (0) 314#define xfs_icsb_init_counters(mp) (0)
313#define xfs_icsb_sync_counters_lazy(mp) do { } while (0) 315#define xfs_icsb_reinit_counters(mp) do { } while (0)
316#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0)
314#endif 317#endif
315 318
316typedef struct xfs_mount { 319typedef struct xfs_mount {
@@ -419,6 +422,7 @@ typedef struct xfs_mount {
419 xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ 422 xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */
420 unsigned long m_icsb_counters; /* disabled per-cpu counters */ 423 unsigned long m_icsb_counters; /* disabled per-cpu counters */
421 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ 424 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
425 struct mutex m_icsb_mutex; /* balancer sync lock */
422#endif 426#endif
423} xfs_mount_t; 427} xfs_mount_t;
424 428
@@ -563,11 +567,32 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
563} 567}
564 568
565/* 569/*
570 * Per-cpu superblock locking functions
571 */
572#ifdef HAVE_PERCPU_SB
573STATIC_INLINE void
574xfs_icsb_lock(xfs_mount_t *mp)
575{
576 mutex_lock(&mp->m_icsb_mutex);
577}
578
579STATIC_INLINE void
580xfs_icsb_unlock(xfs_mount_t *mp)
581{
582 mutex_unlock(&mp->m_icsb_mutex);
583}
584#else
585#define xfs_icsb_lock(mp)
586#define xfs_icsb_unlock(mp)
587#endif
588
589/*
566 * This structure is for use by the xfs_mod_incore_sb_batch() routine. 590 * This structure is for use by the xfs_mod_incore_sb_batch() routine.
591 * xfs_growfs can specify a few fields which are more than int limit
567 */ 592 */
568typedef struct xfs_mod_sb { 593typedef struct xfs_mod_sb {
569 xfs_sb_field_t msb_field; /* Field to modify, see below */ 594 xfs_sb_field_t msb_field; /* Field to modify, see below */
570 int msb_delta; /* Change to make to specified field */ 595 int64_t msb_delta; /* Change to make to specified field */
571} xfs_mod_sb_t; 596} xfs_mod_sb_t;
572 597
573#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 598#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
@@ -585,17 +610,17 @@ extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
585extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); 610extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
586extern int xfs_unmountfs_writesb(xfs_mount_t *); 611extern int xfs_unmountfs_writesb(xfs_mount_t *);
587extern int xfs_unmount_flush(xfs_mount_t *, int); 612extern int xfs_unmount_flush(xfs_mount_t *, int);
588extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int); 613extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
589extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, 614extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
590 int, int); 615 int64_t, int);
591extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 616extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
592 uint, int); 617 uint, int);
593extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 618extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
594extern int xfs_readsb(xfs_mount_t *, int); 619extern int xfs_readsb(xfs_mount_t *, int);
595extern void xfs_freesb(xfs_mount_t *); 620extern void xfs_freesb(xfs_mount_t *);
596extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); 621extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
597extern int xfs_syncsub(xfs_mount_t *, int, int, int *); 622extern int xfs_syncsub(xfs_mount_t *, int, int *);
598extern int xfs_sync_inodes(xfs_mount_t *, int, int, int *); 623extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
599extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *, 624extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
600 xfs_agnumber_t); 625 xfs_agnumber_t);
601extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); 626extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d98171deaa1c..4c6573d784cd 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -565,7 +565,7 @@ xfs_rename(
565 IHOLD(target_ip); 565 IHOLD(target_ip);
566 IHOLD(src_ip); 566 IHOLD(src_ip);
567 567
568 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 568 error = xfs_bmap_finish(&tp, &free_list, &committed);
569 if (error) { 569 if (error) {
570 xfs_bmap_cancel(&free_list); 570 xfs_bmap_cancel(&free_list);
571 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 571 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 880c73271c05..6fff19dc3cf9 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -147,7 +147,7 @@ xfs_growfs_rt_alloc(
147 /* 147 /*
148 * Free any blocks freed up in the transaction, then commit. 148 * Free any blocks freed up in the transaction, then commit.
149 */ 149 */
150 error = xfs_bmap_finish(&tp, &flist, firstblock, &committed); 150 error = xfs_bmap_finish(&tp, &flist, &committed);
151 if (error) 151 if (error)
152 goto error_exit; 152 goto error_exit;
153 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 153 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
@@ -913,57 +913,6 @@ xfs_rtcheck_alloc_range(
913} 913}
914#endif 914#endif
915 915
916#ifdef DEBUG
917/*
918 * Check whether the given block in the bitmap has the given value.
919 */
920STATIC int /* 1 for matches, 0 for not */
921xfs_rtcheck_bit(
922 xfs_mount_t *mp, /* file system mount structure */
923 xfs_trans_t *tp, /* transaction pointer */
924 xfs_rtblock_t start, /* bit (block) to check */
925 int val) /* 1 for free, 0 for allocated */
926{
927 int bit; /* bit number in the word */
928 xfs_rtblock_t block; /* bitmap block number */
929 xfs_buf_t *bp; /* buf for the block */
930 xfs_rtword_t *bufp; /* pointer into the buffer */
931 /* REFERENCED */
932 int error; /* error value */
933 xfs_rtword_t wdiff; /* difference between bit & expected */
934 int word; /* word number in the buffer */
935 xfs_rtword_t wval; /* word value from buffer */
936
937 block = XFS_BITTOBLOCK(mp, start);
938 error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
939 bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
940 word = XFS_BITTOWORD(mp, start);
941 bit = (int)(start & (XFS_NBWORD - 1));
942 wval = bufp[word];
943 xfs_trans_brelse(tp, bp);
944 wdiff = (wval ^ -val) & ((xfs_rtword_t)1 << bit);
945 return !wdiff;
946}
947#endif /* DEBUG */
948
949#if 0
950/*
951 * Check that the given extent (block range) is free already.
952 */
953STATIC int /* error */
954xfs_rtcheck_free_range(
955 xfs_mount_t *mp, /* file system mount point */
956 xfs_trans_t *tp, /* transaction pointer */
957 xfs_rtblock_t bno, /* starting block number of extent */
958 xfs_extlen_t len, /* length of extent */
959 int *stat) /* out: 1 for free, 0 for not */
960{
961 xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
962
963 return xfs_rtcheck_range(mp, tp, bno, len, 1, &new, stat);
964}
965#endif
966
967/* 916/*
968 * Check that the given range is either all allocated (val = 0) or 917 * Check that the given range is either all allocated (val = 0) or
969 * all free (val = 1). 918 * all free (val = 1).
@@ -2382,60 +2331,3 @@ xfs_rtpick_extent(
2382 *pick = b; 2331 *pick = b;
2383 return 0; 2332 return 0;
2384} 2333}
2385
2386#ifdef DEBUG
2387/*
2388 * Debug code: print out the value of a range in the bitmap.
2389 */
2390void
2391xfs_rtprint_range(
2392 xfs_mount_t *mp, /* file system mount structure */
2393 xfs_trans_t *tp, /* transaction pointer */
2394 xfs_rtblock_t start, /* starting block to print */
2395 xfs_extlen_t len) /* length to print */
2396{
2397 xfs_extlen_t i; /* block number in the extent */
2398
2399 cmn_err(CE_DEBUG, "%Ld: ", (long long)start);
2400 for (i = 0; i < len; i++)
2401 cmn_err(CE_DEBUG, "%d", xfs_rtcheck_bit(mp, tp, start + i, 1));
2402 cmn_err(CE_DEBUG, "\n");
2403}
2404
2405/*
2406 * Debug code: print the summary file.
2407 */
2408void
2409xfs_rtprint_summary(
2410 xfs_mount_t *mp, /* file system mount structure */
2411 xfs_trans_t *tp) /* transaction pointer */
2412{
2413 xfs_suminfo_t c; /* summary data */
2414 xfs_rtblock_t i; /* bitmap block number */
2415 int l; /* summary information level */
2416 int p; /* flag for printed anything */
2417 xfs_fsblock_t sb; /* summary block number */
2418 xfs_buf_t *sumbp; /* summary block buffer */
2419
2420 sumbp = NULL;
2421 for (l = 0; l < mp->m_rsumlevels; l++) {
2422 for (p = 0, i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
2423 (void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c);
2424 if (c) {
2425 if (!p) {
2426 cmn_err(CE_DEBUG, "%Ld-%Ld:", 1LL << l,
2427 XFS_RTMIN((1LL << l) +
2428 ((1LL << l) - 1LL),
2429 mp->m_sb.sb_rextents));
2430 p = 1;
2431 }
2432 cmn_err(CE_DEBUG, " %Ld:%d", (long long)i, c);
2433 }
2434 }
2435 if (p)
2436 cmn_err(CE_DEBUG, "\n");
2437 }
2438 if (sumbp)
2439 xfs_trans_brelse(tp, sumbp);
2440}
2441#endif /* DEBUG */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 0e0b4d2ec202..799c1f871263 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -134,24 +134,6 @@ xfs_rtpick_extent(
134 xfs_rtblock_t *pick); /* result rt extent */ 134 xfs_rtblock_t *pick); /* result rt extent */
135 135
136/* 136/*
137 * Debug code: print out the value of a range in the bitmap.
138 */
139void
140xfs_rtprint_range(
141 struct xfs_mount *mp, /* file system mount structure */
142 struct xfs_trans *tp, /* transaction pointer */
143 xfs_rtblock_t start, /* starting block to print */
144 xfs_extlen_t len); /* length to print */
145
146/*
147 * Debug code: print the summary file.
148 */
149void
150xfs_rtprint_summary(
151 struct xfs_mount *mp, /* file system mount structure */
152 struct xfs_trans *tp); /* transaction pointer */
153
154/*
155 * Grow the realtime area of the filesystem. 137 * Grow the realtime area of the filesystem.
156 */ 138 */
157int 139int
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index defb2febaaf5..1ea7c0ca6ae0 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -42,7 +42,6 @@
42#include "xfs_attr.h" 42#include "xfs_attr.h"
43#include "xfs_bmap.h" 43#include "xfs_bmap.h"
44#include "xfs_acl.h" 44#include "xfs_acl.h"
45#include "xfs_mac.h"
46#include "xfs_error.h" 45#include "xfs_error.h"
47#include "xfs_buf_item.h" 46#include "xfs_buf_item.h"
48#include "xfs_rw.h" 47#include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ee2721e0de4d..301ff9445b6f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -339,7 +339,7 @@ xfs_trans_reserve(
339 */ 339 */
340 if (blocks > 0) { 340 if (blocks > 0) {
341 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 341 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
342 -blocks, rsvd); 342 -((int64_t)blocks), rsvd);
343 if (error != 0) { 343 if (error != 0) {
344 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 344 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
345 return (XFS_ERROR(ENOSPC)); 345 return (XFS_ERROR(ENOSPC));
@@ -380,7 +380,7 @@ xfs_trans_reserve(
380 */ 380 */
381 if (rtextents > 0) { 381 if (rtextents > 0) {
382 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 382 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
383 -rtextents, rsvd); 383 -((int64_t)rtextents), rsvd);
384 if (error) { 384 if (error) {
385 error = XFS_ERROR(ENOSPC); 385 error = XFS_ERROR(ENOSPC);
386 goto undo_log; 386 goto undo_log;
@@ -410,7 +410,7 @@ undo_log:
410undo_blocks: 410undo_blocks:
411 if (blocks > 0) { 411 if (blocks > 0) {
412 (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, 412 (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
413 blocks, rsvd); 413 (int64_t)blocks, rsvd);
414 tp->t_blk_res = 0; 414 tp->t_blk_res = 0;
415 } 415 }
416 416
@@ -432,7 +432,7 @@ void
432xfs_trans_mod_sb( 432xfs_trans_mod_sb(
433 xfs_trans_t *tp, 433 xfs_trans_t *tp,
434 uint field, 434 uint field,
435 long delta) 435 int64_t delta)
436{ 436{
437 437
438 switch (field) { 438 switch (field) {
@@ -663,62 +663,62 @@ xfs_trans_unreserve_and_mod_sb(
663 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 663 if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
664 if (tp->t_icount_delta != 0) { 664 if (tp->t_icount_delta != 0) {
665 msbp->msb_field = XFS_SBS_ICOUNT; 665 msbp->msb_field = XFS_SBS_ICOUNT;
666 msbp->msb_delta = (int)tp->t_icount_delta; 666 msbp->msb_delta = tp->t_icount_delta;
667 msbp++; 667 msbp++;
668 } 668 }
669 if (tp->t_ifree_delta != 0) { 669 if (tp->t_ifree_delta != 0) {
670 msbp->msb_field = XFS_SBS_IFREE; 670 msbp->msb_field = XFS_SBS_IFREE;
671 msbp->msb_delta = (int)tp->t_ifree_delta; 671 msbp->msb_delta = tp->t_ifree_delta;
672 msbp++; 672 msbp++;
673 } 673 }
674 if (tp->t_fdblocks_delta != 0) { 674 if (tp->t_fdblocks_delta != 0) {
675 msbp->msb_field = XFS_SBS_FDBLOCKS; 675 msbp->msb_field = XFS_SBS_FDBLOCKS;
676 msbp->msb_delta = (int)tp->t_fdblocks_delta; 676 msbp->msb_delta = tp->t_fdblocks_delta;
677 msbp++; 677 msbp++;
678 } 678 }
679 if (tp->t_frextents_delta != 0) { 679 if (tp->t_frextents_delta != 0) {
680 msbp->msb_field = XFS_SBS_FREXTENTS; 680 msbp->msb_field = XFS_SBS_FREXTENTS;
681 msbp->msb_delta = (int)tp->t_frextents_delta; 681 msbp->msb_delta = tp->t_frextents_delta;
682 msbp++; 682 msbp++;
683 } 683 }
684 if (tp->t_dblocks_delta != 0) { 684 if (tp->t_dblocks_delta != 0) {
685 msbp->msb_field = XFS_SBS_DBLOCKS; 685 msbp->msb_field = XFS_SBS_DBLOCKS;
686 msbp->msb_delta = (int)tp->t_dblocks_delta; 686 msbp->msb_delta = tp->t_dblocks_delta;
687 msbp++; 687 msbp++;
688 } 688 }
689 if (tp->t_agcount_delta != 0) { 689 if (tp->t_agcount_delta != 0) {
690 msbp->msb_field = XFS_SBS_AGCOUNT; 690 msbp->msb_field = XFS_SBS_AGCOUNT;
691 msbp->msb_delta = (int)tp->t_agcount_delta; 691 msbp->msb_delta = tp->t_agcount_delta;
692 msbp++; 692 msbp++;
693 } 693 }
694 if (tp->t_imaxpct_delta != 0) { 694 if (tp->t_imaxpct_delta != 0) {
695 msbp->msb_field = XFS_SBS_IMAX_PCT; 695 msbp->msb_field = XFS_SBS_IMAX_PCT;
696 msbp->msb_delta = (int)tp->t_imaxpct_delta; 696 msbp->msb_delta = tp->t_imaxpct_delta;
697 msbp++; 697 msbp++;
698 } 698 }
699 if (tp->t_rextsize_delta != 0) { 699 if (tp->t_rextsize_delta != 0) {
700 msbp->msb_field = XFS_SBS_REXTSIZE; 700 msbp->msb_field = XFS_SBS_REXTSIZE;
701 msbp->msb_delta = (int)tp->t_rextsize_delta; 701 msbp->msb_delta = tp->t_rextsize_delta;
702 msbp++; 702 msbp++;
703 } 703 }
704 if (tp->t_rbmblocks_delta != 0) { 704 if (tp->t_rbmblocks_delta != 0) {
705 msbp->msb_field = XFS_SBS_RBMBLOCKS; 705 msbp->msb_field = XFS_SBS_RBMBLOCKS;
706 msbp->msb_delta = (int)tp->t_rbmblocks_delta; 706 msbp->msb_delta = tp->t_rbmblocks_delta;
707 msbp++; 707 msbp++;
708 } 708 }
709 if (tp->t_rblocks_delta != 0) { 709 if (tp->t_rblocks_delta != 0) {
710 msbp->msb_field = XFS_SBS_RBLOCKS; 710 msbp->msb_field = XFS_SBS_RBLOCKS;
711 msbp->msb_delta = (int)tp->t_rblocks_delta; 711 msbp->msb_delta = tp->t_rblocks_delta;
712 msbp++; 712 msbp++;
713 } 713 }
714 if (tp->t_rextents_delta != 0) { 714 if (tp->t_rextents_delta != 0) {
715 msbp->msb_field = XFS_SBS_REXTENTS; 715 msbp->msb_field = XFS_SBS_REXTENTS;
716 msbp->msb_delta = (int)tp->t_rextents_delta; 716 msbp->msb_delta = tp->t_rextents_delta;
717 msbp++; 717 msbp++;
718 } 718 }
719 if (tp->t_rextslog_delta != 0) { 719 if (tp->t_rextslog_delta != 0) {
720 msbp->msb_field = XFS_SBS_REXTSLOG; 720 msbp->msb_field = XFS_SBS_REXTSLOG;
721 msbp->msb_delta = (int)tp->t_rextslog_delta; 721 msbp->msb_delta = tp->t_rextslog_delta;
722 msbp++; 722 msbp++;
723 } 723 }
724 } 724 }
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c68e00105d23..f1d7ab236726 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -39,13 +39,9 @@ typedef struct xfs_trans_header {
39/* 39/*
40 * Log item types. 40 * Log item types.
41 */ 41 */
42#define XFS_LI_5_3_BUF 0x1234 /* v1 bufs, 1-block inode buffers */
43#define XFS_LI_5_3_INODE 0x1235 /* 1-block inode buffers */
44#define XFS_LI_EFI 0x1236 42#define XFS_LI_EFI 0x1236
45#define XFS_LI_EFD 0x1237 43#define XFS_LI_EFD 0x1237
46#define XFS_LI_IUNLINK 0x1238 44#define XFS_LI_IUNLINK 0x1238
47#define XFS_LI_6_1_INODE 0x1239 /* 4K non-aligned inode bufs */
48#define XFS_LI_6_1_BUF 0x123a /* v1, 4K inode buffers */
49#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */ 45#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */
50#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ 46#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */
51#define XFS_LI_DQUOT 0x123d 47#define XFS_LI_DQUOT 0x123d
@@ -354,25 +350,25 @@ typedef struct xfs_trans {
354 xfs_trans_callback_t t_callback; /* transaction callback */ 350 xfs_trans_callback_t t_callback; /* transaction callback */
355 void *t_callarg; /* callback arg */ 351 void *t_callarg; /* callback arg */
356 unsigned int t_flags; /* misc flags */ 352 unsigned int t_flags; /* misc flags */
357 long t_icount_delta; /* superblock icount change */ 353 int64_t t_icount_delta; /* superblock icount change */
358 long t_ifree_delta; /* superblock ifree change */ 354 int64_t t_ifree_delta; /* superblock ifree change */
359 long t_fdblocks_delta; /* superblock fdblocks chg */ 355 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
360 long t_res_fdblocks_delta; /* on-disk only chg */ 356 int64_t t_res_fdblocks_delta; /* on-disk only chg */
361 long t_frextents_delta;/* superblock freextents chg*/ 357 int64_t t_frextents_delta;/* superblock freextents chg*/
362 long t_res_frextents_delta; /* on-disk only chg */ 358 int64_t t_res_frextents_delta; /* on-disk only chg */
363#ifdef DEBUG 359#ifdef DEBUG
364 long t_ag_freeblks_delta; /* debugging counter */ 360 int64_t t_ag_freeblks_delta; /* debugging counter */
365 long t_ag_flist_delta; /* debugging counter */ 361 int64_t t_ag_flist_delta; /* debugging counter */
366 long t_ag_btree_delta; /* debugging counter */ 362 int64_t t_ag_btree_delta; /* debugging counter */
367#endif 363#endif
368 long t_dblocks_delta;/* superblock dblocks change */ 364 int64_t t_dblocks_delta;/* superblock dblocks change */
369 long t_agcount_delta;/* superblock agcount change */ 365 int64_t t_agcount_delta;/* superblock agcount change */
370 long t_imaxpct_delta;/* superblock imaxpct change */ 366 int64_t t_imaxpct_delta;/* superblock imaxpct change */
371 long t_rextsize_delta;/* superblock rextsize chg */ 367 int64_t t_rextsize_delta;/* superblock rextsize chg */
372 long t_rbmblocks_delta;/* superblock rbmblocks chg */ 368 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
373 long t_rblocks_delta;/* superblock rblocks change */ 369 int64_t t_rblocks_delta;/* superblock rblocks change */
374 long t_rextents_delta;/* superblocks rextents chg */ 370 int64_t t_rextents_delta;/* superblocks rextents chg */
375 long t_rextslog_delta;/* superblocks rextslog chg */ 371 int64_t t_rextslog_delta;/* superblocks rextslog chg */
376 unsigned int t_items_free; /* log item descs free */ 372 unsigned int t_items_free; /* log item descs free */
377 xfs_log_item_chunk_t t_items; /* first log item desc chunk */ 373 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
378 xfs_trans_header_t t_header; /* header for in-log trans */ 374 xfs_trans_header_t t_header; /* header for in-log trans */
@@ -936,9 +932,9 @@ typedef struct xfs_trans {
936#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC) 932#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
937 933
938#ifdef DEBUG 934#ifdef DEBUG
939#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (long)d) 935#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (int64_t)d)
940#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (long)d) 936#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (int64_t)d)
941#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (long)d) 937#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (int64_t)d)
942#else 938#else
943#define xfs_trans_agblocks_delta(tp, d) 939#define xfs_trans_agblocks_delta(tp, d)
944#define xfs_trans_agflist_delta(tp, d) 940#define xfs_trans_agflist_delta(tp, d)
@@ -954,7 +950,7 @@ xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
954xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 950xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
955int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, 951int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
956 uint, uint); 952 uint, uint);
957void xfs_trans_mod_sb(xfs_trans_t *, uint, long); 953void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
958struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t, 954struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
959 int, uint); 955 int, uint);
960int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *, 956int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index fc39b166d403..ceb4f6e99960 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -90,7 +90,7 @@ xfs_trans_push_ail(
90 int flush_log; 90 int flush_log;
91 SPLDECL(s); 91 SPLDECL(s);
92 92
93#define XFS_TRANS_PUSH_AIL_RESTARTS 10 93#define XFS_TRANS_PUSH_AIL_RESTARTS 1000
94 94
95 AIL_LOCK(mp,s); 95 AIL_LOCK(mp,s);
96 lip = xfs_trans_first_ail(mp, &gen); 96 lip = xfs_trans_first_ail(mp, &gen);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 62336a4cc5a4..29f72f613782 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -640,7 +640,7 @@ xfs_quiesce_fs(
640 * we can write the unmount record. 640 * we can write the unmount record.
641 */ 641 */
642 do { 642 do {
643 xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL); 643 xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
644 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 644 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
645 if (!pincount) { 645 if (!pincount) {
646 delay(50); 646 delay(50);
@@ -806,7 +806,7 @@ xfs_statvfs(
806 806
807 statp->f_type = XFS_SB_MAGIC; 807 statp->f_type = XFS_SB_MAGIC;
808 808
809 xfs_icsb_sync_counters_lazy(mp); 809 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
810 s = XFS_SB_LOCK(mp); 810 s = XFS_SB_LOCK(mp);
811 statp->f_bsize = sbp->sb_blocksize; 811 statp->f_bsize = sbp->sb_blocksize;
812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
@@ -872,6 +872,10 @@ xfs_statvfs(
872 * this by simply making sure the log gets flushed 872 * this by simply making sure the log gets flushed
873 * if SYNC_BDFLUSH is set, and by actually writing it 873 * if SYNC_BDFLUSH is set, and by actually writing it
874 * out otherwise. 874 * out otherwise.
875 * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
876 * before we return (including direct I/O). Forms the drain
877 * side of the write barrier needed to safely quiesce the
878 * filesystem.
875 * 879 *
876 */ 880 */
877/*ARGSUSED*/ 881/*ARGSUSED*/
@@ -883,27 +887,20 @@ xfs_sync(
883{ 887{
884 xfs_mount_t *mp = XFS_BHVTOM(bdp); 888 xfs_mount_t *mp = XFS_BHVTOM(bdp);
885 889
886 if (unlikely(flags == SYNC_QUIESCE)) 890 return xfs_syncsub(mp, flags, NULL);
887 return xfs_quiesce_fs(mp);
888 else
889 return xfs_syncsub(mp, flags, 0, NULL);
890} 891}
891 892
892/* 893/*
893 * xfs sync routine for internal use 894 * xfs sync routine for internal use
894 * 895 *
895 * This routine supports all of the flags defined for the generic vfs_sync 896 * This routine supports all of the flags defined for the generic vfs_sync
896 * interface as explained above under xfs_sync. In the interests of not 897 * interface as explained above under xfs_sync.
897 * changing interfaces within the 6.5 family, additional internally-
898 * required functions are specified within a separate xflags parameter,
899 * only available by calling this routine.
900 * 898 *
901 */ 899 */
902int 900int
903xfs_sync_inodes( 901xfs_sync_inodes(
904 xfs_mount_t *mp, 902 xfs_mount_t *mp,
905 int flags, 903 int flags,
906 int xflags,
907 int *bypassed) 904 int *bypassed)
908{ 905{
909 xfs_inode_t *ip = NULL; 906 xfs_inode_t *ip = NULL;
@@ -1176,6 +1173,13 @@ xfs_sync_inodes(
1176 } 1173 }
1177 1174
1178 } 1175 }
1176 /*
1177 * When freezing, we need to wait ensure all I/O (including direct
1178 * I/O) is complete to ensure no further data modification can take
1179 * place after this point
1180 */
1181 if (flags & SYNC_IOWAIT)
1182 vn_iowait(vp);
1179 1183
1180 if (flags & SYNC_BDFLUSH) { 1184 if (flags & SYNC_BDFLUSH) {
1181 if ((flags & SYNC_ATTR) && 1185 if ((flags & SYNC_ATTR) &&
@@ -1412,17 +1416,13 @@ xfs_sync_inodes(
1412 * xfs sync routine for internal use 1416 * xfs sync routine for internal use
1413 * 1417 *
1414 * This routine supports all of the flags defined for the generic vfs_sync 1418 * This routine supports all of the flags defined for the generic vfs_sync
1415 * interface as explained above under xfs_sync. In the interests of not 1419 * interface as explained above under xfs_sync.
1416 * changing interfaces within the 6.5 family, additional internally-
1417 * required functions are specified within a separate xflags parameter,
1418 * only available by calling this routine.
1419 * 1420 *
1420 */ 1421 */
1421int 1422int
1422xfs_syncsub( 1423xfs_syncsub(
1423 xfs_mount_t *mp, 1424 xfs_mount_t *mp,
1424 int flags, 1425 int flags,
1425 int xflags,
1426 int *bypassed) 1426 int *bypassed)
1427{ 1427{
1428 int error = 0; 1428 int error = 0;
@@ -1444,7 +1444,7 @@ xfs_syncsub(
1444 if (flags & SYNC_BDFLUSH) 1444 if (flags & SYNC_BDFLUSH)
1445 xfs_finish_reclaim_all(mp, 1); 1445 xfs_finish_reclaim_all(mp, 1);
1446 else 1446 else
1447 error = xfs_sync_inodes(mp, flags, xflags, bypassed); 1447 error = xfs_sync_inodes(mp, flags, bypassed);
1448 } 1448 }
1449 1449
1450 /* 1450 /*
@@ -1958,15 +1958,26 @@ xfs_showargs(
1958 return 0; 1958 return 0;
1959} 1959}
1960 1960
1961/*
1962 * Second stage of a freeze. The data is already frozen, now we have to take
1963 * care of the metadata. New transactions are already blocked, so we need to
1964 * wait for any remaining transactions to drain out before proceding.
1965 */
1961STATIC void 1966STATIC void
1962xfs_freeze( 1967xfs_freeze(
1963 bhv_desc_t *bdp) 1968 bhv_desc_t *bdp)
1964{ 1969{
1965 xfs_mount_t *mp = XFS_BHVTOM(bdp); 1970 xfs_mount_t *mp = XFS_BHVTOM(bdp);
1966 1971
1972 /* wait for all modifications to complete */
1967 while (atomic_read(&mp->m_active_trans) > 0) 1973 while (atomic_read(&mp->m_active_trans) > 0)
1968 delay(100); 1974 delay(100);
1969 1975
1976 /* flush inodes and push all remaining buffers out to disk */
1977 xfs_quiesce_fs(mp);
1978
1979 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
1980
1970 /* Push the superblock and write an unmount record */ 1981 /* Push the superblock and write an unmount record */
1971 xfs_log_unmount_write(mp); 1982 xfs_log_unmount_write(mp);
1972 xfs_unmountfs_writesb(mp); 1983 xfs_unmountfs_writesb(mp);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index bda774a04b8f..52c41714ec54 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,7 +51,6 @@
51#include "xfs_refcache.h" 51#include "xfs_refcache.h"
52#include "xfs_trans_space.h" 52#include "xfs_trans_space.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_mac.h"
55 54
56STATIC int 55STATIC int
57xfs_open( 56xfs_open(
@@ -1381,7 +1380,7 @@ xfs_inactive_symlink_rmt(
1381 /* 1380 /*
1382 * Commit the first transaction. This logs the EFI and the inode. 1381 * Commit the first transaction. This logs the EFI and the inode.
1383 */ 1382 */
1384 if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) 1383 if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
1385 goto error1; 1384 goto error1;
1386 /* 1385 /*
1387 * The transaction must have been committed, since there were 1386 * The transaction must have been committed, since there were
@@ -1790,8 +1789,7 @@ xfs_inactive(
1790 * Just ignore errors at this point. There is 1789 * Just ignore errors at this point. There is
1791 * nothing we can do except to try to keep going. 1790 * nothing we can do except to try to keep going.
1792 */ 1791 */
1793 (void) xfs_bmap_finish(&tp, &free_list, first_block, 1792 (void) xfs_bmap_finish(&tp, &free_list, &committed);
1794 &committed);
1795 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1793 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
1796 } 1794 }
1797 /* 1795 /*
@@ -2022,7 +2020,7 @@ xfs_create(
2022 IHOLD(ip); 2020 IHOLD(ip);
2023 vp = XFS_ITOV(ip); 2021 vp = XFS_ITOV(ip);
2024 2022
2025 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2023 error = xfs_bmap_finish(&tp, &free_list, &committed);
2026 if (error) { 2024 if (error) {
2027 xfs_bmap_cancel(&free_list); 2025 xfs_bmap_cancel(&free_list);
2028 goto abort_rele; 2026 goto abort_rele;
@@ -2507,7 +2505,7 @@ xfs_remove(
2507 xfs_trans_set_sync(tp); 2505 xfs_trans_set_sync(tp);
2508 } 2506 }
2509 2507
2510 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2508 error = xfs_bmap_finish(&tp, &free_list, &committed);
2511 if (error) { 2509 if (error) {
2512 REMOVE_DEBUG_TRACE(__LINE__); 2510 REMOVE_DEBUG_TRACE(__LINE__);
2513 goto error_rele; 2511 goto error_rele;
@@ -2715,7 +2713,7 @@ xfs_link(
2715 xfs_trans_set_sync(tp); 2713 xfs_trans_set_sync(tp);
2716 } 2714 }
2717 2715
2718 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 2716 error = xfs_bmap_finish (&tp, &free_list, &committed);
2719 if (error) { 2717 if (error) {
2720 xfs_bmap_cancel(&free_list); 2718 xfs_bmap_cancel(&free_list);
2721 goto abort_return; 2719 goto abort_return;
@@ -2932,7 +2930,7 @@ xfs_mkdir(
2932 xfs_trans_set_sync(tp); 2930 xfs_trans_set_sync(tp);
2933 } 2931 }
2934 2932
2935 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2933 error = xfs_bmap_finish(&tp, &free_list, &committed);
2936 if (error) { 2934 if (error) {
2937 IRELE(cdp); 2935 IRELE(cdp);
2938 goto error2; 2936 goto error2;
@@ -3183,7 +3181,7 @@ xfs_rmdir(
3183 xfs_trans_set_sync(tp); 3181 xfs_trans_set_sync(tp);
3184 } 3182 }
3185 3183
3186 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 3184 error = xfs_bmap_finish (&tp, &free_list, &committed);
3187 if (error) { 3185 if (error) {
3188 xfs_bmap_cancel(&free_list); 3186 xfs_bmap_cancel(&free_list);
3189 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3187 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
@@ -3533,7 +3531,7 @@ xfs_symlink(
3533 */ 3531 */
3534 IHOLD(ip); 3532 IHOLD(ip);
3535 3533
3536 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 3534 error = xfs_bmap_finish(&tp, &free_list, &committed);
3537 if (error) { 3535 if (error) {
3538 goto error2; 3536 goto error2;
3539 } 3537 }
@@ -4145,7 +4143,7 @@ retry:
4145 /* 4143 /*
4146 * Complete the transaction 4144 * Complete the transaction
4147 */ 4145 */
4148 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4146 error = xfs_bmap_finish(&tp, &free_list, &committed);
4149 if (error) { 4147 if (error) {
4150 goto error0; 4148 goto error0;
4151 } 4149 }
@@ -4452,7 +4450,7 @@ xfs_free_file_space(
4452 /* 4450 /*
4453 * complete the transaction 4451 * complete the transaction
4454 */ 4452 */
4455 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4453 error = xfs_bmap_finish(&tp, &free_list, &committed);
4456 if (error) { 4454 if (error) {
4457 goto error0; 4455 goto error0;
4458 } 4456 }