aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c9
-rw-r--r--fs/9p/acl.h2
-rw-r--r--fs/9p/v9fs.h42
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode.c880
-rw-r--r--fs/9p/vfs_inode_dotl.c824
-rw-r--r--fs/9p/xattr.c2
-rw-r--r--fs/adfs/dir.c13
-rw-r--r--fs/adfs/super.c11
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/namei.c68
-rw-r--r--fs/affs/super.c11
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/security.c7
-rw-r--r--fs/afs/super.c10
-rw-r--r--fs/aio.c29
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/autofs_i.h21
-rw-r--r--fs/autofs4/expire.c141
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c91
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/befs/endian.h16
-rw-r--r--fs/befs/linuxvfs.c12
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/acl.c21
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/inode.c17
-rw-r--r--fs/buffer.c37
-rw-r--r--fs/ceph/dir.c28
-rw-r--r--fs/ceph/inode.c38
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/char_dev.c13
-rw-r--r--fs/cifs/cache.c16
-rw-r--r--fs/cifs/cifs_debug.c22
-rw-r--r--fs/cifs/cifs_spnego.c10
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c33
-rw-r--r--fs/cifs/cifsglob.h9
-rw-r--r--fs/cifs/cifssmb.c5
-rw-r--r--fs/cifs/connect.c462
-rw-r--r--fs/cifs/dir.c83
-rw-r--r--fs/cifs/file.c233
-rw-r--r--fs/cifs/inode.c20
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/sess.c135
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c20
-rw-r--r--fs/coda/inode.c9
-rw-r--r--fs/coda/pioctl.c6
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c24
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/cramfs/inode.c110
-rw-r--r--fs/dcache.c1375
-rw-r--r--fs/dlm/lowcomms.c63
-rw-r--r--fs/ecryptfs/dentry.c9
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/ecryptfs/super.c12
-rw-r--r--fs/efs/super.c9
-rw-r--r--fs/eventpoll.c20
-rw-r--r--fs/exofs/super.c9
-rw-r--r--fs/exportfs/expfs.c14
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/dir.c19
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c34
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/balloc.c266
-rw-r--r--fs/ext3/dir.c15
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c22
-rw-r--r--fs/ext3/namei.c138
-rw-r--r--fs/ext3/resize.c65
-rw-r--r--fs/ext3/super.c73
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/ext4/acl.c11
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/dir.c56
-rw-r--r--fs/ext4/ext4.h93
-rw-r--r--fs/ext4/ext4_extents.h8
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c75
-rw-r--r--fs/ext4/mballoc.c55
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c69
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/resize.c69
-rw-r--r--fs/ext4/super.c297
-rw-r--r--fs/ext4/xattr.c28
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fat/namei_msdos.c23
-rw-r--r--fs/fat/namei_vfat.c55
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_inode.c9
-rw-r--r--fs/fs_struct.c36
-rw-r--r--fs/fuse/dev.c156
-rw-r--r--fs/fuse/dir.c71
-rw-r--r--fs/fuse/file.c66
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c43
-rw-r--r--fs/generic_acl.c20
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/dentry.c22
-rw-r--r--fs/gfs2/export.c4
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/glock.c71
-rw-r--r--fs/gfs2/glock.h28
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h13
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/lock_dlm.c15
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/ops_inode.c38
-rw-r--r--fs/gfs2/quota.c13
-rw-r--r--fs/gfs2/rgrp.c57
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/xattr.c23
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/string.c17
-rw-r--r--fs/hfs/super.c11
-rw-r--r--fs/hfs/sysdep.c7
-rw-r--r--fs/hfsplus/bfind.c6
-rw-r--r--fs/hfsplus/bitmap.c3
-rw-r--r--fs/hfsplus/bnode.c70
-rw-r--r--fs/hfsplus/brec.c28
-rw-r--r--fs/hfsplus/btree.c33
-rw-r--r--fs/hfsplus/catalog.c85
-rw-r--r--fs/hfsplus/dir.c39
-rw-r--r--fs/hfsplus/extents.c96
-rw-r--r--fs/hfsplus/hfsplus_fs.h130
-rw-r--r--fs/hfsplus/hfsplus_raw.h3
-rw-r--r--fs/hfsplus/inode.c89
-rw-r--r--fs/hfsplus/ioctl.c6
-rw-r--r--fs/hfsplus/options.c44
-rw-r--r--fs/hfsplus/part_tbl.c129
-rw-r--r--fs/hfsplus/super.c142
-rw-r--r--fs/hfsplus/unicode.c56
-rw-r--r--fs/hfsplus/wrapper.c178
-rw-r--r--fs/hostfs/hostfs_kern.c44
-rw-r--r--fs/hpfs/dentry.c27
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/inode.c131
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/jffs2/acl.c5
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c63
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c63
-rw-r--r--fs/lockd/Makefile6
-rw-r--r--fs/lockd/clnt4xdr.c605
-rw-r--r--fs/lockd/clntlock.c4
-rw-r--r--fs/lockd/clntproc.c18
-rw-r--r--fs/lockd/clntxdr.c627
-rw-r--r--fs/lockd/host.c409
-rw-r--r--fs/lockd/mon.c110
-rw-r--r--fs/lockd/svc4proc.c20
-rw-r--r--fs/lockd/svclock.c34
-rw-r--r--fs/lockd/svcproc.c28
-rw-r--r--fs/lockd/xdr.c287
-rw-r--r--fs/lockd/xdr4.c255
-rw-r--r--fs/locks.c2
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c9
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/mbcache.c12
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c857
-rw-r--r--fs/namespace.c291
-rw-r--r--fs/ncpfs/dir.c88
-rw-r--r--fs/ncpfs/inode.c19
-rw-r--r--fs/ncpfs/ncplib_kernel.h16
-rw-r--r--fs/nfs/callback.c83
-rw-r--r--fs/nfs/callback.h59
-rw-r--r--fs/nfs/callback_proc.c326
-rw-r--r--fs/nfs/callback_xdr.c143
-rw-r--r--fs/nfs/client.c302
-rw-r--r--fs/nfs/delegation.c362
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c104
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/inode.c12
-rw-r--r--fs/nfs/internal.h19
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/nfs2xdr.c1294
-rw-r--r--fs/nfs/nfs3xdr.c2817
-rw-r--r--fs/nfs/nfs4_fs.h13
-rw-r--r--fs/nfs/nfs4filelayout.c6
-rw-r--r--fs/nfs/nfs4proc.c188
-rw-r--r--fs/nfs/nfs4renewd.c11
-rw-r--r--fs/nfs/nfs4state.c293
-rw-r--r--fs/nfs/nfs4xdr.c1426
-rw-r--r--fs/nfs/pagelist.c7
-rw-r--r--fs/nfs/pnfs.c524
-rw-r--r--fs/nfs/pnfs.h76
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/super.c18
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfsd/nfs4callback.c690
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/bmap.c47
-rw-r--r--fs/nilfs2/btnode.c3
-rw-r--r--fs/nilfs2/dir.c3
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/ifile.c11
-rw-r--r--fs/nilfs2/inode.c190
-rw-r--r--fs/nilfs2/ioctl.c12
-rw-r--r--fs/nilfs2/mdt.c32
-rw-r--r--fs/nilfs2/namei.c1
-rw-r--r--fs/nilfs2/nilfs.h15
-rw-r--r--fs/nilfs2/page.c86
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/sb.h8
-rw-r--r--fs/nilfs2/segment.c43
-rw-r--r--fs/nilfs2/super.c42
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nilfs2/the_nilfs.h3
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c35
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ntfs/super.c6
-rw-r--r--fs/ocfs2/Kconfig2
-rw-r--r--fs/ocfs2/acl.c8
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/alloc.c77
-rw-r--r--fs/ocfs2/alloc.h4
-rw-r--r--fs/ocfs2/aops.c66
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/heartbeat.c249
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/cluster/netdebug.c286
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c145
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h33
-rw-r--r--fs/ocfs2/dcache.c20
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmast.c76
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h86
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c200
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h5
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c10
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlm/dlmthread.c132
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c9
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/super.c9
-rw-r--r--fs/openpromfs/inode.c9
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c28
-rw-r--r--fs/proc/base.c177
-rw-r--r--fs/proc/consoles.c114
-rw-r--r--fs/proc/devices.c4
-rw-r--r--fs/proc/generic.c21
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c31
-rw-r--r--fs/proc/proc_tty.c26
-rw-r--r--fs/proc/softirqs.c6
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/proc/task_nommu.c7
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/quota/dquot.c18
-rw-r--r--fs/quota/quota_tree.c9
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c18
-rw-r--r--fs/romfs/super.c9
-rw-r--r--fs/select.c2
-rw-r--r--fs/squashfs/super.c9
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/sysfs/group.c10
-rw-r--r--fs/sysfs/inode.c12
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/sysv/inode.c9
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/sysv/super.c2
-rw-r--r--fs/ubifs/super.c10
-rw-r--r--fs/udf/Kconfig1
-rw-r--r--fs/udf/balloc.c3
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/file.c11
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c51
-rw-r--r--fs/udf/namei.c107
-rw-r--r--fs/udf/partition.c27
-rw-r--r--fs/udf/super.c76
-rw-r--r--fs/udf/symlink.c12
-rw-r--r--fs/udf/udf_i.h13
-rw-r--r--fs/udf/udf_sb.h22
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/ufs/super.c9
-rw-r--r--fs/xfs/linux-2.6/sv.h59
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c425
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c235
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c92
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h59
-rw-r--r--fs/xfs/quota/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/xfs_btree.c9
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_buf_item.h11
-rw-r--r--fs/xfs/xfs_extfree_item.c97
-rw-r--r--fs/xfs/xfs_extfree_item.h11
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_iget.c90
-rw-r--r--fs/xfs/xfs_inode.c54
-rw-r--r--fs/xfs/xfs_inode.h15
-rw-r--r--fs/xfs/xfs_inode_item.c90
-rw-r--r--fs/xfs/xfs_iomap.c233
-rw-r--r--fs/xfs/xfs_iomap.h27
-rw-r--r--fs/xfs/xfs_log.c739
-rw-r--r--fs/xfs/xfs_log_cil.c17
-rw-r--r--fs/xfs/xfs_log_priv.h127
-rw-r--r--fs/xfs/xfs_log_recover.c620
-rw-r--r--fs/xfs/xfs_mount.c23
-rw-r--r--fs/xfs/xfs_mount.h14
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c232
-rw-r--r--fs/xfs/xfs_trans_extfree.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h35
-rw-r--r--fs/xfs/xfs_vnodeops.c61
389 files changed, 18157 insertions, 10853 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 7e051147679..814ac4e213a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -9,6 +9,8 @@ config 9P_FS
9 9
10 If unsure, say N. 10 If unsure, say N.
11 11
12if 9P_FS
13
12config 9P_FSCACHE 14config 9P_FSCACHE
13 bool "Enable 9P client caching support (EXPERIMENTAL)" 15 bool "Enable 9P client caching support (EXPERIMENTAL)"
14 depends on EXPERIMENTAL 16 depends on EXPERIMENTAL
@@ -20,7 +22,6 @@ config 9P_FSCACHE
20 22
21config 9P_FS_POSIX_ACL 23config 9P_FS_POSIX_ACL
22 bool "9P POSIX Access Control Lists" 24 bool "9P POSIX Access Control Lists"
23 depends on 9P_FS
24 select FS_POSIX_ACL 25 select FS_POSIX_ACL
25 help 26 help
26 POSIX Access Control Lists (ACLs) support permissions for users and 27 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -30,3 +31,5 @@ config 9P_FS_POSIX_ACL
30 Linux website <http://acl.bestbits.at/>. 31 Linux website <http://acl.bestbits.at/>.
31 32
32 If you don't know what Access Control Lists are, say N 33 If you don't know what Access Control Lists are, say N
34
35endif
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index f8ba37effd1..ab8c1278063 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
39p-objs := \ 39p-objs := \
4 vfs_super.o \ 4 vfs_super.o \
5 vfs_inode.o \ 5 vfs_inode.o \
6 vfs_inode_dotl.o \
6 vfs_addr.o \ 7 vfs_addr.o \
7 vfs_file.o \ 8 vfs_file.o \
8 vfs_dir.o \ 9 vfs_dir.o \
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 12d602351db..02a2cf61631 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -28,7 +28,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
29 ssize_t size; 29 ssize_t size;
30 void *value = NULL; 30 void *value = NULL;
31 struct posix_acl *acl = NULL;; 31 struct posix_acl *acl = NULL;
32 32
33 size = v9fs_fid_xattr_get(fid, name, NULL, 0); 33 size = v9fs_fid_xattr_get(fid, name, NULL, 0);
34 if (size > 0) { 34 if (size > 0) {
@@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
91 return acl; 91 return acl;
92} 92}
93 93
94int v9fs_check_acl(struct inode *inode, int mask) 94int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
95{ 95{
96 struct posix_acl *acl; 96 struct posix_acl *acl;
97 struct v9fs_session_info *v9ses; 97 struct v9fs_session_info *v9ses;
98 98
99 if (flags & IPERM_FLAG_RCU)
100 return -ECHILD;
101
99 v9ses = v9fs_inode2v9ses(inode); 102 v9ses = v9fs_inode2v9ses(inode);
100 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
101 /* 104 /*
@@ -362,7 +365,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
362 case ACL_TYPE_DEFAULT: 365 case ACL_TYPE_DEFAULT:
363 name = POSIX_ACL_XATTR_DEFAULT; 366 name = POSIX_ACL_XATTR_DEFAULT;
364 if (!S_ISDIR(inode->i_mode)) { 367 if (!S_ISDIR(inode->i_mode)) {
365 retval = -EINVAL; 368 retval = acl ? -EINVAL : 0;
366 goto err_out; 369 goto err_out;
367 } 370 }
368 break; 371 break;
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 59e18c2e8c7..7ef3ac9f6d9 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
16 16
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask); 19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index cb6396855e2..c4b5d8864f0 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -113,9 +113,27 @@ struct v9fs_session_info {
113 113
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 115 char *);
116void v9fs_session_close(struct v9fs_session_info *v9ses); 116extern void v9fs_session_close(struct v9fs_session_info *v9ses);
117void v9fs_session_cancel(struct v9fs_session_info *v9ses); 117extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
118void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); 118extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
119extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
120 struct nameidata *nameidata);
121extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
122extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
123extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid,
129 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid,
136 struct super_block *sb);
119 137
120/* other default globals */ 138/* other default globals */
121#define V9FS_PORT 564 139#define V9FS_PORT 564
@@ -138,3 +156,21 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
138{ 156{
139 return v9ses->flags & V9FS_PROTO_2000L; 157 return v9ses->flags & V9FS_PROTO_2000L;
140} 158}
159
160/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request
163 * @v9ses: session information
164 * @fid: fid to issue attribute request for
165 * @sb: superblock on which to create inode
166 *
167 */
168static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb)
171{
172 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb);
174 else
175 return v9fs_inode(v9ses, fid, sb);
176}
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f393..466d2a4fc5c 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
51 * 51 *
52 */ 52 */
53 53
54static int v9fs_dentry_delete(struct dentry *dentry) 54static int v9fs_dentry_delete(const struct dentry *dentry)
55{ 55{
56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
57 dentry); 57 dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
68 * 68 *
69 */ 69 */
70 70
71static int v9fs_cached_dentry_delete(struct dentry *dentry) 71static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 34bf71b5654..5076eeb9550 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -49,15 +49,8 @@
49 49
50static const struct inode_operations v9fs_dir_inode_operations; 50static const struct inode_operations v9fs_dir_inode_operations;
51static const struct inode_operations v9fs_dir_inode_operations_dotu; 51static const struct inode_operations v9fs_dir_inode_operations_dotu;
52static const struct inode_operations v9fs_dir_inode_operations_dotl;
53static const struct inode_operations v9fs_file_inode_operations; 52static const struct inode_operations v9fs_file_inode_operations;
54static const struct inode_operations v9fs_file_inode_operations_dotl;
55static const struct inode_operations v9fs_symlink_inode_operations; 53static const struct inode_operations v9fs_symlink_inode_operations;
56static const struct inode_operations v9fs_symlink_inode_operations_dotl;
57
58static int
59v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
60 dev_t rdev);
61 54
62/** 55/**
63 * unixmode2p9mode - convert unix mode bits to plan 9 56 * unixmode2p9mode - convert unix mode bits to plan 9
@@ -237,46 +230,18 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
237 * 230 *
238 */ 231 */
239 232
240void v9fs_destroy_inode(struct inode *inode) 233static void v9fs_i_callback(struct rcu_head *head)
241{ 234{
235 struct inode *inode = container_of(head, struct inode, i_rcu);
236 INIT_LIST_HEAD(&inode->i_dentry);
242 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 237 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
243} 238}
244#endif
245 239
246/** 240void v9fs_destroy_inode(struct inode *inode)
247 * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
248 * new file system object. This checks the S_ISGID to determine the owning
249 * group of the new file system object.
250 */
251
252static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
253{
254 BUG_ON(dir_inode == NULL);
255
256 if (dir_inode->i_mode & S_ISGID) {
257 /* set_gid bit is set.*/
258 return dir_inode->i_gid;
259 }
260 return current_fsgid();
261}
262
263/**
264 * v9fs_dentry_from_dir_inode - helper function to get the dentry from
265 * dir inode.
266 *
267 */
268
269static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
270{ 241{
271 struct dentry *dentry; 242 call_rcu(&inode->i_rcu, v9fs_i_callback);
272
273 spin_lock(&dcache_lock);
274 /* Directory should have only one entry. */
275 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
276 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
277 spin_unlock(&dcache_lock);
278 return dentry;
279} 243}
244#endif
280 245
281/** 246/**
282 * v9fs_get_inode - helper function to setup an inode 247 * v9fs_get_inode - helper function to setup an inode
@@ -447,7 +412,7 @@ void v9fs_evict_inode(struct inode *inode)
447#endif 412#endif
448} 413}
449 414
450static struct inode * 415struct inode *
451v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
452 struct super_block *sb) 417 struct super_block *sb)
453{ 418{
@@ -482,60 +447,6 @@ error:
482 return ERR_PTR(err); 447 return ERR_PTR(err);
483} 448}
484 449
485static struct inode *
486v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
487 struct super_block *sb)
488{
489 struct inode *ret = NULL;
490 int err;
491 struct p9_stat_dotl *st;
492
493 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
494 if (IS_ERR(st))
495 return ERR_CAST(st);
496
497 ret = v9fs_get_inode(sb, st->st_mode);
498 if (IS_ERR(ret)) {
499 err = PTR_ERR(ret);
500 goto error;
501 }
502
503 v9fs_stat2inode_dotl(st, ret);
504 ret->i_ino = v9fs_qid2ino(&st->qid);
505#ifdef CONFIG_9P_FSCACHE
506 v9fs_vcookie_set_qid(ret, &st->qid);
507 v9fs_cache_inode_get_cookie(ret);
508#endif
509 err = v9fs_get_acl(ret, fid);
510 if (err) {
511 iput(ret);
512 goto error;
513 }
514 kfree(st);
515 return ret;
516error:
517 kfree(st);
518 return ERR_PTR(err);
519}
520
521/**
522 * v9fs_inode_from_fid - Helper routine to populate an inode by
523 * issuing a attribute request
524 * @v9ses: session information
525 * @fid: fid to issue attribute request for
526 * @sb: superblock on which to create inode
527 *
528 */
529static inline struct inode *
530v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
531 struct super_block *sb)
532{
533 if (v9fs_proto_dotl(v9ses))
534 return v9fs_inode_dotl(v9ses, fid, sb);
535 else
536 return v9fs_inode(v9ses, fid, sb);
537}
538
539/** 450/**
540 * v9fs_remove - helper function to remove files and directories 451 * v9fs_remove - helper function to remove files and directories
541 * @dir: directory inode that is being deleted 452 * @dir: directory inode that is being deleted
@@ -626,12 +537,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
626 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
627 goto error; 538 goto error;
628 } 539 }
629
630 if (v9ses->cache)
631 dentry->d_op = &v9fs_cached_dentry_operations;
632 else
633 dentry->d_op = &v9fs_dentry_operations;
634
635 d_instantiate(dentry, inode); 540 d_instantiate(dentry, inode);
636 err = v9fs_fid_add(dentry, fid); 541 err = v9fs_fid_add(dentry, fid);
637 if (err < 0) 542 if (err < 0)
@@ -650,144 +555,6 @@ error:
650} 555}
651 556
652/** 557/**
653 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
654 * @dir: directory inode that is being created
655 * @dentry: dentry that is being deleted
656 * @mode: create permissions
657 * @nd: path information
658 *
659 */
660
661static int
662v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
663 struct nameidata *nd)
664{
665 int err = 0;
666 char *name = NULL;
667 gid_t gid;
668 int flags;
669 mode_t mode;
670 struct v9fs_session_info *v9ses;
671 struct p9_fid *fid = NULL;
672 struct p9_fid *dfid, *ofid;
673 struct file *filp;
674 struct p9_qid qid;
675 struct inode *inode;
676 struct posix_acl *pacl = NULL, *dacl = NULL;
677
678 v9ses = v9fs_inode2v9ses(dir);
679 if (nd && nd->flags & LOOKUP_OPEN)
680 flags = nd->intent.open.flags - 1;
681 else {
682 /*
683 * create call without LOOKUP_OPEN is due
684 * to mknod of regular files. So use mknod
685 * operation.
686 */
687 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
688 }
689
690 name = (char *) dentry->d_name.name;
691 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
692 "mode:0x%x\n", name, flags, omode);
693
694 dfid = v9fs_fid_lookup(dentry->d_parent);
695 if (IS_ERR(dfid)) {
696 err = PTR_ERR(dfid);
697 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
698 return err;
699 }
700
701 /* clone a fid to use for creation */
702 ofid = p9_client_walk(dfid, 0, NULL, 1);
703 if (IS_ERR(ofid)) {
704 err = PTR_ERR(ofid);
705 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
706 return err;
707 }
708
709 gid = v9fs_get_fsgid_for_create(dir);
710
711 mode = omode;
712 /* Update mode based on ACL value */
713 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
714 if (err) {
715 P9_DPRINTK(P9_DEBUG_VFS,
716 "Failed to get acl values in creat %d\n", err);
717 goto error;
718 }
719 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
720 if (err < 0) {
721 P9_DPRINTK(P9_DEBUG_VFS,
722 "p9_client_open_dotl failed in creat %d\n",
723 err);
724 goto error;
725 }
726 /* instantiate inode and assign the unopened fid to the dentry */
727 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
728 (nd && nd->flags & LOOKUP_OPEN)) {
729 fid = p9_client_walk(dfid, 1, &name, 1);
730 if (IS_ERR(fid)) {
731 err = PTR_ERR(fid);
732 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
733 err);
734 fid = NULL;
735 goto error;
736 }
737
738 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
739 if (IS_ERR(inode)) {
740 err = PTR_ERR(inode);
741 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
742 err);
743 goto error;
744 }
745 dentry->d_op = &v9fs_cached_dentry_operations;
746 d_instantiate(dentry, inode);
747 err = v9fs_fid_add(dentry, fid);
748 if (err < 0)
749 goto error;
750 /* The fid would get clunked via a dput */
751 fid = NULL;
752 } else {
753 /*
754 * Not in cached mode. No need to populate
755 * inode with stat. We need to get an inode
756 * so that we can set the acl with dentry
757 */
758 inode = v9fs_get_inode(dir->i_sb, mode);
759 if (IS_ERR(inode)) {
760 err = PTR_ERR(inode);
761 goto error;
762 }
763 dentry->d_op = &v9fs_dentry_operations;
764 d_instantiate(dentry, inode);
765 }
766 /* Now set the ACL based on the default value */
767 v9fs_set_create_acl(dentry, dacl, pacl);
768
769 /* if we are opening a file, assign the open fid to the file */
770 if (nd && nd->flags & LOOKUP_OPEN) {
771 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
772 if (IS_ERR(filp)) {
773 p9_client_clunk(ofid);
774 return PTR_ERR(filp);
775 }
776 filp->private_data = ofid;
777 } else
778 p9_client_clunk(ofid);
779
780 return 0;
781
782error:
783 if (ofid)
784 p9_client_clunk(ofid);
785 if (fid)
786 p9_client_clunk(fid);
787 return err;
788}
789
790/**
791 * v9fs_vfs_create - VFS hook to create files 558 * v9fs_vfs_create - VFS hook to create files
792 * @dir: directory inode that is being created 559 * @dir: directory inode that is being created
793 * @dentry: dentry that is being deleted 560 * @dentry: dentry that is being deleted
@@ -877,107 +644,6 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
877 return err; 644 return err;
878} 645}
879 646
880
881/**
882 * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
883 * @dir: inode that is being unlinked
884 * @dentry: dentry that is being unlinked
885 * @mode: mode for new directory
886 *
887 */
888
889static int v9fs_vfs_mkdir_dotl(struct inode *dir,
890 struct dentry *dentry, int omode)
891{
892 int err;
893 struct v9fs_session_info *v9ses;
894 struct p9_fid *fid = NULL, *dfid = NULL;
895 gid_t gid;
896 char *name;
897 mode_t mode;
898 struct inode *inode;
899 struct p9_qid qid;
900 struct dentry *dir_dentry;
901 struct posix_acl *dacl = NULL, *pacl = NULL;
902
903 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
904 err = 0;
905 v9ses = v9fs_inode2v9ses(dir);
906
907 omode |= S_IFDIR;
908 if (dir->i_mode & S_ISGID)
909 omode |= S_ISGID;
910
911 dir_dentry = v9fs_dentry_from_dir_inode(dir);
912 dfid = v9fs_fid_lookup(dir_dentry);
913 if (IS_ERR(dfid)) {
914 err = PTR_ERR(dfid);
915 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
916 dfid = NULL;
917 goto error;
918 }
919
920 gid = v9fs_get_fsgid_for_create(dir);
921 mode = omode;
922 /* Update mode based on ACL value */
923 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
924 if (err) {
925 P9_DPRINTK(P9_DEBUG_VFS,
926 "Failed to get acl values in mkdir %d\n", err);
927 goto error;
928 }
929 name = (char *) dentry->d_name.name;
930 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
931 if (err < 0)
932 goto error;
933
934 /* instantiate inode and assign the unopened fid to the dentry */
935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
936 fid = p9_client_walk(dfid, 1, &name, 1);
937 if (IS_ERR(fid)) {
938 err = PTR_ERR(fid);
939 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
940 err);
941 fid = NULL;
942 goto error;
943 }
944
945 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
946 if (IS_ERR(inode)) {
947 err = PTR_ERR(inode);
948 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
949 err);
950 goto error;
951 }
952 dentry->d_op = &v9fs_cached_dentry_operations;
953 d_instantiate(dentry, inode);
954 err = v9fs_fid_add(dentry, fid);
955 if (err < 0)
956 goto error;
957 fid = NULL;
958 } else {
959 /*
960 * Not in cached mode. No need to populate
961 * inode with stat. We need to get an inode
962 * so that we can set the acl with dentry
963 */
964 inode = v9fs_get_inode(dir->i_sb, mode);
965 if (IS_ERR(inode)) {
966 err = PTR_ERR(inode);
967 goto error;
968 }
969 dentry->d_op = &v9fs_dentry_operations;
970 d_instantiate(dentry, inode);
971 }
972 /* Now set the ACL based on the default value */
973 v9fs_set_create_acl(dentry, dacl, pacl);
974
975error:
976 if (fid)
977 p9_client_clunk(fid);
978 return err;
979}
980
981/** 647/**
982 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode 648 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
983 * @dir: inode that is being walked from 649 * @dir: inode that is being walked from
@@ -986,7 +652,7 @@ error:
986 * 652 *
987 */ 653 */
988 654
989static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, 655struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
990 struct nameidata *nameidata) 656 struct nameidata *nameidata)
991{ 657{
992 struct super_block *sb; 658 struct super_block *sb;
@@ -1034,9 +700,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
1034 700
1035inst_out: 701inst_out:
1036 if (v9ses->cache) 702 if (v9ses->cache)
1037 dentry->d_op = &v9fs_cached_dentry_operations; 703 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1038 else 704 else
1039 dentry->d_op = &v9fs_dentry_operations; 705 d_set_d_op(dentry, &v9fs_dentry_operations);
1040 706
1041 d_add(dentry, inode); 707 d_add(dentry, inode);
1042 return NULL; 708 return NULL;
@@ -1056,7 +722,7 @@ error:
1056 * 722 *
1057 */ 723 */
1058 724
1059static int v9fs_vfs_unlink(struct inode *i, struct dentry *d) 725int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
1060{ 726{
1061 return v9fs_remove(i, d, 0); 727 return v9fs_remove(i, d, 0);
1062} 728}
@@ -1068,7 +734,7 @@ static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
1068 * 734 *
1069 */ 735 */
1070 736
1071static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) 737int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
1072{ 738{
1073 return v9fs_remove(i, d, 1); 739 return v9fs_remove(i, d, 1);
1074} 740}
@@ -1082,7 +748,7 @@ static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
1082 * 748 *
1083 */ 749 */
1084 750
1085static int 751int
1086v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 752v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1087 struct inode *new_dir, struct dentry *new_dentry) 753 struct inode *new_dir, struct dentry *new_dentry)
1088{ 754{
@@ -1189,42 +855,6 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1189 return 0; 855 return 0;
1190} 856}
1191 857
1192static int
1193v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
1194 struct kstat *stat)
1195{
1196 int err;
1197 struct v9fs_session_info *v9ses;
1198 struct p9_fid *fid;
1199 struct p9_stat_dotl *st;
1200
1201 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
1202 err = -EPERM;
1203 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1204 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
1205 return simple_getattr(mnt, dentry, stat);
1206
1207 fid = v9fs_fid_lookup(dentry);
1208 if (IS_ERR(fid))
1209 return PTR_ERR(fid);
1210
1211 /* Ask for all the fields in stat structure. Server will return
1212 * whatever it supports
1213 */
1214
1215 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
1216 if (IS_ERR(st))
1217 return PTR_ERR(st);
1218
1219 v9fs_stat2inode_dotl(st, dentry->d_inode);
1220 generic_fillattr(dentry->d_inode, stat);
1221 /* Change block size to what the server returned */
1222 stat->blksize = st->st_blksize;
1223
1224 kfree(st);
1225 return 0;
1226}
1227
1228/** 858/**
1229 * v9fs_vfs_setattr - set file metadata 859 * v9fs_vfs_setattr - set file metadata
1230 * @dentry: file whose metadata to set 860 * @dentry: file whose metadata to set
@@ -1284,64 +914,6 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1284} 914}
1285 915
1286/** 916/**
1287 * v9fs_vfs_setattr_dotl - set file metadata
1288 * @dentry: file whose metadata to set
1289 * @iattr: metadata assignment structure
1290 *
1291 */
1292
1293int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1294{
1295 int retval;
1296 struct v9fs_session_info *v9ses;
1297 struct p9_fid *fid;
1298 struct p9_iattr_dotl p9attr;
1299
1300 P9_DPRINTK(P9_DEBUG_VFS, "\n");
1301
1302 retval = inode_change_ok(dentry->d_inode, iattr);
1303 if (retval)
1304 return retval;
1305
1306 p9attr.valid = iattr->ia_valid;
1307 p9attr.mode = iattr->ia_mode;
1308 p9attr.uid = iattr->ia_uid;
1309 p9attr.gid = iattr->ia_gid;
1310 p9attr.size = iattr->ia_size;
1311 p9attr.atime_sec = iattr->ia_atime.tv_sec;
1312 p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
1313 p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
1314 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
1315
1316 retval = -EPERM;
1317 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1318 fid = v9fs_fid_lookup(dentry);
1319 if (IS_ERR(fid))
1320 return PTR_ERR(fid);
1321
1322 retval = p9_client_setattr(fid, &p9attr);
1323 if (retval < 0)
1324 return retval;
1325
1326 if ((iattr->ia_valid & ATTR_SIZE) &&
1327 iattr->ia_size != i_size_read(dentry->d_inode)) {
1328 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
1329 if (retval)
1330 return retval;
1331 }
1332
1333 setattr_copy(dentry->d_inode, iattr);
1334 mark_inode_dirty(dentry->d_inode);
1335 if (iattr->ia_valid & ATTR_MODE) {
1336 /* We also want to update ACL when we update mode bits */
1337 retval = v9fs_acl_chmod(dentry);
1338 if (retval < 0)
1339 return retval;
1340 }
1341 return 0;
1342}
1343
1344/**
1345 * v9fs_stat2inode - populate an inode structure with mistat info 917 * v9fs_stat2inode - populate an inode structure with mistat info
1346 * @stat: Plan 9 metadata (mistat) structure 918 * @stat: Plan 9 metadata (mistat) structure
1347 * @inode: inode to populate 919 * @inode: inode to populate
@@ -1419,77 +991,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1419} 991}
1420 992
1421/** 993/**
1422 * v9fs_stat2inode_dotl - populate an inode structure with stat info
1423 * @stat: stat structure
1424 * @inode: inode to populate
1425 * @sb: superblock of filesystem
1426 *
1427 */
1428
1429void
1430v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
1431{
1432
1433 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
1434 inode->i_atime.tv_sec = stat->st_atime_sec;
1435 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1436 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1437 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1438 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1439 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1440 inode->i_uid = stat->st_uid;
1441 inode->i_gid = stat->st_gid;
1442 inode->i_nlink = stat->st_nlink;
1443 inode->i_mode = stat->st_mode;
1444 inode->i_rdev = new_decode_dev(stat->st_rdev);
1445
1446 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
1447 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1448
1449 i_size_write(inode, stat->st_size);
1450 inode->i_blocks = stat->st_blocks;
1451 } else {
1452 if (stat->st_result_mask & P9_STATS_ATIME) {
1453 inode->i_atime.tv_sec = stat->st_atime_sec;
1454 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1455 }
1456 if (stat->st_result_mask & P9_STATS_MTIME) {
1457 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1458 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1459 }
1460 if (stat->st_result_mask & P9_STATS_CTIME) {
1461 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1462 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1463 }
1464 if (stat->st_result_mask & P9_STATS_UID)
1465 inode->i_uid = stat->st_uid;
1466 if (stat->st_result_mask & P9_STATS_GID)
1467 inode->i_gid = stat->st_gid;
1468 if (stat->st_result_mask & P9_STATS_NLINK)
1469 inode->i_nlink = stat->st_nlink;
1470 if (stat->st_result_mask & P9_STATS_MODE) {
1471 inode->i_mode = stat->st_mode;
1472 if ((S_ISBLK(inode->i_mode)) ||
1473 (S_ISCHR(inode->i_mode)))
1474 init_special_inode(inode, inode->i_mode,
1475 inode->i_rdev);
1476 }
1477 if (stat->st_result_mask & P9_STATS_RDEV)
1478 inode->i_rdev = new_decode_dev(stat->st_rdev);
1479 if (stat->st_result_mask & P9_STATS_SIZE)
1480 i_size_write(inode, stat->st_size);
1481 if (stat->st_result_mask & P9_STATS_BLOCKS)
1482 inode->i_blocks = stat->st_blocks;
1483 }
1484 if (stat->st_result_mask & P9_STATS_GEN)
1485 inode->i_generation = stat->st_gen;
1486
1487 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
1488 * because the inode structure does not have fields for them.
1489 */
1490}
1491
1492/**
1493 * v9fs_qid2ino - convert qid into inode number 994 * v9fs_qid2ino - convert qid into inode number
1494 * @qid: qid to hash 995 * @qid: qid to hash
1495 * 996 *
@@ -1595,7 +1096,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1595 * 1096 *
1596 */ 1097 */
1597 1098
1598static void 1099void
1599v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 1100v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1600{ 1101{
1601 char *s = nd_get_link(nd); 1102 char *s = nd_get_link(nd);
@@ -1639,94 +1140,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1639} 1140}
1640 1141
1641/** 1142/**
1642 * v9fs_vfs_symlink_dotl - helper function to create symlinks
1643 * @dir: directory inode containing symlink
1644 * @dentry: dentry for symlink
1645 * @symname: symlink data
1646 *
1647 * See Also: 9P2000.L RFC for more information
1648 *
1649 */
1650
1651static int
1652v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1653 const char *symname)
1654{
1655 struct v9fs_session_info *v9ses;
1656 struct p9_fid *dfid;
1657 struct p9_fid *fid = NULL;
1658 struct inode *inode;
1659 struct p9_qid qid;
1660 char *name;
1661 int err;
1662 gid_t gid;
1663
1664 name = (char *) dentry->d_name.name;
1665 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
1666 dir->i_ino, name, symname);
1667 v9ses = v9fs_inode2v9ses(dir);
1668
1669 dfid = v9fs_fid_lookup(dentry->d_parent);
1670 if (IS_ERR(dfid)) {
1671 err = PTR_ERR(dfid);
1672 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1673 return err;
1674 }
1675
1676 gid = v9fs_get_fsgid_for_create(dir);
1677
1678 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
1679 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
1680
1681 if (err < 0) {
1682 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
1683 goto error;
1684 }
1685
1686 if (v9ses->cache) {
1687 /* Now walk from the parent so we can get an unopened fid. */
1688 fid = p9_client_walk(dfid, 1, &name, 1);
1689 if (IS_ERR(fid)) {
1690 err = PTR_ERR(fid);
1691 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1692 err);
1693 fid = NULL;
1694 goto error;
1695 }
1696
1697 /* instantiate inode and assign the unopened fid to dentry */
1698 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1699 if (IS_ERR(inode)) {
1700 err = PTR_ERR(inode);
1701 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1702 err);
1703 goto error;
1704 }
1705 dentry->d_op = &v9fs_cached_dentry_operations;
1706 d_instantiate(dentry, inode);
1707 err = v9fs_fid_add(dentry, fid);
1708 if (err < 0)
1709 goto error;
1710 fid = NULL;
1711 } else {
1712 /* Not in cached mode. No need to populate inode with stat */
1713 inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
1714 if (IS_ERR(inode)) {
1715 err = PTR_ERR(inode);
1716 goto error;
1717 }
1718 dentry->d_op = &v9fs_dentry_operations;
1719 d_instantiate(dentry, inode);
1720 }
1721
1722error:
1723 if (fid)
1724 p9_client_clunk(fid);
1725
1726 return err;
1727}
1728
1729/**
1730 * v9fs_vfs_symlink - helper function to create symlinks 1143 * v9fs_vfs_symlink - helper function to create symlinks
1731 * @dir: directory inode containing symlink 1144 * @dir: directory inode containing symlink
1732 * @dentry: dentry for symlink 1145 * @dentry: dentry for symlink
@@ -1785,77 +1198,6 @@ clunk_fid:
1785} 1198}
1786 1199
1787/** 1200/**
1788 * v9fs_vfs_link_dotl - create a hardlink for dotl
1789 * @old_dentry: dentry for file to link to
1790 * @dir: inode destination for new link
1791 * @dentry: dentry for link
1792 *
1793 */
1794
1795static int
1796v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1797 struct dentry *dentry)
1798{
1799 int err;
1800 struct p9_fid *dfid, *oldfid;
1801 char *name;
1802 struct v9fs_session_info *v9ses;
1803 struct dentry *dir_dentry;
1804
1805 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
1806 dir->i_ino, old_dentry->d_name.name,
1807 dentry->d_name.name);
1808
1809 v9ses = v9fs_inode2v9ses(dir);
1810 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1811 dfid = v9fs_fid_lookup(dir_dentry);
1812 if (IS_ERR(dfid))
1813 return PTR_ERR(dfid);
1814
1815 oldfid = v9fs_fid_lookup(old_dentry);
1816 if (IS_ERR(oldfid))
1817 return PTR_ERR(oldfid);
1818
1819 name = (char *) dentry->d_name.name;
1820
1821 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
1822
1823 if (err < 0) {
1824 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
1825 return err;
1826 }
1827
1828 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1829 /* Get the latest stat info from server. */
1830 struct p9_fid *fid;
1831 struct p9_stat_dotl *st;
1832
1833 fid = v9fs_fid_lookup(old_dentry);
1834 if (IS_ERR(fid))
1835 return PTR_ERR(fid);
1836
1837 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
1838 if (IS_ERR(st))
1839 return PTR_ERR(st);
1840
1841 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
1842
1843 kfree(st);
1844 } else {
1845 /* Caching disabled. No need to get upto date stat info.
1846 * This dentry will be released immediately. So, just hold the
1847 * inode
1848 */
1849 ihold(old_dentry->d_inode);
1850 }
1851
1852 dentry->d_op = old_dentry->d_op;
1853 d_instantiate(dentry, old_dentry->d_inode);
1854
1855 return err;
1856}
1857
1858/**
1859 * v9fs_vfs_mknod - create a special file 1201 * v9fs_vfs_mknod - create a special file
1860 * @dir: inode destination for new link 1202 * @dir: inode destination for new link
1861 * @dentry: dentry for file 1203 * @dentry: dentry for file
@@ -1900,160 +1242,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1900 return retval; 1242 return retval;
1901} 1243}
1902 1244
1903/**
1904 * v9fs_vfs_mknod_dotl - create a special file
1905 * @dir: inode destination for new link
1906 * @dentry: dentry for file
1907 * @mode: mode for creation
1908 * @rdev: device associated with special file
1909 *
1910 */
1911static int
1912v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1913 dev_t rdev)
1914{
1915 int err;
1916 char *name;
1917 mode_t mode;
1918 struct v9fs_session_info *v9ses;
1919 struct p9_fid *fid = NULL, *dfid = NULL;
1920 struct inode *inode;
1921 gid_t gid;
1922 struct p9_qid qid;
1923 struct dentry *dir_dentry;
1924 struct posix_acl *dacl = NULL, *pacl = NULL;
1925
1926 P9_DPRINTK(P9_DEBUG_VFS,
1927 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1928 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
1929
1930 if (!new_valid_dev(rdev))
1931 return -EINVAL;
1932
1933 v9ses = v9fs_inode2v9ses(dir);
1934 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1935 dfid = v9fs_fid_lookup(dir_dentry);
1936 if (IS_ERR(dfid)) {
1937 err = PTR_ERR(dfid);
1938 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1939 dfid = NULL;
1940 goto error;
1941 }
1942
1943 gid = v9fs_get_fsgid_for_create(dir);
1944 mode = omode;
1945 /* Update mode based on ACL value */
1946 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
1947 if (err) {
1948 P9_DPRINTK(P9_DEBUG_VFS,
1949 "Failed to get acl values in mknod %d\n", err);
1950 goto error;
1951 }
1952 name = (char *) dentry->d_name.name;
1953
1954 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
1955 if (err < 0)
1956 goto error;
1957
1958 /* instantiate inode and assign the unopened fid to the dentry */
1959 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1960 fid = p9_client_walk(dfid, 1, &name, 1);
1961 if (IS_ERR(fid)) {
1962 err = PTR_ERR(fid);
1963 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1964 err);
1965 fid = NULL;
1966 goto error;
1967 }
1968
1969 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1970 if (IS_ERR(inode)) {
1971 err = PTR_ERR(inode);
1972 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1973 err);
1974 goto error;
1975 }
1976 dentry->d_op = &v9fs_cached_dentry_operations;
1977 d_instantiate(dentry, inode);
1978 err = v9fs_fid_add(dentry, fid);
1979 if (err < 0)
1980 goto error;
1981 fid = NULL;
1982 } else {
1983 /*
1984 * Not in cached mode. No need to populate inode with stat.
1985 * socket syscall returns a fd, so we need instantiate
1986 */
1987 inode = v9fs_get_inode(dir->i_sb, mode);
1988 if (IS_ERR(inode)) {
1989 err = PTR_ERR(inode);
1990 goto error;
1991 }
1992 dentry->d_op = &v9fs_dentry_operations;
1993 d_instantiate(dentry, inode);
1994 }
1995 /* Now set the ACL based on the default value */
1996 v9fs_set_create_acl(dentry, dacl, pacl);
1997error:
1998 if (fid)
1999 p9_client_clunk(fid);
2000 return err;
2001}
2002
2003static int
2004v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
2005{
2006 int retval;
2007 struct p9_fid *fid;
2008 char *target = NULL;
2009
2010 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
2011 retval = -EPERM;
2012 fid = v9fs_fid_lookup(dentry);
2013 if (IS_ERR(fid))
2014 return PTR_ERR(fid);
2015
2016 retval = p9_client_readlink(fid, &target);
2017 if (retval < 0)
2018 return retval;
2019
2020 strncpy(buffer, target, buflen);
2021 P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
2022
2023 retval = strnlen(buffer, buflen);
2024 return retval;
2025}
2026
2027/**
2028 * v9fs_vfs_follow_link_dotl - follow a symlink path
2029 * @dentry: dentry for symlink
2030 * @nd: nameidata
2031 *
2032 */
2033
2034static void *
2035v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
2036{
2037 int len = 0;
2038 char *link = __getname();
2039
2040 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
2041
2042 if (!link)
2043 link = ERR_PTR(-ENOMEM);
2044 else {
2045 len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
2046 if (len < 0) {
2047 __putname(link);
2048 link = ERR_PTR(len);
2049 } else
2050 link[min(len, PATH_MAX-1)] = 0;
2051 }
2052 nd_set_link(nd, link);
2053
2054 return NULL;
2055}
2056
2057static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1245static const struct inode_operations v9fs_dir_inode_operations_dotu = {
2058 .create = v9fs_vfs_create, 1246 .create = v9fs_vfs_create,
2059 .lookup = v9fs_vfs_lookup, 1247 .lookup = v9fs_vfs_lookup,
@@ -2068,25 +1256,6 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
2068 .setattr = v9fs_vfs_setattr, 1256 .setattr = v9fs_vfs_setattr,
2069}; 1257};
2070 1258
2071static const struct inode_operations v9fs_dir_inode_operations_dotl = {
2072 .create = v9fs_vfs_create_dotl,
2073 .lookup = v9fs_vfs_lookup,
2074 .link = v9fs_vfs_link_dotl,
2075 .symlink = v9fs_vfs_symlink_dotl,
2076 .unlink = v9fs_vfs_unlink,
2077 .mkdir = v9fs_vfs_mkdir_dotl,
2078 .rmdir = v9fs_vfs_rmdir,
2079 .mknod = v9fs_vfs_mknod_dotl,
2080 .rename = v9fs_vfs_rename,
2081 .getattr = v9fs_vfs_getattr_dotl,
2082 .setattr = v9fs_vfs_setattr_dotl,
2083 .setxattr = generic_setxattr,
2084 .getxattr = generic_getxattr,
2085 .removexattr = generic_removexattr,
2086 .listxattr = v9fs_listxattr,
2087 .check_acl = v9fs_check_acl,
2088};
2089
2090static const struct inode_operations v9fs_dir_inode_operations = { 1259static const struct inode_operations v9fs_dir_inode_operations = {
2091 .create = v9fs_vfs_create, 1260 .create = v9fs_vfs_create,
2092 .lookup = v9fs_vfs_lookup, 1261 .lookup = v9fs_vfs_lookup,
@@ -2104,16 +1273,6 @@ static const struct inode_operations v9fs_file_inode_operations = {
2104 .setattr = v9fs_vfs_setattr, 1273 .setattr = v9fs_vfs_setattr,
2105}; 1274};
2106 1275
2107static const struct inode_operations v9fs_file_inode_operations_dotl = {
2108 .getattr = v9fs_vfs_getattr_dotl,
2109 .setattr = v9fs_vfs_setattr_dotl,
2110 .setxattr = generic_setxattr,
2111 .getxattr = generic_getxattr,
2112 .removexattr = generic_removexattr,
2113 .listxattr = v9fs_listxattr,
2114 .check_acl = v9fs_check_acl,
2115};
2116
2117static const struct inode_operations v9fs_symlink_inode_operations = { 1276static const struct inode_operations v9fs_symlink_inode_operations = {
2118 .readlink = generic_readlink, 1277 .readlink = generic_readlink,
2119 .follow_link = v9fs_vfs_follow_link, 1278 .follow_link = v9fs_vfs_follow_link,
@@ -2122,14 +1281,3 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
2122 .setattr = v9fs_vfs_setattr, 1281 .setattr = v9fs_vfs_setattr,
2123}; 1282};
2124 1283
2125static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
2126 .readlink = v9fs_vfs_readlink_dotl,
2127 .follow_link = v9fs_vfs_follow_link_dotl,
2128 .put_link = v9fs_vfs_put_link,
2129 .getattr = v9fs_vfs_getattr_dotl,
2130 .setattr = v9fs_vfs_setattr_dotl,
2131 .setxattr = generic_setxattr,
2132 .getxattr = generic_getxattr,
2133 .removexattr = generic_removexattr,
2134 .listxattr = v9fs_listxattr,
2135};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
new file mode 100644
index 00000000000..fe3ffa9aace
--- /dev/null
+++ b/fs/9p/vfs_inode_dotl.c
@@ -0,0 +1,824 @@
1/*
2 * linux/fs/9p/vfs_inode_dotl.c
3 *
4 * This file contains vfs inode ops for the 9P2000.L protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/file.h>
30#include <linux/pagemap.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/inet.h>
34#include <linux/namei.h>
35#include <linux/idr.h>
36#include <linux/sched.h>
37#include <linux/slab.h>
38#include <linux/xattr.h>
39#include <linux/posix_acl.h>
40#include <net/9p/9p.h>
41#include <net/9p/client.h>
42
43#include "v9fs.h"
44#include "v9fs_vfs.h"
45#include "fid.h"
46#include "cache.h"
47#include "xattr.h"
48#include "acl.h"
49
50static int
51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
52 dev_t rdev);
53
54/**
55 * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
56 * new file system object. This checks the S_ISGID to determine the owning
57 * group of the new file system object.
58 */
59
60static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
61{
62 BUG_ON(dir_inode == NULL);
63
64 if (dir_inode->i_mode & S_ISGID) {
65 /* set_gid bit is set.*/
66 return dir_inode->i_gid;
67 }
68 return current_fsgid();
69}
70
71/**
72 * v9fs_dentry_from_dir_inode - helper function to get the dentry from
73 * dir inode.
74 *
75 */
76
77static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
78{
79 struct dentry *dentry;
80
81 spin_lock(&inode->i_lock);
82 /* Directory should have only one entry. */
83 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
84 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
85 spin_unlock(&inode->i_lock);
86 return dentry;
87}
88
89struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb)
92{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st;
96
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st))
99 return ERR_CAST(st);
100
101 ret = v9fs_get_inode(sb, st->st_mode);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st);
119 return ret;
120error:
121 kfree(st);
122 return ERR_PTR(err);
123}
124
125/**
126 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
127 * @dir: directory inode that is being created
128 * @dentry: dentry that is being deleted
129 * @mode: create permissions
130 * @nd: path information
131 *
132 */
133
134static int
135v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd)
137{
138 int err = 0;
139 char *name = NULL;
140 gid_t gid;
141 int flags;
142 mode_t mode;
143 struct v9fs_session_info *v9ses;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp;
147 struct p9_qid qid;
148 struct inode *inode;
149 struct posix_acl *pacl = NULL, *dacl = NULL;
150
151 v9ses = v9fs_inode2v9ses(dir);
152 if (nd && nd->flags & LOOKUP_OPEN)
153 flags = nd->intent.open.flags - 1;
154 else {
155 /*
156 * create call without LOOKUP_OPEN is due
157 * to mknod of regular files. So use mknod
158 * operation.
159 */
160 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
161 }
162
163 name = (char *) dentry->d_name.name;
164 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
165 "mode:0x%x\n", name, flags, omode);
166
167 dfid = v9fs_fid_lookup(dentry->d_parent);
168 if (IS_ERR(dfid)) {
169 err = PTR_ERR(dfid);
170 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
171 return err;
172 }
173
174 /* clone a fid to use for creation */
175 ofid = p9_client_walk(dfid, 0, NULL, 1);
176 if (IS_ERR(ofid)) {
177 err = PTR_ERR(ofid);
178 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
179 return err;
180 }
181
182 gid = v9fs_get_fsgid_for_create(dir);
183
184 mode = omode;
185 /* Update mode based on ACL value */
186 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
187 if (err) {
188 P9_DPRINTK(P9_DEBUG_VFS,
189 "Failed to get acl values in creat %d\n", err);
190 goto error;
191 }
192 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
193 if (err < 0) {
194 P9_DPRINTK(P9_DEBUG_VFS,
195 "p9_client_open_dotl failed in creat %d\n",
196 err);
197 goto error;
198 }
199
200 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1);
202 if (IS_ERR(fid)) {
203 err = PTR_ERR(fid);
204 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
205 fid = NULL;
206 goto error;
207 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
212 goto error;
213 }
214 d_instantiate(dentry, inode);
215 err = v9fs_fid_add(dentry, fid);
216 if (err < 0)
217 goto error;
218
219 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl);
221
222 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) {
225 p9_client_clunk(ofid);
226 return PTR_ERR(filp);
227 }
228 filp->private_data = ofid;
229 return 0;
230
231error:
232 if (ofid)
233 p9_client_clunk(ofid);
234 if (fid)
235 p9_client_clunk(fid);
236 return err;
237}
238
239/**
240 * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
241 * @dir: inode that is being unlinked
242 * @dentry: dentry that is being unlinked
243 * @mode: mode for new directory
244 *
245 */
246
247static int v9fs_vfs_mkdir_dotl(struct inode *dir,
248 struct dentry *dentry, int omode)
249{
250 int err;
251 struct v9fs_session_info *v9ses;
252 struct p9_fid *fid = NULL, *dfid = NULL;
253 gid_t gid;
254 char *name;
255 mode_t mode;
256 struct inode *inode;
257 struct p9_qid qid;
258 struct dentry *dir_dentry;
259 struct posix_acl *dacl = NULL, *pacl = NULL;
260
261 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
262 err = 0;
263 v9ses = v9fs_inode2v9ses(dir);
264
265 omode |= S_IFDIR;
266 if (dir->i_mode & S_ISGID)
267 omode |= S_ISGID;
268
269 dir_dentry = v9fs_dentry_from_dir_inode(dir);
270 dfid = v9fs_fid_lookup(dir_dentry);
271 if (IS_ERR(dfid)) {
272 err = PTR_ERR(dfid);
273 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
274 dfid = NULL;
275 goto error;
276 }
277
278 gid = v9fs_get_fsgid_for_create(dir);
279 mode = omode;
280 /* Update mode based on ACL value */
281 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
282 if (err) {
283 P9_DPRINTK(P9_DEBUG_VFS,
284 "Failed to get acl values in mkdir %d\n", err);
285 goto error;
286 }
287 name = (char *) dentry->d_name.name;
288 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
289 if (err < 0)
290 goto error;
291
292 /* instantiate inode and assign the unopened fid to the dentry */
293 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
294 fid = p9_client_walk(dfid, 1, &name, 1);
295 if (IS_ERR(fid)) {
296 err = PTR_ERR(fid);
297 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
298 err);
299 fid = NULL;
300 goto error;
301 }
302
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
307 err);
308 goto error;
309 }
310 d_instantiate(dentry, inode);
311 err = v9fs_fid_add(dentry, fid);
312 if (err < 0)
313 goto error;
314 fid = NULL;
315 } else {
316 /*
317 * Not in cached mode. No need to populate
318 * inode with stat. We need to get an inode
319 * so that we can set the acl with dentry
320 */
321 inode = v9fs_get_inode(dir->i_sb, mode);
322 if (IS_ERR(inode)) {
323 err = PTR_ERR(inode);
324 goto error;
325 }
326 d_instantiate(dentry, inode);
327 }
328 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl);
330
331error:
332 if (fid)
333 p9_client_clunk(fid);
334 return err;
335}
336
337static int
338v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
339 struct kstat *stat)
340{
341 int err;
342 struct v9fs_session_info *v9ses;
343 struct p9_fid *fid;
344 struct p9_stat_dotl *st;
345
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
350 return simple_getattr(mnt, dentry, stat);
351
352 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid))
354 return PTR_ERR(fid);
355
356 /* Ask for all the fields in stat structure. Server will return
357 * whatever it supports
358 */
359
360 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
361 if (IS_ERR(st))
362 return PTR_ERR(st);
363
364 v9fs_stat2inode_dotl(st, dentry->d_inode);
365 generic_fillattr(dentry->d_inode, stat);
366 /* Change block size to what the server returned */
367 stat->blksize = st->st_blksize;
368
369 kfree(st);
370 return 0;
371}
372
373/**
374 * v9fs_vfs_setattr_dotl - set file metadata
375 * @dentry: file whose metadata to set
376 * @iattr: metadata assignment structure
377 *
378 */
379
380int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
381{
382 int retval;
383 struct v9fs_session_info *v9ses;
384 struct p9_fid *fid;
385 struct p9_iattr_dotl p9attr;
386
387 P9_DPRINTK(P9_DEBUG_VFS, "\n");
388
389 retval = inode_change_ok(dentry->d_inode, iattr);
390 if (retval)
391 return retval;
392
393 p9attr.valid = iattr->ia_valid;
394 p9attr.mode = iattr->ia_mode;
395 p9attr.uid = iattr->ia_uid;
396 p9attr.gid = iattr->ia_gid;
397 p9attr.size = iattr->ia_size;
398 p9attr.atime_sec = iattr->ia_atime.tv_sec;
399 p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
400 p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
401 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
402
403 retval = -EPERM;
404 v9ses = v9fs_inode2v9ses(dentry->d_inode);
405 fid = v9fs_fid_lookup(dentry);
406 if (IS_ERR(fid))
407 return PTR_ERR(fid);
408
409 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0)
411 return retval;
412
413 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) {
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
416 if (retval)
417 return retval;
418 }
419
420 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode);
422 if (iattr->ia_valid & ATTR_MODE) {
423 /* We also want to update ACL when we update mode bits */
424 retval = v9fs_acl_chmod(dentry);
425 if (retval < 0)
426 return retval;
427 }
428 return 0;
429}
430
431/**
432 * v9fs_stat2inode_dotl - populate an inode structure with stat info
433 * @stat: stat structure
434 * @inode: inode to populate
435 * @sb: superblock of filesystem
436 *
437 */
438
439void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{
442
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec;
445 inode->i_atime.tv_nsec = stat->st_atime_nsec;
446 inode->i_mtime.tv_sec = stat->st_mtime_sec;
447 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
448 inode->i_ctime.tv_sec = stat->st_ctime_sec;
449 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
450 inode->i_uid = stat->st_uid;
451 inode->i_gid = stat->st_gid;
452 inode->i_nlink = stat->st_nlink;
453 inode->i_mode = stat->st_mode;
454 inode->i_rdev = new_decode_dev(stat->st_rdev);
455
456 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
457 init_special_inode(inode, inode->i_mode, inode->i_rdev);
458
459 i_size_write(inode, stat->st_size);
460 inode->i_blocks = stat->st_blocks;
461 } else {
462 if (stat->st_result_mask & P9_STATS_ATIME) {
463 inode->i_atime.tv_sec = stat->st_atime_sec;
464 inode->i_atime.tv_nsec = stat->st_atime_nsec;
465 }
466 if (stat->st_result_mask & P9_STATS_MTIME) {
467 inode->i_mtime.tv_sec = stat->st_mtime_sec;
468 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
469 }
470 if (stat->st_result_mask & P9_STATS_CTIME) {
471 inode->i_ctime.tv_sec = stat->st_ctime_sec;
472 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
473 }
474 if (stat->st_result_mask & P9_STATS_UID)
475 inode->i_uid = stat->st_uid;
476 if (stat->st_result_mask & P9_STATS_GID)
477 inode->i_gid = stat->st_gid;
478 if (stat->st_result_mask & P9_STATS_NLINK)
479 inode->i_nlink = stat->st_nlink;
480 if (stat->st_result_mask & P9_STATS_MODE) {
481 inode->i_mode = stat->st_mode;
482 if ((S_ISBLK(inode->i_mode)) ||
483 (S_ISCHR(inode->i_mode)))
484 init_special_inode(inode, inode->i_mode,
485 inode->i_rdev);
486 }
487 if (stat->st_result_mask & P9_STATS_RDEV)
488 inode->i_rdev = new_decode_dev(stat->st_rdev);
489 if (stat->st_result_mask & P9_STATS_SIZE)
490 i_size_write(inode, stat->st_size);
491 if (stat->st_result_mask & P9_STATS_BLOCKS)
492 inode->i_blocks = stat->st_blocks;
493 }
494 if (stat->st_result_mask & P9_STATS_GEN)
495 inode->i_generation = stat->st_gen;
496
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them.
499 */
500}
501
502static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname)
505{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err;
513 gid_t gid;
514
515 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
517 dir->i_ino, name, symname);
518 v9ses = v9fs_inode2v9ses(dir);
519
520 dfid = v9fs_fid_lookup(dentry->d_parent);
521 if (IS_ERR(dfid)) {
522 err = PTR_ERR(dfid);
523 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
524 return err;
525 }
526
527 gid = v9fs_get_fsgid_for_create(dir);
528
529 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
530 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
531
532 if (err < 0) {
533 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
534 goto error;
535 }
536
537 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1);
540 if (IS_ERR(fid)) {
541 err = PTR_ERR(fid);
542 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
543 err);
544 fid = NULL;
545 goto error;
546 }
547
548 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
553 err);
554 goto error;
555 }
556 d_instantiate(dentry, inode);
557 err = v9fs_fid_add(dentry, fid);
558 if (err < 0)
559 goto error;
560 fid = NULL;
561 } else {
562 /* Not in cached mode. No need to populate inode with stat */
563 inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
564 if (IS_ERR(inode)) {
565 err = PTR_ERR(inode);
566 goto error;
567 }
568 d_instantiate(dentry, inode);
569 }
570
571error:
572 if (fid)
573 p9_client_clunk(fid);
574
575 return err;
576}
577
578/**
579 * v9fs_vfs_link_dotl - create a hardlink for dotl
580 * @old_dentry: dentry for file to link to
581 * @dir: inode destination for new link
582 * @dentry: dentry for link
583 *
584 */
585
586static int
587v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry)
589{
590 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry;
595
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name,
598 dentry->d_name.name);
599
600 v9ses = v9fs_inode2v9ses(dir);
601 dir_dentry = v9fs_dentry_from_dir_inode(dir);
602 dfid = v9fs_fid_lookup(dir_dentry);
603 if (IS_ERR(dfid))
604 return PTR_ERR(dfid);
605
606 oldfid = v9fs_fid_lookup(old_dentry);
607 if (IS_ERR(oldfid))
608 return PTR_ERR(oldfid);
609
610 name = (char *) dentry->d_name.name;
611
612 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
613
614 if (err < 0) {
615 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
616 return err;
617 }
618
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */
621 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid))
626 return PTR_ERR(fid);
627
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 }
642 d_instantiate(dentry, old_dentry->d_inode);
643
644 return err;
645}
646
647/**
648 * v9fs_vfs_mknod_dotl - create a special file
649 * @dir: inode destination for new link
650 * @dentry: dentry for file
651 * @mode: mode for creation
652 * @rdev: device associated with special file
653 *
654 */
655static int
656v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev)
658{
659 int err;
660 char *name;
661 mode_t mode;
662 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid;
667 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL;
669
670 P9_DPRINTK(P9_DEBUG_VFS,
671 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
672 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
673
674 if (!new_valid_dev(rdev))
675 return -EINVAL;
676
677 v9ses = v9fs_inode2v9ses(dir);
678 dir_dentry = v9fs_dentry_from_dir_inode(dir);
679 dfid = v9fs_fid_lookup(dir_dentry);
680 if (IS_ERR(dfid)) {
681 err = PTR_ERR(dfid);
682 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
683 dfid = NULL;
684 goto error;
685 }
686
687 gid = v9fs_get_fsgid_for_create(dir);
688 mode = omode;
689 /* Update mode based on ACL value */
690 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
691 if (err) {
692 P9_DPRINTK(P9_DEBUG_VFS,
693 "Failed to get acl values in mknod %d\n", err);
694 goto error;
695 }
696 name = (char *) dentry->d_name.name;
697
698 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
699 if (err < 0)
700 goto error;
701
702 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1);
705 if (IS_ERR(fid)) {
706 err = PTR_ERR(fid);
707 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
708 err);
709 fid = NULL;
710 goto error;
711 }
712
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
717 err);
718 goto error;
719 }
720 d_instantiate(dentry, inode);
721 err = v9fs_fid_add(dentry, fid);
722 if (err < 0)
723 goto error;
724 fid = NULL;
725 } else {
726 /*
727 * Not in cached mode. No need to populate inode with stat.
728 * socket syscall returns a fd, so we need instantiate
729 */
730 inode = v9fs_get_inode(dir->i_sb, mode);
731 if (IS_ERR(inode)) {
732 err = PTR_ERR(inode);
733 goto error;
734 }
735 d_instantiate(dentry, inode);
736 }
737 /* Now set the ACL based on the default value */
738 v9fs_set_create_acl(dentry, dacl, pacl);
739error:
740 if (fid)
741 p9_client_clunk(fid);
742 return err;
743}
744
745/**
746 * v9fs_vfs_follow_link_dotl - follow a symlink path
747 * @dentry: dentry for symlink
748 * @nd: nameidata
749 *
750 */
751
752static void *
753v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
754{
755 int retval;
756 struct p9_fid *fid;
757 char *link = __getname();
758 char *target;
759
760 P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
761
762 if (!link) {
763 link = ERR_PTR(-ENOMEM);
764 goto ndset;
765 }
766 fid = v9fs_fid_lookup(dentry);
767 if (IS_ERR(fid)) {
768 __putname(link);
769 link = ERR_PTR(PTR_ERR(fid));
770 goto ndset;
771 }
772 retval = p9_client_readlink(fid, &target);
773 if (!retval) {
774 strcpy(link, target);
775 kfree(target);
776 goto ndset;
777 }
778 __putname(link);
779 link = ERR_PTR(retval);
780ndset:
781 nd_set_link(nd, link);
782 return NULL;
783}
784
785const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup,
788 .link = v9fs_vfs_link_dotl,
789 .symlink = v9fs_vfs_symlink_dotl,
790 .unlink = v9fs_vfs_unlink,
791 .mkdir = v9fs_vfs_mkdir_dotl,
792 .rmdir = v9fs_vfs_rmdir,
793 .mknod = v9fs_vfs_mknod_dotl,
794 .rename = v9fs_vfs_rename,
795 .getattr = v9fs_vfs_getattr_dotl,
796 .setattr = v9fs_vfs_setattr_dotl,
797 .setxattr = generic_setxattr,
798 .getxattr = generic_getxattr,
799 .removexattr = generic_removexattr,
800 .listxattr = v9fs_listxattr,
801 .check_acl = v9fs_check_acl,
802};
803
804const struct inode_operations v9fs_file_inode_operations_dotl = {
805 .getattr = v9fs_vfs_getattr_dotl,
806 .setattr = v9fs_vfs_setattr_dotl,
807 .setxattr = generic_setxattr,
808 .getxattr = generic_getxattr,
809 .removexattr = generic_removexattr,
810 .listxattr = v9fs_listxattr,
811 .check_acl = v9fs_check_acl,
812};
813
814const struct inode_operations v9fs_symlink_inode_operations_dotl = {
815 .readlink = generic_readlink,
816 .follow_link = v9fs_vfs_follow_link_dotl,
817 .put_link = v9fs_vfs_put_link,
818 .getattr = v9fs_vfs_getattr_dotl,
819 .setattr = v9fs_vfs_setattr_dotl,
820 .setxattr = generic_setxattr,
821 .getxattr = generic_getxattr,
822 .removexattr = generic_removexattr,
823 .listxattr = v9fs_listxattr,
824};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 43ec7df8433..d288773871b 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -133,7 +133,7 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
133 "p9_client_xattrcreate failed %d\n", retval); 133 "p9_client_xattrcreate failed %d\n", retval);
134 goto error; 134 goto error;
135 } 135 }
136 msize = fid->clnt->msize;; 136 msize = fid->clnt->msize;
137 while (value_len) { 137 while (value_len) {
138 if (value_len > (msize - P9_IOHDRSZ)) 138 if (value_len > (msize - P9_IOHDRSZ))
139 write_count = msize - P9_IOHDRSZ; 139 write_count = msize - P9_IOHDRSZ;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4de74..bf7693c384f 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
201}; 201};
202 202
203static int 203static int
204adfs_hash(struct dentry *parent, struct qstr *qstr) 204adfs_hash(const struct dentry *parent, const struct inode *inode,
205 struct qstr *qstr)
205{ 206{
206 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; 207 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
207 const unsigned char *name; 208 const unsigned char *name;
@@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
237 * requirements of the underlying filesystem. 238 * requirements of the underlying filesystem.
238 */ 239 */
239static int 240static int
240adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) 241adfs_compare(const struct dentry *parent, const struct inode *pinode,
242 const struct dentry *dentry, const struct inode *inode,
243 unsigned int len, const char *str, const struct qstr *name)
241{ 244{
242 int i; 245 int i;
243 246
244 if (entry->len != name->len) 247 if (len != name->len)
245 return 1; 248 return 1;
246 249
247 for (i = 0; i < name->len; i++) { 250 for (i = 0; i < name->len; i++) {
248 char a, b; 251 char a, b;
249 252
250 a = entry->name[i]; 253 a = str[i];
251 b = name->name[i]; 254 b = name->name[i];
252 255
253 if (a >= 'A' && a <= 'Z') 256 if (a >= 'A' && a <= 'Z')
@@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
273 struct object_info obj; 276 struct object_info obj;
274 int error; 277 int error;
275 278
276 dentry->d_op = &adfs_dentry_operations; 279 d_set_d_op(dentry, &adfs_dentry_operations);
277 lock_kernel(); 280 lock_kernel();
278 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); 281 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
279 if (error == 0) { 282 if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42..a4041b52fbc 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
240 return &ei->vfs_inode; 240 return &ei->vfs_inode;
241} 241}
242 242
243static void adfs_destroy_inode(struct inode *inode) 243static void adfs_i_callback(struct rcu_head *head)
244{ 244{
245 struct inode *inode = container_of(head, struct inode, i_rcu);
246 INIT_LIST_HEAD(&inode->i_dentry);
245 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 247 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
246} 248}
247 249
250static void adfs_destroy_inode(struct inode *inode)
251{
252 call_rcu(&inode->i_rcu, adfs_i_callback);
253}
254
248static void init_once(void *foo) 255static void init_once(void *foo)
249{ 256{
250 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 257 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
477 adfs_error(sb, "get root inode failed\n"); 484 adfs_error(sb, "get root inode failed\n");
478 goto error; 485 goto error;
479 } else 486 } else
480 sb->s_root->d_op = &adfs_dentry_operations; 487 d_set_d_op(sb->s_root, &adfs_dentry_operations);
481 unlock_kernel(); 488 unlock_kernel();
482 return 0; 489 return 0;
483 490
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a30f7a..3a4557e8325 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
128 void *data = dentry->d_fsdata; 128 void *data = dentry->d_fsdata;
129 struct list_head *head, *next; 129 struct list_head *head, *next;
130 130
131 spin_lock(&dcache_lock); 131 spin_lock(&inode->i_lock);
132 head = &inode->i_dentry; 132 head = &inode->i_dentry;
133 next = head->next; 133 next = head->next;
134 while (next != head) { 134 while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
139 } 139 }
140 next = next->next; 140 next = next->next;
141 } 141 }
142 spin_unlock(&dcache_lock); 142 spin_unlock(&inode->i_lock);
143} 143}
144 144
145 145
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0bc07..944a4042fb6 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,11 +13,19 @@
13typedef int (*toupper_t)(int); 13typedef int (*toupper_t)(int);
14 14
15static int affs_toupper(int ch); 15static int affs_toupper(int ch);
16static int affs_hash_dentry(struct dentry *, struct qstr *); 16static int affs_hash_dentry(const struct dentry *,
17static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 17 const struct inode *, struct qstr *);
18static int affs_compare_dentry(const struct dentry *parent,
19 const struct inode *pinode,
20 const struct dentry *dentry, const struct inode *inode,
21 unsigned int len, const char *str, const struct qstr *name);
18static int affs_intl_toupper(int ch); 22static int affs_intl_toupper(int ch);
19static int affs_intl_hash_dentry(struct dentry *, struct qstr *); 23static int affs_intl_hash_dentry(const struct dentry *,
20static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 24 const struct inode *, struct qstr *);
25static int affs_intl_compare_dentry(const struct dentry *parent,
26 const struct inode *pinode,
27 const struct dentry *dentry, const struct inode *inode,
28 unsigned int len, const char *str, const struct qstr *name);
21 29
22const struct dentry_operations affs_dentry_operations = { 30const struct dentry_operations affs_dentry_operations = {
23 .d_hash = affs_hash_dentry, 31 .d_hash = affs_hash_dentry,
@@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
58 * Note: the dentry argument is the parent dentry. 66 * Note: the dentry argument is the parent dentry.
59 */ 67 */
60static inline int 68static inline int
61__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) 69__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
62{ 70{
63 const u8 *name = qstr->name; 71 const u8 *name = qstr->name;
64 unsigned long hash; 72 unsigned long hash;
65 int i; 73 int i;
66 74
67 i = affs_check_name(qstr->name,qstr->len); 75 i = affs_check_name(qstr->name, qstr->len);
68 if (i) 76 if (i)
69 return i; 77 return i;
70 78
@@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
78} 86}
79 87
80static int 88static int
81affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 89affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
90 struct qstr *qstr)
82{ 91{
83 return __affs_hash_dentry(dentry, qstr, affs_toupper); 92 return __affs_hash_dentry(qstr, affs_toupper);
84} 93}
85static int 94static int
86affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) 95affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
96 struct qstr *qstr)
87{ 97{
88 return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); 98 return __affs_hash_dentry(qstr, affs_intl_toupper);
89} 99}
90 100
91static inline int 101static inline int __affs_compare_dentry(unsigned int len,
92__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) 102 const char *str, const struct qstr *name, toupper_t toupper)
93{ 103{
94 const u8 *aname = a->name; 104 const u8 *aname = str;
95 const u8 *bname = b->name; 105 const u8 *bname = name->name;
96 int len;
97 106
98 /* 'a' is the qstr of an already existing dentry, so the name 107 /*
99 * must be valid. 'b' must be validated first. 108 * 'str' is the name of an already existing dentry, so the name
109 * must be valid. 'name' must be validated first.
100 */ 110 */
101 111
102 if (affs_check_name(b->name,b->len)) 112 if (affs_check_name(name->name, name->len))
103 return 1; 113 return 1;
104 114
105 /* If the names are longer than the allowed 30 chars, 115 /*
116 * If the names are longer than the allowed 30 chars,
106 * the excess is ignored, so their length may differ. 117 * the excess is ignored, so their length may differ.
107 */ 118 */
108 len = a->len;
109 if (len >= 30) { 119 if (len >= 30) {
110 if (b->len < 30) 120 if (name->len < 30)
111 return 1; 121 return 1;
112 len = 30; 122 len = 30;
113 } else if (len != b->len) 123 } else if (len != name->len)
114 return 1; 124 return 1;
115 125
116 for (; len > 0; len--) 126 for (; len > 0; len--)
@@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
121} 131}
122 132
123static int 133static int
124affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 134affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
135 const struct dentry *dentry, const struct inode *inode,
136 unsigned int len, const char *str, const struct qstr *name)
125{ 137{
126 return __affs_compare_dentry(dentry, a, b, affs_toupper); 138 return __affs_compare_dentry(len, str, name, affs_toupper);
127} 139}
128static int 140static int
129affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 141affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
142 const struct dentry *dentry, const struct inode *inode,
143 unsigned int len, const char *str, const struct qstr *name)
130{ 144{
131 return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); 145 return __affs_compare_dentry(len, str, name, affs_intl_toupper);
132} 146}
133 147
134/* 148/*
@@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
226 if (IS_ERR(inode)) 240 if (IS_ERR(inode))
227 return ERR_CAST(inode); 241 return ERR_CAST(inode);
228 } 242 }
229 dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; 243 d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
230 d_add(dentry, inode); 244 d_add(dentry, inode);
231 return NULL; 245 return NULL;
232} 246}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cb..d39081bbe7c 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
95 return &i->vfs_inode; 95 return &i->vfs_inode;
96} 96}
97 97
98static void affs_destroy_inode(struct inode *inode) 98static void affs_i_callback(struct rcu_head *head)
99{ 99{
100 struct inode *inode = container_of(head, struct inode, i_rcu);
101 INIT_LIST_HEAD(&inode->i_dentry);
100 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 102 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
101} 103}
102 104
105static void affs_destroy_inode(struct inode *inode)
106{
107 call_rcu(&inode->i_rcu, affs_i_callback);
108}
109
103static void init_once(void *foo) 110static void init_once(void *foo)
104{ 111{
105 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 112 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -475,7 +482,7 @@ got_root:
475 printk(KERN_ERR "AFFS: Get root inode failed\n"); 482 printk(KERN_ERR "AFFS: Get root inode failed\n");
476 goto out_error; 483 goto out_error;
477 } 484 }
478 sb->s_root->d_op = &affs_dentry_operations; 485 d_set_d_op(sb->s_root, &affs_dentry_operations);
479 486
480 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); 487 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
481 return 0; 488 return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a8..34a3263d60a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/namei.h>
16#include <linux/pagemap.h> 17#include <linux/pagemap.h>
17#include <linux/ctype.h> 18#include <linux/ctype.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
@@ -23,7 +24,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
23static int afs_dir_open(struct inode *inode, struct file *file); 24static int afs_dir_open(struct inode *inode, struct file *file);
24static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); 25static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
25static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 26static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
26static int afs_d_delete(struct dentry *dentry); 27static int afs_d_delete(const struct dentry *dentry);
27static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
28static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
29 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
@@ -581,7 +582,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
581 } 582 }
582 583
583success: 584success:
584 dentry->d_op = &afs_fs_dentry_operations; 585 d_set_d_op(dentry, &afs_fs_dentry_operations);
585 586
586 d_add(dentry, inode); 587 d_add(dentry, inode);
587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", 588 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
@@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
607 void *dir_version; 608 void *dir_version;
608 int ret; 609 int ret;
609 610
611 if (nd->flags & LOOKUP_RCU)
612 return -ECHILD;
613
610 vnode = AFS_FS_I(dentry->d_inode); 614 vnode = AFS_FS_I(dentry->d_inode);
611 615
612 if (dentry->d_inode) 616 if (dentry->d_inode)
@@ -730,7 +734,7 @@ out_bad:
730 * - called from dput() when d_count is going to 0. 734 * - called from dput() when d_count is going to 0.
731 * - return 1 to request dentry be unhashed, 0 otherwise 735 * - return 1 to request dentry be unhashed, 0 otherwise
732 */ 736 */
733static int afs_d_delete(struct dentry *dentry) 737static int afs_d_delete(const struct dentry *dentry)
734{ 738{
735 _enter("%s", dentry->d_name.name); 739 _enter("%s", dentry->d_name.name);
736 740
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index cca8eef736f..6d4bc1c8ff6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
624extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 624extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
625extern void afs_zap_permits(struct rcu_head *); 625extern void afs_zap_permits(struct rcu_head *);
626extern struct key *afs_request_key(struct afs_cell *); 626extern struct key *afs_request_key(struct afs_cell *);
627extern int afs_permission(struct inode *, int); 627extern int afs_permission(struct inode *, int, unsigned int);
628 628
629/* 629/*
630 * server.c 630 * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index bb4ed144d0e..f44b9d35537 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
285 * - AFS ACLs are attached to directories only, and a file is controlled by its 285 * - AFS ACLs are attached to directories only, and a file is controlled by its
286 * parent directory's ACL 286 * parent directory's ACL
287 */ 287 */
288int afs_permission(struct inode *inode, int mask) 288int afs_permission(struct inode *inode, int mask, unsigned int flags)
289{ 289{
290 struct afs_vnode *vnode = AFS_FS_I(inode); 290 struct afs_vnode *vnode = AFS_FS_I(inode);
291 afs_access_t uninitialized_var(access); 291 afs_access_t uninitialized_var(access);
292 struct key *key; 292 struct key *key;
293 int ret; 293 int ret;
294 294
295 if (flags & IPERM_FLAG_RCU)
296 return -ECHILD;
297
295 _enter("{{%x:%u},%lx},%x,", 298 _enter("{{%x:%u},%lx},%x,",
296 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); 299 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
297 300
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
347 } 350 }
348 351
349 key_put(key); 352 key_put(key);
350 ret = generic_permission(inode, mask, NULL); 353 ret = generic_permission(inode, mask, flags, NULL);
351 _leave(" = %d", ret); 354 _leave(" = %d", ret);
352 return ret; 355 return ret;
353 356
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece..f901a9d7c11 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
498 return &vnode->vfs_inode; 498 return &vnode->vfs_inode;
499} 499}
500 500
501static void afs_i_callback(struct rcu_head *head)
502{
503 struct inode *inode = container_of(head, struct inode, i_rcu);
504 struct afs_vnode *vnode = AFS_FS_I(inode);
505 INIT_LIST_HEAD(&inode->i_dentry);
506 kmem_cache_free(afs_inode_cachep, vnode);
507}
508
501/* 509/*
502 * destroy an AFS inode struct 510 * destroy an AFS inode struct
503 */ 511 */
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
511 519
512 ASSERTCMP(vnode->server, ==, NULL); 520 ASSERTCMP(vnode->server, ==, NULL);
513 521
514 kmem_cache_free(afs_inode_cachep, vnode); 522 call_rcu(&inode->i_rcu, afs_i_callback);
515 atomic_dec(&afs_count_active_inodes); 523 atomic_dec(&afs_count_active_inodes);
516} 524}
517 525
diff --git a/fs/aio.c b/fs/aio.c
index 8c8f6c5b6d7..5e00f15c54a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -798,29 +798,12 @@ static void aio_queue_work(struct kioctx * ctx)
798 queue_delayed_work(aio_wq, &ctx->wq, timeout); 798 queue_delayed_work(aio_wq, &ctx->wq, timeout);
799} 799}
800 800
801
802/*
803 * aio_run_iocbs:
804 * Process all pending retries queued on the ioctx
805 * run list.
806 * Assumes it is operating within the aio issuer's mm
807 * context.
808 */
809static inline void aio_run_iocbs(struct kioctx *ctx)
810{
811 int requeue;
812
813 spin_lock_irq(&ctx->ctx_lock);
814
815 requeue = __aio_run_iocbs(ctx);
816 spin_unlock_irq(&ctx->ctx_lock);
817 if (requeue)
818 aio_queue_work(ctx);
819}
820
821/* 801/*
822 * just like aio_run_iocbs, but keeps running them until 802 * aio_run_all_iocbs:
823 * the list stays empty 803 * Process all pending retries queued on the ioctx
804 * run list, and keep running them until the list
805 * stays empty.
806 * Assumes it is operating within the aio issuer's mm context.
824 */ 807 */
825static inline void aio_run_all_iocbs(struct kioctx *ctx) 808static inline void aio_run_all_iocbs(struct kioctx *ctx)
826{ 809{
@@ -1839,7 +1822,7 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
1839 long ret = -EINVAL; 1822 long ret = -EINVAL;
1840 1823
1841 if (likely(ioctx)) { 1824 if (likely(ioctx)) {
1842 if (likely(min_nr <= nr && min_nr >= 0 && nr >= 0)) 1825 if (likely(min_nr <= nr && min_nr >= 0))
1843 ret = read_events(ioctx, min_nr, nr, events, timeout); 1826 ret = read_events(ioctx, min_nr, nr, events, timeout);
1844 put_ioctx(ioctx); 1827 put_ioctx(ioctx);
1845 } 1828 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 73097336ea2..98edb657b84 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
102 this.name = name; 102 this.name = name;
103 this.len = strlen(name); 103 this.len = strlen(name);
104 this.hash = 0; 104 this.hash = 0;
105 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 105 path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
106 if (!path.dentry) 106 if (!path.dentry)
107 goto err_module; 107 goto err_module;
108 108
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
113 */ 113 */
114 ihold(anon_inode_inode); 114 ihold(anon_inode_inode);
115 115
116 path.dentry->d_op = &anon_inodefs_dentry_operations; 116 d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
117 d_instantiate(path.dentry, anon_inode_inode); 117 d_instantiate(path.dentry, anon_inode_inode);
118 118
119 error = -ENFILE; 119 error = -ENFILE;
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
232 return 0; 232 return 0;
233 233
234err_mntput: 234err_mntput:
235 mntput(anon_inode_mnt); 235 mntput_long(anon_inode_mnt);
236err_unregister_filesystem: 236err_unregister_filesystem:
237 unregister_filesystem(&anon_inode_fs_type); 237 unregister_filesystem(&anon_inode_fs_type);
238err_exit: 238err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3d283abf67d..0fffe1c24ce 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
16#include <linux/auto_fs4.h> 16#include <linux/auto_fs4.h>
17#include <linux/auto_dev-ioctl.h> 17#include <linux/auto_dev-ioctl.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/spinlock.h>
19#include <linux/list.h> 20#include <linux/list.h>
20 21
21/* This is the range of ioctl() numbers we claim as ours */ 22/* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do { \
60 current->pid, __func__, ##args); \ 61 current->pid, __func__, ##args); \
61} while (0) 62} while (0)
62 63
64extern spinlock_t autofs4_lock;
65
63/* Unified info structure. This is pointed to by both the dentry and 66/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 67 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 68 structure. It holds a reference to the dentry, so dentries are never
@@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry)
254 return dentry->d_inode && !d_unhashed(dentry); 257 return dentry->d_inode && !d_unhashed(dentry);
255} 258}
256 259
257static inline int __simple_empty(struct dentry *dentry) 260static inline void __autofs4_add_expiring(struct dentry *dentry)
258{ 261{
259 struct dentry *child; 262 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
260 int ret = 0; 263 struct autofs_info *ino = autofs4_dentry_ino(dentry);
261 264 if (ino) {
262 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 265 if (list_empty(&ino->expiring))
263 if (simple_positive(child)) 266 list_add(&ino->expiring, &sbi->expiring_list);
264 goto out; 267 }
265 ret = 1; 268 return;
266out:
267 return ret;
268} 269}
269 270
270static inline void autofs4_add_expiring(struct dentry *dentry) 271static inline void autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c9417fb..cc1d0136590 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
91} 91}
92 92
93/* 93/*
94 * Calculate next entry in top down tree traversal. 94 * Calculate and dget next entry in top down tree traversal.
95 * From next_mnt in namespace.c - elegant.
96 */ 95 */
97static struct dentry *next_dentry(struct dentry *p, struct dentry *root) 96static struct dentry *get_next_positive_dentry(struct dentry *prev,
97 struct dentry *root)
98{ 98{
99 struct list_head *next = p->d_subdirs.next; 99 struct list_head *next;
100 struct dentry *p, *ret;
101
102 if (prev == NULL)
103 return dget(prev);
100 104
105 spin_lock(&autofs4_lock);
106relock:
107 p = prev;
108 spin_lock(&p->d_lock);
109again:
110 next = p->d_subdirs.next;
101 if (next == &p->d_subdirs) { 111 if (next == &p->d_subdirs) {
102 while (1) { 112 while (1) {
103 if (p == root) 113 struct dentry *parent;
114
115 if (p == root) {
116 spin_unlock(&p->d_lock);
117 spin_unlock(&autofs4_lock);
118 dput(prev);
104 return NULL; 119 return NULL;
120 }
121
122 parent = p->d_parent;
123 if (!spin_trylock(&parent->d_lock)) {
124 spin_unlock(&p->d_lock);
125 cpu_relax();
126 goto relock;
127 }
128 spin_unlock(&p->d_lock);
105 next = p->d_u.d_child.next; 129 next = p->d_u.d_child.next;
106 if (next != &p->d_parent->d_subdirs) 130 p = parent;
131 if (next != &parent->d_subdirs)
107 break; 132 break;
108 p = p->d_parent;
109 } 133 }
110 } 134 }
111 return list_entry(next, struct dentry, d_u.d_child); 135 ret = list_entry(next, struct dentry, d_u.d_child);
136
137 spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
138 /* Negative dentry - try next */
139 if (!simple_positive(ret)) {
140 spin_unlock(&ret->d_lock);
141 p = ret;
142 goto again;
143 }
144 dget_dlock(ret);
145 spin_unlock(&ret->d_lock);
146 spin_unlock(&p->d_lock);
147 spin_unlock(&autofs4_lock);
148
149 dput(prev);
150
151 return ret;
112} 152}
113 153
114/* 154/*
@@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
158 if (!simple_positive(top)) 198 if (!simple_positive(top))
159 return 1; 199 return 1;
160 200
161 spin_lock(&dcache_lock); 201 p = NULL;
162 for (p = top; p; p = next_dentry(p, top)) { 202 while ((p = get_next_positive_dentry(p, top))) {
163 /* Negative dentry - give up */
164 if (!simple_positive(p))
165 continue;
166
167 DPRINTK("dentry %p %.*s", 203 DPRINTK("dentry %p %.*s",
168 p, (int) p->d_name.len, p->d_name.name); 204 p, (int) p->d_name.len, p->d_name.name);
169 205
170 p = dget(p);
171 spin_unlock(&dcache_lock);
172
173 /* 206 /*
174 * Is someone visiting anywhere in the subtree ? 207 * Is someone visiting anywhere in the subtree ?
175 * If there's no mount we need to check the usage 208 * If there's no mount we need to check the usage
@@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
198 else 231 else
199 ino_count++; 232 ino_count++;
200 233
201 if (atomic_read(&p->d_count) > ino_count) { 234 if (p->d_count > ino_count) {
202 top_ino->last_used = jiffies; 235 top_ino->last_used = jiffies;
203 dput(p); 236 dput(p);
204 return 1; 237 return 1;
205 } 238 }
206 } 239 }
207 dput(p);
208 spin_lock(&dcache_lock);
209 } 240 }
210 spin_unlock(&dcache_lock);
211 241
212 /* Timeout of a tree mount is ultimately determined by its top dentry */ 242 /* Timeout of a tree mount is ultimately determined by its top dentry */
213 if (!autofs4_can_expire(top, timeout, do_now)) 243 if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
226 DPRINTK("parent %p %.*s", 256 DPRINTK("parent %p %.*s",
227 parent, (int)parent->d_name.len, parent->d_name.name); 257 parent, (int)parent->d_name.len, parent->d_name.name);
228 258
229 spin_lock(&dcache_lock); 259 p = NULL;
230 for (p = parent; p; p = next_dentry(p, parent)) { 260 while ((p = get_next_positive_dentry(p, parent))) {
231 /* Negative dentry - give up */
232 if (!simple_positive(p))
233 continue;
234
235 DPRINTK("dentry %p %.*s", 261 DPRINTK("dentry %p %.*s",
236 p, (int) p->d_name.len, p->d_name.name); 262 p, (int) p->d_name.len, p->d_name.name);
237 263
238 p = dget(p);
239 spin_unlock(&dcache_lock);
240
241 if (d_mountpoint(p)) { 264 if (d_mountpoint(p)) {
242 /* Can we umount this guy */ 265 /* Can we umount this guy */
243 if (autofs4_mount_busy(mnt, p)) 266 if (autofs4_mount_busy(mnt, p))
244 goto cont; 267 continue;
245 268
246 /* Can we expire this guy */ 269 /* Can we expire this guy */
247 if (autofs4_can_expire(p, timeout, do_now)) 270 if (autofs4_can_expire(p, timeout, do_now))
248 return p; 271 return p;
249 } 272 }
250cont:
251 dput(p);
252 spin_lock(&dcache_lock);
253 } 273 }
254 spin_unlock(&dcache_lock);
255 return NULL; 274 return NULL;
256} 275}
257 276
@@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
276 struct autofs_info *ino = autofs4_dentry_ino(root); 295 struct autofs_info *ino = autofs4_dentry_ino(root);
277 if (d_mountpoint(root)) { 296 if (d_mountpoint(root)) {
278 ino->flags |= AUTOFS_INF_MOUNTPOINT; 297 ino->flags |= AUTOFS_INF_MOUNTPOINT;
279 root->d_mounted--; 298 spin_lock(&root->d_lock);
299 root->d_flags &= ~DCACHE_MOUNTED;
300 spin_unlock(&root->d_lock);
280 } 301 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 302 ino->flags |= AUTOFS_INF_EXPIRING;
282 init_completion(&ino->expire_complete); 303 init_completion(&ino->expire_complete);
@@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
302{ 323{
303 unsigned long timeout; 324 unsigned long timeout;
304 struct dentry *root = sb->s_root; 325 struct dentry *root = sb->s_root;
326 struct dentry *dentry;
305 struct dentry *expired = NULL; 327 struct dentry *expired = NULL;
306 struct list_head *next;
307 int do_now = how & AUTOFS_EXP_IMMEDIATE; 328 int do_now = how & AUTOFS_EXP_IMMEDIATE;
308 int exp_leaves = how & AUTOFS_EXP_LEAVES; 329 int exp_leaves = how & AUTOFS_EXP_LEAVES;
309 struct autofs_info *ino; 330 struct autofs_info *ino;
@@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
315 now = jiffies; 336 now = jiffies;
316 timeout = sbi->exp_timeout; 337 timeout = sbi->exp_timeout;
317 338
318 spin_lock(&dcache_lock); 339 dentry = NULL;
319 next = root->d_subdirs.next; 340 while ((dentry = get_next_positive_dentry(dentry, root))) {
320
321 /* On exit from the loop expire is set to a dgot dentry
322 * to expire or it's NULL */
323 while ( next != &root->d_subdirs ) {
324 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
325
326 /* Negative dentry - give up */
327 if (!simple_positive(dentry)) {
328 next = next->next;
329 continue;
330 }
331
332 dentry = dget(dentry);
333 spin_unlock(&dcache_lock);
334
335 spin_lock(&sbi->fs_lock); 341 spin_lock(&sbi->fs_lock);
336 ino = autofs4_dentry_ino(dentry); 342 ino = autofs4_dentry_ino(dentry);
337 343
@@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
347 353
348 /* Path walk currently on this dentry? */ 354 /* Path walk currently on this dentry? */
349 ino_count = atomic_read(&ino->count) + 2; 355 ino_count = atomic_read(&ino->count) + 2;
350 if (atomic_read(&dentry->d_count) > ino_count) 356 if (dentry->d_count > ino_count)
351 goto next; 357 goto next;
352 358
353 /* Can we umount this guy */ 359 /* Can we umount this guy */
@@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
369 if (!exp_leaves) { 375 if (!exp_leaves) {
370 /* Path walk currently on this dentry? */ 376 /* Path walk currently on this dentry? */
371 ino_count = atomic_read(&ino->count) + 1; 377 ino_count = atomic_read(&ino->count) + 1;
372 if (atomic_read(&dentry->d_count) > ino_count) 378 if (dentry->d_count > ino_count)
373 goto next; 379 goto next;
374 380
375 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 381 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
383 } else { 389 } else {
384 /* Path walk currently on this dentry? */ 390 /* Path walk currently on this dentry? */
385 ino_count = atomic_read(&ino->count) + 1; 391 ino_count = atomic_read(&ino->count) + 1;
386 if (atomic_read(&dentry->d_count) > ino_count) 392 if (dentry->d_count > ino_count)
387 goto next; 393 goto next;
388 394
389 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 395 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
394 } 400 }
395next: 401next:
396 spin_unlock(&sbi->fs_lock); 402 spin_unlock(&sbi->fs_lock);
397 dput(dentry);
398 spin_lock(&dcache_lock);
399 next = next->next;
400 } 403 }
401 spin_unlock(&dcache_lock);
402 return NULL; 404 return NULL;
403 405
404found: 406found:
@@ -408,9 +410,13 @@ found:
408 ino->flags |= AUTOFS_INF_EXPIRING; 410 ino->flags |= AUTOFS_INF_EXPIRING;
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&autofs4_lock);
414 spin_lock(&expired->d_parent->d_lock);
415 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
412 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 416 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
413 spin_unlock(&dcache_lock); 417 spin_unlock(&expired->d_lock);
418 spin_unlock(&expired->d_parent->d_lock);
419 spin_unlock(&autofs4_lock);
414 return expired; 420 return expired;
415} 421}
416 422
@@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
499 505
500 spin_lock(&sbi->fs_lock); 506 spin_lock(&sbi->fs_lock);
501 if (ino->flags & AUTOFS_INF_MOUNTPOINT) { 507 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
502 sb->s_root->d_mounted++; 508 spin_lock(&sb->s_root->d_lock);
509 /*
510 * If we haven't been expired away, then reset
511 * mounted status.
512 */
513 if (mnt->mnt_parent != mnt)
514 sb->s_root->d_flags |= DCACHE_MOUNTED;
515 spin_unlock(&sb->s_root->d_lock);
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 516 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 517 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 518 ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa70..a7bdb9dcac8 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
309 goto fail_iput; 309 goto fail_iput;
310 pipe = NULL; 310 pipe = NULL;
311 311
312 root->d_op = &autofs4_sb_dentry_operations; 312 d_set_d_op(root, &autofs4_sb_dentry_operations);
313 root->d_fsdata = ino; 313 root->d_fsdata = ino;
314 314
315 /* Can this call block? */ 315 /* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d34896cfb19..651e4ef563b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
23 23
24#include "autofs_i.h" 24#include "autofs_i.h"
25 25
26DEFINE_SPINLOCK(autofs4_lock);
27
26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 28static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
27static int autofs4_dir_unlink(struct inode *,struct dentry *); 29static int autofs4_dir_unlink(struct inode *,struct dentry *);
28static int autofs4_dir_rmdir(struct inode *,struct dentry *); 30static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
142 * autofs file system so just let the libfs routines handle 144 * autofs file system so just let the libfs routines handle
143 * it. 145 * it.
144 */ 146 */
145 spin_lock(&dcache_lock); 147 spin_lock(&autofs4_lock);
148 spin_lock(&dentry->d_lock);
146 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 149 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
147 spin_unlock(&dcache_lock); 150 spin_unlock(&dentry->d_lock);
151 spin_unlock(&autofs4_lock);
148 return -ENOENT; 152 return -ENOENT;
149 } 153 }
150 spin_unlock(&dcache_lock); 154 spin_unlock(&dentry->d_lock);
155 spin_unlock(&autofs4_lock);
151 156
152out: 157out:
153 return dcache_dir_open(inode, file); 158 return dcache_dir_open(inode, file);
@@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
252 /* We trigger a mount for almost all flags */ 257 /* We trigger a mount for almost all flags */
253 lookup_type = autofs4_need_mount(nd->flags); 258 lookup_type = autofs4_need_mount(nd->flags);
254 spin_lock(&sbi->fs_lock); 259 spin_lock(&sbi->fs_lock);
255 spin_lock(&dcache_lock); 260 spin_lock(&autofs4_lock);
261 spin_lock(&dentry->d_lock);
256 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { 262 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
257 spin_unlock(&dcache_lock); 263 spin_unlock(&dentry->d_lock);
264 spin_unlock(&autofs4_lock);
258 spin_unlock(&sbi->fs_lock); 265 spin_unlock(&sbi->fs_lock);
259 goto follow; 266 goto follow;
260 } 267 }
@@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
266 */ 273 */
267 if (ino->flags & AUTOFS_INF_PENDING || 274 if (ino->flags & AUTOFS_INF_PENDING ||
268 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { 275 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
269 spin_unlock(&dcache_lock); 276 spin_unlock(&dentry->d_lock);
277 spin_unlock(&autofs4_lock);
270 spin_unlock(&sbi->fs_lock); 278 spin_unlock(&sbi->fs_lock);
271 279
272 status = try_to_fill_dentry(dentry, nd->flags); 280 status = try_to_fill_dentry(dentry, nd->flags);
@@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
275 283
276 goto follow; 284 goto follow;
277 } 285 }
278 spin_unlock(&dcache_lock); 286 spin_unlock(&dentry->d_lock);
287 spin_unlock(&autofs4_lock);
279 spin_unlock(&sbi->fs_lock); 288 spin_unlock(&sbi->fs_lock);
280follow: 289follow:
281 /* 290 /*
@@ -306,12 +315,19 @@ out_error:
306 */ 315 */
307static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) 316static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
308{ 317{
309 struct inode *dir = dentry->d_parent->d_inode; 318 struct inode *dir;
310 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 319 struct autofs_sb_info *sbi;
311 int oz_mode = autofs4_oz_mode(sbi); 320 int oz_mode;
312 int flags = nd ? nd->flags : 0; 321 int flags = nd ? nd->flags : 0;
313 int status = 1; 322 int status = 1;
314 323
324 if (flags & LOOKUP_RCU)
325 return -ECHILD;
326
327 dir = dentry->d_parent->d_inode;
328 sbi = autofs4_sbi(dir->i_sb);
329 oz_mode = autofs4_oz_mode(sbi);
330
315 /* Pending dentry */ 331 /* Pending dentry */
316 spin_lock(&sbi->fs_lock); 332 spin_lock(&sbi->fs_lock);
317 if (autofs4_ispending(dentry)) { 333 if (autofs4_ispending(dentry)) {
@@ -346,12 +362,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
346 return 0; 362 return 0;
347 363
348 /* Check for a non-mountpoint directory with no contents */ 364 /* Check for a non-mountpoint directory with no contents */
349 spin_lock(&dcache_lock); 365 spin_lock(&autofs4_lock);
366 spin_lock(&dentry->d_lock);
350 if (S_ISDIR(dentry->d_inode->i_mode) && 367 if (S_ISDIR(dentry->d_inode->i_mode) &&
351 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 368 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
352 DPRINTK("dentry=%p %.*s, emptydir", 369 DPRINTK("dentry=%p %.*s, emptydir",
353 dentry, dentry->d_name.len, dentry->d_name.name); 370 dentry, dentry->d_name.len, dentry->d_name.name);
354 spin_unlock(&dcache_lock); 371 spin_unlock(&dentry->d_lock);
372 spin_unlock(&autofs4_lock);
355 373
356 /* The daemon never causes a mount to trigger */ 374 /* The daemon never causes a mount to trigger */
357 if (oz_mode) 375 if (oz_mode)
@@ -367,7 +385,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
367 385
368 return status; 386 return status;
369 } 387 }
370 spin_unlock(&dcache_lock); 388 spin_unlock(&dentry->d_lock);
389 spin_unlock(&autofs4_lock);
371 390
372 return 1; 391 return 1;
373} 392}
@@ -422,7 +441,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
422 const unsigned char *str = name->name; 441 const unsigned char *str = name->name;
423 struct list_head *p, *head; 442 struct list_head *p, *head;
424 443
425 spin_lock(&dcache_lock); 444 spin_lock(&autofs4_lock);
426 spin_lock(&sbi->lookup_lock); 445 spin_lock(&sbi->lookup_lock);
427 head = &sbi->active_list; 446 head = &sbi->active_list;
428 list_for_each(p, head) { 447 list_for_each(p, head) {
@@ -436,7 +455,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
436 spin_lock(&active->d_lock); 455 spin_lock(&active->d_lock);
437 456
438 /* Already gone? */ 457 /* Already gone? */
439 if (atomic_read(&active->d_count) == 0) 458 if (active->d_count == 0)
440 goto next; 459 goto next;
441 460
442 qstr = &active->d_name; 461 qstr = &active->d_name;
@@ -452,17 +471,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
452 goto next; 471 goto next;
453 472
454 if (d_unhashed(active)) { 473 if (d_unhashed(active)) {
455 dget(active); 474 dget_dlock(active);
456 spin_unlock(&active->d_lock); 475 spin_unlock(&active->d_lock);
457 spin_unlock(&sbi->lookup_lock); 476 spin_unlock(&sbi->lookup_lock);
458 spin_unlock(&dcache_lock); 477 spin_unlock(&autofs4_lock);
459 return active; 478 return active;
460 } 479 }
461next: 480next:
462 spin_unlock(&active->d_lock); 481 spin_unlock(&active->d_lock);
463 } 482 }
464 spin_unlock(&sbi->lookup_lock); 483 spin_unlock(&sbi->lookup_lock);
465 spin_unlock(&dcache_lock); 484 spin_unlock(&autofs4_lock);
466 485
467 return NULL; 486 return NULL;
468} 487}
@@ -477,7 +496,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
477 const unsigned char *str = name->name; 496 const unsigned char *str = name->name;
478 struct list_head *p, *head; 497 struct list_head *p, *head;
479 498
480 spin_lock(&dcache_lock); 499 spin_lock(&autofs4_lock);
481 spin_lock(&sbi->lookup_lock); 500 spin_lock(&sbi->lookup_lock);
482 head = &sbi->expiring_list; 501 head = &sbi->expiring_list;
483 list_for_each(p, head) { 502 list_for_each(p, head) {
@@ -507,17 +526,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
507 goto next; 526 goto next;
508 527
509 if (d_unhashed(expiring)) { 528 if (d_unhashed(expiring)) {
510 dget(expiring); 529 dget_dlock(expiring);
511 spin_unlock(&expiring->d_lock); 530 spin_unlock(&expiring->d_lock);
512 spin_unlock(&sbi->lookup_lock); 531 spin_unlock(&sbi->lookup_lock);
513 spin_unlock(&dcache_lock); 532 spin_unlock(&autofs4_lock);
514 return expiring; 533 return expiring;
515 } 534 }
516next: 535next:
517 spin_unlock(&expiring->d_lock); 536 spin_unlock(&expiring->d_lock);
518 } 537 }
519 spin_unlock(&sbi->lookup_lock); 538 spin_unlock(&sbi->lookup_lock);
520 spin_unlock(&dcache_lock); 539 spin_unlock(&autofs4_lock);
521 540
522 return NULL; 541 return NULL;
523} 542}
@@ -559,7 +578,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
559 * we check for the hashed dentry and return the newly 578 * we check for the hashed dentry and return the newly
560 * hashed dentry. 579 * hashed dentry.
561 */ 580 */
562 dentry->d_op = &autofs4_root_dentry_operations; 581 d_set_d_op(dentry, &autofs4_root_dentry_operations);
563 582
564 /* 583 /*
565 * And we need to ensure that the same dentry is used for 584 * And we need to ensure that the same dentry is used for
@@ -698,9 +717,9 @@ static int autofs4_dir_symlink(struct inode *dir,
698 d_add(dentry, inode); 717 d_add(dentry, inode);
699 718
700 if (dir == dir->i_sb->s_root->d_inode) 719 if (dir == dir->i_sb->s_root->d_inode)
701 dentry->d_op = &autofs4_root_dentry_operations; 720 d_set_d_op(dentry, &autofs4_root_dentry_operations);
702 else 721 else
703 dentry->d_op = &autofs4_dentry_operations; 722 d_set_d_op(dentry, &autofs4_dentry_operations);
704 723
705 dentry->d_fsdata = ino; 724 dentry->d_fsdata = ino;
706 ino->dentry = dget(dentry); 725 ino->dentry = dget(dentry);
@@ -753,12 +772,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
753 772
754 dir->i_mtime = CURRENT_TIME; 773 dir->i_mtime = CURRENT_TIME;
755 774
756 spin_lock(&dcache_lock); 775 spin_lock(&autofs4_lock);
757 autofs4_add_expiring(dentry); 776 autofs4_add_expiring(dentry);
758 spin_lock(&dentry->d_lock); 777 spin_lock(&dentry->d_lock);
759 __d_drop(dentry); 778 __d_drop(dentry);
760 spin_unlock(&dentry->d_lock); 779 spin_unlock(&dentry->d_lock);
761 spin_unlock(&dcache_lock); 780 spin_unlock(&autofs4_lock);
762 781
763 return 0; 782 return 0;
764} 783}
@@ -775,16 +794,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
775 if (!autofs4_oz_mode(sbi)) 794 if (!autofs4_oz_mode(sbi))
776 return -EACCES; 795 return -EACCES;
777 796
778 spin_lock(&dcache_lock); 797 spin_lock(&autofs4_lock);
798 spin_lock(&sbi->lookup_lock);
799 spin_lock(&dentry->d_lock);
779 if (!list_empty(&dentry->d_subdirs)) { 800 if (!list_empty(&dentry->d_subdirs)) {
780 spin_unlock(&dcache_lock); 801 spin_unlock(&dentry->d_lock);
802 spin_unlock(&sbi->lookup_lock);
803 spin_unlock(&autofs4_lock);
781 return -ENOTEMPTY; 804 return -ENOTEMPTY;
782 } 805 }
783 autofs4_add_expiring(dentry); 806 __autofs4_add_expiring(dentry);
784 spin_lock(&dentry->d_lock); 807 spin_unlock(&sbi->lookup_lock);
785 __d_drop(dentry); 808 __d_drop(dentry);
786 spin_unlock(&dentry->d_lock); 809 spin_unlock(&dentry->d_lock);
787 spin_unlock(&dcache_lock); 810 spin_unlock(&autofs4_lock);
788 811
789 if (atomic_dec_and_test(&ino->count)) { 812 if (atomic_dec_and_test(&ino->count)) {
790 p_ino = autofs4_dentry_ino(dentry->d_parent); 813 p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -829,9 +852,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
829 d_add(dentry, inode); 852 d_add(dentry, inode);
830 853
831 if (dir == dir->i_sb->s_root->d_inode) 854 if (dir == dir->i_sb->s_root->d_inode)
832 dentry->d_op = &autofs4_root_dentry_operations; 855 d_set_d_op(dentry, &autofs4_root_dentry_operations);
833 else 856 else
834 dentry->d_op = &autofs4_dentry_operations; 857 d_set_d_op(dentry, &autofs4_dentry_operations);
835 858
836 dentry->d_fsdata = ino; 859 dentry->d_fsdata = ino;
837 ino->dentry = dget(dentry); 860 ino->dentry = dget(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375386f..c5f8459c905 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
186{ 186{
187 struct dentry *root = sbi->sb->s_root; 187 struct dentry *root = sbi->sb->s_root;
188 struct dentry *tmp; 188 struct dentry *tmp;
189 char *buf = *name; 189 char *buf;
190 char *p; 190 char *p;
191 int len = 0; 191 int len;
192 unsigned seq;
192 193
193 spin_lock(&dcache_lock); 194rename_retry:
195 buf = *name;
196 len = 0;
197
198 seq = read_seqbegin(&rename_lock);
199 rcu_read_lock();
200 spin_lock(&autofs4_lock);
194 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
195 len += tmp->d_name.len + 1; 202 len += tmp->d_name.len + 1;
196 203
197 if (!len || --len > NAME_MAX) { 204 if (!len || --len > NAME_MAX) {
198 spin_unlock(&dcache_lock); 205 spin_unlock(&autofs4_lock);
206 rcu_read_unlock();
207 if (read_seqretry(&rename_lock, seq))
208 goto rename_retry;
199 return 0; 209 return 0;
200 } 210 }
201 211
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
208 p -= tmp->d_name.len; 218 p -= tmp->d_name.len;
209 strncpy(p, tmp->d_name.name, tmp->d_name.len); 219 strncpy(p, tmp->d_name.name, tmp->d_name.len);
210 } 220 }
211 spin_unlock(&dcache_lock); 221 spin_unlock(&autofs4_lock);
222 rcu_read_unlock();
223 if (read_seqretry(&rename_lock, seq))
224 goto rename_retry;
212 225
213 return len; 226 return len;
214} 227}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f024d8aadde..9ad2369d9e3 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
229 return -EIO; 229 return -EIO;
230} 230}
231 231
232static int bad_inode_permission(struct inode *inode, int mask) 232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
233{ 233{
234 if (flags & IPERM_FLAG_RCU)
235 return -ECHILD;
236
234 return -EIO; 237 return -EIO;
235} 238}
236 239
diff --git a/fs/befs/endian.h b/fs/befs/endian.h
index 6cb84d896d0..27223878ba9 100644
--- a/fs/befs/endian.h
+++ b/fs/befs/endian.h
@@ -102,22 +102,22 @@ cpu_to_fsrun(const struct super_block *sb, befs_block_run n)
102} 102}
103 103
104static inline befs_data_stream 104static inline befs_data_stream
105fsds_to_cpu(const struct super_block *sb, befs_disk_data_stream n) 105fsds_to_cpu(const struct super_block *sb, const befs_disk_data_stream *n)
106{ 106{
107 befs_data_stream data; 107 befs_data_stream data;
108 int i; 108 int i;
109 109
110 for (i = 0; i < BEFS_NUM_DIRECT_BLOCKS; ++i) 110 for (i = 0; i < BEFS_NUM_DIRECT_BLOCKS; ++i)
111 data.direct[i] = fsrun_to_cpu(sb, n.direct[i]); 111 data.direct[i] = fsrun_to_cpu(sb, n->direct[i]);
112 112
113 data.max_direct_range = fs64_to_cpu(sb, n.max_direct_range); 113 data.max_direct_range = fs64_to_cpu(sb, n->max_direct_range);
114 data.indirect = fsrun_to_cpu(sb, n.indirect); 114 data.indirect = fsrun_to_cpu(sb, n->indirect);
115 data.max_indirect_range = fs64_to_cpu(sb, n.max_indirect_range); 115 data.max_indirect_range = fs64_to_cpu(sb, n->max_indirect_range);
116 data.double_indirect = fsrun_to_cpu(sb, n.double_indirect); 116 data.double_indirect = fsrun_to_cpu(sb, n->double_indirect);
117 data.max_double_indirect_range = fs64_to_cpu(sb, 117 data.max_double_indirect_range = fs64_to_cpu(sb,
118 n. 118 n->
119 max_double_indirect_range); 119 max_double_indirect_range);
120 data.size = fs64_to_cpu(sb, n.size); 120 data.size = fs64_to_cpu(sb, n->size);
121 121
122 return data; 122 return data;
123} 123}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c..b1d0c794747 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
284 return &bi->vfs_inode; 284 return &bi->vfs_inode;
285} 285}
286 286
287static void 287static void befs_i_callback(struct rcu_head *head)
288befs_destroy_inode(struct inode *inode)
289{ 288{
289 struct inode *inode = container_of(head, struct inode, i_rcu);
290 INIT_LIST_HEAD(&inode->i_dentry);
290 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 291 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
291} 292}
292 293
294static void befs_destroy_inode(struct inode *inode)
295{
296 call_rcu(&inode->i_rcu, befs_i_callback);
297}
298
293static void init_once(void *foo) 299static void init_once(void *foo)
294{ 300{
295 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 301 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
@@ -384,7 +390,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
384 int num_blks; 390 int num_blks;
385 391
386 befs_ino->i_data.ds = 392 befs_ino->i_data.ds =
387 fsds_to_cpu(sb, raw_inode->data.datastream); 393 fsds_to_cpu(sb, &raw_inode->data.datastream);
388 394
389 num_blks = befs_count_blocks(sb, &befs_ino->i_data.ds); 395 num_blks = befs_count_blocks(sb, &befs_ino->i_data.ds);
390 inode->i_blocks = 396 inode->i_blocks =
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49b..a8e37f81d09 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
248 return &bi->vfs_inode; 248 return &bi->vfs_inode;
249} 249}
250 250
251static void bfs_destroy_inode(struct inode *inode) 251static void bfs_i_callback(struct rcu_head *head)
252{ 252{
253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
253 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 255 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
254} 256}
255 257
258static void bfs_destroy_inode(struct inode *inode)
259{
260 call_rcu(&inode->i_rcu, bfs_i_callback);
261}
262
256static void init_once(void *foo) 263static void init_once(void *foo)
257{ 264{
258 struct bfs_inode_info *bi = foo; 265 struct bfs_inode_info *bi = foo;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6884e198e0c..d5b640ba6cb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -66,12 +66,11 @@ static int elf_core_dump(struct coredump_params *cprm);
66#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) 66#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67 67
68static struct linux_binfmt elf_format = { 68static struct linux_binfmt elf_format = {
69 .module = THIS_MODULE, 69 .module = THIS_MODULE,
70 .load_binary = load_elf_binary, 70 .load_binary = load_elf_binary,
71 .load_shlib = load_elf_library, 71 .load_shlib = load_elf_library,
72 .core_dump = elf_core_dump, 72 .core_dump = elf_core_dump,
73 .min_coredump = ELF_EXEC_PAGESIZE, 73 .min_coredump = ELF_EXEC_PAGESIZE,
74 .hasvdso = 1
75}; 74};
76 75
77#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) 76#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
@@ -316,8 +315,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
316 return 0; 315 return 0;
317} 316}
318 317
319#ifndef elf_map
320
321static unsigned long elf_map(struct file *filep, unsigned long addr, 318static unsigned long elf_map(struct file *filep, unsigned long addr,
322 struct elf_phdr *eppnt, int prot, int type, 319 struct elf_phdr *eppnt, int prot, int type,
323 unsigned long total_size) 320 unsigned long total_size)
@@ -354,8 +351,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
354 return(map_addr); 351 return(map_addr);
355} 352}
356 353
357#endif /* !elf_map */
358
359static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) 354static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
360{ 355{
361 int i, first_idx = -1, last_idx = -1; 356 int i, first_idx = -1, last_idx = -1;
@@ -421,7 +416,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
421 goto out; 416 goto out;
422 417
423 retval = kernel_read(interpreter, interp_elf_ex->e_phoff, 418 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
424 (char *)elf_phdata,size); 419 (char *)elf_phdata, size);
425 error = -EIO; 420 error = -EIO;
426 if (retval != size) { 421 if (retval != size) {
427 if (retval < 0) 422 if (retval < 0)
@@ -601,7 +596,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
601 goto out; 596 goto out;
602 if (!elf_check_arch(&loc->elf_ex)) 597 if (!elf_check_arch(&loc->elf_ex))
603 goto out; 598 goto out;
604 if (!bprm->file->f_op||!bprm->file->f_op->mmap) 599 if (!bprm->file->f_op || !bprm->file->f_op->mmap)
605 goto out; 600 goto out;
606 601
607 /* Now read in all of the header information */ 602 /* Now read in all of the header information */
@@ -761,8 +756,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
761 /* There was a PT_LOAD segment with p_memsz > p_filesz 756 /* There was a PT_LOAD segment with p_memsz > p_filesz
762 before this one. Map anonymous pages, if needed, 757 before this one. Map anonymous pages, if needed,
763 and clear the area. */ 758 and clear the area. */
764 retval = set_brk (elf_bss + load_bias, 759 retval = set_brk(elf_bss + load_bias,
765 elf_brk + load_bias); 760 elf_brk + load_bias);
766 if (retval) { 761 if (retval) {
767 send_sig(SIGKILL, current, 0); 762 send_sig(SIGKILL, current, 0);
768 goto out_free_dentry; 763 goto out_free_dentry;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd68..771f2352701 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
409 return &ei->vfs_inode; 409 return &ei->vfs_inode;
410} 410}
411 411
412static void bdev_destroy_inode(struct inode *inode) 412static void bdev_i_callback(struct rcu_head *head)
413{ 413{
414 struct inode *inode = container_of(head, struct inode, i_rcu);
414 struct bdev_inode *bdi = BDEV_I(inode); 415 struct bdev_inode *bdi = BDEV_I(inode);
415 416
417 INIT_LIST_HEAD(&inode->i_dentry);
416 kmem_cache_free(bdev_cachep, bdi); 418 kmem_cache_free(bdev_cachep, bdi);
417} 419}
418 420
421static void bdev_destroy_inode(struct inode *inode)
422{
423 call_rcu(&inode->i_rcu, bdev_i_callback);
424}
425
419static void init_once(void *foo) 426static void init_once(void *foo)
420{ 427{
421 struct bdev_inode *ei = (struct bdev_inode *) foo; 428 struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b..6ae2c8cac9d 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
185 return ret; 185 return ret;
186} 186}
187 187
188int btrfs_check_acl(struct inode *inode, int mask) 188int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
189{ 189{
190 struct posix_acl *acl;
191 int error = -EAGAIN; 190 int error = -EAGAIN;
192 191
193 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 192 if (flags & IPERM_FLAG_RCU) {
193 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
194 error = -ECHILD;
194 195
195 if (IS_ERR(acl)) 196 } else {
196 return PTR_ERR(acl); 197 struct posix_acl *acl;
197 if (acl) { 198 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
198 error = posix_acl_permission(inode, acl, mask); 199 if (IS_ERR(acl))
199 posix_acl_release(acl); 200 return PTR_ERR(acl);
201 if (acl) {
202 error = posix_acl_permission(inode, acl, mask);
203 posix_acl_release(acl);
204 }
200 } 205 }
201 206
202 return error; 207 return error;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af52f6d7a4d..a142d204b52 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
2544 2544
2545/* acl.c */ 2545/* acl.c */
2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2547int btrfs_check_acl(struct inode *inode, int mask); 2547int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
2548#else 2548#else
2549#define btrfs_check_acl NULL 2549#define btrfs_check_acl NULL
2550#endif 2550#endif
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 659f532d26a..0ccf9a8afcd 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
110 110
111 dentry = d_obtain_alias(inode); 111 dentry = d_obtain_alias(inode);
112 if (!IS_ERR(dentry)) 112 if (!IS_ERR(dentry))
113 dentry->d_op = &btrfs_dentry_operations; 113 d_set_d_op(dentry, &btrfs_dentry_operations);
114 return dentry; 114 return dentry;
115fail: 115fail:
116 srcu_read_unlock(&fs_info->subvol_srcu, index); 116 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
225 key.offset = 0; 225 key.offset = 0;
226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
227 if (!IS_ERR(dentry)) 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations; 228 d_set_d_op(dentry, &btrfs_dentry_operations);
229 return dentry; 229 return dentry;
230fail: 230fail:
231 btrfs_free_path(path); 231 btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 72f31ecb5c9..a0ff46a4789 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4084 int index; 4084 int index;
4085 int ret; 4085 int ret;
4086 4086
4087 dentry->d_op = &btrfs_dentry_operations; 4087 d_set_d_op(dentry, &btrfs_dentry_operations);
4088 4088
4089 if (dentry->d_name.len > BTRFS_NAME_LEN) 4089 if (dentry->d_name.len > BTRFS_NAME_LEN)
4090 return ERR_PTR(-ENAMETOOLONG); 4090 return ERR_PTR(-ENAMETOOLONG);
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4127 return inode; 4127 return inode;
4128} 4128}
4129 4129
4130static int btrfs_dentry_delete(struct dentry *dentry) 4130static int btrfs_dentry_delete(const struct dentry *dentry)
4131{ 4131{
4132 struct btrfs_root *root; 4132 struct btrfs_root *root;
4133 4133
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6495 return inode; 6495 return inode;
6496} 6496}
6497 6497
6498static void btrfs_i_callback(struct rcu_head *head)
6499{
6500 struct inode *inode = container_of(head, struct inode, i_rcu);
6501 INIT_LIST_HEAD(&inode->i_dentry);
6502 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6503}
6504
6498void btrfs_destroy_inode(struct inode *inode) 6505void btrfs_destroy_inode(struct inode *inode)
6499{ 6506{
6500 struct btrfs_ordered_extent *ordered; 6507 struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
6564 inode_tree_del(inode); 6571 inode_tree_del(inode);
6565 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6572 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6566free: 6573free:
6567 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6574 call_rcu(&inode->i_rcu, btrfs_i_callback);
6568} 6575}
6569 6576
6570int btrfs_drop_inode(struct inode *inode) 6577int btrfs_drop_inode(struct inode *inode)
@@ -7204,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
7204 return __set_page_dirty_nobuffers(page); 7211 return __set_page_dirty_nobuffers(page);
7205} 7212}
7206 7213
7207static int btrfs_permission(struct inode *inode, int mask) 7214static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7208{ 7215{
7209 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7216 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7210 return -EACCES; 7217 return -EACCES;
7211 return generic_permission(inode, mask, btrfs_check_acl); 7218 return generic_permission(inode, mask, flags, btrfs_check_acl);
7212} 7219}
7213 7220
7214static const struct inode_operations btrfs_dir_inode_operations = { 7221static const struct inode_operations btrfs_dir_inode_operations = {
diff --git a/fs/buffer.c b/fs/buffer.c
index 5930e382959..2219a76e2ca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1270,12 +1270,10 @@ static inline void check_irqs_on(void)
1270static void bh_lru_install(struct buffer_head *bh) 1270static void bh_lru_install(struct buffer_head *bh)
1271{ 1271{
1272 struct buffer_head *evictee = NULL; 1272 struct buffer_head *evictee = NULL;
1273 struct bh_lru *lru;
1274 1273
1275 check_irqs_on(); 1274 check_irqs_on();
1276 bh_lru_lock(); 1275 bh_lru_lock();
1277 lru = &__get_cpu_var(bh_lrus); 1276 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1278 if (lru->bhs[0] != bh) {
1279 struct buffer_head *bhs[BH_LRU_SIZE]; 1277 struct buffer_head *bhs[BH_LRU_SIZE];
1280 int in; 1278 int in;
1281 int out = 0; 1279 int out = 0;
@@ -1283,7 +1281,8 @@ static void bh_lru_install(struct buffer_head *bh)
1283 get_bh(bh); 1281 get_bh(bh);
1284 bhs[out++] = bh; 1282 bhs[out++] = bh;
1285 for (in = 0; in < BH_LRU_SIZE; in++) { 1283 for (in = 0; in < BH_LRU_SIZE; in++) {
1286 struct buffer_head *bh2 = lru->bhs[in]; 1284 struct buffer_head *bh2 =
1285 __this_cpu_read(bh_lrus.bhs[in]);
1287 1286
1288 if (bh2 == bh) { 1287 if (bh2 == bh) {
1289 __brelse(bh2); 1288 __brelse(bh2);
@@ -1298,7 +1297,7 @@ static void bh_lru_install(struct buffer_head *bh)
1298 } 1297 }
1299 while (out < BH_LRU_SIZE) 1298 while (out < BH_LRU_SIZE)
1300 bhs[out++] = NULL; 1299 bhs[out++] = NULL;
1301 memcpy(lru->bhs, bhs, sizeof(bhs)); 1300 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1302 } 1301 }
1303 bh_lru_unlock(); 1302 bh_lru_unlock();
1304 1303
@@ -1313,23 +1312,22 @@ static struct buffer_head *
1313lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) 1312lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1314{ 1313{
1315 struct buffer_head *ret = NULL; 1314 struct buffer_head *ret = NULL;
1316 struct bh_lru *lru;
1317 unsigned int i; 1315 unsigned int i;
1318 1316
1319 check_irqs_on(); 1317 check_irqs_on();
1320 bh_lru_lock(); 1318 bh_lru_lock();
1321 lru = &__get_cpu_var(bh_lrus);
1322 for (i = 0; i < BH_LRU_SIZE; i++) { 1319 for (i = 0; i < BH_LRU_SIZE; i++) {
1323 struct buffer_head *bh = lru->bhs[i]; 1320 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1324 1321
1325 if (bh && bh->b_bdev == bdev && 1322 if (bh && bh->b_bdev == bdev &&
1326 bh->b_blocknr == block && bh->b_size == size) { 1323 bh->b_blocknr == block && bh->b_size == size) {
1327 if (i) { 1324 if (i) {
1328 while (i) { 1325 while (i) {
1329 lru->bhs[i] = lru->bhs[i - 1]; 1326 __this_cpu_write(bh_lrus.bhs[i],
1327 __this_cpu_read(bh_lrus.bhs[i - 1]));
1330 i--; 1328 i--;
1331 } 1329 }
1332 lru->bhs[0] = bh; 1330 __this_cpu_write(bh_lrus.bhs[0], bh);
1333 } 1331 }
1334 get_bh(bh); 1332 get_bh(bh);
1335 ret = bh; 1333 ret = bh;
@@ -3203,22 +3201,23 @@ static void recalc_bh_state(void)
3203 int i; 3201 int i;
3204 int tot = 0; 3202 int tot = 0;
3205 3203
3206 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) 3204 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3207 return; 3205 return;
3208 __get_cpu_var(bh_accounting).ratelimit = 0; 3206 __this_cpu_write(bh_accounting.ratelimit, 0);
3209 for_each_online_cpu(i) 3207 for_each_online_cpu(i)
3210 tot += per_cpu(bh_accounting, i).nr; 3208 tot += per_cpu(bh_accounting, i).nr;
3211 buffer_heads_over_limit = (tot > max_buffer_heads); 3209 buffer_heads_over_limit = (tot > max_buffer_heads);
3212} 3210}
3213 3211
3214struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3212struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3215{ 3213{
3216 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 3214 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3217 if (ret) { 3215 if (ret) {
3218 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3216 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3219 get_cpu_var(bh_accounting).nr++; 3217 preempt_disable();
3218 __this_cpu_inc(bh_accounting.nr);
3220 recalc_bh_state(); 3219 recalc_bh_state();
3221 put_cpu_var(bh_accounting); 3220 preempt_enable();
3222 } 3221 }
3223 return ret; 3222 return ret;
3224} 3223}
@@ -3228,9 +3227,10 @@ void free_buffer_head(struct buffer_head *bh)
3228{ 3227{
3229 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3228 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3230 kmem_cache_free(bh_cachep, bh); 3229 kmem_cache_free(bh_cachep, bh);
3231 get_cpu_var(bh_accounting).nr--; 3230 preempt_disable();
3231 __this_cpu_dec(bh_accounting.nr);
3232 recalc_bh_state(); 3232 recalc_bh_state();
3233 put_cpu_var(bh_accounting); 3233 preempt_enable();
3234} 3234}
3235EXPORT_SYMBOL(free_buffer_head); 3235EXPORT_SYMBOL(free_buffer_head);
3236 3236
@@ -3243,9 +3243,8 @@ static void buffer_exit_cpu(int cpu)
3243 brelse(b->bhs[i]); 3243 brelse(b->bhs[i]);
3244 b->bhs[i] = NULL; 3244 b->bhs[i] = NULL;
3245 } 3245 }
3246 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; 3246 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3247 per_cpu(bh_accounting, cpu).nr = 0; 3247 per_cpu(bh_accounting, cpu).nr = 0;
3248 put_cpu_var(bh_accounting);
3249} 3248}
3250 3249
3251static int buffer_cpu_notify(struct notifier_block *self, 3250static int buffer_cpu_notify(struct notifier_block *self,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d902948a90d..fa7ca04ee81 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry)
42 42
43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
45 dentry->d_op = &ceph_dentry_ops; 45 d_set_d_op(dentry, &ceph_dentry_ops);
46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
47 dentry->d_op = &ceph_snapdir_dentry_ops; 47 d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
48 else 48 else
49 dentry->d_op = &ceph_snap_dentry_ops; 49 d_set_d_op(dentry, &ceph_snap_dentry_ops);
50 50
51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
52 if (!di) 52 if (!di)
@@ -112,7 +112,7 @@ static int __dcache_readdir(struct file *filp,
112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
113 last); 113 last);
114 114
115 spin_lock(&dcache_lock); 115 spin_lock(&parent->d_lock);
116 116
117 /* start at beginning? */ 117 /* start at beginning? */
118 if (filp->f_pos == 2 || last == NULL || 118 if (filp->f_pos == 2 || last == NULL ||
@@ -136,6 +136,7 @@ more:
136 fi->at_end = 1; 136 fi->at_end = 1;
137 goto out_unlock; 137 goto out_unlock;
138 } 138 }
139 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
139 if (!d_unhashed(dentry) && dentry->d_inode && 140 if (!d_unhashed(dentry) && dentry->d_inode &&
140 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 141 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
141 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 142 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -145,13 +146,15 @@ more:
145 dentry->d_name.len, dentry->d_name.name, di->offset, 146 dentry->d_name.len, dentry->d_name.name, di->offset,
146 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 147 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
147 !dentry->d_inode ? " null" : ""); 148 !dentry->d_inode ? " null" : "");
149 spin_unlock(&dentry->d_lock);
148 p = p->prev; 150 p = p->prev;
149 dentry = list_entry(p, struct dentry, d_u.d_child); 151 dentry = list_entry(p, struct dentry, d_u.d_child);
150 di = ceph_dentry(dentry); 152 di = ceph_dentry(dentry);
151 } 153 }
152 154
153 atomic_inc(&dentry->d_count); 155 dget_dlock(dentry);
154 spin_unlock(&dcache_lock); 156 spin_unlock(&dentry->d_lock);
157 spin_unlock(&parent->d_lock);
155 158
156 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 159 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
157 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 160 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -177,19 +180,19 @@ more:
177 180
178 filp->f_pos++; 181 filp->f_pos++;
179 182
180 /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ 183 /* make sure a dentry wasn't dropped while we didn't have parent lock */
181 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { 184 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
182 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); 185 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
183 err = -EAGAIN; 186 err = -EAGAIN;
184 goto out; 187 goto out;
185 } 188 }
186 189
187 spin_lock(&dcache_lock); 190 spin_lock(&parent->d_lock);
188 p = p->prev; /* advance to next dentry */ 191 p = p->prev; /* advance to next dentry */
189 goto more; 192 goto more;
190 193
191out_unlock: 194out_unlock:
192 spin_unlock(&dcache_lock); 195 spin_unlock(&parent->d_lock);
193out: 196out:
194 if (last) 197 if (last)
195 dput(last); 198 dput(last);
@@ -987,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
987 */ 990 */
988static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 991static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
989{ 992{
990 struct inode *dir = dentry->d_parent->d_inode; 993 struct inode *dir;
994
995 if (nd->flags & LOOKUP_RCU)
996 return -ECHILD;
997
998 dir = dentry->d_parent->d_inode;
991 999
992 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1000 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
993 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 1001 dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bf1286588f2..e61de4f7b99 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
368 return &ci->vfs_inode; 368 return &ci->vfs_inode;
369} 369}
370 370
371static void ceph_i_callback(struct rcu_head *head)
372{
373 struct inode *inode = container_of(head, struct inode, i_rcu);
374 struct ceph_inode_info *ci = ceph_inode(inode);
375
376 INIT_LIST_HEAD(&inode->i_dentry);
377 kmem_cache_free(ceph_inode_cachep, ci);
378}
379
371void ceph_destroy_inode(struct inode *inode) 380void ceph_destroy_inode(struct inode *inode)
372{ 381{
373 struct ceph_inode_info *ci = ceph_inode(inode); 382 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
407 if (ci->i_xattrs.prealloc_blob) 416 if (ci->i_xattrs.prealloc_blob)
408 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 417 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
409 418
410 kmem_cache_free(ceph_inode_cachep, ci); 419 call_rcu(&inode->i_rcu, ceph_i_callback);
411} 420}
412 421
413 422
@@ -841,13 +850,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
841 di->offset = ceph_inode(inode)->i_max_offset++; 850 di->offset = ceph_inode(inode)->i_max_offset++;
842 spin_unlock(&inode->i_lock); 851 spin_unlock(&inode->i_lock);
843 852
844 spin_lock(&dcache_lock); 853 spin_lock(&dir->d_lock);
845 spin_lock(&dn->d_lock); 854 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
846 list_move(&dn->d_u.d_child, &dir->d_subdirs); 855 list_move(&dn->d_u.d_child, &dir->d_subdirs);
847 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 856 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
848 dn->d_u.d_child.prev, dn->d_u.d_child.next); 857 dn->d_u.d_child.prev, dn->d_u.d_child.next);
849 spin_unlock(&dn->d_lock); 858 spin_unlock(&dn->d_lock);
850 spin_unlock(&dcache_lock); 859 spin_unlock(&dir->d_lock);
851} 860}
852 861
853/* 862/*
@@ -879,8 +888,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
879 } else if (realdn) { 888 } else if (realdn) {
880 dout("dn %p (%d) spliced with %p (%d) " 889 dout("dn %p (%d) spliced with %p (%d) "
881 "inode %p ino %llx.%llx\n", 890 "inode %p ino %llx.%llx\n",
882 dn, atomic_read(&dn->d_count), 891 dn, dn->d_count,
883 realdn, atomic_read(&realdn->d_count), 892 realdn, realdn->d_count,
884 realdn->d_inode, ceph_vinop(realdn->d_inode)); 893 realdn->d_inode, ceph_vinop(realdn->d_inode));
885 dput(dn); 894 dput(dn);
886 dn = realdn; 895 dn = realdn;
@@ -1231,11 +1240,11 @@ retry_lookup:
1231 goto retry_lookup; 1240 goto retry_lookup;
1232 } else { 1241 } else {
1233 /* reorder parent's d_subdirs */ 1242 /* reorder parent's d_subdirs */
1234 spin_lock(&dcache_lock); 1243 spin_lock(&parent->d_lock);
1235 spin_lock(&dn->d_lock); 1244 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
1236 list_move(&dn->d_u.d_child, &parent->d_subdirs); 1245 list_move(&dn->d_u.d_child, &parent->d_subdirs);
1237 spin_unlock(&dn->d_lock); 1246 spin_unlock(&dn->d_lock);
1238 spin_unlock(&dcache_lock); 1247 spin_unlock(&parent->d_lock);
1239 } 1248 }
1240 1249
1241 di = dn->d_fsdata; 1250 di = dn->d_fsdata;
@@ -1772,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1772 * Check inode permissions. We verify we have a valid value for 1781 * Check inode permissions. We verify we have a valid value for
1773 * the AUTH cap, then call the generic handler. 1782 * the AUTH cap, then call the generic handler.
1774 */ 1783 */
1775int ceph_permission(struct inode *inode, int mask) 1784int ceph_permission(struct inode *inode, int mask, unsigned int flags)
1776{ 1785{
1777 int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1786 int err;
1787
1788 if (flags & IPERM_FLAG_RCU)
1789 return -ECHILD;
1790
1791 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1778 1792
1779 if (!err) 1793 if (!err)
1780 err = generic_permission(inode, mask, NULL); 1794 err = generic_permission(inode, mask, flags, NULL);
1781 return err; 1795 return err;
1782} 1796}
1783 1797
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800eaa81d..a50fca1e03b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1486,7 +1486,7 @@ retry:
1486 *base = ceph_ino(temp->d_inode); 1486 *base = ceph_ino(temp->d_inode);
1487 *plen = len; 1487 *plen = len;
1488 dout("build_path on %p %d built %llx '%.*s'\n", 1488 dout("build_path on %p %d built %llx '%.*s'\n",
1489 dentry, atomic_read(&dentry->d_count), *base, len, path); 1489 dentry, dentry->d_count, *base, len, path);
1490 return path; 1490 return path;
1491} 1491}
1492 1492
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7f01728a465..4553d8829ed 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
665extern void ceph_queue_writeback(struct inode *inode); 665extern void ceph_queue_writeback(struct inode *inode);
666 666
667extern int ceph_do_getattr(struct inode *inode, int mask); 667extern int ceph_do_getattr(struct inode *inode, int mask);
668extern int ceph_permission(struct inode *inode, int mask); 668extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
669extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 669extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
670extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 670extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
671 struct kstat *stat); 671 struct kstat *stat);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index e5b9df993b9..6e99b9ddd4e 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -417,18 +417,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
417 return ret; 417 return ret;
418} 418}
419 419
420int cdev_index(struct inode *inode)
421{
422 int idx;
423 struct kobject *kobj;
424
425 kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
426 if (!kobj)
427 return -1;
428 kobject_put(kobj);
429 return idx;
430}
431
432void cd_forget(struct inode *inode) 420void cd_forget(struct inode *inode)
433{ 421{
434 spin_lock(&cdev_lock); 422 spin_lock(&cdev_lock);
@@ -582,7 +570,6 @@ EXPORT_SYMBOL(cdev_init);
582EXPORT_SYMBOL(cdev_alloc); 570EXPORT_SYMBOL(cdev_alloc);
583EXPORT_SYMBOL(cdev_del); 571EXPORT_SYMBOL(cdev_del);
584EXPORT_SYMBOL(cdev_add); 572EXPORT_SYMBOL(cdev_add);
585EXPORT_SYMBOL(cdev_index);
586EXPORT_SYMBOL(__register_chrdev); 573EXPORT_SYMBOL(__register_chrdev);
587EXPORT_SYMBOL(__unregister_chrdev); 574EXPORT_SYMBOL(__unregister_chrdev);
588EXPORT_SYMBOL(directly_mappable_cdev_bdi); 575EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 224d7bbd1fc..e654dfd092c 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -64,7 +64,9 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
64 void *buffer, uint16_t maxbuf) 64 void *buffer, uint16_t maxbuf)
65{ 65{
66 const struct TCP_Server_Info *server = cookie_netfs_data; 66 const struct TCP_Server_Info *server = cookie_netfs_data;
67 const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr; 67 const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
68 const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
69 const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
68 struct cifs_server_key *key = buffer; 70 struct cifs_server_key *key = buffer;
69 uint16_t key_len = sizeof(struct cifs_server_key); 71 uint16_t key_len = sizeof(struct cifs_server_key);
70 72
@@ -76,16 +78,16 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
76 */ 78 */
77 switch (sa->sa_family) { 79 switch (sa->sa_family) {
78 case AF_INET: 80 case AF_INET:
79 key->family = server->addr.sockAddr.sin_family; 81 key->family = sa->sa_family;
80 key->port = server->addr.sockAddr.sin_port; 82 key->port = addr->sin_port;
81 key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr; 83 key->addr[0].ipv4_addr = addr->sin_addr;
82 key_len += sizeof(key->addr[0].ipv4_addr); 84 key_len += sizeof(key->addr[0].ipv4_addr);
83 break; 85 break;
84 86
85 case AF_INET6: 87 case AF_INET6:
86 key->family = server->addr.sockAddr6.sin6_family; 88 key->family = sa->sa_family;
87 key->port = server->addr.sockAddr6.sin6_port; 89 key->port = addr6->sin6_port;
88 key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr; 90 key->addr[0].ipv6_addr = addr6->sin6_addr;
89 key_len += sizeof(key->addr[0].ipv6_addr); 91 key_len += sizeof(key->addr[0].ipv6_addr);
90 break; 92 break;
91 93
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 103ab8b605b..ede98300a8c 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,29 +119,27 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
119 "Display Internal CIFS Data Structures for Debugging\n" 119 "Display Internal CIFS Data Structures for Debugging\n"
120 "---------------------------------------------------\n"); 120 "---------------------------------------------------\n");
121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); 121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
122 seq_printf(m, "Features: "); 122 seq_printf(m, "Features:");
123#ifdef CONFIG_CIFS_DFS_UPCALL 123#ifdef CONFIG_CIFS_DFS_UPCALL
124 seq_printf(m, "dfs"); 124 seq_printf(m, " dfs");
125 seq_putc(m, ' ');
126#endif 125#endif
127#ifdef CONFIG_CIFS_FSCACHE 126#ifdef CONFIG_CIFS_FSCACHE
128 seq_printf(m, "fscache"); 127 seq_printf(m, " fscache");
129 seq_putc(m, ' ');
130#endif 128#endif
131#ifdef CONFIG_CIFS_WEAK_PW_HASH 129#ifdef CONFIG_CIFS_WEAK_PW_HASH
132 seq_printf(m, "lanman"); 130 seq_printf(m, " lanman");
133 seq_putc(m, ' ');
134#endif 131#endif
135#ifdef CONFIG_CIFS_POSIX 132#ifdef CONFIG_CIFS_POSIX
136 seq_printf(m, "posix"); 133 seq_printf(m, " posix");
137 seq_putc(m, ' ');
138#endif 134#endif
139#ifdef CONFIG_CIFS_UPCALL 135#ifdef CONFIG_CIFS_UPCALL
140 seq_printf(m, "spnego"); 136 seq_printf(m, " spnego");
141 seq_putc(m, ' ');
142#endif 137#endif
143#ifdef CONFIG_CIFS_XATTR 138#ifdef CONFIG_CIFS_XATTR
144 seq_printf(m, "xattr"); 139 seq_printf(m, " xattr");
140#endif
141#ifdef CONFIG_CIFS_ACL
142 seq_printf(m, " acl");
145#endif 143#endif
146 seq_putc(m, '\n'); 144 seq_putc(m, '\n');
147 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); 145 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 87044906cd1..4dfba828316 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -98,6 +98,8 @@ struct key *
98cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 98cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
99{ 99{
100 struct TCP_Server_Info *server = sesInfo->server; 100 struct TCP_Server_Info *server = sesInfo->server;
101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
102 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
101 char *description, *dp; 103 char *description, *dp;
102 size_t desc_len; 104 size_t desc_len;
103 struct key *spnego_key; 105 struct key *spnego_key;
@@ -127,10 +129,10 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
127 dp = description + strlen(description); 129 dp = description + strlen(description);
128 130
129 /* add the server address */ 131 /* add the server address */
130 if (server->addr.sockAddr.sin_family == AF_INET) 132 if (server->dstaddr.ss_family == AF_INET)
131 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); 133 sprintf(dp, "ip4=%pI4", &sa->sin_addr);
132 else if (server->addr.sockAddr.sin_family == AF_INET6) 134 else if (server->dstaddr.ss_family == AF_INET6)
133 sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr); 135 sprintf(dp, "ip6=%pI6", &sa6->sin6_addr);
134 else 136 else
135 goto out; 137 goto out;
136 138
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index f856732161a..66f3d50d067 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -72,6 +72,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
72 return 0; 72 return 0;
73} 73}
74 74
75/* must be called with server->srv_mutex held */
75int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, 76int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
76 __u32 *pexpected_response_sequence_number) 77 __u32 *pexpected_response_sequence_number)
77{ 78{
@@ -84,14 +85,12 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
84 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) 85 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
85 return rc; 86 return rc;
86 87
87 spin_lock(&GlobalMid_Lock);
88 cifs_pdu->Signature.Sequence.SequenceNumber = 88 cifs_pdu->Signature.Sequence.SequenceNumber =
89 cpu_to_le32(server->sequence_number); 89 cpu_to_le32(server->sequence_number);
90 cifs_pdu->Signature.Sequence.Reserved = 0; 90 cifs_pdu->Signature.Sequence.Reserved = 0;
91 91
92 *pexpected_response_sequence_number = server->sequence_number++; 92 *pexpected_response_sequence_number = server->sequence_number++;
93 server->sequence_number++; 93 server->sequence_number++;
94 spin_unlock(&GlobalMid_Lock);
95 94
96 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); 95 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
97 if (rc) 96 if (rc)
@@ -149,6 +148,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
149 return rc; 148 return rc;
150} 149}
151 150
151/* must be called with server->srv_mutex held */
152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
153 __u32 *pexpected_response_sequence_number) 153 __u32 *pexpected_response_sequence_number)
154{ 154{
@@ -162,14 +162,12 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
162 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) 162 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
163 return rc; 163 return rc;
164 164
165 spin_lock(&GlobalMid_Lock);
166 cifs_pdu->Signature.Sequence.SequenceNumber = 165 cifs_pdu->Signature.Sequence.SequenceNumber =
167 cpu_to_le32(server->sequence_number); 166 cpu_to_le32(server->sequence_number);
168 cifs_pdu->Signature.Sequence.Reserved = 0; 167 cifs_pdu->Signature.Sequence.Reserved = 0;
169 168
170 *pexpected_response_sequence_number = server->sequence_number++; 169 *pexpected_response_sequence_number = server->sequence_number++;
171 server->sequence_number++; 170 server->sequence_number++;
172 spin_unlock(&GlobalMid_Lock);
173 171
174 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); 172 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
175 if (rc) 173 if (rc)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3936aa7f2c2..5e7075d5f13 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
283 return 0; 283 return 0;
284} 284}
285 285
286static int cifs_permission(struct inode *inode, int mask) 286static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
287{ 287{
288 struct cifs_sb_info *cifs_sb; 288 struct cifs_sb_info *cifs_sb;
289 289
290 if (flags & IPERM_FLAG_RCU)
291 return -ECHILD;
292
290 cifs_sb = CIFS_SB(inode->i_sb); 293 cifs_sb = CIFS_SB(inode->i_sb);
291 294
292 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 295 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
298 on the client (above and beyond ACL on servers) for 301 on the client (above and beyond ACL on servers) for
299 servers which do not support setting and viewing mode bits, 302 servers which do not support setting and viewing mode bits,
300 so allowing client to check permissions is useful */ 303 so allowing client to check permissions is useful */
301 return generic_permission(inode, mask, NULL); 304 return generic_permission(inode, mask, flags, NULL);
302} 305}
303 306
304static struct kmem_cache *cifs_inode_cachep; 307static struct kmem_cache *cifs_inode_cachep;
@@ -326,6 +329,8 @@ cifs_alloc_inode(struct super_block *sb)
326 cifs_inode->invalid_mapping = false; 329 cifs_inode->invalid_mapping = false;
327 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 330 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
328 cifs_inode->server_eof = 0; 331 cifs_inode->server_eof = 0;
332 cifs_inode->uniqueid = 0;
333 cifs_inode->createtime = 0;
329 334
330 /* Can not set i_flags here - they get immediately overwritten 335 /* Can not set i_flags here - they get immediately overwritten
331 to zero by the VFS */ 336 to zero by the VFS */
@@ -334,10 +339,17 @@ cifs_alloc_inode(struct super_block *sb)
334 return &cifs_inode->vfs_inode; 339 return &cifs_inode->vfs_inode;
335} 340}
336 341
342static void cifs_i_callback(struct rcu_head *head)
343{
344 struct inode *inode = container_of(head, struct inode, i_rcu);
345 INIT_LIST_HEAD(&inode->i_dentry);
346 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
347}
348
337static void 349static void
338cifs_destroy_inode(struct inode *inode) 350cifs_destroy_inode(struct inode *inode)
339{ 351{
340 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 352 call_rcu(&inode->i_rcu, cifs_i_callback);
341} 353}
342 354
343static void 355static void
@@ -351,18 +363,19 @@ cifs_evict_inode(struct inode *inode)
351static void 363static void
352cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) 364cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
353{ 365{
366 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
367 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
368
354 seq_printf(s, ",addr="); 369 seq_printf(s, ",addr=");
355 370
356 switch (server->addr.sockAddr.sin_family) { 371 switch (server->dstaddr.ss_family) {
357 case AF_INET: 372 case AF_INET:
358 seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); 373 seq_printf(s, "%pI4", &sa->sin_addr.s_addr);
359 break; 374 break;
360 case AF_INET6: 375 case AF_INET6:
361 seq_printf(s, "%pI6", 376 seq_printf(s, "%pI6", &sa6->sin6_addr.s6_addr);
362 &server->addr.sockAddr6.sin6_addr.s6_addr); 377 if (sa6->sin6_scope_id)
363 if (server->addr.sockAddr6.sin6_scope_id) 378 seq_printf(s, "%%%u", sa6->sin6_scope_id);
364 seq_printf(s, "%%%u",
365 server->addr.sockAddr6.sin6_scope_id);
366 break; 379 break;
367 default: 380 default:
368 seq_printf(s, "(unknown)"); 381 seq_printf(s, "(unknown)");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7136c0c3e2f..606ca8bb710 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -163,10 +163,7 @@ struct TCP_Server_Info {
163 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 163 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
164 char *hostname; /* hostname portion of UNC string */ 164 char *hostname; /* hostname portion of UNC string */
165 struct socket *ssocket; 165 struct socket *ssocket;
166 union { 166 struct sockaddr_storage dstaddr;
167 struct sockaddr_in sockAddr;
168 struct sockaddr_in6 sockAddr6;
169 } addr;
170 struct sockaddr_storage srcaddr; /* locally bind to this IP */ 167 struct sockaddr_storage srcaddr; /* locally bind to this IP */
171 wait_queue_head_t response_q; 168 wait_queue_head_t response_q;
172 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ 169 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
@@ -210,7 +207,7 @@ struct TCP_Server_Info {
210 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ 207 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
211 /* 16th byte of RFC1001 workstation name is always null */ 208 /* 16th byte of RFC1001 workstation name is always null */
212 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 209 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
213 __u32 sequence_number; /* needed for CIFS PDU signature */ 210 __u32 sequence_number; /* for signing, protected by srv_mutex */
214 struct session_key session_key; 211 struct session_key session_key;
215 unsigned long lstrp; /* when we got last response from this server */ 212 unsigned long lstrp; /* when we got last response from this server */
216 u16 dialect; /* dialect index that server chose */ 213 u16 dialect; /* dialect index that server chose */
@@ -456,6 +453,7 @@ struct cifsInodeInfo {
456 bool invalid_mapping:1; /* pagecache is invalid */ 453 bool invalid_mapping:1; /* pagecache is invalid */
457 u64 server_eof; /* current file size on server */ 454 u64 server_eof; /* current file size on server */
458 u64 uniqueid; /* server inode number */ 455 u64 uniqueid; /* server inode number */
456 u64 createtime; /* creation time on server */
459#ifdef CONFIG_CIFS_FSCACHE 457#ifdef CONFIG_CIFS_FSCACHE
460 struct fscache_cookie *fscache; 458 struct fscache_cookie *fscache;
461#endif 459#endif
@@ -576,6 +574,7 @@ struct cifs_fattr {
576 u64 cf_uniqueid; 574 u64 cf_uniqueid;
577 u64 cf_eof; 575 u64 cf_eof;
578 u64 cf_bytes; 576 u64 cf_bytes;
577 u64 cf_createtime;
579 uid_t cf_uid; 578 uid_t cf_uid;
580 gid_t cf_gid; 579 gid_t cf_gid;
581 umode_t cf_mode; 580 umode_t cf_mode;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 67acfb3acad..2f6795e524d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -401,15 +401,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
401 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { 401 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
402 cFYI(1, "Kerberos only mechanism, enable extended security"); 402 cFYI(1, "Kerberos only mechanism, enable extended security");
403 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 403 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
404 } 404 } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
405#ifdef CONFIG_CIFS_EXPERIMENTAL
406 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
407 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 405 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
408 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { 406 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
409 cFYI(1, "NTLMSSP only mechanism, enable extended security"); 407 cFYI(1, "NTLMSSP only mechanism, enable extended security");
410 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 408 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
411 } 409 }
412#endif
413 410
414 count = 0; 411 count = 0;
415 for (i = 0; i < CIFS_NUM_PROT; i++) { 412 for (i = 0; i < CIFS_NUM_PROT; i++) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index cc1a8604a79..a65d311d163 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -64,8 +64,8 @@ struct smb_vol {
64 char *UNC; 64 char *UNC;
65 char *UNCip; 65 char *UNCip;
66 char *iocharset; /* local code page for mapping to and from Unicode */ 66 char *iocharset; /* local code page for mapping to and from Unicode */
67 char source_rfc1001_name[16]; /* netbios name of client */ 67 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
68 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ 68 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
69 uid_t cred_uid; 69 uid_t cred_uid;
70 uid_t linux_uid; 70 uid_t linux_uid;
71 gid_t linux_gid; 71 gid_t linux_gid;
@@ -115,8 +115,8 @@ struct smb_vol {
115#define TLINK_ERROR_EXPIRE (1 * HZ) 115#define TLINK_ERROR_EXPIRE (1 * HZ)
116#define TLINK_IDLE_EXPIRE (600 * HZ) 116#define TLINK_IDLE_EXPIRE (600 * HZ)
117 117
118static int ipv4_connect(struct TCP_Server_Info *server); 118static int ip_connect(struct TCP_Server_Info *server);
119static int ipv6_connect(struct TCP_Server_Info *server); 119static int generic_ip_connect(struct TCP_Server_Info *server);
120static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); 120static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
121static void cifs_prune_tlinks(struct work_struct *work); 121static void cifs_prune_tlinks(struct work_struct *work);
122 122
@@ -200,10 +200,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
200 while ((server->tcpStatus != CifsExiting) && 200 while ((server->tcpStatus != CifsExiting) &&
201 (server->tcpStatus != CifsGood)) { 201 (server->tcpStatus != CifsGood)) {
202 try_to_freeze(); 202 try_to_freeze();
203 if (server->addr.sockAddr6.sin6_family == AF_INET6) 203
204 rc = ipv6_connect(server); 204 /* we should try only the port we connected to before */
205 else 205 rc = generic_ip_connect(server);
206 rc = ipv4_connect(server);
207 if (rc) { 206 if (rc) {
208 cFYI(1, "reconnect error %d", rc); 207 cFYI(1, "reconnect error %d", rc);
209 msleep(3000); 208 msleep(3000);
@@ -477,7 +476,7 @@ incomplete_rcv:
477 * initialize frame) 476 * initialize frame)
478 */ 477 */
479 cifs_set_port((struct sockaddr *) 478 cifs_set_port((struct sockaddr *)
480 &server->addr.sockAddr, CIFS_PORT); 479 &server->dstaddr, CIFS_PORT);
481 cifs_reconnect(server); 480 cifs_reconnect(server);
482 csocket = server->ssocket; 481 csocket = server->ssocket;
483 wake_up(&server->response_q); 482 wake_up(&server->response_q);
@@ -817,11 +816,11 @@ cifs_parse_mount_options(char *options, const char *devname,
817 * informational, only used for servers that do not support 816 * informational, only used for servers that do not support
818 * port 445 and it can be overridden at mount time 817 * port 445 and it can be overridden at mount time
819 */ 818 */
820 memset(vol->source_rfc1001_name, 0x20, 15); 819 memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
821 for (i = 0; i < strnlen(nodename, 15); i++) 820 for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
822 vol->source_rfc1001_name[i] = toupper(nodename[i]); 821 vol->source_rfc1001_name[i] = toupper(nodename[i]);
823 822
824 vol->source_rfc1001_name[15] = 0; 823 vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
825 /* null target name indicates to use *SMBSERVR default called name 824 /* null target name indicates to use *SMBSERVR default called name
826 if we end up sending RFC1001 session initialize */ 825 if we end up sending RFC1001 session initialize */
827 vol->target_rfc1001_name[0] = 0; 826 vol->target_rfc1001_name[0] = 0;
@@ -985,13 +984,11 @@ cifs_parse_mount_options(char *options, const char *devname,
985 return 1; 984 return 1;
986 } else if (strnicmp(value, "krb5", 4) == 0) { 985 } else if (strnicmp(value, "krb5", 4) == 0) {
987 vol->secFlg |= CIFSSEC_MAY_KRB5; 986 vol->secFlg |= CIFSSEC_MAY_KRB5;
988#ifdef CONFIG_CIFS_EXPERIMENTAL
989 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 987 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
990 vol->secFlg |= CIFSSEC_MAY_NTLMSSP | 988 vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
991 CIFSSEC_MUST_SIGN; 989 CIFSSEC_MUST_SIGN;
992 } else if (strnicmp(value, "ntlmssp", 7) == 0) { 990 } else if (strnicmp(value, "ntlmssp", 7) == 0) {
993 vol->secFlg |= CIFSSEC_MAY_NTLMSSP; 991 vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
994#endif
995 } else if (strnicmp(value, "ntlmv2i", 7) == 0) { 992 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
996 vol->secFlg |= CIFSSEC_MAY_NTLMV2 | 993 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
997 CIFSSEC_MUST_SIGN; 994 CIFSSEC_MUST_SIGN;
@@ -1168,22 +1165,22 @@ cifs_parse_mount_options(char *options, const char *devname,
1168 if (!value || !*value || (*value == ' ')) { 1165 if (!value || !*value || (*value == ' ')) {
1169 cFYI(1, "invalid (empty) netbiosname"); 1166 cFYI(1, "invalid (empty) netbiosname");
1170 } else { 1167 } else {
1171 memset(vol->source_rfc1001_name, 0x20, 15); 1168 memset(vol->source_rfc1001_name, 0x20,
1172 for (i = 0; i < 15; i++) { 1169 RFC1001_NAME_LEN);
1173 /* BB are there cases in which a comma can be 1170 /*
1174 valid in this workstation netbios name (and need 1171 * FIXME: are there cases in which a comma can
1175 special handling)? */ 1172 * be valid in workstation netbios name (and
1176 1173 * need special handling)?
1177 /* We do not uppercase netbiosname for user */ 1174 */
1175 for (i = 0; i < RFC1001_NAME_LEN; i++) {
1176 /* don't ucase netbiosname for user */
1178 if (value[i] == 0) 1177 if (value[i] == 0)
1179 break; 1178 break;
1180 else 1179 vol->source_rfc1001_name[i] = value[i];
1181 vol->source_rfc1001_name[i] =
1182 value[i];
1183 } 1180 }
1184 /* The string has 16th byte zero still from 1181 /* The string has 16th byte zero still from
1185 set at top of the function */ 1182 set at top of the function */
1186 if ((i == 15) && (value[i] != 0)) 1183 if (i == RFC1001_NAME_LEN && value[i] != 0)
1187 printk(KERN_WARNING "CIFS: netbiosname" 1184 printk(KERN_WARNING "CIFS: netbiosname"
1188 " longer than 15 truncated.\n"); 1185 " longer than 15 truncated.\n");
1189 } 1186 }
@@ -1193,7 +1190,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1193 cFYI(1, "empty server netbiosname specified"); 1190 cFYI(1, "empty server netbiosname specified");
1194 } else { 1191 } else {
1195 /* last byte, type, is 0x20 for servr type */ 1192 /* last byte, type, is 0x20 for servr type */
1196 memset(vol->target_rfc1001_name, 0x20, 16); 1193 memset(vol->target_rfc1001_name, 0x20,
1194 RFC1001_NAME_LEN_WITH_NULL);
1197 1195
1198 for (i = 0; i < 15; i++) { 1196 for (i = 0; i < 15; i++) {
1199 /* BB are there cases in which a comma can be 1197 /* BB are there cases in which a comma can be
@@ -1210,7 +1208,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1210 } 1208 }
1211 /* The string has 16th byte zero still from 1209 /* The string has 16th byte zero still from
1212 set at top of the function */ 1210 set at top of the function */
1213 if ((i == 15) && (value[i] != 0)) 1211 if (i == RFC1001_NAME_LEN && value[i] != 0)
1214 printk(KERN_WARNING "CIFS: server net" 1212 printk(KERN_WARNING "CIFS: server net"
1215 "biosname longer than 15 truncated.\n"); 1213 "biosname longer than 15 truncated.\n");
1216 } 1214 }
@@ -1341,10 +1339,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1341 vol->no_psx_acl = 0; 1339 vol->no_psx_acl = 0;
1342 } else if (strnicmp(data, "noacl", 5) == 0) { 1340 } else if (strnicmp(data, "noacl", 5) == 0) {
1343 vol->no_psx_acl = 1; 1341 vol->no_psx_acl = 1;
1344#ifdef CONFIG_CIFS_EXPERIMENTAL
1345 } else if (strnicmp(data, "locallease", 6) == 0) { 1342 } else if (strnicmp(data, "locallease", 6) == 0) {
1346 vol->local_lease = 1; 1343 vol->local_lease = 1;
1347#endif
1348 } else if (strnicmp(data, "sign", 4) == 0) { 1344 } else if (strnicmp(data, "sign", 4) == 0) {
1349 vol->secFlg |= CIFSSEC_MUST_SIGN; 1345 vol->secFlg |= CIFSSEC_MUST_SIGN;
1350 } else if (strnicmp(data, "seal", 4) == 0) { 1346 } else if (strnicmp(data, "seal", 4) == 0) {
@@ -1454,35 +1450,71 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1454 } 1450 }
1455} 1451}
1456 1452
1453/*
1454 * If no port is specified in addr structure, we try to match with 445 port
1455 * and if it fails - with 139 ports. It should be called only if address
1456 * families of server and addr are equal.
1457 */
1458static bool
1459match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1460{
1461 unsigned short int port, *sport;
1462
1463 switch (addr->sa_family) {
1464 case AF_INET:
1465 sport = &((struct sockaddr_in *) &server->dstaddr)->sin_port;
1466 port = ((struct sockaddr_in *) addr)->sin_port;
1467 break;
1468 case AF_INET6:
1469 sport = &((struct sockaddr_in6 *) &server->dstaddr)->sin6_port;
1470 port = ((struct sockaddr_in6 *) addr)->sin6_port;
1471 break;
1472 default:
1473 WARN_ON(1);
1474 return false;
1475 }
1476
1477 if (!port) {
1478 port = htons(CIFS_PORT);
1479 if (port == *sport)
1480 return true;
1481
1482 port = htons(RFC1001_PORT);
1483 }
1484
1485 return port == *sport;
1486}
1457 1487
1458static bool 1488static bool
1459match_address(struct TCP_Server_Info *server, struct sockaddr *addr, 1489match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
1460 struct sockaddr *srcaddr) 1490 struct sockaddr *srcaddr)
1461{ 1491{
1462 struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
1463 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
1464
1465 switch (addr->sa_family) { 1492 switch (addr->sa_family) {
1466 case AF_INET: 1493 case AF_INET: {
1467 if (addr4->sin_addr.s_addr != 1494 struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
1468 server->addr.sockAddr.sin_addr.s_addr) 1495 struct sockaddr_in *srv_addr4 =
1469 return false; 1496 (struct sockaddr_in *)&server->dstaddr;
1470 if (addr4->sin_port && 1497
1471 addr4->sin_port != server->addr.sockAddr.sin_port) 1498 if (addr4->sin_addr.s_addr != srv_addr4->sin_addr.s_addr)
1472 return false; 1499 return false;
1473 break; 1500 break;
1474 case AF_INET6: 1501 }
1502 case AF_INET6: {
1503 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
1504 struct sockaddr_in6 *srv_addr6 =
1505 (struct sockaddr_in6 *)&server->dstaddr;
1506
1475 if (!ipv6_addr_equal(&addr6->sin6_addr, 1507 if (!ipv6_addr_equal(&addr6->sin6_addr,
1476 &server->addr.sockAddr6.sin6_addr)) 1508 &srv_addr6->sin6_addr))
1477 return false; 1509 return false;
1478 if (addr6->sin6_scope_id != 1510 if (addr6->sin6_scope_id != srv_addr6->sin6_scope_id)
1479 server->addr.sockAddr6.sin6_scope_id)
1480 return false;
1481 if (addr6->sin6_port &&
1482 addr6->sin6_port != server->addr.sockAddr6.sin6_port)
1483 return false; 1511 return false;
1484 break; 1512 break;
1485 } 1513 }
1514 default:
1515 WARN_ON(1);
1516 return false; /* don't expect to be here */
1517 }
1486 1518
1487 if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr)) 1519 if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr))
1488 return false; 1520 return false;
@@ -1549,6 +1581,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1549 (struct sockaddr *)&vol->srcaddr)) 1581 (struct sockaddr *)&vol->srcaddr))
1550 continue; 1582 continue;
1551 1583
1584 if (!match_port(server, addr))
1585 continue;
1586
1552 if (!match_security(server, vol)) 1587 if (!match_security(server, vol))
1553 continue; 1588 continue;
1554 1589
@@ -1681,14 +1716,13 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1681 cFYI(1, "attempting ipv6 connect"); 1716 cFYI(1, "attempting ipv6 connect");
1682 /* BB should we allow ipv6 on port 139? */ 1717 /* BB should we allow ipv6 on port 139? */
1683 /* other OS never observed in Wild doing 139 with v6 */ 1718 /* other OS never observed in Wild doing 139 with v6 */
1684 memcpy(&tcp_ses->addr.sockAddr6, sin_server6, 1719 memcpy(&tcp_ses->dstaddr, sin_server6,
1685 sizeof(struct sockaddr_in6)); 1720 sizeof(struct sockaddr_in6));
1686 rc = ipv6_connect(tcp_ses); 1721 } else
1687 } else { 1722 memcpy(&tcp_ses->dstaddr, sin_server,
1688 memcpy(&tcp_ses->addr.sockAddr, sin_server, 1723 sizeof(struct sockaddr_in));
1689 sizeof(struct sockaddr_in)); 1724
1690 rc = ipv4_connect(tcp_ses); 1725 rc = ip_connect(tcp_ses);
1691 }
1692 if (rc < 0) { 1726 if (rc < 0) {
1693 cERROR(1, "Error connecting to socket. Aborting operation"); 1727 cERROR(1, "Error connecting to socket. Aborting operation");
1694 goto out_err_crypto_release; 1728 goto out_err_crypto_release;
@@ -1793,6 +1827,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1793{ 1827{
1794 int rc = -ENOMEM, xid; 1828 int rc = -ENOMEM, xid;
1795 struct cifsSesInfo *ses; 1829 struct cifsSesInfo *ses;
1830 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
1831 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
1796 1832
1797 xid = GetXid(); 1833 xid = GetXid();
1798 1834
@@ -1836,12 +1872,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1836 1872
1837 /* new SMB session uses our server ref */ 1873 /* new SMB session uses our server ref */
1838 ses->server = server; 1874 ses->server = server;
1839 if (server->addr.sockAddr6.sin6_family == AF_INET6) 1875 if (server->dstaddr.ss_family == AF_INET6)
1840 sprintf(ses->serverName, "%pI6", 1876 sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
1841 &server->addr.sockAddr6.sin6_addr);
1842 else 1877 else
1843 sprintf(ses->serverName, "%pI4", 1878 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1844 &server->addr.sockAddr.sin_addr.s_addr);
1845 1879
1846 if (volume_info->username) 1880 if (volume_info->username)
1847 strncpy(ses->userName, volume_info->username, 1881 strncpy(ses->userName, volume_info->username,
@@ -2136,19 +2170,106 @@ bind_socket(struct TCP_Server_Info *server)
2136} 2170}
2137 2171
2138static int 2172static int
2139ipv4_connect(struct TCP_Server_Info *server) 2173ip_rfc1001_connect(struct TCP_Server_Info *server)
2174{
2175 int rc = 0;
2176 /*
2177 * some servers require RFC1001 sessinit before sending
2178 * negprot - BB check reconnection in case where second
2179 * sessinit is sent but no second negprot
2180 */
2181 struct rfc1002_session_packet *ses_init_buf;
2182 struct smb_hdr *smb_buf;
2183 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
2184 GFP_KERNEL);
2185 if (ses_init_buf) {
2186 ses_init_buf->trailer.session_req.called_len = 32;
2187
2188 if (server->server_RFC1001_name &&
2189 server->server_RFC1001_name[0] != 0)
2190 rfc1002mangle(ses_init_buf->trailer.
2191 session_req.called_name,
2192 server->server_RFC1001_name,
2193 RFC1001_NAME_LEN_WITH_NULL);
2194 else
2195 rfc1002mangle(ses_init_buf->trailer.
2196 session_req.called_name,
2197 DEFAULT_CIFS_CALLED_NAME,
2198 RFC1001_NAME_LEN_WITH_NULL);
2199
2200 ses_init_buf->trailer.session_req.calling_len = 32;
2201
2202 /*
2203 * calling name ends in null (byte 16) from old smb
2204 * convention.
2205 */
2206 if (server->workstation_RFC1001_name &&
2207 server->workstation_RFC1001_name[0] != 0)
2208 rfc1002mangle(ses_init_buf->trailer.
2209 session_req.calling_name,
2210 server->workstation_RFC1001_name,
2211 RFC1001_NAME_LEN_WITH_NULL);
2212 else
2213 rfc1002mangle(ses_init_buf->trailer.
2214 session_req.calling_name,
2215 "LINUX_CIFS_CLNT",
2216 RFC1001_NAME_LEN_WITH_NULL);
2217
2218 ses_init_buf->trailer.session_req.scope1 = 0;
2219 ses_init_buf->trailer.session_req.scope2 = 0;
2220 smb_buf = (struct smb_hdr *)ses_init_buf;
2221
2222 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2223 smb_buf->smb_buf_length = 0x81000044;
2224 rc = smb_send(server, smb_buf, 0x44);
2225 kfree(ses_init_buf);
2226 /*
2227 * RFC1001 layer in at least one server
2228 * requires very short break before negprot
2229 * presumably because not expecting negprot
2230 * to follow so fast. This is a simple
2231 * solution that works without
2232 * complicating the code and causes no
2233 * significant slowing down on mount
2234 * for everyone else
2235 */
2236 usleep_range(1000, 2000);
2237 }
2238 /*
2239 * else the negprot may still work without this
2240 * even though malloc failed
2241 */
2242
2243 return rc;
2244}
2245
2246static int
2247generic_ip_connect(struct TCP_Server_Info *server)
2140{ 2248{
2141 int rc = 0; 2249 int rc = 0;
2142 int val; 2250 unsigned short int sport;
2143 bool connected = false; 2251 int slen, sfamily;
2144 __be16 orig_port = 0;
2145 struct socket *socket = server->ssocket; 2252 struct socket *socket = server->ssocket;
2253 struct sockaddr *saddr;
2254
2255 saddr = (struct sockaddr *) &server->dstaddr;
2256
2257 if (server->dstaddr.ss_family == AF_INET6) {
2258 sport = ((struct sockaddr_in6 *) saddr)->sin6_port;
2259 slen = sizeof(struct sockaddr_in6);
2260 sfamily = AF_INET6;
2261 } else {
2262 sport = ((struct sockaddr_in *) saddr)->sin_port;
2263 slen = sizeof(struct sockaddr_in);
2264 sfamily = AF_INET;
2265 }
2146 2266
2147 if (socket == NULL) { 2267 if (socket == NULL) {
2148 rc = sock_create_kern(PF_INET, SOCK_STREAM, 2268 rc = sock_create_kern(sfamily, SOCK_STREAM,
2149 IPPROTO_TCP, &socket); 2269 IPPROTO_TCP, &socket);
2150 if (rc < 0) { 2270 if (rc < 0) {
2151 cERROR(1, "Error %d creating socket", rc); 2271 cERROR(1, "Error %d creating socket", rc);
2272 server->ssocket = NULL;
2152 return rc; 2273 return rc;
2153 } 2274 }
2154 2275
@@ -2156,63 +2277,28 @@ ipv4_connect(struct TCP_Server_Info *server)
2156 cFYI(1, "Socket created"); 2277 cFYI(1, "Socket created");
2157 server->ssocket = socket; 2278 server->ssocket = socket;
2158 socket->sk->sk_allocation = GFP_NOFS; 2279 socket->sk->sk_allocation = GFP_NOFS;
2159 cifs_reclassify_socket4(socket); 2280 if (sfamily == AF_INET6)
2281 cifs_reclassify_socket6(socket);
2282 else
2283 cifs_reclassify_socket4(socket);
2160 } 2284 }
2161 2285
2162 rc = bind_socket(server); 2286 rc = bind_socket(server);
2163 if (rc < 0) 2287 if (rc < 0)
2164 return rc; 2288 return rc;
2165 2289
2166 /* user overrode default port */ 2290 rc = socket->ops->connect(socket, saddr, slen, 0);
2167 if (server->addr.sockAddr.sin_port) { 2291 if (rc < 0) {
2168 rc = socket->ops->connect(socket, (struct sockaddr *) 2292 cFYI(1, "Error %d connecting to server", rc);
2169 &server->addr.sockAddr,
2170 sizeof(struct sockaddr_in), 0);
2171 if (rc >= 0)
2172 connected = true;
2173 }
2174
2175 if (!connected) {
2176 /* save original port so we can retry user specified port
2177 later if fall back ports fail this time */
2178 orig_port = server->addr.sockAddr.sin_port;
2179
2180 /* do not retry on the same port we just failed on */
2181 if (server->addr.sockAddr.sin_port != htons(CIFS_PORT)) {
2182 server->addr.sockAddr.sin_port = htons(CIFS_PORT);
2183 rc = socket->ops->connect(socket,
2184 (struct sockaddr *)
2185 &server->addr.sockAddr,
2186 sizeof(struct sockaddr_in), 0);
2187 if (rc >= 0)
2188 connected = true;
2189 }
2190 }
2191 if (!connected) {
2192 server->addr.sockAddr.sin_port = htons(RFC1001_PORT);
2193 rc = socket->ops->connect(socket, (struct sockaddr *)
2194 &server->addr.sockAddr,
2195 sizeof(struct sockaddr_in), 0);
2196 if (rc >= 0)
2197 connected = true;
2198 }
2199
2200 /* give up here - unless we want to retry on different
2201 protocol families some day */
2202 if (!connected) {
2203 if (orig_port)
2204 server->addr.sockAddr.sin_port = orig_port;
2205 cFYI(1, "Error %d connecting to server via ipv4", rc);
2206 sock_release(socket); 2293 sock_release(socket);
2207 server->ssocket = NULL; 2294 server->ssocket = NULL;
2208 return rc; 2295 return rc;
2209 } 2296 }
2210 2297
2211
2212 /* 2298 /*
2213 * Eventually check for other socket options to change from 2299 * Eventually check for other socket options to change from
2214 * the default. sock_setsockopt not used because it expects 2300 * the default. sock_setsockopt not used because it expects
2215 * user space buffer 2301 * user space buffer
2216 */ 2302 */
2217 socket->sk->sk_rcvtimeo = 7 * HZ; 2303 socket->sk->sk_rcvtimeo = 7 * HZ;
2218 socket->sk->sk_sndtimeo = 5 * HZ; 2304 socket->sk->sk_sndtimeo = 5 * HZ;
@@ -2226,7 +2312,7 @@ ipv4_connect(struct TCP_Server_Info *server)
2226 } 2312 }
2227 2313
2228 if (server->tcp_nodelay) { 2314 if (server->tcp_nodelay) {
2229 val = 1; 2315 int val = 1;
2230 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2316 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2231 (char *)&val, sizeof(val)); 2317 (char *)&val, sizeof(val));
2232 if (rc) 2318 if (rc)
@@ -2237,161 +2323,39 @@ ipv4_connect(struct TCP_Server_Info *server)
2237 socket->sk->sk_sndbuf, 2323 socket->sk->sk_sndbuf,
2238 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); 2324 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
2239 2325
2240 /* send RFC1001 sessinit */ 2326 if (sport == htons(RFC1001_PORT))
2241 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) { 2327 rc = ip_rfc1001_connect(server);
2242 /* some servers require RFC1001 sessinit before sending
2243 negprot - BB check reconnection in case where second
2244 sessinit is sent but no second negprot */
2245 struct rfc1002_session_packet *ses_init_buf;
2246 struct smb_hdr *smb_buf;
2247 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
2248 GFP_KERNEL);
2249 if (ses_init_buf) {
2250 ses_init_buf->trailer.session_req.called_len = 32;
2251 if (server->server_RFC1001_name &&
2252 server->server_RFC1001_name[0] != 0)
2253 rfc1002mangle(ses_init_buf->trailer.
2254 session_req.called_name,
2255 server->server_RFC1001_name,
2256 RFC1001_NAME_LEN_WITH_NULL);
2257 else
2258 rfc1002mangle(ses_init_buf->trailer.
2259 session_req.called_name,
2260 DEFAULT_CIFS_CALLED_NAME,
2261 RFC1001_NAME_LEN_WITH_NULL);
2262
2263 ses_init_buf->trailer.session_req.calling_len = 32;
2264
2265 /* calling name ends in null (byte 16) from old smb
2266 convention. */
2267 if (server->workstation_RFC1001_name &&
2268 server->workstation_RFC1001_name[0] != 0)
2269 rfc1002mangle(ses_init_buf->trailer.
2270 session_req.calling_name,
2271 server->workstation_RFC1001_name,
2272 RFC1001_NAME_LEN_WITH_NULL);
2273 else
2274 rfc1002mangle(ses_init_buf->trailer.
2275 session_req.calling_name,
2276 "LINUX_CIFS_CLNT",
2277 RFC1001_NAME_LEN_WITH_NULL);
2278
2279 ses_init_buf->trailer.session_req.scope1 = 0;
2280 ses_init_buf->trailer.session_req.scope2 = 0;
2281 smb_buf = (struct smb_hdr *)ses_init_buf;
2282 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2283 smb_buf->smb_buf_length = 0x81000044;
2284 rc = smb_send(server, smb_buf, 0x44);
2285 kfree(ses_init_buf);
2286 msleep(1); /* RFC1001 layer in at least one server
2287 requires very short break before negprot
2288 presumably because not expecting negprot
2289 to follow so fast. This is a simple
2290 solution that works without
2291 complicating the code and causes no
2292 significant slowing down on mount
2293 for everyone else */
2294 }
2295 /* else the negprot may still work without this
2296 even though malloc failed */
2297
2298 }
2299 2328
2300 return rc; 2329 return rc;
2301} 2330}
2302 2331
2303static int 2332static int
2304ipv6_connect(struct TCP_Server_Info *server) 2333ip_connect(struct TCP_Server_Info *server)
2305{ 2334{
2306 int rc = 0; 2335 unsigned short int *sport;
2307 int val; 2336 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2308 bool connected = false; 2337 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2309 __be16 orig_port = 0;
2310 struct socket *socket = server->ssocket;
2311 2338
2312 if (socket == NULL) { 2339 if (server->dstaddr.ss_family == AF_INET6)
2313 rc = sock_create_kern(PF_INET6, SOCK_STREAM, 2340 sport = &addr6->sin6_port;
2314 IPPROTO_TCP, &socket); 2341 else
2315 if (rc < 0) { 2342 sport = &addr->sin_port;
2316 cERROR(1, "Error %d creating ipv6 socket", rc);
2317 socket = NULL;
2318 return rc;
2319 }
2320 2343
2321 /* BB other socket options to set KEEPALIVE, NODELAY? */ 2344 if (*sport == 0) {
2322 cFYI(1, "ipv6 Socket created"); 2345 int rc;
2323 server->ssocket = socket;
2324 socket->sk->sk_allocation = GFP_NOFS;
2325 cifs_reclassify_socket6(socket);
2326 }
2327 2346
2328 rc = bind_socket(server); 2347 /* try with 445 port at first */
2329 if (rc < 0) 2348 *sport = htons(CIFS_PORT);
2330 return rc;
2331 2349
2332 /* user overrode default port */ 2350 rc = generic_ip_connect(server);
2333 if (server->addr.sockAddr6.sin6_port) {
2334 rc = socket->ops->connect(socket,
2335 (struct sockaddr *) &server->addr.sockAddr6,
2336 sizeof(struct sockaddr_in6), 0);
2337 if (rc >= 0)
2338 connected = true;
2339 }
2340
2341 if (!connected) {
2342 /* save original port so we can retry user specified port
2343 later if fall back ports fail this time */
2344
2345 orig_port = server->addr.sockAddr6.sin6_port;
2346 /* do not retry on the same port we just failed on */
2347 if (server->addr.sockAddr6.sin6_port != htons(CIFS_PORT)) {
2348 server->addr.sockAddr6.sin6_port = htons(CIFS_PORT);
2349 rc = socket->ops->connect(socket, (struct sockaddr *)
2350 &server->addr.sockAddr6,
2351 sizeof(struct sockaddr_in6), 0);
2352 if (rc >= 0)
2353 connected = true;
2354 }
2355 }
2356 if (!connected) {
2357 server->addr.sockAddr6.sin6_port = htons(RFC1001_PORT);
2358 rc = socket->ops->connect(socket, (struct sockaddr *)
2359 &server->addr.sockAddr6,
2360 sizeof(struct sockaddr_in6), 0);
2361 if (rc >= 0) 2351 if (rc >= 0)
2362 connected = true; 2352 return rc;
2363 }
2364
2365 /* give up here - unless we want to retry on different
2366 protocol families some day */
2367 if (!connected) {
2368 if (orig_port)
2369 server->addr.sockAddr6.sin6_port = orig_port;
2370 cFYI(1, "Error %d connecting to server via ipv6", rc);
2371 sock_release(socket);
2372 server->ssocket = NULL;
2373 return rc;
2374 }
2375
2376 /*
2377 * Eventually check for other socket options to change from
2378 * the default. sock_setsockopt not used because it expects
2379 * user space buffer
2380 */
2381 socket->sk->sk_rcvtimeo = 7 * HZ;
2382 socket->sk->sk_sndtimeo = 5 * HZ;
2383 2353
2384 if (server->tcp_nodelay) { 2354 /* if it failed, try with 139 port */
2385 val = 1; 2355 *sport = htons(RFC1001_PORT);
2386 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2387 (char *)&val, sizeof(val));
2388 if (rc)
2389 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
2390 } 2356 }
2391 2357
2392 server->ssocket = socket; 2358 return generic_ip_connect(server);
2393
2394 return rc;
2395} 2359}
2396 2360
2397void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 2361void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3840eddbfb7..2e773825835 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
135 struct inode *newinode) 135 struct inode *newinode)
136{ 136{
137 if (tcon->nocase) 137 if (tcon->nocase)
138 direntry->d_op = &cifs_ci_dentry_ops; 138 d_set_d_op(direntry, &cifs_ci_dentry_ops);
139 else 139 else
140 direntry->d_op = &cifs_dentry_ops; 140 d_set_d_op(direntry, &cifs_dentry_ops);
141 d_instantiate(direntry, newinode); 141 d_instantiate(direntry, newinode);
142} 142}
143 143
@@ -293,10 +293,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
293 args.uid = NO_CHANGE_64; 293 args.uid = NO_CHANGE_64;
294 args.gid = NO_CHANGE_64; 294 args.gid = NO_CHANGE_64;
295 } 295 }
296 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 296 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
297 cifs_sb->local_nls, 297 current->tgid);
298 cifs_sb->mnt_cifs_flags &
299 CIFS_MOUNT_MAP_SPECIAL_CHR);
300 } else { 298 } else {
301 /* BB implement mode setting via Windows security 299 /* BB implement mode setting via Windows security
302 descriptors e.g. */ 300 descriptors e.g. */
@@ -421,9 +419,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
421 rc = cifs_get_inode_info_unix(&newinode, full_path, 419 rc = cifs_get_inode_info_unix(&newinode, full_path,
422 inode->i_sb, xid); 420 inode->i_sb, xid);
423 if (pTcon->nocase) 421 if (pTcon->nocase)
424 direntry->d_op = &cifs_ci_dentry_ops; 422 d_set_d_op(direntry, &cifs_ci_dentry_ops);
425 else 423 else
426 direntry->d_op = &cifs_dentry_ops; 424 d_set_d_op(direntry, &cifs_dentry_ops);
427 425
428 if (rc == 0) 426 if (rc == 0)
429 d_instantiate(direntry, newinode); 427 d_instantiate(direntry, newinode);
@@ -604,9 +602,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
604 602
605 if ((rc == 0) && (newInode != NULL)) { 603 if ((rc == 0) && (newInode != NULL)) {
606 if (pTcon->nocase) 604 if (pTcon->nocase)
607 direntry->d_op = &cifs_ci_dentry_ops; 605 d_set_d_op(direntry, &cifs_ci_dentry_ops);
608 else 606 else
609 direntry->d_op = &cifs_dentry_ops; 607 d_set_d_op(direntry, &cifs_dentry_ops);
610 d_add(direntry, newInode); 608 d_add(direntry, newInode);
611 if (posix_open) { 609 if (posix_open) {
612 filp = lookup_instantiate_filp(nd, direntry, 610 filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +632,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
634 rc = 0; 632 rc = 0;
635 direntry->d_time = jiffies; 633 direntry->d_time = jiffies;
636 if (pTcon->nocase) 634 if (pTcon->nocase)
637 direntry->d_op = &cifs_ci_dentry_ops; 635 d_set_d_op(direntry, &cifs_ci_dentry_ops);
638 else 636 else
639 direntry->d_op = &cifs_dentry_ops; 637 d_set_d_op(direntry, &cifs_dentry_ops);
640 d_add(direntry, NULL); 638 d_add(direntry, NULL);
641 /* if it was once a directory (but how can we tell?) we could do 639 /* if it was once a directory (but how can we tell?) we could do
642 shrink_dcache_parent(direntry); */ 640 shrink_dcache_parent(direntry); */
@@ -656,22 +654,37 @@ lookup_out:
656static int 654static int
657cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) 655cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
658{ 656{
659 int isValid = 1; 657 if (nd->flags & LOOKUP_RCU)
658 return -ECHILD;
660 659
661 if (direntry->d_inode) { 660 if (direntry->d_inode) {
662 if (cifs_revalidate_dentry(direntry)) 661 if (cifs_revalidate_dentry(direntry))
663 return 0; 662 return 0;
664 } else { 663 else
665 cFYI(1, "neg dentry 0x%p name = %s", 664 return 1;
666 direntry, direntry->d_name.name); 665 }
667 if (time_after(jiffies, direntry->d_time + HZ) || 666
668 !lookupCacheEnabled) { 667 /*
669 d_drop(direntry); 668 * This may be nfsd (or something), anyway, we can't see the
670 isValid = 0; 669 * intent of this. So, since this can be for creation, drop it.
671 } 670 */
671 if (!nd)
672 return 0;
673
674 /*
675 * Drop the negative dentry, in order to make sure to use the
676 * case sensitive name which is specified by user if this is
677 * for creation.
678 */
679 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
680 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
681 return 0;
672 } 682 }
673 683
674 return isValid; 684 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
685 return 0;
686
687 return 1;
675} 688}
676 689
677/* static int cifs_d_delete(struct dentry *direntry) 690/* static int cifs_d_delete(struct dentry *direntry)
@@ -688,9 +701,10 @@ const struct dentry_operations cifs_dentry_ops = {
688/* d_delete: cifs_d_delete, */ /* not needed except for debugging */ 701/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
689}; 702};
690 703
691static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) 704static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
705 struct qstr *q)
692{ 706{
693 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 707 struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
694 unsigned long hash; 708 unsigned long hash;
695 int i; 709 int i;
696 710
@@ -703,21 +717,16 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
703 return 0; 717 return 0;
704} 718}
705 719
706static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, 720static int cifs_ci_compare(const struct dentry *parent,
707 struct qstr *b) 721 const struct inode *pinode,
722 const struct dentry *dentry, const struct inode *inode,
723 unsigned int len, const char *str, const struct qstr *name)
708{ 724{
709 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 725 struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
710 726
711 if ((a->len == b->len) && 727 if ((name->len == len) &&
712 (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) { 728 (nls_strnicmp(codepage, name->name, str, len) == 0))
713 /*
714 * To preserve case, don't let an existing negative dentry's
715 * case take precedence. If a is not a negative dentry, this
716 * should have no side effects
717 */
718 memcpy((void *)a->name, b->name, a->len);
719 return 0; 729 return 0;
720 }
721 return 1; 730 return 1;
722} 731}
723 732
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5a28660ca2b..d843631c028 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -104,53 +104,6 @@ static inline int cifs_get_disposition(unsigned int flags)
104 return FILE_OPEN; 104 return FILE_OPEN;
105} 105}
106 106
107static inline int cifs_open_inode_helper(struct inode *inode,
108 struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
109 char *full_path, int xid)
110{
111 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
112 struct timespec temp;
113 int rc;
114
115 if (pCifsInode->clientCanCacheRead) {
116 /* we have the inode open somewhere else
117 no need to discard cache data */
118 goto client_can_cache;
119 }
120
121 /* BB need same check in cifs_create too? */
122 /* if not oplocked, invalidate inode pages if mtime or file
123 size changed */
124 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
125 if (timespec_equal(&inode->i_mtime, &temp) &&
126 (inode->i_size ==
127 (loff_t)le64_to_cpu(buf->EndOfFile))) {
128 cFYI(1, "inode unchanged on server");
129 } else {
130 if (inode->i_mapping) {
131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping);
134 mapping_set_error(inode->i_mapping, rc);
135 }
136 cFYI(1, "invalidating remote inode since open detected it "
137 "changed");
138 invalidate_remote_inode(inode);
139 }
140
141client_can_cache:
142 if (pTcon->unix_ext)
143 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
144 xid);
145 else
146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
147 xid, NULL);
148
149 cifs_set_oplock_level(pCifsInode, oplock);
150
151 return rc;
152}
153
154int cifs_posix_open(char *full_path, struct inode **pinode, 107int cifs_posix_open(char *full_path, struct inode **pinode,
155 struct super_block *sb, int mode, unsigned int f_flags, 108 struct super_block *sb, int mode, unsigned int f_flags,
156 __u32 *poplock, __u16 *pnetfid, int xid) 109 __u32 *poplock, __u16 *pnetfid, int xid)
@@ -213,6 +166,76 @@ posix_open_ret:
213 return rc; 166 return rc;
214} 167}
215 168
169static int
170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid)
173{
174 int rc;
175 int desiredAccess;
176 int disposition;
177 FILE_ALL_INFO *buf;
178
179 desiredAccess = cifs_convert_flags(f_flags);
180
181/*********************************************************************
182 * open flag mapping table:
183 *
184 * POSIX Flag CIFS Disposition
185 * ---------- ----------------
186 * O_CREAT FILE_OPEN_IF
187 * O_CREAT | O_EXCL FILE_CREATE
188 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
189 * O_TRUNC FILE_OVERWRITE
190 * none of the above FILE_OPEN
191 *
192 * Note that there is not a direct match between disposition
193 * FILE_SUPERSEDE (ie create whether or not file exists although
194 * O_CREAT | O_TRUNC is similar but truncates the existing
195 * file rather than creating a new file as FILE_SUPERSEDE does
196 * (which uses the attributes / metadata passed in on open call)
197 *?
198 *? O_SYNC is a reasonable match to CIFS writethrough flag
199 *? and the read write flags match reasonably. O_LARGEFILE
200 *? is irrelevant because largefile support is always used
201 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
202 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
203 *********************************************************************/
204
205 disposition = cifs_get_disposition(f_flags);
206
207 /* BB pass O_SYNC flag through on file attributes .. BB */
208
209 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
210 if (!buf)
211 return -ENOMEM;
212
213 if (tcon->ses->capabilities & CAP_NT_SMBS)
214 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
215 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
216 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
217 & CIFS_MOUNT_MAP_SPECIAL_CHR);
218 else
219 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
220 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223
224 if (rc)
225 goto out;
226
227 if (tcon->unix_ext)
228 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
229 xid);
230 else
231 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
232 xid, pnetfid);
233
234out:
235 kfree(buf);
236 return rc;
237}
238
216struct cifsFileInfo * 239struct cifsFileInfo *
217cifs_new_fileinfo(__u16 fileHandle, struct file *file, 240cifs_new_fileinfo(__u16 fileHandle, struct file *file,
218 struct tcon_link *tlink, __u32 oplock) 241 struct tcon_link *tlink, __u32 oplock)
@@ -317,10 +340,8 @@ int cifs_open(struct inode *inode, struct file *file)
317 struct cifsFileInfo *pCifsFile = NULL; 340 struct cifsFileInfo *pCifsFile = NULL;
318 struct cifsInodeInfo *pCifsInode; 341 struct cifsInodeInfo *pCifsInode;
319 char *full_path = NULL; 342 char *full_path = NULL;
320 int desiredAccess; 343 bool posix_open_ok = false;
321 int disposition;
322 __u16 netfid; 344 __u16 netfid;
323 FILE_ALL_INFO *buf = NULL;
324 345
325 xid = GetXid(); 346 xid = GetXid();
326 347
@@ -358,17 +379,7 @@ int cifs_open(struct inode *inode, struct file *file)
358 file->f_flags, &oplock, &netfid, xid); 379 file->f_flags, &oplock, &netfid, xid);
359 if (rc == 0) { 380 if (rc == 0) {
360 cFYI(1, "posix open succeeded"); 381 cFYI(1, "posix open succeeded");
361 382 posix_open_ok = true;
362 pCifsFile = cifs_new_fileinfo(netfid, file, tlink,
363 oplock);
364 if (pCifsFile == NULL) {
365 CIFSSMBClose(xid, tcon, netfid);
366 rc = -ENOMEM;
367 }
368
369 cifs_fscache_set_inode_cookie(inode, file);
370
371 goto out;
372 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 383 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
373 if (tcon->ses->serverNOS) 384 if (tcon->ses->serverNOS)
374 cERROR(1, "server %s of type %s returned" 385 cERROR(1, "server %s of type %s returned"
@@ -385,103 +396,39 @@ int cifs_open(struct inode *inode, struct file *file)
385 or DFS errors */ 396 or DFS errors */
386 } 397 }
387 398
388 desiredAccess = cifs_convert_flags(file->f_flags); 399 if (!posix_open_ok) {
389 400 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
390/********************************************************************* 401 file->f_flags, &oplock, &netfid, xid);
391 * open flag mapping table: 402 if (rc)
392 * 403 goto out;
393 * POSIX Flag CIFS Disposition
394 * ---------- ----------------
395 * O_CREAT FILE_OPEN_IF
396 * O_CREAT | O_EXCL FILE_CREATE
397 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
398 * O_TRUNC FILE_OVERWRITE
399 * none of the above FILE_OPEN
400 *
401 * Note that there is not a direct match between disposition
402 * FILE_SUPERSEDE (ie create whether or not file exists although
403 * O_CREAT | O_TRUNC is similar but truncates the existing
404 * file rather than creating a new file as FILE_SUPERSEDE does
405 * (which uses the attributes / metadata passed in on open call)
406 *?
407 *? O_SYNC is a reasonable match to CIFS writethrough flag
408 *? and the read write flags match reasonably. O_LARGEFILE
409 *? is irrelevant because largefile support is always used
410 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
411 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
412 *********************************************************************/
413
414 disposition = cifs_get_disposition(file->f_flags);
415
416 /* BB pass O_SYNC flag through on file attributes .. BB */
417
418 /* Also refresh inode by passing in file_info buf returned by SMBOpen
419 and calling get_inode_info with returned buf (at least helps
420 non-Unix server case) */
421
422 /* BB we can not do this if this is the second open of a file
423 and the first handle has writebehind data, we might be
424 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
425 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
426 if (!buf) {
427 rc = -ENOMEM;
428 goto out;
429 }
430
431 if (tcon->ses->capabilities & CAP_NT_SMBS)
432 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
433 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
434 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
435 & CIFS_MOUNT_MAP_SPECIAL_CHR);
436 else
437 rc = -EIO; /* no NT SMB support fall into legacy open below */
438
439 if (rc == -EIO) {
440 /* Old server, try legacy style OpenX */
441 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
442 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
443 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
444 & CIFS_MOUNT_MAP_SPECIAL_CHR);
445 }
446 if (rc) {
447 cFYI(1, "cifs_open returned 0x%x", rc);
448 goto out;
449 } 404 }
450 405
451 rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
452 if (rc != 0)
453 goto out;
454
455 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); 406 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
456 if (pCifsFile == NULL) { 407 if (pCifsFile == NULL) {
408 CIFSSMBClose(xid, tcon, netfid);
457 rc = -ENOMEM; 409 rc = -ENOMEM;
458 goto out; 410 goto out;
459 } 411 }
460 412
461 cifs_fscache_set_inode_cookie(inode, file); 413 cifs_fscache_set_inode_cookie(inode, file);
462 414
463 if (oplock & CIFS_CREATE_ACTION) { 415 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
464 /* time to set mode which we can not set earlier due to 416 /* time to set mode which we can not set earlier due to
465 problems creating new read-only files */ 417 problems creating new read-only files */
466 if (tcon->unix_ext) { 418 struct cifs_unix_set_info_args args = {
467 struct cifs_unix_set_info_args args = { 419 .mode = inode->i_mode,
468 .mode = inode->i_mode, 420 .uid = NO_CHANGE_64,
469 .uid = NO_CHANGE_64, 421 .gid = NO_CHANGE_64,
470 .gid = NO_CHANGE_64, 422 .ctime = NO_CHANGE_64,
471 .ctime = NO_CHANGE_64, 423 .atime = NO_CHANGE_64,
472 .atime = NO_CHANGE_64, 424 .mtime = NO_CHANGE_64,
473 .mtime = NO_CHANGE_64, 425 .device = 0,
474 .device = 0, 426 };
475 }; 427 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
476 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 428 pCifsFile->pid);
477 cifs_sb->local_nls,
478 cifs_sb->mnt_cifs_flags &
479 CIFS_MOUNT_MAP_SPECIAL_CHR);
480 }
481 } 429 }
482 430
483out: 431out:
484 kfree(buf);
485 kfree(full_path); 432 kfree(full_path);
486 FreeXid(xid); 433 FreeXid(xid);
487 cifs_put_tlink(tlink); 434 cifs_put_tlink(tlink);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 589f3e3f6e0..0c7e36910e3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -518,6 +518,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
518 518
519 fattr->cf_eof = le64_to_cpu(info->EndOfFile); 519 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
520 fattr->cf_bytes = le64_to_cpu(info->AllocationSize); 520 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
521 fattr->cf_createtime = le64_to_cpu(info->CreationTime);
521 522
522 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 523 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
523 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 524 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
@@ -779,6 +780,10 @@ cifs_find_inode(struct inode *inode, void *opaque)
779 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 780 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
780 return 0; 781 return 0;
781 782
783 /* use createtime like an i_generation field */
784 if (CIFS_I(inode)->createtime != fattr->cf_createtime)
785 return 0;
786
782 /* don't match inode of different type */ 787 /* don't match inode of different type */
783 if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) 788 if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
784 return 0; 789 return 0;
@@ -796,6 +801,7 @@ cifs_init_inode(struct inode *inode, void *opaque)
796 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; 801 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
797 802
798 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid; 803 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
804 CIFS_I(inode)->createtime = fattr->cf_createtime;
799 return 0; 805 return 0;
800} 806}
801 807
@@ -809,14 +815,14 @@ inode_has_hashed_dentries(struct inode *inode)
809{ 815{
810 struct dentry *dentry; 816 struct dentry *dentry;
811 817
812 spin_lock(&dcache_lock); 818 spin_lock(&inode->i_lock);
813 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 819 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
814 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 820 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
815 spin_unlock(&dcache_lock); 821 spin_unlock(&inode->i_lock);
816 return true; 822 return true;
817 } 823 }
818 } 824 }
819 spin_unlock(&dcache_lock); 825 spin_unlock(&inode->i_lock);
820 return false; 826 return false;
821} 827}
822 828
@@ -1319,9 +1325,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1319 to set uid/gid */ 1325 to set uid/gid */
1320 inc_nlink(inode); 1326 inc_nlink(inode);
1321 if (pTcon->nocase) 1327 if (pTcon->nocase)
1322 direntry->d_op = &cifs_ci_dentry_ops; 1328 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1323 else 1329 else
1324 direntry->d_op = &cifs_dentry_ops; 1330 d_set_d_op(direntry, &cifs_dentry_ops);
1325 1331
1326 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1332 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1327 cifs_fill_uniqueid(inode->i_sb, &fattr); 1333 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1363,9 +1369,9 @@ mkdir_get_info:
1363 inode->i_sb, xid, NULL); 1369 inode->i_sb, xid, NULL);
1364 1370
1365 if (pTcon->nocase) 1371 if (pTcon->nocase)
1366 direntry->d_op = &cifs_ci_dentry_ops; 1372 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1367 else 1373 else
1368 direntry->d_op = &cifs_dentry_ops; 1374 d_set_d_op(direntry, &cifs_dentry_ops);
1369 d_instantiate(direntry, newinode); 1375 d_instantiate(direntry, newinode);
1370 /* setting nlink not necessary except in cases where we 1376 /* setting nlink not necessary except in cases where we
1371 * failed to get it from the server or was set bogus */ 1377 * failed to get it from the server or was set bogus */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7..fe2f6a93c49 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
525 rc); 525 rc);
526 } else { 526 } else {
527 if (pTcon->nocase) 527 if (pTcon->nocase)
528 direntry->d_op = &cifs_ci_dentry_ops; 528 d_set_d_op(direntry, &cifs_ci_dentry_ops);
529 else 529 else
530 direntry->d_op = &cifs_dentry_ops; 530 d_set_d_op(direntry, &cifs_dentry_ops);
531 d_instantiate(direntry, newinode); 531 d_instantiate(direntry, newinode);
532 } 532 }
533 } 533 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a73eb9f4bda..76b1b37c9e6 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
79 cFYI(1, "For %s", name->name); 79 cFYI(1, "For %s", name->name);
80 80
81 if (parent->d_op && parent->d_op->d_hash) 81 if (parent->d_op && parent->d_op->d_hash)
82 parent->d_op->d_hash(parent, name); 82 parent->d_op->d_hash(parent, parent->d_inode, name);
83 else 83 else
84 name->hash = full_name_hash(name->name, name->len); 84 name->hash = full_name_hash(name->name, name->len);
85 85
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
103 } 103 }
104 104
105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) 105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
106 dentry->d_op = &cifs_ci_dentry_ops; 106 d_set_d_op(dentry, &cifs_ci_dentry_ops);
107 else 107 else
108 dentry->d_op = &cifs_dentry_ops; 108 d_set_d_op(dentry, &cifs_dentry_ops);
109 109
110 alias = d_materialise_unique(dentry, inode); 110 alias = d_materialise_unique(dentry, inode);
111 if (alias != NULL) { 111 if (alias != NULL) {
@@ -160,6 +160,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
160 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); 160 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
161 fattr->cf_eof = le64_to_cpu(info->EndOfFile); 161 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
162 fattr->cf_bytes = le64_to_cpu(info->AllocationSize); 162 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
163 fattr->cf_createtime = le64_to_cpu(info->CreationTime);
163 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); 164 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
164 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); 165 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
165 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); 166 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7b01d3f6eed..eb746486e49 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -420,7 +420,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
420 return 0; 420 return 0;
421} 421}
422 422
423#ifdef CONFIG_CIFS_EXPERIMENTAL
424/* BB Move to ntlmssp.c eventually */ 423/* BB Move to ntlmssp.c eventually */
425 424
426/* We do not malloc the blob, it is passed in pbuffer, because 425/* We do not malloc the blob, it is passed in pbuffer, because
@@ -431,13 +430,14 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
431 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; 430 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
432 __u32 flags; 431 __u32 flags;
433 432
433 memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
434 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 434 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
435 sec_blob->MessageType = NtLmNegotiate; 435 sec_blob->MessageType = NtLmNegotiate;
436 436
437 /* BB is NTLMV2 session security format easier to use here? */ 437 /* BB is NTLMV2 session security format easier to use here? */
438 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | 438 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
440 NTLMSSP_NEGOTIATE_NTLM; 440 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
441 if (ses->server->secMode & 441 if (ses->server->secMode &
442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
443 flags |= NTLMSSP_NEGOTIATE_SIGN; 443 flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -446,7 +446,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
446 NTLMSSP_NEGOTIATE_EXTENDED_SEC; 446 NTLMSSP_NEGOTIATE_EXTENDED_SEC;
447 } 447 }
448 448
449 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 449 sec_blob->NegotiateFlags = cpu_to_le32(flags);
450 450
451 sec_blob->WorkstationName.BufferOffset = 0; 451 sec_blob->WorkstationName.BufferOffset = 0;
452 sec_blob->WorkstationName.Length = 0; 452 sec_blob->WorkstationName.Length = 0;
@@ -477,7 +477,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
477 flags = NTLMSSP_NEGOTIATE_56 | 477 flags = NTLMSSP_NEGOTIATE_56 |
478 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | 478 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
479 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 479 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
480 NTLMSSP_NEGOTIATE_NTLM; 480 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
481 if (ses->server->secMode & 481 if (ses->server->secMode &
482 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 482 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
483 flags |= NTLMSSP_NEGOTIATE_SIGN; 483 flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -485,7 +485,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
485 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 485 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
486 486
487 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); 487 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
488 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 488 sec_blob->NegotiateFlags = cpu_to_le32(flags);
489 489
490 sec_blob->LmChallengeResponse.BufferOffset = 490 sec_blob->LmChallengeResponse.BufferOffset =
491 cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE)); 491 cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE));
@@ -544,8 +544,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
544 sec_blob->WorkstationName.MaximumLength = 0; 544 sec_blob->WorkstationName.MaximumLength = 0;
545 tmp += 2; 545 tmp += 2;
546 546
547 if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && 547 if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
548 !calc_seckey(ses)) { 548 (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
549 && !calc_seckey(ses)) {
549 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); 550 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
550 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); 551 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
551 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); 552 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
@@ -563,17 +564,6 @@ setup_ntlmv2_ret:
563 return rc; 564 return rc;
564} 565}
565 566
566
567static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
568 struct cifsSesInfo *ses)
569{
570 build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses);
571 pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
572
573 return;
574}
575#endif
576
577int 567int
578CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 568CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
579 const struct nls_table *nls_cp) 569 const struct nls_table *nls_cp)
@@ -814,71 +804,70 @@ ssetup_ntlmssp_authenticate:
814 rc = -ENOSYS; 804 rc = -ENOSYS;
815 goto ssetup_exit; 805 goto ssetup_exit;
816#endif /* CONFIG_CIFS_UPCALL */ 806#endif /* CONFIG_CIFS_UPCALL */
817 } else { 807 } else if (type == RawNTLMSSP) {
818#ifdef CONFIG_CIFS_EXPERIMENTAL 808 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
819 if (type == RawNTLMSSP) { 809 cERROR(1, "NTLMSSP requires Unicode support");
820 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 810 rc = -ENOSYS;
821 cERROR(1, "NTLMSSP requires Unicode support"); 811 goto ssetup_exit;
822 rc = -ENOSYS; 812 }
813
814 cFYI(1, "ntlmssp session setup phase %d", phase);
815 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
816 capabilities |= CAP_EXTENDED_SECURITY;
817 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
818 switch(phase) {
819 case NtLmNegotiate:
820 build_ntlmssp_negotiate_blob(
821 pSMB->req.SecurityBlob, ses);
822 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
823 iov[1].iov_base = pSMB->req.SecurityBlob;
824 pSMB->req.SecurityBlobLength =
825 cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
826 break;
827 case NtLmAuthenticate:
828 /*
829 * 5 is an empirical value, large enough to hold
830 * authenticate message plus max 10 of av paris,
831 * domain, user, workstation names, flags, etc.
832 */
833 ntlmsspblob = kzalloc(
834 5*sizeof(struct _AUTHENTICATE_MESSAGE),
835 GFP_KERNEL);
836 if (!ntlmsspblob) {
837 cERROR(1, "Can't allocate NTLMSSP blob");
838 rc = -ENOMEM;
823 goto ssetup_exit; 839 goto ssetup_exit;
824 } 840 }
825 841
826 cFYI(1, "ntlmssp session setup phase %d", phase); 842 rc = build_ntlmssp_auth_blob(ntlmsspblob,
827 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 843 &blob_len, ses, nls_cp);
828 capabilities |= CAP_EXTENDED_SECURITY; 844 if (rc)
829 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
830 if (phase == NtLmNegotiate) {
831 setup_ntlmssp_neg_req(pSMB, ses);
832 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
833 iov[1].iov_base = &pSMB->req.SecurityBlob[0];
834 } else if (phase == NtLmAuthenticate) {
835 /* 5 is an empirical value, large enought to
836 * hold authenticate message, max 10 of
837 * av paris, doamin,user,workstation mames,
838 * flags etc..
839 */
840 ntlmsspblob = kmalloc(
841 5*sizeof(struct _AUTHENTICATE_MESSAGE),
842 GFP_KERNEL);
843 if (!ntlmsspblob) {
844 cERROR(1, "Can't allocate NTLMSSP");
845 rc = -ENOMEM;
846 goto ssetup_exit;
847 }
848
849 rc = build_ntlmssp_auth_blob(ntlmsspblob,
850 &blob_len, ses, nls_cp);
851 if (rc)
852 goto ssetup_exit;
853 iov[1].iov_len = blob_len;
854 iov[1].iov_base = ntlmsspblob;
855 pSMB->req.SecurityBlobLength =
856 cpu_to_le16(blob_len);
857 /* Make sure that we tell the server that we
858 are using the uid that it just gave us back
859 on the response (challenge) */
860 smb_buf->Uid = ses->Suid;
861 } else {
862 cERROR(1, "invalid phase %d", phase);
863 rc = -ENOSYS;
864 goto ssetup_exit; 845 goto ssetup_exit;
865 } 846 iov[1].iov_len = blob_len;
866 /* unicode strings must be word aligned */ 847 iov[1].iov_base = ntlmsspblob;
867 if ((iov[0].iov_len + iov[1].iov_len) % 2) { 848 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
868 *bcc_ptr = 0; 849 /*
869 bcc_ptr++; 850 * Make sure that we tell the server that we are using
870 } 851 * the uid that it just gave us back on the response
871 unicode_oslm_strings(&bcc_ptr, nls_cp); 852 * (challenge)
872 } else { 853 */
873 cERROR(1, "secType %d not supported!", type); 854 smb_buf->Uid = ses->Suid;
855 break;
856 default:
857 cERROR(1, "invalid phase %d", phase);
874 rc = -ENOSYS; 858 rc = -ENOSYS;
875 goto ssetup_exit; 859 goto ssetup_exit;
876 } 860 }
877#else 861 /* unicode strings must be word aligned */
862 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
863 *bcc_ptr = 0;
864 bcc_ptr++;
865 }
866 unicode_oslm_strings(&bcc_ptr, nls_cp);
867 } else {
878 cERROR(1, "secType %d not supported!", type); 868 cERROR(1, "secType %d not supported!", type);
879 rc = -ENOSYS; 869 rc = -ENOSYS;
880 goto ssetup_exit; 870 goto ssetup_exit;
881#endif
882 } 871 }
883 872
884 iov[2].iov_base = str_area; 873 iov[2].iov_base = str_area;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cdf4cc..59ca81b1691 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,7 +119,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
119 if (ssocket == NULL) 119 if (ssocket == NULL)
120 return -ENOTSOCK; /* BB eventually add reconnect code here */ 120 return -ENOTSOCK; /* BB eventually add reconnect code here */
121 121
122 smb_msg.msg_name = (struct sockaddr *) &server->addr.sockAddr; 122 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
123 smb_msg.msg_namelen = sizeof(struct sockaddr); 123 smb_msg.msg_namelen = sizeof(struct sockaddr);
124 smb_msg.msg_control = NULL; 124 smb_msg.msg_control = NULL;
125 smb_msg.msg_controllen = 0; 125 smb_msg.msg_controllen = 0;
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08e70c..5525e1c660f 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
93 struct list_head *child; 93 struct list_head *child;
94 struct dentry *de; 94 struct dentry *de;
95 95
96 spin_lock(&dcache_lock); 96 spin_lock(&parent->d_lock);
97 list_for_each(child, &parent->d_subdirs) 97 list_for_each(child, &parent->d_subdirs)
98 { 98 {
99 de = list_entry(child, struct dentry, d_u.d_child); 99 de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
102 continue; 102 continue;
103 coda_flag_inode(de->d_inode, flag); 103 coda_flag_inode(de->d_inode, flag);
104 } 104 }
105 spin_unlock(&dcache_lock); 105 spin_unlock(&parent->d_lock);
106 return; 106 return;
107} 107}
108 108
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b3553960..29badd91360 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/namei.h>
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
@@ -47,7 +48,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
47 48
48/* dentry ops */ 49/* dentry ops */
49static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); 50static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
50static int coda_dentry_delete(struct dentry *); 51static int coda_dentry_delete(const struct dentry *);
51 52
52/* support routines */ 53/* support routines */
53static int coda_venus_readdir(struct file *coda_file, void *buf, 54static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -125,7 +126,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
125 return ERR_PTR(error); 126 return ERR_PTR(error);
126 127
127exit: 128exit:
128 entry->d_op = &coda_dentry_operations; 129 d_set_d_op(entry, &coda_dentry_operations);
129 130
130 if (inode && (type & CODA_NOCACHE)) 131 if (inode && (type & CODA_NOCACHE))
131 coda_flag_inode(inode, C_VATTR | C_PURGE); 132 coda_flag_inode(inode, C_VATTR | C_PURGE);
@@ -134,10 +135,13 @@ exit:
134} 135}
135 136
136 137
137int coda_permission(struct inode *inode, int mask) 138int coda_permission(struct inode *inode, int mask, unsigned int flags)
138{ 139{
139 int error; 140 int error;
140 141
142 if (flags & IPERM_FLAG_RCU)
143 return -ECHILD;
144
141 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 145 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
142 146
143 if (!mask) 147 if (!mask)
@@ -541,9 +545,13 @@ out:
541/* called when a cache lookup succeeds */ 545/* called when a cache lookup succeeds */
542static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) 546static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
543{ 547{
544 struct inode *inode = de->d_inode; 548 struct inode *inode;
545 struct coda_inode_info *cii; 549 struct coda_inode_info *cii;
546 550
551 if (nd->flags & LOOKUP_RCU)
552 return -ECHILD;
553
554 inode = de->d_inode;
547 if (!inode || coda_isroot(inode)) 555 if (!inode || coda_isroot(inode))
548 goto out; 556 goto out;
549 if (is_bad_inode(inode)) 557 if (is_bad_inode(inode))
@@ -559,7 +567,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
559 if (cii->c_flags & C_FLUSH) 567 if (cii->c_flags & C_FLUSH)
560 coda_flag_inode_children(inode, C_FLUSH); 568 coda_flag_inode_children(inode, C_FLUSH);
561 569
562 if (atomic_read(&de->d_count) > 1) 570 if (de->d_count > 1)
563 /* pretend it's valid, but don't change the flags */ 571 /* pretend it's valid, but don't change the flags */
564 goto out; 572 goto out;
565 573
@@ -577,7 +585,7 @@ out:
577 * This is the callback from dput() when d_count is going to 0. 585 * This is the callback from dput() when d_count is going to 0.
578 * We use this to unhash dentries with bad inodes. 586 * We use this to unhash dentries with bad inodes.
579 */ 587 */
580static int coda_dentry_delete(struct dentry * dentry) 588static int coda_dentry_delete(const struct dentry * dentry)
581{ 589{
582 int flags; 590 int flags;
583 591
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 1a49c1708a5..f065a5d31a1 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
56 return &ei->vfs_inode; 56 return &ei->vfs_inode;
57} 57}
58 58
59static void coda_destroy_inode(struct inode *inode) 59static void coda_i_callback(struct rcu_head *head)
60{ 60{
61 struct inode *inode = container_of(head, struct inode, i_rcu);
62 INIT_LIST_HEAD(&inode->i_dentry);
61 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 63 kmem_cache_free(coda_inode_cachep, ITOC(inode));
62} 64}
63 65
66static void coda_destroy_inode(struct inode *inode)
67{
68 call_rcu(&inode->i_rcu, coda_i_callback);
69}
70
64static void init_once(void *foo) 71static void init_once(void *foo)
65{ 72{
66 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 73 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2fd89b5c5c7..741f0bd0391 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask); 27static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
28static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned long user_data); 29 unsigned long user_data);
30 30
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
41}; 41};
42 42
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask) 44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
45{ 45{
46 if (flags & IPERM_FLAG_RCU)
47 return -ECHILD;
46 return (mask & MAY_EXEC) ? -EACCES : 0; 48 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 49}
48 50
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a60579b007b..61abb638b4b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -42,7 +42,7 @@
42#include <linux/tty.h> 42#include <linux/tty.h>
43#include <linux/vt_kern.h> 43#include <linux/vt_kern.h>
44#include <linux/fb.h> 44#include <linux/fb.h>
45#include <linux/videodev.h> 45#include <linux/videodev2.h>
46#include <linux/netdevice.h> 46#include <linux/netdevice.h>
47#include <linux/raw.h> 47#include <linux/raw.h>
48#include <linux/blkdev.h> 48#include <linux/blkdev.h>
@@ -836,6 +836,7 @@ COMPATIBLE_IOCTL(TCSETSW)
836COMPATIBLE_IOCTL(TCSETSF) 836COMPATIBLE_IOCTL(TCSETSF)
837COMPATIBLE_IOCTL(TIOCLINUX) 837COMPATIBLE_IOCTL(TIOCLINUX)
838COMPATIBLE_IOCTL(TIOCSBRK) 838COMPATIBLE_IOCTL(TIOCSBRK)
839COMPATIBLE_IOCTL(TIOCGDEV)
839COMPATIBLE_IOCTL(TIOCCBRK) 840COMPATIBLE_IOCTL(TIOCCBRK)
840COMPATIBLE_IOCTL(TIOCGSID) 841COMPATIBLE_IOCTL(TIOCGSID)
841COMPATIBLE_IOCTL(TIOCGICOUNT) 842COMPATIBLE_IOCTL(TIOCGICOUNT)
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da6061a6df4..026cf68553a 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
120{ 120{
121 struct config_item * item = NULL; 121 struct config_item * item = NULL;
122 122
123 spin_lock(&dcache_lock); 123 spin_lock(&dentry->d_lock);
124 if (!d_unhashed(dentry)) { 124 if (!d_unhashed(dentry)) {
125 struct configfs_dirent * sd = dentry->d_fsdata; 125 struct configfs_dirent * sd = dentry->d_fsdata;
126 if (sd->s_type & CONFIGFS_ITEM_LINK) { 126 if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
129 } else 129 } else
130 item = config_item_get(sd->s_element); 130 item = config_item_get(sd->s_element);
131 } 131 }
132 spin_unlock(&dcache_lock); 132 spin_unlock(&dentry->d_lock);
133 133
134 return item; 134 return item;
135} 135}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0b502f80c69..36637a8c1ed 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
67 * We _must_ delete our dentries on last dput, as the chain-to-parent 67 * We _must_ delete our dentries on last dput, as the chain-to-parent
68 * behavior is required to clear the parents of default_groups. 68 * behavior is required to clear the parents of default_groups.
69 */ 69 */
70static int configfs_d_delete(struct dentry *dentry) 70static int configfs_d_delete(const struct dentry *dentry)
71{ 71{
72 return 1; 72 return 1;
73} 73}
@@ -232,10 +232,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
232 232
233 sd->s_mode = mode; 233 sd->s_mode = mode;
234 sd->s_dentry = dentry; 234 sd->s_dentry = dentry;
235 if (dentry) { 235 if (dentry)
236 dentry->d_fsdata = configfs_get(sd); 236 dentry->d_fsdata = configfs_get(sd);
237 dentry->d_op = &configfs_dentry_ops;
238 }
239 237
240 return 0; 238 return 0;
241} 239}
@@ -278,7 +276,6 @@ static int create_dir(struct config_item * k, struct dentry * p,
278 error = configfs_create(d, mode, init_dir); 276 error = configfs_create(d, mode, init_dir);
279 if (!error) { 277 if (!error) {
280 inc_nlink(p->d_inode); 278 inc_nlink(p->d_inode);
281 (d)->d_op = &configfs_dentry_ops;
282 } else { 279 } else {
283 struct configfs_dirent *sd = d->d_fsdata; 280 struct configfs_dirent *sd = d->d_fsdata;
284 if (sd) { 281 if (sd) {
@@ -371,9 +368,7 @@ int configfs_create_link(struct configfs_symlink *sl,
371 CONFIGFS_ITEM_LINK); 368 CONFIGFS_ITEM_LINK);
372 if (!err) { 369 if (!err) {
373 err = configfs_create(dentry, mode, init_symlink); 370 err = configfs_create(dentry, mode, init_symlink);
374 if (!err) 371 if (err) {
375 dentry->d_op = &configfs_dentry_ops;
376 else {
377 struct configfs_dirent *sd = dentry->d_fsdata; 372 struct configfs_dirent *sd = dentry->d_fsdata;
378 if (sd) { 373 if (sd) {
379 spin_lock(&configfs_dirent_lock); 374 spin_lock(&configfs_dirent_lock);
@@ -399,8 +394,7 @@ static void remove_dir(struct dentry * d)
399 if (d->d_inode) 394 if (d->d_inode)
400 simple_rmdir(parent->d_inode,d); 395 simple_rmdir(parent->d_inode,d);
401 396
402 pr_debug(" o %s removing done (%d)\n",d->d_name.name, 397 pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
403 atomic_read(&d->d_count));
404 398
405 dput(parent); 399 dput(parent);
406} 400}
@@ -448,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
448 return error; 442 return error;
449 } 443 }
450 444
451 dentry->d_op = &configfs_dentry_ops; 445 d_set_d_op(dentry, &configfs_dentry_ops);
452 d_rehash(dentry); 446 d_rehash(dentry);
453 447
454 return 0; 448 return 0;
@@ -493,7 +487,11 @@ static struct dentry * configfs_lookup(struct inode *dir,
493 * If it doesn't exist and it isn't a NOT_PINNED item, 487 * If it doesn't exist and it isn't a NOT_PINNED item,
494 * it must be negative. 488 * it must be negative.
495 */ 489 */
496 return simple_lookup(dir, dentry, nd); 490 if (dentry->d_name.len > NAME_MAX)
491 return ERR_PTR(-ENAMETOOLONG);
492 d_set_d_op(dentry, &configfs_dentry_ops);
493 d_add(dentry, NULL);
494 return NULL;
497 } 495 }
498 496
499out: 497out:
@@ -685,6 +683,7 @@ static int create_default_group(struct config_group *parent_group,
685 ret = -ENOMEM; 683 ret = -ENOMEM;
686 child = d_alloc(parent, &name); 684 child = d_alloc(parent, &name);
687 if (child) { 685 if (child) {
686 d_set_d_op(child, &configfs_dentry_ops);
688 d_add(child, NULL); 687 d_add(child, NULL);
689 688
690 ret = configfs_attach_group(&parent_group->cg_item, 689 ret = configfs_attach_group(&parent_group->cg_item,
@@ -1682,6 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1682 err = -ENOMEM; 1681 err = -ENOMEM;
1683 dentry = d_alloc(configfs_sb->s_root, &name); 1682 dentry = d_alloc(configfs_sb->s_root, &name);
1684 if (dentry) { 1683 if (dentry) {
1684 d_set_d_op(dentry, &configfs_dentry_ops);
1685 d_add(dentry, NULL); 1685 d_add(dentry, NULL);
1686 1686
1687 err = configfs_attach_group(sd->s_element, &group->cg_item, 1687 err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d78ed..c83f4768eea 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
250 struct dentry * dentry = sd->s_dentry; 250 struct dentry * dentry = sd->s_dentry;
251 251
252 if (dentry) { 252 if (dentry) {
253 spin_lock(&dcache_lock);
254 spin_lock(&dentry->d_lock); 253 spin_lock(&dentry->d_lock);
255 if (!(d_unhashed(dentry) && dentry->d_inode)) { 254 if (!(d_unhashed(dentry) && dentry->d_inode)) {
256 dget_locked(dentry); 255 dget_dlock(dentry);
257 __d_drop(dentry); 256 __d_drop(dentry);
258 spin_unlock(&dentry->d_lock); 257 spin_unlock(&dentry->d_lock);
259 spin_unlock(&dcache_lock);
260 simple_unlink(parent->d_inode, dentry); 258 simple_unlink(parent->d_inode, dentry);
261 } else { 259 } else
262 spin_unlock(&dentry->d_lock); 260 spin_unlock(&dentry->d_lock);
263 spin_unlock(&dcache_lock);
264 }
265 } 261 }
266} 262}
267 263
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 32fd5fe9ca0..e141939080f 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -34,57 +34,81 @@ static const struct address_space_operations cramfs_aops;
34static DEFINE_MUTEX(read_mutex); 34static DEFINE_MUTEX(read_mutex);
35 35
36 36
37/* These two macros may change in future, to provide better st_ino 37/* These macros may change in future, to provide better st_ino semantics. */
38 semantics. */
39#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1)
40#define OFFSET(x) ((x)->i_ino) 38#define OFFSET(x) ((x)->i_ino)
41 39
42static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode) 40static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset)
43{ 41{
42 if (!cino->offset)
43 return offset + 1;
44 if (!cino->size)
45 return offset + 1;
46
47 /*
48 * The file mode test fixes buggy mkcramfs implementations where
49 * cramfs_inode->offset is set to a non zero value for entries
50 * which did not contain data, like devices node and fifos.
51 */
52 switch (cino->mode & S_IFMT) {
53 case S_IFREG:
54 case S_IFDIR:
55 case S_IFLNK:
56 return cino->offset << 2;
57 default:
58 break;
59 }
60 return offset + 1;
61}
62
63static struct inode *get_cramfs_inode(struct super_block *sb,
64 struct cramfs_inode *cramfs_inode, unsigned int offset)
65{
66 struct inode *inode;
44 static struct timespec zerotime; 67 static struct timespec zerotime;
68
69 inode = iget_locked(sb, cramino(cramfs_inode, offset));
70 if (!inode)
71 return ERR_PTR(-ENOMEM);
72 if (!(inode->i_state & I_NEW))
73 return inode;
74
75 switch (cramfs_inode->mode & S_IFMT) {
76 case S_IFREG:
77 inode->i_fop = &generic_ro_fops;
78 inode->i_data.a_ops = &cramfs_aops;
79 break;
80 case S_IFDIR:
81 inode->i_op = &cramfs_dir_inode_operations;
82 inode->i_fop = &cramfs_directory_operations;
83 break;
84 case S_IFLNK:
85 inode->i_op = &page_symlink_inode_operations;
86 inode->i_data.a_ops = &cramfs_aops;
87 break;
88 default:
89 init_special_inode(inode, cramfs_inode->mode,
90 old_decode_dev(cramfs_inode->size));
91 }
92
45 inode->i_mode = cramfs_inode->mode; 93 inode->i_mode = cramfs_inode->mode;
46 inode->i_uid = cramfs_inode->uid; 94 inode->i_uid = cramfs_inode->uid;
47 inode->i_size = cramfs_inode->size;
48 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
49 inode->i_gid = cramfs_inode->gid; 95 inode->i_gid = cramfs_inode->gid;
96
97 /* if the lower 2 bits are zero, the inode contains data */
98 if (!(inode->i_ino & 3)) {
99 inode->i_size = cramfs_inode->size;
100 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
101 }
102
50 /* Struct copy intentional */ 103 /* Struct copy intentional */
51 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; 104 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
52 /* inode->i_nlink is left 1 - arguably wrong for directories, 105 /* inode->i_nlink is left 1 - arguably wrong for directories,
53 but it's the best we can do without reading the directory 106 but it's the best we can do without reading the directory
54 contents. 1 yields the right result in GNU find, even 107 contents. 1 yields the right result in GNU find, even
55 without -noleaf option. */ 108 without -noleaf option. */
56 if (S_ISREG(inode->i_mode)) {
57 inode->i_fop = &generic_ro_fops;
58 inode->i_data.a_ops = &cramfs_aops;
59 } else if (S_ISDIR(inode->i_mode)) {
60 inode->i_op = &cramfs_dir_inode_operations;
61 inode->i_fop = &cramfs_directory_operations;
62 } else if (S_ISLNK(inode->i_mode)) {
63 inode->i_op = &page_symlink_inode_operations;
64 inode->i_data.a_ops = &cramfs_aops;
65 } else {
66 init_special_inode(inode, inode->i_mode,
67 old_decode_dev(cramfs_inode->size));
68 }
69}
70 109
71static struct inode *get_cramfs_inode(struct super_block *sb, 110 unlock_new_inode(inode);
72 struct cramfs_inode * cramfs_inode) 111
73{
74 struct inode *inode;
75 if (CRAMINO(cramfs_inode) == 1) {
76 inode = new_inode(sb);
77 if (inode) {
78 inode->i_ino = 1;
79 setup_inode(inode, cramfs_inode);
80 }
81 } else {
82 inode = iget_locked(sb, CRAMINO(cramfs_inode));
83 if (inode && (inode->i_state & I_NEW)) {
84 setup_inode(inode, cramfs_inode);
85 unlock_new_inode(inode);
86 }
87 }
88 return inode; 112 return inode;
89} 113}
90 114
@@ -265,6 +289,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
265 printk(KERN_ERR "cramfs: root is not a directory\n"); 289 printk(KERN_ERR "cramfs: root is not a directory\n");
266 goto out; 290 goto out;
267 } 291 }
292 /* correct strange, hard-coded permissions of mkcramfs */
293 super.root.mode |= (S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
294
268 root_offset = super.root.offset << 2; 295 root_offset = super.root.offset << 2;
269 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) { 296 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
270 sbi->size=super.size; 297 sbi->size=super.size;
@@ -289,7 +316,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
289 316
290 /* Set it all up.. */ 317 /* Set it all up.. */
291 sb->s_op = &cramfs_ops; 318 sb->s_op = &cramfs_ops;
292 root = get_cramfs_inode(sb, &super.root); 319 root = get_cramfs_inode(sb, &super.root, 0);
293 if (!root) 320 if (!root)
294 goto out; 321 goto out;
295 sb->s_root = d_alloc_root(root); 322 sb->s_root = d_alloc_root(root);
@@ -365,7 +392,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
365 */ 392 */
366 namelen = de->namelen << 2; 393 namelen = de->namelen << 2;
367 memcpy(buf, name, namelen); 394 memcpy(buf, name, namelen);
368 ino = CRAMINO(de); 395 ino = cramino(de, OFFSET(inode) + offset);
369 mode = de->mode; 396 mode = de->mode;
370 mutex_unlock(&read_mutex); 397 mutex_unlock(&read_mutex);
371 nextoffset = offset + sizeof(*de) + namelen; 398 nextoffset = offset + sizeof(*de) + namelen;
@@ -404,8 +431,9 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
404 struct cramfs_inode *de; 431 struct cramfs_inode *de;
405 char *name; 432 char *name;
406 int namelen, retval; 433 int namelen, retval;
434 int dir_off = OFFSET(dir) + offset;
407 435
408 de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); 436 de = cramfs_read(dir->i_sb, dir_off, sizeof(*de)+CRAMFS_MAXPATHLEN);
409 name = (char *)(de+1); 437 name = (char *)(de+1);
410 438
411 /* Try to take advantage of sorted directories */ 439 /* Try to take advantage of sorted directories */
@@ -436,7 +464,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
436 if (!retval) { 464 if (!retval) {
437 struct cramfs_inode entry = *de; 465 struct cramfs_inode entry = *de;
438 mutex_unlock(&read_mutex); 466 mutex_unlock(&read_mutex);
439 d_add(dentry, get_cramfs_inode(dir->i_sb, &entry)); 467 d_add(dentry, get_cramfs_inode(dir->i_sb, &entry, dir_off));
440 return NULL; 468 return NULL;
441 } 469 }
442 /* else (retval < 0) */ 470 /* else (retval < 0) */
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9d4e6..5699d4c027c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
36#include "internal.h" 38#include "internal.h"
37 39
40/*
41 * Usage:
42 * dcache->d_inode->i_lock protects:
43 * - i_dentry, d_alias, d_inode of aliases
44 * dcache_hash_bucket lock protects:
45 * - the dcache hash table
46 * s_anon bl list spinlock protects:
47 * - the s_anon list (see __d_drop)
48 * dcache_lru_lock protects:
49 * - the dcache lru lists and counters
50 * d_lock protects:
51 * - d_flags
52 * - d_name
53 * - d_lru
54 * - d_count
55 * - d_unhashed()
56 * - d_parent and d_subdirs
57 * - childrens' d_child and d_parent
58 * - d_alias, d_inode
59 *
60 * Ordering:
61 * dentry->d_inode->i_lock
62 * dentry->d_lock
63 * dcache_lru_lock
64 * dcache_hash_bucket lock
65 * s_anon lock
66 *
67 * If there is an ancestor relationship:
68 * dentry->d_parent->...->d_parent->d_lock
69 * ...
70 * dentry->d_parent->d_lock
71 * dentry->d_lock
72 *
73 * If no ancestor relationship:
74 * if (dentry1 < dentry2)
75 * dentry1->d_lock
76 * dentry2->d_lock
77 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 78int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 80
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 83
44EXPORT_SYMBOL(dcache_lock); 84EXPORT_SYMBOL(rename_lock);
45 85
46static struct kmem_cache *dentry_cache __read_mostly; 86static struct kmem_cache *dentry_cache __read_mostly;
47 87
48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
49
50/* 88/*
51 * This is the single most critical data structure when it comes 89 * This is the single most critical data structure when it comes
52 * to the dcache: the hashtable for lookups. Somebody should try 90 * to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 98
61static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 101
102struct dcache_hash_bucket {
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
108 unsigned long hash)
109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK);
113}
114
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
64 124
65/* Statistics gathering. */ 125/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 126struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 127 .age_limit = 45,
68}; 128};
69 129
70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; 130static DEFINE_PER_CPU(unsigned int, nr_dentry);
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72 131
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 132#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
133static int get_nr_dentry(void)
134{
135 int i;
136 int sum = 0;
137 for_each_possible_cpu(i)
138 sum += per_cpu(nr_dentry, i);
139 return sum < 0 ? 0 : sum;
140}
141
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 142int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos) 143 size_t *lenp, loff_t *ppos)
76{ 144{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); 145 dentry_stat.nr_dentry = get_nr_dentry();
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos); 146 return proc_dointvec(table, write, buffer, lenp, ppos);
80} 147}
81#endif 148#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
91} 158}
92 159
93/* 160/*
94 * no dcache_lock, please. 161 * no locks, please.
95 */ 162 */
96static void d_free(struct dentry *dentry) 163static void d_free(struct dentry *dentry)
97{ 164{
98 percpu_counter_dec(&nr_dentry); 165 BUG_ON(dentry->d_count);
166 this_cpu_dec(nr_dentry);
99 if (dentry->d_op && dentry->d_op->d_release) 167 if (dentry->d_op && dentry->d_op->d_release)
100 dentry->d_op->d_release(dentry); 168 dentry->d_op->d_release(dentry);
101 169
102 /* if dentry was never inserted into hash, immediate free is OK */ 170 /* if dentry was never inserted into hash, immediate free is OK */
103 if (hlist_unhashed(&dentry->d_hash)) 171 if (hlist_bl_unhashed(&dentry->d_hash))
104 __d_free(&dentry->d_u.d_rcu); 172 __d_free(&dentry->d_u.d_rcu);
105 else 173 else
106 call_rcu(&dentry->d_u.d_rcu, __d_free); 174 call_rcu(&dentry->d_u.d_rcu, __d_free);
107} 175}
108 176
177/**
178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
179 * After this call, in-progress rcu-walk path lookup will fail. This
180 * should be called after unhashing, and after changing d_inode (if
181 * the dentry has not already been unhashed).
182 */
183static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
184{
185 assert_spin_locked(&dentry->d_lock);
186 /* Go through a barrier */
187 write_seqcount_barrier(&dentry->d_seq);
188}
189
109/* 190/*
110 * Release the dentry's inode, using the filesystem 191 * Release the dentry's inode, using the filesystem
111 * d_iput() operation if defined. 192 * d_iput() operation if defined. Dentry has no refcount
193 * and is unhashed.
112 */ 194 */
113static void dentry_iput(struct dentry * dentry) 195static void dentry_iput(struct dentry * dentry)
114 __releases(dentry->d_lock) 196 __releases(dentry->d_lock)
115 __releases(dcache_lock) 197 __releases(dentry->d_inode->i_lock)
116{ 198{
117 struct inode *inode = dentry->d_inode; 199 struct inode *inode = dentry->d_inode;
118 if (inode) { 200 if (inode) {
119 dentry->d_inode = NULL; 201 dentry->d_inode = NULL;
120 list_del_init(&dentry->d_alias); 202 list_del_init(&dentry->d_alias);
121 spin_unlock(&dentry->d_lock); 203 spin_unlock(&dentry->d_lock);
122 spin_unlock(&dcache_lock); 204 spin_unlock(&inode->i_lock);
123 if (!inode->i_nlink) 205 if (!inode->i_nlink)
124 fsnotify_inoderemove(inode); 206 fsnotify_inoderemove(inode);
125 if (dentry->d_op && dentry->d_op->d_iput) 207 if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
128 iput(inode); 210 iput(inode);
129 } else { 211 } else {
130 spin_unlock(&dentry->d_lock); 212 spin_unlock(&dentry->d_lock);
131 spin_unlock(&dcache_lock);
132 } 213 }
133} 214}
134 215
135/* 216/*
136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. 217 * Release the dentry's inode, using the filesystem
218 * d_iput() operation if defined. dentry remains in-use.
219 */
220static void dentry_unlink_inode(struct dentry * dentry)
221 __releases(dentry->d_lock)
222 __releases(dentry->d_inode->i_lock)
223{
224 struct inode *inode = dentry->d_inode;
225 dentry->d_inode = NULL;
226 list_del_init(&dentry->d_alias);
227 dentry_rcuwalk_barrier(dentry);
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&inode->i_lock);
230 if (!inode->i_nlink)
231 fsnotify_inoderemove(inode);
232 if (dentry->d_op && dentry->d_op->d_iput)
233 dentry->d_op->d_iput(dentry, inode);
234 else
235 iput(inode);
236}
237
238/*
239 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
137 */ 240 */
138static void dentry_lru_add(struct dentry *dentry) 241static void dentry_lru_add(struct dentry *dentry)
139{ 242{
140 if (list_empty(&dentry->d_lru)) { 243 if (list_empty(&dentry->d_lru)) {
244 spin_lock(&dcache_lru_lock);
141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
142 dentry->d_sb->s_nr_dentry_unused++; 246 dentry->d_sb->s_nr_dentry_unused++;
143 percpu_counter_inc(&nr_dentry_unused); 247 dentry_stat.nr_unused++;
248 spin_unlock(&dcache_lru_lock);
144 } 249 }
145} 250}
146 251
252static void __dentry_lru_del(struct dentry *dentry)
253{
254 list_del_init(&dentry->d_lru);
255 dentry->d_sb->s_nr_dentry_unused--;
256 dentry_stat.nr_unused--;
257}
258
147static void dentry_lru_del(struct dentry *dentry) 259static void dentry_lru_del(struct dentry *dentry)
148{ 260{
149 if (!list_empty(&dentry->d_lru)) { 261 if (!list_empty(&dentry->d_lru)) {
150 list_del_init(&dentry->d_lru); 262 spin_lock(&dcache_lru_lock);
151 dentry->d_sb->s_nr_dentry_unused--; 263 __dentry_lru_del(dentry);
152 percpu_counter_dec(&nr_dentry_unused); 264 spin_unlock(&dcache_lru_lock);
153 } 265 }
154} 266}
155 267
156static void dentry_lru_move_tail(struct dentry *dentry) 268static void dentry_lru_move_tail(struct dentry *dentry)
157{ 269{
270 spin_lock(&dcache_lru_lock);
158 if (list_empty(&dentry->d_lru)) { 271 if (list_empty(&dentry->d_lru)) {
159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
160 dentry->d_sb->s_nr_dentry_unused++; 273 dentry->d_sb->s_nr_dentry_unused++;
161 percpu_counter_inc(&nr_dentry_unused); 274 dentry_stat.nr_unused++;
162 } else { 275 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
164 } 277 }
278 spin_unlock(&dcache_lru_lock);
165} 279}
166 280
167/** 281/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
171 * The dentry must already be unhashed and removed from the LRU. 285 * The dentry must already be unhashed and removed from the LRU.
172 * 286 *
173 * If this is the root of the dentry tree, return NULL. 287 * If this is the root of the dentry tree, return NULL.
288 *
289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
290 * d_kill.
174 */ 291 */
175static struct dentry *d_kill(struct dentry *dentry) 292static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
176 __releases(dentry->d_lock) 293 __releases(dentry->d_lock)
177 __releases(dcache_lock) 294 __releases(parent->d_lock)
295 __releases(dentry->d_inode->i_lock)
178{ 296{
179 struct dentry *parent; 297 dentry->d_parent = NULL;
180
181 list_del(&dentry->d_u.d_child); 298 list_del(&dentry->d_u.d_child);
182 /*drops the locks, at that point nobody can reach this dentry */ 299 if (parent)
300 spin_unlock(&parent->d_lock);
183 dentry_iput(dentry); 301 dentry_iput(dentry);
302 /*
303 * dentry_iput drops the locks, at which point nobody (except
304 * transient RCU lookups) can reach this dentry.
305 */
306 d_free(dentry);
307 return parent;
308}
309
310/**
311 * d_drop - drop a dentry
312 * @dentry: dentry to drop
313 *
314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
315 * be found through a VFS lookup any more. Note that this is different from
316 * deleting the dentry - d_delete will try to mark the dentry negative if
317 * possible, giving a successful _negative_ lookup, while d_drop will
318 * just make the cache lookup fail.
319 *
320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
321 * reason (NFS timeouts or autofs deletes).
322 *
323 * __d_drop requires dentry->d_lock.
324 */
325void __d_drop(struct dentry *dentry)
326{
327 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
329 bit_spin_lock(0,
330 (unsigned long *)&dentry->d_sb->s_anon.first);
331 dentry->d_flags |= DCACHE_UNHASHED;
332 hlist_bl_del_init(&dentry->d_hash);
333 __bit_spin_unlock(0,
334 (unsigned long *)&dentry->d_sb->s_anon.first);
335 } else {
336 struct dcache_hash_bucket *b;
337 b = d_hash(dentry->d_parent, dentry->d_name.hash);
338 spin_lock_bucket(b);
339 /*
340 * We may not actually need to put DCACHE_UNHASHED
341 * manipulations under the hash lock, but follow
342 * the principle of least surprise.
343 */
344 dentry->d_flags |= DCACHE_UNHASHED;
345 hlist_bl_del_rcu(&dentry->d_hash);
346 spin_unlock_bucket(b);
347 dentry_rcuwalk_barrier(dentry);
348 }
349 }
350}
351EXPORT_SYMBOL(__d_drop);
352
353void d_drop(struct dentry *dentry)
354{
355 spin_lock(&dentry->d_lock);
356 __d_drop(dentry);
357 spin_unlock(&dentry->d_lock);
358}
359EXPORT_SYMBOL(d_drop);
360
361/*
362 * Finish off a dentry we've decided to kill.
363 * dentry->d_lock must be held, returns with it unlocked.
364 * If ref is non-zero, then decrement the refcount too.
365 * Returns dentry requiring refcount drop, or NULL if we're done.
366 */
367static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
368 __releases(dentry->d_lock)
369{
370 struct inode *inode;
371 struct dentry *parent;
372
373 inode = dentry->d_inode;
374 if (inode && !spin_trylock(&inode->i_lock)) {
375relock:
376 spin_unlock(&dentry->d_lock);
377 cpu_relax();
378 return dentry; /* try again with same dentry */
379 }
184 if (IS_ROOT(dentry)) 380 if (IS_ROOT(dentry))
185 parent = NULL; 381 parent = NULL;
186 else 382 else
187 parent = dentry->d_parent; 383 parent = dentry->d_parent;
188 d_free(dentry); 384 if (parent && !spin_trylock(&parent->d_lock)) {
189 return parent; 385 if (inode)
386 spin_unlock(&inode->i_lock);
387 goto relock;
388 }
389
390 if (ref)
391 dentry->d_count--;
392 /* if dentry was on the d_lru list delete it from there */
393 dentry_lru_del(dentry);
394 /* if it was on the hash then remove it */
395 __d_drop(dentry);
396 return d_kill(dentry, parent);
190} 397}
191 398
192/* 399/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
214 * call the dentry unlink method as well as removing it from the queues and 421 * call the dentry unlink method as well as removing it from the queues and
215 * releasing its resources. If the parent dentries were scheduled for release 422 * releasing its resources. If the parent dentries were scheduled for release
216 * they too may now get deleted. 423 * they too may now get deleted.
217 *
218 * no dcache lock, please.
219 */ 424 */
220
221void dput(struct dentry *dentry) 425void dput(struct dentry *dentry)
222{ 426{
223 if (!dentry) 427 if (!dentry)
224 return; 428 return;
225 429
226repeat: 430repeat:
227 if (atomic_read(&dentry->d_count) == 1) 431 if (dentry->d_count == 1)
228 might_sleep(); 432 might_sleep();
229 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
230 return;
231
232 spin_lock(&dentry->d_lock); 433 spin_lock(&dentry->d_lock);
233 if (atomic_read(&dentry->d_count)) { 434 BUG_ON(!dentry->d_count);
435 if (dentry->d_count > 1) {
436 dentry->d_count--;
234 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
235 spin_unlock(&dcache_lock);
236 return; 438 return;
237 } 439 }
238 440
239 /* 441 if (dentry->d_flags & DCACHE_OP_DELETE) {
240 * AV: ->d_delete() is _NOT_ allowed to block now.
241 */
242 if (dentry->d_op && dentry->d_op->d_delete) {
243 if (dentry->d_op->d_delete(dentry)) 442 if (dentry->d_op->d_delete(dentry))
244 goto unhash_it; 443 goto kill_it;
245 } 444 }
246 445
247 /* Unreachable? Get rid of it */ 446 /* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
252 dentry->d_flags |= DCACHE_REFERENCED; 451 dentry->d_flags |= DCACHE_REFERENCED;
253 dentry_lru_add(dentry); 452 dentry_lru_add(dentry);
254 453
255 spin_unlock(&dentry->d_lock); 454 dentry->d_count--;
256 spin_unlock(&dcache_lock); 455 spin_unlock(&dentry->d_lock);
257 return; 456 return;
258 457
259unhash_it:
260 __d_drop(dentry);
261kill_it: 458kill_it:
262 /* if dentry was on the d_lru list delete it from there */ 459 dentry = dentry_kill(dentry, 1);
263 dentry_lru_del(dentry);
264 dentry = d_kill(dentry);
265 if (dentry) 460 if (dentry)
266 goto repeat; 461 goto repeat;
267} 462}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
284 /* 479 /*
285 * If it's already been dropped, return OK. 480 * If it's already been dropped, return OK.
286 */ 481 */
287 spin_lock(&dcache_lock); 482 spin_lock(&dentry->d_lock);
288 if (d_unhashed(dentry)) { 483 if (d_unhashed(dentry)) {
289 spin_unlock(&dcache_lock); 484 spin_unlock(&dentry->d_lock);
290 return 0; 485 return 0;
291 } 486 }
292 /* 487 /*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
294 * to get rid of unused child entries. 489 * to get rid of unused child entries.
295 */ 490 */
296 if (!list_empty(&dentry->d_subdirs)) { 491 if (!list_empty(&dentry->d_subdirs)) {
297 spin_unlock(&dcache_lock); 492 spin_unlock(&dentry->d_lock);
298 shrink_dcache_parent(dentry); 493 shrink_dcache_parent(dentry);
299 spin_lock(&dcache_lock); 494 spin_lock(&dentry->d_lock);
300 } 495 }
301 496
302 /* 497 /*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
309 * we might still populate it if it was a 504 * we might still populate it if it was a
310 * working directory or similar). 505 * working directory or similar).
311 */ 506 */
312 spin_lock(&dentry->d_lock); 507 if (dentry->d_count > 1) {
313 if (atomic_read(&dentry->d_count) > 1) {
314 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
315 spin_unlock(&dentry->d_lock); 509 spin_unlock(&dentry->d_lock);
316 spin_unlock(&dcache_lock);
317 return -EBUSY; 510 return -EBUSY;
318 } 511 }
319 } 512 }
320 513
321 __d_drop(dentry); 514 __d_drop(dentry);
322 spin_unlock(&dentry->d_lock); 515 spin_unlock(&dentry->d_lock);
323 spin_unlock(&dcache_lock);
324 return 0; 516 return 0;
325} 517}
326EXPORT_SYMBOL(d_invalidate); 518EXPORT_SYMBOL(d_invalidate);
327 519
328/* This should be called _only_ with dcache_lock held */ 520/* This must be called with d_lock held */
329static inline struct dentry * __dget_locked(struct dentry *dentry) 521static inline void __dget_dlock(struct dentry *dentry)
330{ 522{
331 atomic_inc(&dentry->d_count); 523 dentry->d_count++;
332 dentry_lru_del(dentry);
333 return dentry;
334} 524}
335 525
336struct dentry * dget_locked(struct dentry *dentry) 526static inline void __dget(struct dentry *dentry)
337{ 527{
338 return __dget_locked(dentry); 528 spin_lock(&dentry->d_lock);
529 __dget_dlock(dentry);
530 spin_unlock(&dentry->d_lock);
531}
532
533struct dentry *dget_parent(struct dentry *dentry)
534{
535 struct dentry *ret;
536
537repeat:
538 /*
539 * Don't need rcu_dereference because we re-check it was correct under
540 * the lock.
541 */
542 rcu_read_lock();
543 ret = dentry->d_parent;
544 if (!ret) {
545 rcu_read_unlock();
546 goto out;
547 }
548 spin_lock(&ret->d_lock);
549 if (unlikely(ret != dentry->d_parent)) {
550 spin_unlock(&ret->d_lock);
551 rcu_read_unlock();
552 goto repeat;
553 }
554 rcu_read_unlock();
555 BUG_ON(!ret->d_count);
556 ret->d_count++;
557 spin_unlock(&ret->d_lock);
558out:
559 return ret;
339} 560}
340EXPORT_SYMBOL(dget_locked); 561EXPORT_SYMBOL(dget_parent);
341 562
342/** 563/**
343 * d_find_alias - grab a hashed alias of inode 564 * d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked);
355 * any other hashed alias over that one unless @want_discon is set, 576 * any other hashed alias over that one unless @want_discon is set,
356 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
357 */ 578 */
358 579static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
359static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
360{ 580{
361 struct list_head *head, *next, *tmp; 581 struct dentry *alias, *discon_alias;
362 struct dentry *alias, *discon_alias=NULL;
363 582
364 head = &inode->i_dentry; 583again:
365 next = inode->i_dentry.next; 584 discon_alias = NULL;
366 while (next != head) { 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
367 tmp = next; 586 spin_lock(&alias->d_lock);
368 next = tmp->next;
369 prefetch(next);
370 alias = list_entry(tmp, struct dentry, d_alias);
371 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
372 if (IS_ROOT(alias) && 588 if (IS_ROOT(alias) &&
373 (alias->d_flags & DCACHE_DISCONNECTED)) 589 (alias->d_flags & DCACHE_DISCONNECTED)) {
374 discon_alias = alias; 590 discon_alias = alias;
375 else if (!want_discon) { 591 } else if (!want_discon) {
376 __dget_locked(alias); 592 __dget_dlock(alias);
593 spin_unlock(&alias->d_lock);
594 return alias;
595 }
596 }
597 spin_unlock(&alias->d_lock);
598 }
599 if (discon_alias) {
600 alias = discon_alias;
601 spin_lock(&alias->d_lock);
602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
603 if (IS_ROOT(alias) &&
604 (alias->d_flags & DCACHE_DISCONNECTED)) {
605 __dget_dlock(alias);
606 spin_unlock(&alias->d_lock);
377 return alias; 607 return alias;
378 } 608 }
379 } 609 }
610 spin_unlock(&alias->d_lock);
611 goto again;
380 } 612 }
381 if (discon_alias) 613 return NULL;
382 __dget_locked(discon_alias);
383 return discon_alias;
384} 614}
385 615
386struct dentry * d_find_alias(struct inode *inode) 616struct dentry *d_find_alias(struct inode *inode)
387{ 617{
388 struct dentry *de = NULL; 618 struct dentry *de = NULL;
389 619
390 if (!list_empty(&inode->i_dentry)) { 620 if (!list_empty(&inode->i_dentry)) {
391 spin_lock(&dcache_lock); 621 spin_lock(&inode->i_lock);
392 de = __d_find_alias(inode, 0); 622 de = __d_find_alias(inode, 0);
393 spin_unlock(&dcache_lock); 623 spin_unlock(&inode->i_lock);
394 } 624 }
395 return de; 625 return de;
396} 626}
@@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode)
404{ 634{
405 struct dentry *dentry; 635 struct dentry *dentry;
406restart: 636restart:
407 spin_lock(&dcache_lock); 637 spin_lock(&inode->i_lock);
408 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
409 spin_lock(&dentry->d_lock); 639 spin_lock(&dentry->d_lock);
410 if (!atomic_read(&dentry->d_count)) { 640 if (!dentry->d_count) {
411 __dget_locked(dentry); 641 __dget_dlock(dentry);
412 __d_drop(dentry); 642 __d_drop(dentry);
413 spin_unlock(&dentry->d_lock); 643 spin_unlock(&dentry->d_lock);
414 spin_unlock(&dcache_lock); 644 spin_unlock(&inode->i_lock);
415 dput(dentry); 645 dput(dentry);
416 goto restart; 646 goto restart;
417 } 647 }
418 spin_unlock(&dentry->d_lock); 648 spin_unlock(&dentry->d_lock);
419 } 649 }
420 spin_unlock(&dcache_lock); 650 spin_unlock(&inode->i_lock);
421} 651}
422EXPORT_SYMBOL(d_prune_aliases); 652EXPORT_SYMBOL(d_prune_aliases);
423 653
424/* 654/*
425 * Throw away a dentry - free the inode, dput the parent. This requires that 655 * Try to throw away a dentry - free the inode, dput the parent.
426 * the LRU list has already been removed. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0.
657 * Releases dentry->d_lock.
427 * 658 *
428 * Try to prune ancestors as well. This is necessary to prevent 659 * This may fail if locks cannot be acquired no problem, just try again.
429 * quadratic behavior of shrink_dcache_parent(), but is also expected
430 * to be beneficial in reducing dentry cache fragmentation.
431 */ 660 */
432static void prune_one_dentry(struct dentry * dentry) 661static void try_prune_one_dentry(struct dentry *dentry)
433 __releases(dentry->d_lock) 662 __releases(dentry->d_lock)
434 __releases(dcache_lock)
435 __acquires(dcache_lock)
436{ 663{
437 __d_drop(dentry); 664 struct dentry *parent;
438 dentry = d_kill(dentry);
439 665
666 parent = dentry_kill(dentry, 0);
440 /* 667 /*
441 * Prune ancestors. Locking is simpler than in dput(), 668 * If dentry_kill returns NULL, we have nothing more to do.
442 * because dcache_lock needs to be taken anyway. 669 * if it returns the same dentry, trylocks failed. In either
670 * case, just loop again.
671 *
672 * Otherwise, we need to prune ancestors too. This is necessary
673 * to prevent quadratic behavior of shrink_dcache_parent(), but
674 * is also expected to be beneficial in reducing dentry cache
675 * fragmentation.
443 */ 676 */
444 spin_lock(&dcache_lock); 677 if (!parent)
678 return;
679 if (parent == dentry)
680 return;
681
682 /* Prune ancestors. */
683 dentry = parent;
445 while (dentry) { 684 while (dentry) {
446 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 685 spin_lock(&dentry->d_lock);
686 if (dentry->d_count > 1) {
687 dentry->d_count--;
688 spin_unlock(&dentry->d_lock);
447 return; 689 return;
448 690 }
449 if (dentry->d_op && dentry->d_op->d_delete) 691 dentry = dentry_kill(dentry, 1);
450 dentry->d_op->d_delete(dentry);
451 dentry_lru_del(dentry);
452 __d_drop(dentry);
453 dentry = d_kill(dentry);
454 spin_lock(&dcache_lock);
455 } 692 }
456} 693}
457 694
@@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list)
459{ 696{
460 struct dentry *dentry; 697 struct dentry *dentry;
461 698
462 while (!list_empty(list)) { 699 rcu_read_lock();
463 dentry = list_entry(list->prev, struct dentry, d_lru); 700 for (;;) {
464 dentry_lru_del(dentry); 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
702 if (&dentry->d_lru == list)
703 break; /* empty */
704 spin_lock(&dentry->d_lock);
705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
706 spin_unlock(&dentry->d_lock);
707 continue;
708 }
465 709
466 /* 710 /*
467 * We found an inuse dentry which was not removed from 711 * We found an inuse dentry which was not removed from
468 * the LRU because of laziness during lookup. Do not free 712 * the LRU because of laziness during lookup. Do not free
469 * it - just keep it off the LRU list. 713 * it - just keep it off the LRU list.
470 */ 714 */
471 spin_lock(&dentry->d_lock); 715 if (dentry->d_count) {
472 if (atomic_read(&dentry->d_count)) { 716 dentry_lru_del(dentry);
473 spin_unlock(&dentry->d_lock); 717 spin_unlock(&dentry->d_lock);
474 continue; 718 continue;
475 } 719 }
476 prune_one_dentry(dentry); 720
477 /* dentry->d_lock was dropped in prune_one_dentry() */ 721 rcu_read_unlock();
478 cond_resched_lock(&dcache_lock); 722
723 try_prune_one_dentry(dentry);
724
725 rcu_read_lock();
479 } 726 }
727 rcu_read_unlock();
480} 728}
481 729
482/** 730/**
@@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
495 LIST_HEAD(tmp); 743 LIST_HEAD(tmp);
496 int cnt = *count; 744 int cnt = *count;
497 745
498 spin_lock(&dcache_lock); 746relock:
747 spin_lock(&dcache_lru_lock);
499 while (!list_empty(&sb->s_dentry_lru)) { 748 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev, 749 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru); 750 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb); 751 BUG_ON(dentry->d_sb != sb);
503 752
753 if (!spin_trylock(&dentry->d_lock)) {
754 spin_unlock(&dcache_lru_lock);
755 cpu_relax();
756 goto relock;
757 }
758
504 /* 759 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the 760 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag 761 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU. 762 * and put it back on the LRU.
508 */ 763 */
509 if (flags & DCACHE_REFERENCED) { 764 if (flags & DCACHE_REFERENCED &&
510 spin_lock(&dentry->d_lock); 765 dentry->d_flags & DCACHE_REFERENCED) {
511 if (dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED;
512 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced);
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock); 768 spin_unlock(&dentry->d_lock);
769 } else {
770 list_move_tail(&dentry->d_lru, &tmp);
771 spin_unlock(&dentry->d_lock);
772 if (!--cnt)
773 break;
519 } 774 }
520 775 cond_resched_lock(&dcache_lru_lock);
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 } 776 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
530 if (!list_empty(&referenced)) 777 if (!list_empty(&referenced))
531 list_splice(&referenced, &sb->s_dentry_lru); 778 list_splice(&referenced, &sb->s_dentry_lru);
532 spin_unlock(&dcache_lock); 779 spin_unlock(&dcache_lru_lock);
533 780
781 shrink_dentry_list(&tmp);
782
783 *count = cnt;
534} 784}
535 785
536/** 786/**
@@ -546,13 +796,12 @@ static void prune_dcache(int count)
546{ 796{
547 struct super_block *sb, *p = NULL; 797 struct super_block *sb, *p = NULL;
548 int w_count; 798 int w_count;
549 int unused = percpu_counter_sum_positive(&nr_dentry_unused); 799 int unused = dentry_stat.nr_unused;
550 int prune_ratio; 800 int prune_ratio;
551 int pruned; 801 int pruned;
552 802
553 if (unused == 0 || count == 0) 803 if (unused == 0 || count == 0)
554 return; 804 return;
555 spin_lock(&dcache_lock);
556 if (count >= unused) 805 if (count >= unused)
557 prune_ratio = 1; 806 prune_ratio = 1;
558 else 807 else
@@ -589,11 +838,9 @@ static void prune_dcache(int count)
589 if (down_read_trylock(&sb->s_umount)) { 838 if (down_read_trylock(&sb->s_umount)) {
590 if ((sb->s_root != NULL) && 839 if ((sb->s_root != NULL) &&
591 (!list_empty(&sb->s_dentry_lru))) { 840 (!list_empty(&sb->s_dentry_lru))) {
592 spin_unlock(&dcache_lock);
593 __shrink_dcache_sb(sb, &w_count, 841 __shrink_dcache_sb(sb, &w_count,
594 DCACHE_REFERENCED); 842 DCACHE_REFERENCED);
595 pruned -= w_count; 843 pruned -= w_count;
596 spin_lock(&dcache_lock);
597 } 844 }
598 up_read(&sb->s_umount); 845 up_read(&sb->s_umount);
599 } 846 }
@@ -609,7 +856,6 @@ static void prune_dcache(int count)
609 if (p) 856 if (p)
610 __put_super(p); 857 __put_super(p);
611 spin_unlock(&sb_lock); 858 spin_unlock(&sb_lock);
612 spin_unlock(&dcache_lock);
613} 859}
614 860
615/** 861/**
@@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb)
623{ 869{
624 LIST_HEAD(tmp); 870 LIST_HEAD(tmp);
625 871
626 spin_lock(&dcache_lock); 872 spin_lock(&dcache_lru_lock);
627 while (!list_empty(&sb->s_dentry_lru)) { 873 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp); 874 list_splice_init(&sb->s_dentry_lru, &tmp);
875 spin_unlock(&dcache_lru_lock);
629 shrink_dentry_list(&tmp); 876 shrink_dentry_list(&tmp);
877 spin_lock(&dcache_lru_lock);
630 } 878 }
631 spin_unlock(&dcache_lock); 879 spin_unlock(&dcache_lru_lock);
632} 880}
633EXPORT_SYMBOL(shrink_dcache_sb); 881EXPORT_SYMBOL(shrink_dcache_sb);
634 882
@@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
645 BUG_ON(!IS_ROOT(dentry)); 893 BUG_ON(!IS_ROOT(dentry));
646 894
647 /* detach this root from the system */ 895 /* detach this root from the system */
648 spin_lock(&dcache_lock); 896 spin_lock(&dentry->d_lock);
649 dentry_lru_del(dentry); 897 dentry_lru_del(dentry);
650 __d_drop(dentry); 898 __d_drop(dentry);
651 spin_unlock(&dcache_lock); 899 spin_unlock(&dentry->d_lock);
652 900
653 for (;;) { 901 for (;;) {
654 /* descend to the first leaf in the current subtree */ 902 /* descend to the first leaf in the current subtree */
@@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
657 905
658 /* this is a branch with children - detach all of them 906 /* this is a branch with children - detach all of them
659 * from the system in one go */ 907 * from the system in one go */
660 spin_lock(&dcache_lock); 908 spin_lock(&dentry->d_lock);
661 list_for_each_entry(loop, &dentry->d_subdirs, 909 list_for_each_entry(loop, &dentry->d_subdirs,
662 d_u.d_child) { 910 d_u.d_child) {
911 spin_lock_nested(&loop->d_lock,
912 DENTRY_D_LOCK_NESTED);
663 dentry_lru_del(loop); 913 dentry_lru_del(loop);
664 __d_drop(loop); 914 __d_drop(loop);
665 cond_resched_lock(&dcache_lock); 915 spin_unlock(&loop->d_lock);
666 } 916 }
667 spin_unlock(&dcache_lock); 917 spin_unlock(&dentry->d_lock);
668 918
669 /* move to the first child */ 919 /* move to the first child */
670 dentry = list_entry(dentry->d_subdirs.next, 920 dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
676 do { 926 do {
677 struct inode *inode; 927 struct inode *inode;
678 928
679 if (atomic_read(&dentry->d_count) != 0) { 929 if (dentry->d_count != 0) {
680 printk(KERN_ERR 930 printk(KERN_ERR
681 "BUG: Dentry %p{i=%lx,n=%s}" 931 "BUG: Dentry %p{i=%lx,n=%s}"
682 " still in use (%d)" 932 " still in use (%d)"
@@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
685 dentry->d_inode ? 935 dentry->d_inode ?
686 dentry->d_inode->i_ino : 0UL, 936 dentry->d_inode->i_ino : 0UL,
687 dentry->d_name.name, 937 dentry->d_name.name,
688 atomic_read(&dentry->d_count), 938 dentry->d_count,
689 dentry->d_sb->s_type->name, 939 dentry->d_sb->s_type->name,
690 dentry->d_sb->s_id); 940 dentry->d_sb->s_id);
691 BUG(); 941 BUG();
692 } 942 }
693 943
694 if (IS_ROOT(dentry)) 944 if (IS_ROOT(dentry)) {
695 parent = NULL; 945 parent = NULL;
696 else { 946 list_del(&dentry->d_u.d_child);
947 } else {
697 parent = dentry->d_parent; 948 parent = dentry->d_parent;
698 atomic_dec(&parent->d_count); 949 spin_lock(&parent->d_lock);
950 parent->d_count--;
951 list_del(&dentry->d_u.d_child);
952 spin_unlock(&parent->d_lock);
699 } 953 }
700 954
701 list_del(&dentry->d_u.d_child);
702 detached++; 955 detached++;
703 956
704 inode = dentry->d_inode; 957 inode = dentry->d_inode;
@@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
728 981
729/* 982/*
730 * destroy the dentries attached to a superblock on unmounting 983 * destroy the dentries attached to a superblock on unmounting
731 * - we don't need to use dentry->d_lock, and only need dcache_lock when 984 * - we don't need to use dentry->d_lock because:
732 * removing the dentry from the system lists and hashes because:
733 * - the superblock is detached from all mountings and open files, so the 985 * - the superblock is detached from all mountings and open files, so the
734 * dentry trees will not be rearranged by the VFS 986 * dentry trees will not be rearranged by the VFS
735 * - s_umount is write-locked, so the memory pressure shrinker will ignore 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
746 998
747 dentry = sb->s_root; 999 dentry = sb->s_root;
748 sb->s_root = NULL; 1000 sb->s_root = NULL;
749 atomic_dec(&dentry->d_count); 1001 spin_lock(&dentry->d_lock);
1002 dentry->d_count--;
1003 spin_unlock(&dentry->d_lock);
750 shrink_dcache_for_umount_subtree(dentry); 1004 shrink_dcache_for_umount_subtree(dentry);
751 1005
752 while (!hlist_empty(&sb->s_anon)) { 1006 while (!hlist_bl_empty(&sb->s_anon)) {
753 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
754 shrink_dcache_for_umount_subtree(dentry); 1008 shrink_dcache_for_umount_subtree(dentry);
755 } 1009 }
756} 1010}
@@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
768 * Return true if the parent or its subdirectories contain 1022 * Return true if the parent or its subdirectories contain
769 * a mount point 1023 * a mount point
770 */ 1024 */
771
772int have_submounts(struct dentry *parent) 1025int have_submounts(struct dentry *parent)
773{ 1026{
774 struct dentry *this_parent = parent; 1027 struct dentry *this_parent;
775 struct list_head *next; 1028 struct list_head *next;
1029 unsigned seq;
1030 int locked = 0;
1031
1032 seq = read_seqbegin(&rename_lock);
1033again:
1034 this_parent = parent;
776 1035
777 spin_lock(&dcache_lock);
778 if (d_mountpoint(parent)) 1036 if (d_mountpoint(parent))
779 goto positive; 1037 goto positive;
1038 spin_lock(&this_parent->d_lock);
780repeat: 1039repeat:
781 next = this_parent->d_subdirs.next; 1040 next = this_parent->d_subdirs.next;
782resume: 1041resume:
@@ -784,27 +1043,65 @@ resume:
784 struct list_head *tmp = next; 1043 struct list_head *tmp = next;
785 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
786 next = tmp->next; 1045 next = tmp->next;
1046
1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
787 /* Have we found a mount point ? */ 1048 /* Have we found a mount point ? */
788 if (d_mountpoint(dentry)) 1049 if (d_mountpoint(dentry)) {
1050 spin_unlock(&dentry->d_lock);
1051 spin_unlock(&this_parent->d_lock);
789 goto positive; 1052 goto positive;
1053 }
790 if (!list_empty(&dentry->d_subdirs)) { 1054 if (!list_empty(&dentry->d_subdirs)) {
1055 spin_unlock(&this_parent->d_lock);
1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
791 this_parent = dentry; 1057 this_parent = dentry;
1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
792 goto repeat; 1059 goto repeat;
793 } 1060 }
1061 spin_unlock(&dentry->d_lock);
794 } 1062 }
795 /* 1063 /*
796 * All done at this level ... ascend and resume the search. 1064 * All done at this level ... ascend and resume the search.
797 */ 1065 */
798 if (this_parent != parent) { 1066 if (this_parent != parent) {
799 next = this_parent->d_u.d_child.next; 1067 struct dentry *tmp;
800 this_parent = this_parent->d_parent; 1068 struct dentry *child;
1069
1070 tmp = this_parent->d_parent;
1071 rcu_read_lock();
1072 spin_unlock(&this_parent->d_lock);
1073 child = this_parent;
1074 this_parent = tmp;
1075 spin_lock(&this_parent->d_lock);
1076 /* might go back up the wrong parent if we have had a rename
1077 * or deletion */
1078 if (this_parent != child->d_parent ||
1079 (!locked && read_seqretry(&rename_lock, seq))) {
1080 spin_unlock(&this_parent->d_lock);
1081 rcu_read_unlock();
1082 goto rename_retry;
1083 }
1084 rcu_read_unlock();
1085 next = child->d_u.d_child.next;
801 goto resume; 1086 goto resume;
802 } 1087 }
803 spin_unlock(&dcache_lock); 1088 spin_unlock(&this_parent->d_lock);
1089 if (!locked && read_seqretry(&rename_lock, seq))
1090 goto rename_retry;
1091 if (locked)
1092 write_sequnlock(&rename_lock);
804 return 0; /* No mount points found in tree */ 1093 return 0; /* No mount points found in tree */
805positive: 1094positive:
806 spin_unlock(&dcache_lock); 1095 if (!locked && read_seqretry(&rename_lock, seq))
1096 goto rename_retry;
1097 if (locked)
1098 write_sequnlock(&rename_lock);
807 return 1; 1099 return 1;
1100
1101rename_retry:
1102 locked = 1;
1103 write_seqlock(&rename_lock);
1104 goto again;
808} 1105}
809EXPORT_SYMBOL(have_submounts); 1106EXPORT_SYMBOL(have_submounts);
810 1107
@@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
824 */ 1121 */
825static int select_parent(struct dentry * parent) 1122static int select_parent(struct dentry * parent)
826{ 1123{
827 struct dentry *this_parent = parent; 1124 struct dentry *this_parent;
828 struct list_head *next; 1125 struct list_head *next;
1126 unsigned seq;
829 int found = 0; 1127 int found = 0;
1128 int locked = 0;
830 1129
831 spin_lock(&dcache_lock); 1130 seq = read_seqbegin(&rename_lock);
1131again:
1132 this_parent = parent;
1133 spin_lock(&this_parent->d_lock);
832repeat: 1134repeat:
833 next = this_parent->d_subdirs.next; 1135 next = this_parent->d_subdirs.next;
834resume: 1136resume:
@@ -837,11 +1139,13 @@ resume:
837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
838 next = tmp->next; 1140 next = tmp->next;
839 1141
1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
840 /* 1144 /*
841 * move only zero ref count dentries to the end 1145 * move only zero ref count dentries to the end
842 * of the unused list for prune_dcache 1146 * of the unused list for prune_dcache
843 */ 1147 */
844 if (!atomic_read(&dentry->d_count)) { 1148 if (!dentry->d_count) {
845 dentry_lru_move_tail(dentry); 1149 dentry_lru_move_tail(dentry);
846 found++; 1150 found++;
847 } else { 1151 } else {
@@ -853,28 +1157,63 @@ resume:
853 * ensures forward progress). We'll be coming back to find 1157 * ensures forward progress). We'll be coming back to find
854 * the rest. 1158 * the rest.
855 */ 1159 */
856 if (found && need_resched()) 1160 if (found && need_resched()) {
1161 spin_unlock(&dentry->d_lock);
857 goto out; 1162 goto out;
1163 }
858 1164
859 /* 1165 /*
860 * Descend a level if the d_subdirs list is non-empty. 1166 * Descend a level if the d_subdirs list is non-empty.
861 */ 1167 */
862 if (!list_empty(&dentry->d_subdirs)) { 1168 if (!list_empty(&dentry->d_subdirs)) {
1169 spin_unlock(&this_parent->d_lock);
1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
863 this_parent = dentry; 1171 this_parent = dentry;
1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
864 goto repeat; 1173 goto repeat;
865 } 1174 }
1175
1176 spin_unlock(&dentry->d_lock);
866 } 1177 }
867 /* 1178 /*
868 * All done at this level ... ascend and resume the search. 1179 * All done at this level ... ascend and resume the search.
869 */ 1180 */
870 if (this_parent != parent) { 1181 if (this_parent != parent) {
871 next = this_parent->d_u.d_child.next; 1182 struct dentry *tmp;
872 this_parent = this_parent->d_parent; 1183 struct dentry *child;
1184
1185 tmp = this_parent->d_parent;
1186 rcu_read_lock();
1187 spin_unlock(&this_parent->d_lock);
1188 child = this_parent;
1189 this_parent = tmp;
1190 spin_lock(&this_parent->d_lock);
1191 /* might go back up the wrong parent if we have had a rename
1192 * or deletion */
1193 if (this_parent != child->d_parent ||
1194 (!locked && read_seqretry(&rename_lock, seq))) {
1195 spin_unlock(&this_parent->d_lock);
1196 rcu_read_unlock();
1197 goto rename_retry;
1198 }
1199 rcu_read_unlock();
1200 next = child->d_u.d_child.next;
873 goto resume; 1201 goto resume;
874 } 1202 }
875out: 1203out:
876 spin_unlock(&dcache_lock); 1204 spin_unlock(&this_parent->d_lock);
1205 if (!locked && read_seqretry(&rename_lock, seq))
1206 goto rename_retry;
1207 if (locked)
1208 write_sequnlock(&rename_lock);
877 return found; 1209 return found;
1210
1211rename_retry:
1212 if (found)
1213 return found;
1214 locked = 1;
1215 write_seqlock(&rename_lock);
1216 goto again;
878} 1217}
879 1218
880/** 1219/**
@@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
908 */ 1247 */
909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1248static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
910{ 1249{
911 int nr_unused;
912
913 if (nr) { 1250 if (nr) {
914 if (!(gfp_mask & __GFP_FS)) 1251 if (!(gfp_mask & __GFP_FS))
915 return -1; 1252 return -1;
916 prune_dcache(nr); 1253 prune_dcache(nr);
917 } 1254 }
918 1255
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
921} 1257}
922 1258
923static struct shrinker dcache_shrinker = { 1259static struct shrinker dcache_shrinker = {
@@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
960 memcpy(dname, name->name, name->len); 1296 memcpy(dname, name->name, name->len);
961 dname[name->len] = 0; 1297 dname[name->len] = 0;
962 1298
963 atomic_set(&dentry->d_count, 1); 1299 dentry->d_count = 1;
964 dentry->d_flags = DCACHE_UNHASHED; 1300 dentry->d_flags = DCACHE_UNHASHED;
965 spin_lock_init(&dentry->d_lock); 1301 spin_lock_init(&dentry->d_lock);
1302 seqcount_init(&dentry->d_seq);
966 dentry->d_inode = NULL; 1303 dentry->d_inode = NULL;
967 dentry->d_parent = NULL; 1304 dentry->d_parent = NULL;
968 dentry->d_sb = NULL; 1305 dentry->d_sb = NULL;
969 dentry->d_op = NULL; 1306 dentry->d_op = NULL;
970 dentry->d_fsdata = NULL; 1307 dentry->d_fsdata = NULL;
971 dentry->d_mounted = 0; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash);
972 INIT_HLIST_NODE(&dentry->d_hash);
973 INIT_LIST_HEAD(&dentry->d_lru); 1309 INIT_LIST_HEAD(&dentry->d_lru);
974 INIT_LIST_HEAD(&dentry->d_subdirs); 1310 INIT_LIST_HEAD(&dentry->d_subdirs);
975 INIT_LIST_HEAD(&dentry->d_alias); 1311 INIT_LIST_HEAD(&dentry->d_alias);
1312 INIT_LIST_HEAD(&dentry->d_u.d_child);
976 1313
977 if (parent) { 1314 if (parent) {
978 dentry->d_parent = dget(parent); 1315 spin_lock(&parent->d_lock);
1316 /*
1317 * don't need child lock because it is not subject
1318 * to concurrency here
1319 */
1320 __dget_dlock(parent);
1321 dentry->d_parent = parent;
979 dentry->d_sb = parent->d_sb; 1322 dentry->d_sb = parent->d_sb;
980 } else {
981 INIT_LIST_HEAD(&dentry->d_u.d_child);
982 }
983
984 spin_lock(&dcache_lock);
985 if (parent)
986 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1323 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
987 spin_unlock(&dcache_lock); 1324 spin_unlock(&parent->d_lock);
1325 }
988 1326
989 percpu_counter_inc(&nr_dentry); 1327 this_cpu_inc(nr_dentry);
990 1328
991 return dentry; 1329 return dentry;
992} 1330}
993EXPORT_SYMBOL(d_alloc); 1331EXPORT_SYMBOL(d_alloc);
994 1332
1333struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1334{
1335 struct dentry *dentry = d_alloc(NULL, name);
1336 if (dentry) {
1337 dentry->d_sb = sb;
1338 dentry->d_parent = dentry;
1339 dentry->d_flags |= DCACHE_DISCONNECTED;
1340 }
1341 return dentry;
1342}
1343EXPORT_SYMBOL(d_alloc_pseudo);
1344
995struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1345struct dentry *d_alloc_name(struct dentry *parent, const char *name)
996{ 1346{
997 struct qstr q; 1347 struct qstr q;
@@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1003} 1353}
1004EXPORT_SYMBOL(d_alloc_name); 1354EXPORT_SYMBOL(d_alloc_name);
1005 1355
1006/* the caller must hold dcache_lock */ 1356void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1357{
1358 BUG_ON(dentry->d_op);
1359 BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
1360 DCACHE_OP_COMPARE |
1361 DCACHE_OP_REVALIDATE |
1362 DCACHE_OP_DELETE ));
1363 dentry->d_op = op;
1364 if (!op)
1365 return;
1366 if (op->d_hash)
1367 dentry->d_flags |= DCACHE_OP_HASH;
1368 if (op->d_compare)
1369 dentry->d_flags |= DCACHE_OP_COMPARE;
1370 if (op->d_revalidate)
1371 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1372 if (op->d_delete)
1373 dentry->d_flags |= DCACHE_OP_DELETE;
1374
1375}
1376EXPORT_SYMBOL(d_set_d_op);
1377
1007static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1378static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1008{ 1379{
1380 spin_lock(&dentry->d_lock);
1009 if (inode) 1381 if (inode)
1010 list_add(&dentry->d_alias, &inode->i_dentry); 1382 list_add(&dentry->d_alias, &inode->i_dentry);
1011 dentry->d_inode = inode; 1383 dentry->d_inode = inode;
1384 dentry_rcuwalk_barrier(dentry);
1385 spin_unlock(&dentry->d_lock);
1012 fsnotify_d_instantiate(dentry, inode); 1386 fsnotify_d_instantiate(dentry, inode);
1013} 1387}
1014 1388
@@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1030void d_instantiate(struct dentry *entry, struct inode * inode) 1404void d_instantiate(struct dentry *entry, struct inode * inode)
1031{ 1405{
1032 BUG_ON(!list_empty(&entry->d_alias)); 1406 BUG_ON(!list_empty(&entry->d_alias));
1033 spin_lock(&dcache_lock); 1407 if (inode)
1408 spin_lock(&inode->i_lock);
1034 __d_instantiate(entry, inode); 1409 __d_instantiate(entry, inode);
1035 spin_unlock(&dcache_lock); 1410 if (inode)
1411 spin_unlock(&inode->i_lock);
1036 security_d_instantiate(entry, inode); 1412 security_d_instantiate(entry, inode);
1037} 1413}
1038EXPORT_SYMBOL(d_instantiate); 1414EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1069 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1445 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1070 struct qstr *qstr = &alias->d_name; 1446 struct qstr *qstr = &alias->d_name;
1071 1447
1448 /*
1449 * Don't need alias->d_lock here, because aliases with
1450 * d_parent == entry->d_parent are not subject to name or
1451 * parent changes, because the parent inode i_mutex is held.
1452 */
1072 if (qstr->hash != hash) 1453 if (qstr->hash != hash)
1073 continue; 1454 continue;
1074 if (alias->d_parent != entry->d_parent) 1455 if (alias->d_parent != entry->d_parent)
1075 continue; 1456 continue;
1076 if (qstr->len != len) 1457 if (dentry_cmp(qstr->name, qstr->len, name, len))
1077 continue; 1458 continue;
1078 if (memcmp(qstr->name, name, len)) 1459 __dget(alias);
1079 continue;
1080 dget_locked(alias);
1081 return alias; 1460 return alias;
1082 } 1461 }
1083 1462
@@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1091 1470
1092 BUG_ON(!list_empty(&entry->d_alias)); 1471 BUG_ON(!list_empty(&entry->d_alias));
1093 1472
1094 spin_lock(&dcache_lock); 1473 if (inode)
1474 spin_lock(&inode->i_lock);
1095 result = __d_instantiate_unique(entry, inode); 1475 result = __d_instantiate_unique(entry, inode);
1096 spin_unlock(&dcache_lock); 1476 if (inode)
1477 spin_unlock(&inode->i_lock);
1097 1478
1098 if (!result) { 1479 if (!result) {
1099 security_d_instantiate(entry, inode); 1480 security_d_instantiate(entry, inode);
@@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1134} 1515}
1135EXPORT_SYMBOL(d_alloc_root); 1516EXPORT_SYMBOL(d_alloc_root);
1136 1517
1137static inline struct hlist_head *d_hash(struct dentry *parent,
1138 unsigned long hash)
1139{
1140 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1141 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1142 return dentry_hashtable + (hash & D_HASHMASK);
1143}
1144
1145/** 1518/**
1146 * d_obtain_alias - find or allocate a dentry for a given inode 1519 * d_obtain_alias - find or allocate a dentry for a given inode
1147 * @inode: inode to allocate the dentry for 1520 * @inode: inode to allocate the dentry for
@@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1182 } 1555 }
1183 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1556 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1184 1557
1185 spin_lock(&dcache_lock); 1558
1559 spin_lock(&inode->i_lock);
1186 res = __d_find_alias(inode, 0); 1560 res = __d_find_alias(inode, 0);
1187 if (res) { 1561 if (res) {
1188 spin_unlock(&dcache_lock); 1562 spin_unlock(&inode->i_lock);
1189 dput(tmp); 1563 dput(tmp);
1190 goto out_iput; 1564 goto out_iput;
1191 } 1565 }
@@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
1195 tmp->d_sb = inode->i_sb; 1569 tmp->d_sb = inode->i_sb;
1196 tmp->d_inode = inode; 1570 tmp->d_inode = inode;
1197 tmp->d_flags |= DCACHE_DISCONNECTED; 1571 tmp->d_flags |= DCACHE_DISCONNECTED;
1198 tmp->d_flags &= ~DCACHE_UNHASHED;
1199 list_add(&tmp->d_alias, &inode->i_dentry); 1572 list_add(&tmp->d_alias, &inode->i_dentry);
1200 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1573 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1574 tmp->d_flags &= ~DCACHE_UNHASHED;
1575 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1576 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1201 spin_unlock(&tmp->d_lock); 1577 spin_unlock(&tmp->d_lock);
1578 spin_unlock(&inode->i_lock);
1202 1579
1203 spin_unlock(&dcache_lock);
1204 return tmp; 1580 return tmp;
1205 1581
1206 out_iput: 1582 out_iput:
@@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1230 struct dentry *new = NULL; 1606 struct dentry *new = NULL;
1231 1607
1232 if (inode && S_ISDIR(inode->i_mode)) { 1608 if (inode && S_ISDIR(inode->i_mode)) {
1233 spin_lock(&dcache_lock); 1609 spin_lock(&inode->i_lock);
1234 new = __d_find_alias(inode, 1); 1610 new = __d_find_alias(inode, 1);
1235 if (new) { 1611 if (new) {
1236 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1612 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1237 spin_unlock(&dcache_lock); 1613 spin_unlock(&inode->i_lock);
1238 security_d_instantiate(new, inode); 1614 security_d_instantiate(new, inode);
1239 d_move(new, dentry); 1615 d_move(new, dentry);
1240 iput(inode); 1616 iput(inode);
1241 } else { 1617 } else {
1242 /* already taking dcache_lock, so d_add() by hand */ 1618 /* already taking inode->i_lock, so d_add() by hand */
1243 __d_instantiate(dentry, inode); 1619 __d_instantiate(dentry, inode);
1244 spin_unlock(&dcache_lock); 1620 spin_unlock(&inode->i_lock);
1245 security_d_instantiate(dentry, inode); 1621 security_d_instantiate(dentry, inode);
1246 d_rehash(dentry); 1622 d_rehash(dentry);
1247 } 1623 }
@@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1314 * Negative dentry: instantiate it unless the inode is a directory and 1690 * Negative dentry: instantiate it unless the inode is a directory and
1315 * already has a dentry. 1691 * already has a dentry.
1316 */ 1692 */
1317 spin_lock(&dcache_lock); 1693 spin_lock(&inode->i_lock);
1318 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1694 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1319 __d_instantiate(found, inode); 1695 __d_instantiate(found, inode);
1320 spin_unlock(&dcache_lock); 1696 spin_unlock(&inode->i_lock);
1321 security_d_instantiate(found, inode); 1697 security_d_instantiate(found, inode);
1322 return found; 1698 return found;
1323 } 1699 }
@@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1327 * reference to it, move it in place and use it. 1703 * reference to it, move it in place and use it.
1328 */ 1704 */
1329 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1705 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1330 dget_locked(new); 1706 __dget(new);
1331 spin_unlock(&dcache_lock); 1707 spin_unlock(&inode->i_lock);
1332 security_d_instantiate(found, inode); 1708 security_d_instantiate(found, inode);
1333 d_move(new, found); 1709 d_move(new, found);
1334 iput(inode); 1710 iput(inode);
@@ -1342,6 +1718,112 @@ err_out:
1342EXPORT_SYMBOL(d_add_ci); 1718EXPORT_SYMBOL(d_add_ci);
1343 1719
1344/** 1720/**
1721 * __d_lookup_rcu - search for a dentry (racy, store-free)
1722 * @parent: parent dentry
1723 * @name: qstr of name we wish to find
1724 * @seq: returns d_seq value at the point where the dentry was found
1725 * @inode: returns dentry->d_inode when the inode was found valid.
1726 * Returns: dentry, or NULL
1727 *
1728 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1729 * resolution (store-free path walking) design described in
1730 * Documentation/filesystems/path-lookup.txt.
1731 *
1732 * This is not to be used outside core vfs.
1733 *
1734 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1735 * held, and rcu_read_lock held. The returned dentry must not be stored into
1736 * without taking d_lock and checking d_seq sequence count against @seq
1737 * returned here.
1738 *
1739 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1740 * function.
1741 *
1742 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1743 * the returned dentry, so long as its parent's seqlock is checked after the
1744 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1745 * is formed, giving integrity down the path walk.
1746 */
1747struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 unsigned *seq, struct inode **inode)
1749{
1750 unsigned int len = name->len;
1751 unsigned int hash = name->hash;
1752 const unsigned char *str = name->name;
1753 struct dcache_hash_bucket *b = d_hash(parent, hash);
1754 struct hlist_bl_node *node;
1755 struct dentry *dentry;
1756
1757 /*
1758 * Note: There is significant duplication with __d_lookup_rcu which is
1759 * required to prevent single threaded performance regressions
1760 * especially on architectures where smp_rmb (in seqcounts) are costly.
1761 * Keep the two functions in sync.
1762 */
1763
1764 /*
1765 * The hash list is protected using RCU.
1766 *
1767 * Carefully use d_seq when comparing a candidate dentry, to avoid
1768 * races with d_move().
1769 *
1770 * It is possible that concurrent renames can mess up our list
1771 * walk here and result in missing our dentry, resulting in the
1772 * false-negative result. d_lookup() protects against concurrent
1773 * renames using rename_lock seqlock.
1774 *
1775 * See Documentation/vfs/dcache-locking.txt for more details.
1776 */
1777 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1778 struct inode *i;
1779 const char *tname;
1780 int tlen;
1781
1782 if (dentry->d_name.hash != hash)
1783 continue;
1784
1785seqretry:
1786 *seq = read_seqcount_begin(&dentry->d_seq);
1787 if (dentry->d_parent != parent)
1788 continue;
1789 if (d_unhashed(dentry))
1790 continue;
1791 tlen = dentry->d_name.len;
1792 tname = dentry->d_name.name;
1793 i = dentry->d_inode;
1794 prefetch(tname);
1795 if (i)
1796 prefetch(i);
1797 /*
1798 * This seqcount check is required to ensure name and
1799 * len are loaded atomically, so as not to walk off the
1800 * edge of memory when walking. If we could load this
1801 * atomically some other way, we could drop this check.
1802 */
1803 if (read_seqcount_retry(&dentry->d_seq, *seq))
1804 goto seqretry;
1805 if (parent->d_flags & DCACHE_OP_COMPARE) {
1806 if (parent->d_op->d_compare(parent, *inode,
1807 dentry, i,
1808 tlen, tname, name))
1809 continue;
1810 } else {
1811 if (dentry_cmp(tname, tlen, str, len))
1812 continue;
1813 }
1814 /*
1815 * No extra seqcount check is required after the name
1816 * compare. The caller must perform a seqcount check in
1817 * order to do anything useful with the returned dentry
1818 * anyway.
1819 */
1820 *inode = i;
1821 return dentry;
1822 }
1823 return NULL;
1824}
1825
1826/**
1345 * d_lookup - search for a dentry 1827 * d_lookup - search for a dentry
1346 * @parent: parent dentry 1828 * @parent: parent dentry
1347 * @name: qstr of name we wish to find 1829 * @name: qstr of name we wish to find
@@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci);
1352 * dentry is returned. The caller must use dput to free the entry when it has 1834 * dentry is returned. The caller must use dput to free the entry when it has
1353 * finished using it. %NULL is returned if the dentry does not exist. 1835 * finished using it. %NULL is returned if the dentry does not exist.
1354 */ 1836 */
1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1837struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1356{ 1838{
1357 struct dentry * dentry = NULL; 1839 struct dentry *dentry;
1358 unsigned long seq; 1840 unsigned seq;
1359 1841
1360 do { 1842 do {
1361 seq = read_seqbegin(&rename_lock); 1843 seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1367} 1849}
1368EXPORT_SYMBOL(d_lookup); 1850EXPORT_SYMBOL(d_lookup);
1369 1851
1370/* 1852/**
1371 * __d_lookup - search for a dentry (racy) 1853 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry 1854 * @parent: parent dentry
1373 * @name: qstr of name we wish to find 1855 * @name: qstr of name we wish to find
@@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup);
1382 * 1864 *
1383 * __d_lookup callers must be commented. 1865 * __d_lookup callers must be commented.
1384 */ 1866 */
1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1867struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1386{ 1868{
1387 unsigned int len = name->len; 1869 unsigned int len = name->len;
1388 unsigned int hash = name->hash; 1870 unsigned int hash = name->hash;
1389 const unsigned char *str = name->name; 1871 const unsigned char *str = name->name;
1390 struct hlist_head *head = d_hash(parent,hash); 1872 struct dcache_hash_bucket *b = d_hash(parent, hash);
1873 struct hlist_bl_node *node;
1391 struct dentry *found = NULL; 1874 struct dentry *found = NULL;
1392 struct hlist_node *node;
1393 struct dentry *dentry; 1875 struct dentry *dentry;
1394 1876
1395 /* 1877 /*
1878 * Note: There is significant duplication with __d_lookup_rcu which is
1879 * required to prevent single threaded performance regressions
1880 * especially on architectures where smp_rmb (in seqcounts) are costly.
1881 * Keep the two functions in sync.
1882 */
1883
1884 /*
1396 * The hash list is protected using RCU. 1885 * The hash list is protected using RCU.
1397 * 1886 *
1398 * Take d_lock when comparing a candidate dentry, to avoid races 1887 * Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1407 */ 1896 */
1408 rcu_read_lock(); 1897 rcu_read_lock();
1409 1898
1410 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1899 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1411 struct qstr *qstr; 1900 const char *tname;
1901 int tlen;
1412 1902
1413 if (dentry->d_name.hash != hash) 1903 if (dentry->d_name.hash != hash)
1414 continue; 1904 continue;
1415 if (dentry->d_parent != parent)
1416 continue;
1417 1905
1418 spin_lock(&dentry->d_lock); 1906 spin_lock(&dentry->d_lock);
1419
1420 /*
1421 * Recheck the dentry after taking the lock - d_move may have
1422 * changed things. Don't bother checking the hash because
1423 * we're about to compare the whole name anyway.
1424 */
1425 if (dentry->d_parent != parent) 1907 if (dentry->d_parent != parent)
1426 goto next; 1908 goto next;
1427
1428 /* non-existing due to RCU? */
1429 if (d_unhashed(dentry)) 1909 if (d_unhashed(dentry))
1430 goto next; 1910 goto next;
1431 1911
@@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1433 * It is safe to compare names since d_move() cannot 1913 * It is safe to compare names since d_move() cannot
1434 * change the qstr (protected by d_lock). 1914 * change the qstr (protected by d_lock).
1435 */ 1915 */
1436 qstr = &dentry->d_name; 1916 tlen = dentry->d_name.len;
1437 if (parent->d_op && parent->d_op->d_compare) { 1917 tname = dentry->d_name.name;
1438 if (parent->d_op->d_compare(parent, qstr, name)) 1918 if (parent->d_flags & DCACHE_OP_COMPARE) {
1919 if (parent->d_op->d_compare(parent, parent->d_inode,
1920 dentry, dentry->d_inode,
1921 tlen, tname, name))
1439 goto next; 1922 goto next;
1440 } else { 1923 } else {
1441 if (qstr->len != len) 1924 if (dentry_cmp(tname, tlen, str, len))
1442 goto next;
1443 if (memcmp(qstr->name, str, len))
1444 goto next; 1925 goto next;
1445 } 1926 }
1446 1927
1447 atomic_inc(&dentry->d_count); 1928 dentry->d_count++;
1448 found = dentry; 1929 found = dentry;
1449 spin_unlock(&dentry->d_lock); 1930 spin_unlock(&dentry->d_lock);
1450 break; 1931 break;
@@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1473 * routine may choose to leave the hash value unchanged. 1954 * routine may choose to leave the hash value unchanged.
1474 */ 1955 */
1475 name->hash = full_name_hash(name->name, name->len); 1956 name->hash = full_name_hash(name->name, name->len);
1476 if (dir->d_op && dir->d_op->d_hash) { 1957 if (dir->d_flags & DCACHE_OP_HASH) {
1477 if (dir->d_op->d_hash(dir, name) < 0) 1958 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1478 goto out; 1959 goto out;
1479 } 1960 }
1480 dentry = d_lookup(dir, name); 1961 dentry = d_lookup(dir, name);
@@ -1483,34 +1964,32 @@ out:
1483} 1964}
1484 1965
1485/** 1966/**
1486 * d_validate - verify dentry provided from insecure source 1967 * d_validate - verify dentry provided from insecure source (deprecated)
1487 * @dentry: The dentry alleged to be valid child of @dparent 1968 * @dentry: The dentry alleged to be valid child of @dparent
1488 * @dparent: The parent dentry (known to be valid) 1969 * @dparent: The parent dentry (known to be valid)
1489 * 1970 *
1490 * An insecure source has sent us a dentry, here we verify it and dget() it. 1971 * An insecure source has sent us a dentry, here we verify it and dget() it.
1491 * This is used by ncpfs in its readdir implementation. 1972 * This is used by ncpfs in its readdir implementation.
1492 * Zero is returned in the dentry is invalid. 1973 * Zero is returned in the dentry is invalid.
1974 *
1975 * This function is slow for big directories, and deprecated, do not use it.
1493 */ 1976 */
1494int d_validate(struct dentry *dentry, struct dentry *parent) 1977int d_validate(struct dentry *dentry, struct dentry *dparent)
1495{ 1978{
1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash); 1979 struct dentry *child;
1497 struct hlist_node *node;
1498 struct dentry *d;
1499
1500 /* Check whether the ptr might be valid at all.. */
1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1502 return 0;
1503 if (dentry->d_parent != parent)
1504 return 0;
1505 1980
1506 rcu_read_lock(); 1981 spin_lock(&dparent->d_lock);
1507 hlist_for_each_entry_rcu(d, node, head, d_hash) { 1982 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1508 if (d == dentry) { 1983 if (dentry == child) {
1509 dget(dentry); 1984 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1985 __dget_dlock(dentry);
1986 spin_unlock(&dentry->d_lock);
1987 spin_unlock(&dparent->d_lock);
1510 return 1; 1988 return 1;
1511 } 1989 }
1512 } 1990 }
1513 rcu_read_unlock(); 1991 spin_unlock(&dparent->d_lock);
1992
1514 return 0; 1993 return 0;
1515} 1994}
1516EXPORT_SYMBOL(d_validate); 1995EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate);
1538 2017
1539void d_delete(struct dentry * dentry) 2018void d_delete(struct dentry * dentry)
1540{ 2019{
2020 struct inode *inode;
1541 int isdir = 0; 2021 int isdir = 0;
1542 /* 2022 /*
1543 * Are we the only user? 2023 * Are we the only user?
1544 */ 2024 */
1545 spin_lock(&dcache_lock); 2025again:
1546 spin_lock(&dentry->d_lock); 2026 spin_lock(&dentry->d_lock);
1547 isdir = S_ISDIR(dentry->d_inode->i_mode); 2027 inode = dentry->d_inode;
1548 if (atomic_read(&dentry->d_count) == 1) { 2028 isdir = S_ISDIR(inode->i_mode);
2029 if (dentry->d_count == 1) {
2030 if (inode && !spin_trylock(&inode->i_lock)) {
2031 spin_unlock(&dentry->d_lock);
2032 cpu_relax();
2033 goto again;
2034 }
1549 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2035 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1550 dentry_iput(dentry); 2036 dentry_unlink_inode(dentry);
1551 fsnotify_nameremove(dentry, isdir); 2037 fsnotify_nameremove(dentry, isdir);
1552 return; 2038 return;
1553 } 2039 }
@@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry)
1556 __d_drop(dentry); 2042 __d_drop(dentry);
1557 2043
1558 spin_unlock(&dentry->d_lock); 2044 spin_unlock(&dentry->d_lock);
1559 spin_unlock(&dcache_lock);
1560 2045
1561 fsnotify_nameremove(dentry, isdir); 2046 fsnotify_nameremove(dentry, isdir);
1562} 2047}
1563EXPORT_SYMBOL(d_delete); 2048EXPORT_SYMBOL(d_delete);
1564 2049
1565static void __d_rehash(struct dentry * entry, struct hlist_head *list) 2050static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
1566{ 2051{
1567 2052 BUG_ON(!d_unhashed(entry));
2053 spin_lock_bucket(b);
1568 entry->d_flags &= ~DCACHE_UNHASHED; 2054 entry->d_flags &= ~DCACHE_UNHASHED;
1569 hlist_add_head_rcu(&entry->d_hash, list); 2055 hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
2056 spin_unlock_bucket(b);
1570} 2057}
1571 2058
1572static void _d_rehash(struct dentry * entry) 2059static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry)
1583 2070
1584void d_rehash(struct dentry * entry) 2071void d_rehash(struct dentry * entry)
1585{ 2072{
1586 spin_lock(&dcache_lock);
1587 spin_lock(&entry->d_lock); 2073 spin_lock(&entry->d_lock);
1588 _d_rehash(entry); 2074 _d_rehash(entry);
1589 spin_unlock(&entry->d_lock); 2075 spin_unlock(&entry->d_lock);
1590 spin_unlock(&dcache_lock);
1591} 2076}
1592EXPORT_SYMBOL(d_rehash); 2077EXPORT_SYMBOL(d_rehash);
1593 2078
1594/* 2079/**
1595 * When switching names, the actual string doesn't strictly have to 2080 * dentry_update_name_case - update case insensitive dentry with a new name
1596 * be preserved in the target - because we're dropping the target 2081 * @dentry: dentry to be updated
1597 * anyway. As such, we can just do a simple memcpy() to copy over 2082 * @name: new name
1598 * the new name before we switch.
1599 * 2083 *
1600 * Note that we have to be a lot more careful about getting the hash 2084 * Update a case insensitive dentry with new case of name.
1601 * switched - we have to switch the hash value properly even if it 2085 *
1602 * then no longer matches the actual (corrupted) string of the target. 2086 * dentry must have been returned by d_lookup with name @name. Old and new
1603 * The hash value has to match the hash queue that the dentry is on.. 2087 * name lengths must match (ie. no d_compare which allows mismatched name
2088 * lengths).
2089 *
2090 * Parent inode i_mutex must be held over d_lookup and into this call (to
2091 * keep renames and concurrent inserts, and readdir(2) away).
1604 */ 2092 */
2093void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2094{
2095 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2096 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2097
2098 spin_lock(&dentry->d_lock);
2099 write_seqcount_begin(&dentry->d_seq);
2100 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2101 write_seqcount_end(&dentry->d_seq);
2102 spin_unlock(&dentry->d_lock);
2103}
2104EXPORT_SYMBOL(dentry_update_name_case);
2105
1605static void switch_names(struct dentry *dentry, struct dentry *target) 2106static void switch_names(struct dentry *dentry, struct dentry *target)
1606{ 2107{
1607 if (dname_external(target)) { 2108 if (dname_external(target)) {
@@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1643 swap(dentry->d_name.len, target->d_name.len); 2144 swap(dentry->d_name.len, target->d_name.len);
1644} 2145}
1645 2146
2147static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2148{
2149 /*
2150 * XXXX: do we really need to take target->d_lock?
2151 */
2152 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2153 spin_lock(&target->d_parent->d_lock);
2154 else {
2155 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2156 spin_lock(&dentry->d_parent->d_lock);
2157 spin_lock_nested(&target->d_parent->d_lock,
2158 DENTRY_D_LOCK_NESTED);
2159 } else {
2160 spin_lock(&target->d_parent->d_lock);
2161 spin_lock_nested(&dentry->d_parent->d_lock,
2162 DENTRY_D_LOCK_NESTED);
2163 }
2164 }
2165 if (target < dentry) {
2166 spin_lock_nested(&target->d_lock, 2);
2167 spin_lock_nested(&dentry->d_lock, 3);
2168 } else {
2169 spin_lock_nested(&dentry->d_lock, 2);
2170 spin_lock_nested(&target->d_lock, 3);
2171 }
2172}
2173
2174static void dentry_unlock_parents_for_move(struct dentry *dentry,
2175 struct dentry *target)
2176{
2177 if (target->d_parent != dentry->d_parent)
2178 spin_unlock(&dentry->d_parent->d_lock);
2179 if (target->d_parent != target)
2180 spin_unlock(&target->d_parent->d_lock);
2181}
2182
1646/* 2183/*
1647 * We cannibalize "target" when moving dentry on top of it, 2184 * When switching names, the actual string doesn't strictly have to
1648 * because it's going to be thrown away anyway. We could be more 2185 * be preserved in the target - because we're dropping the target
1649 * polite about it, though. 2186 * anyway. As such, we can just do a simple memcpy() to copy over
1650 * 2187 * the new name before we switch.
1651 * This forceful removal will result in ugly /proc output if 2188 *
1652 * somebody holds a file open that got deleted due to a rename. 2189 * Note that we have to be a lot more careful about getting the hash
1653 * We could be nicer about the deleted file, and let it show 2190 * switched - we have to switch the hash value properly even if it
1654 * up under the name it had before it was deleted rather than 2191 * then no longer matches the actual (corrupted) string of the target.
1655 * under the original name of the file that was moved on top of it. 2192 * The hash value has to match the hash queue that the dentry is on..
1656 */ 2193 */
1657
1658/* 2194/*
1659 * d_move_locked - move a dentry 2195 * d_move - move a dentry
1660 * @dentry: entry to move 2196 * @dentry: entry to move
1661 * @target: new dentry 2197 * @target: new dentry
1662 * 2198 *
1663 * Update the dcache to reflect the move of a file name. Negative 2199 * Update the dcache to reflect the move of a file name. Negative
1664 * dcache entries should not be moved in this way. 2200 * dcache entries should not be moved in this way.
1665 */ 2201 */
1666static void d_move_locked(struct dentry * dentry, struct dentry * target) 2202void d_move(struct dentry * dentry, struct dentry * target)
1667{ 2203{
1668 struct hlist_head *list;
1669
1670 if (!dentry->d_inode) 2204 if (!dentry->d_inode)
1671 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2205 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1672 2206
2207 BUG_ON(d_ancestor(dentry, target));
2208 BUG_ON(d_ancestor(target, dentry));
2209
1673 write_seqlock(&rename_lock); 2210 write_seqlock(&rename_lock);
1674 /*
1675 * XXXX: do we really need to take target->d_lock?
1676 */
1677 if (target < dentry) {
1678 spin_lock(&target->d_lock);
1679 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1680 } else {
1681 spin_lock(&dentry->d_lock);
1682 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1683 }
1684 2211
1685 /* Move the dentry to the target hash queue, if on different bucket */ 2212 dentry_lock_for_move(dentry, target);
1686 if (d_unhashed(dentry))
1687 goto already_unhashed;
1688 2213
1689 hlist_del_rcu(&dentry->d_hash); 2214 write_seqcount_begin(&dentry->d_seq);
2215 write_seqcount_begin(&target->d_seq);
1690 2216
1691already_unhashed: 2217 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
1692 list = d_hash(target->d_parent, target->d_name.hash); 2218
1693 __d_rehash(dentry, list); 2219 /*
2220 * Move the dentry to the target hash queue. Don't bother checking
2221 * for the same hash queue because of how unlikely it is.
2222 */
2223 __d_drop(dentry);
2224 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1694 2225
1695 /* Unhash the target: dput() will then get rid of it */ 2226 /* Unhash the target: dput() will then get rid of it */
1696 __d_drop(target); 2227 __d_drop(target);
@@ -1715,27 +2246,16 @@ already_unhashed:
1715 } 2246 }
1716 2247
1717 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2248 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2249
2250 write_seqcount_end(&target->d_seq);
2251 write_seqcount_end(&dentry->d_seq);
2252
2253 dentry_unlock_parents_for_move(dentry, target);
1718 spin_unlock(&target->d_lock); 2254 spin_unlock(&target->d_lock);
1719 fsnotify_d_move(dentry); 2255 fsnotify_d_move(dentry);
1720 spin_unlock(&dentry->d_lock); 2256 spin_unlock(&dentry->d_lock);
1721 write_sequnlock(&rename_lock); 2257 write_sequnlock(&rename_lock);
1722} 2258}
1723
1724/**
1725 * d_move - move a dentry
1726 * @dentry: entry to move
1727 * @target: new dentry
1728 *
1729 * Update the dcache to reflect the move of a file name. Negative
1730 * dcache entries should not be moved in this way.
1731 */
1732
1733void d_move(struct dentry * dentry, struct dentry * target)
1734{
1735 spin_lock(&dcache_lock);
1736 d_move_locked(dentry, target);
1737 spin_unlock(&dcache_lock);
1738}
1739EXPORT_SYMBOL(d_move); 2259EXPORT_SYMBOL(d_move);
1740 2260
1741/** 2261/**
@@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1761 * This helper attempts to cope with remotely renamed directories 2281 * This helper attempts to cope with remotely renamed directories
1762 * 2282 *
1763 * It assumes that the caller is already holding 2283 * It assumes that the caller is already holding
1764 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2284 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
1765 * 2285 *
1766 * Note: If ever the locking in lock_rename() changes, then please 2286 * Note: If ever the locking in lock_rename() changes, then please
1767 * remember to update this too... 2287 * remember to update this too...
1768 */ 2288 */
1769static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2289static struct dentry *__d_unalias(struct inode *inode,
1770 __releases(dcache_lock) 2290 struct dentry *dentry, struct dentry *alias)
1771{ 2291{
1772 struct mutex *m1 = NULL, *m2 = NULL; 2292 struct mutex *m1 = NULL, *m2 = NULL;
1773 struct dentry *ret; 2293 struct dentry *ret;
@@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1790 goto out_err; 2310 goto out_err;
1791 m2 = &alias->d_parent->d_inode->i_mutex; 2311 m2 = &alias->d_parent->d_inode->i_mutex;
1792out_unalias: 2312out_unalias:
1793 d_move_locked(alias, dentry); 2313 d_move(alias, dentry);
1794 ret = alias; 2314 ret = alias;
1795out_err: 2315out_err:
1796 spin_unlock(&dcache_lock); 2316 spin_unlock(&inode->i_lock);
1797 if (m2) 2317 if (m2)
1798 mutex_unlock(m2); 2318 mutex_unlock(m2);
1799 if (m1) 2319 if (m1)
@@ -1804,17 +2324,23 @@ out_err:
1804/* 2324/*
1805 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2325 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1806 * named dentry in place of the dentry to be replaced. 2326 * named dentry in place of the dentry to be replaced.
2327 * returns with anon->d_lock held!
1807 */ 2328 */
1808static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2329static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1809{ 2330{
1810 struct dentry *dparent, *aparent; 2331 struct dentry *dparent, *aparent;
1811 2332
1812 switch_names(dentry, anon); 2333 dentry_lock_for_move(anon, dentry);
1813 swap(dentry->d_name.hash, anon->d_name.hash); 2334
2335 write_seqcount_begin(&dentry->d_seq);
2336 write_seqcount_begin(&anon->d_seq);
1814 2337
1815 dparent = dentry->d_parent; 2338 dparent = dentry->d_parent;
1816 aparent = anon->d_parent; 2339 aparent = anon->d_parent;
1817 2340
2341 switch_names(dentry, anon);
2342 swap(dentry->d_name.hash, anon->d_name.hash);
2343
1818 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2344 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1819 list_del(&dentry->d_u.d_child); 2345 list_del(&dentry->d_u.d_child);
1820 if (!IS_ROOT(dentry)) 2346 if (!IS_ROOT(dentry))
@@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1829 else 2355 else
1830 INIT_LIST_HEAD(&anon->d_u.d_child); 2356 INIT_LIST_HEAD(&anon->d_u.d_child);
1831 2357
2358 write_seqcount_end(&dentry->d_seq);
2359 write_seqcount_end(&anon->d_seq);
2360
2361 dentry_unlock_parents_for_move(anon, dentry);
2362 spin_unlock(&dentry->d_lock);
2363
2364 /* anon->d_lock still locked, returns locked */
1832 anon->d_flags &= ~DCACHE_DISCONNECTED; 2365 anon->d_flags &= ~DCACHE_DISCONNECTED;
1833} 2366}
1834 2367
@@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1846 2379
1847 BUG_ON(!d_unhashed(dentry)); 2380 BUG_ON(!d_unhashed(dentry));
1848 2381
1849 spin_lock(&dcache_lock);
1850
1851 if (!inode) { 2382 if (!inode) {
1852 actual = dentry; 2383 actual = dentry;
1853 __d_instantiate(dentry, NULL); 2384 __d_instantiate(dentry, NULL);
1854 goto found_lock; 2385 d_rehash(actual);
2386 goto out_nolock;
1855 } 2387 }
1856 2388
2389 spin_lock(&inode->i_lock);
2390
1857 if (S_ISDIR(inode->i_mode)) { 2391 if (S_ISDIR(inode->i_mode)) {
1858 struct dentry *alias; 2392 struct dentry *alias;
1859 2393
@@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1864 /* Is this an anonymous mountpoint that we could splice 2398 /* Is this an anonymous mountpoint that we could splice
1865 * into our tree? */ 2399 * into our tree? */
1866 if (IS_ROOT(alias)) { 2400 if (IS_ROOT(alias)) {
1867 spin_lock(&alias->d_lock);
1868 __d_materialise_dentry(dentry, alias); 2401 __d_materialise_dentry(dentry, alias);
1869 __d_drop(alias); 2402 __d_drop(alias);
1870 goto found; 2403 goto found;
1871 } 2404 }
1872 /* Nope, but we must(!) avoid directory aliasing */ 2405 /* Nope, but we must(!) avoid directory aliasing */
1873 actual = __d_unalias(dentry, alias); 2406 actual = __d_unalias(inode, dentry, alias);
1874 if (IS_ERR(actual)) 2407 if (IS_ERR(actual))
1875 dput(alias); 2408 dput(alias);
1876 goto out_nolock; 2409 goto out_nolock;
@@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1881 actual = __d_instantiate_unique(dentry, inode); 2414 actual = __d_instantiate_unique(dentry, inode);
1882 if (!actual) 2415 if (!actual)
1883 actual = dentry; 2416 actual = dentry;
1884 else if (unlikely(!d_unhashed(actual))) 2417 else
1885 goto shouldnt_be_hashed; 2418 BUG_ON(!d_unhashed(actual));
1886 2419
1887found_lock:
1888 spin_lock(&actual->d_lock); 2420 spin_lock(&actual->d_lock);
1889found: 2421found:
1890 _d_rehash(actual); 2422 _d_rehash(actual);
1891 spin_unlock(&actual->d_lock); 2423 spin_unlock(&actual->d_lock);
1892 spin_unlock(&dcache_lock); 2424 spin_unlock(&inode->i_lock);
1893out_nolock: 2425out_nolock:
1894 if (actual == dentry) { 2426 if (actual == dentry) {
1895 security_d_instantiate(dentry, inode); 2427 security_d_instantiate(dentry, inode);
@@ -1898,10 +2430,6 @@ out_nolock:
1898 2430
1899 iput(inode); 2431 iput(inode);
1900 return actual; 2432 return actual;
1901
1902shouldnt_be_hashed:
1903 spin_unlock(&dcache_lock);
1904 BUG();
1905} 2433}
1906EXPORT_SYMBOL_GPL(d_materialise_unique); 2434EXPORT_SYMBOL_GPL(d_materialise_unique);
1907 2435
@@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1928 * @buffer: pointer to the end of the buffer 2456 * @buffer: pointer to the end of the buffer
1929 * @buflen: pointer to buffer length 2457 * @buflen: pointer to buffer length
1930 * 2458 *
1931 * Caller holds the dcache_lock. 2459 * Caller holds the rename_lock.
1932 * 2460 *
1933 * If path is not reachable from the supplied root, then the value of 2461 * If path is not reachable from the supplied root, then the value of
1934 * root is changed (without modifying refcounts). 2462 * root is changed (without modifying refcounts).
@@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root,
1956 } 2484 }
1957 parent = dentry->d_parent; 2485 parent = dentry->d_parent;
1958 prefetch(parent); 2486 prefetch(parent);
2487 spin_lock(&dentry->d_lock);
1959 error = prepend_name(buffer, buflen, &dentry->d_name); 2488 error = prepend_name(buffer, buflen, &dentry->d_name);
2489 spin_unlock(&dentry->d_lock);
1960 if (!error) 2490 if (!error)
1961 error = prepend(buffer, buflen, "/", 1); 2491 error = prepend(buffer, buflen, "/", 1);
1962 if (error) 2492 if (error)
@@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root,
2012 int error; 2542 int error;
2013 2543
2014 prepend(&res, &buflen, "\0", 1); 2544 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock); 2545 write_seqlock(&rename_lock);
2016 error = prepend_path(path, root, &res, &buflen); 2546 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock); 2547 write_sequnlock(&rename_lock);
2018 2548
2019 if (error) 2549 if (error)
2020 return ERR_PTR(error); 2550 return ERR_PTR(error);
@@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2076 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2606 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2077 2607
2078 get_fs_root(current->fs, &root); 2608 get_fs_root(current->fs, &root);
2079 spin_lock(&dcache_lock); 2609 write_seqlock(&rename_lock);
2080 tmp = root; 2610 tmp = root;
2081 error = path_with_deleted(path, &tmp, &res, &buflen); 2611 error = path_with_deleted(path, &tmp, &res, &buflen);
2082 if (error) 2612 if (error)
2083 res = ERR_PTR(error); 2613 res = ERR_PTR(error);
2084 spin_unlock(&dcache_lock); 2614 write_sequnlock(&rename_lock);
2085 path_put(&root); 2615 path_put(&root);
2086 return res; 2616 return res;
2087} 2617}
@@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2107 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2637 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108 2638
2109 get_fs_root(current->fs, &root); 2639 get_fs_root(current->fs, &root);
2110 spin_lock(&dcache_lock); 2640 write_seqlock(&rename_lock);
2111 tmp = root; 2641 tmp = root;
2112 error = path_with_deleted(path, &tmp, &res, &buflen); 2642 error = path_with_deleted(path, &tmp, &res, &buflen);
2113 if (!error && !path_equal(&tmp, &root)) 2643 if (!error && !path_equal(&tmp, &root))
2114 error = prepend_unreachable(&res, &buflen); 2644 error = prepend_unreachable(&res, &buflen);
2115 spin_unlock(&dcache_lock); 2645 write_sequnlock(&rename_lock);
2116 path_put(&root); 2646 path_put(&root);
2117 if (error) 2647 if (error)
2118 res = ERR_PTR(error); 2648 res = ERR_PTR(error);
@@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2144/* 2674/*
2145 * Write full pathname from the root of the filesystem into the buffer. 2675 * Write full pathname from the root of the filesystem into the buffer.
2146 */ 2676 */
2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2677static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2148{ 2678{
2149 char *end = buf + buflen; 2679 char *end = buf + buflen;
2150 char *retval; 2680 char *retval;
@@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2158 2688
2159 while (!IS_ROOT(dentry)) { 2689 while (!IS_ROOT(dentry)) {
2160 struct dentry *parent = dentry->d_parent; 2690 struct dentry *parent = dentry->d_parent;
2691 int error;
2161 2692
2162 prefetch(parent); 2693 prefetch(parent);
2163 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2694 spin_lock(&dentry->d_lock);
2164 (prepend(&end, &buflen, "/", 1) != 0)) 2695 error = prepend_name(&end, &buflen, &dentry->d_name);
2696 spin_unlock(&dentry->d_lock);
2697 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2165 goto Elong; 2698 goto Elong;
2166 2699
2167 retval = end; 2700 retval = end;
@@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2171Elong: 2704Elong:
2172 return ERR_PTR(-ENAMETOOLONG); 2705 return ERR_PTR(-ENAMETOOLONG);
2173} 2706}
2174EXPORT_SYMBOL(__dentry_path); 2707
2708char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2709{
2710 char *retval;
2711
2712 write_seqlock(&rename_lock);
2713 retval = __dentry_path(dentry, buf, buflen);
2714 write_sequnlock(&rename_lock);
2715
2716 return retval;
2717}
2718EXPORT_SYMBOL(dentry_path_raw);
2175 2719
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2720char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{ 2721{
2178 char *p = NULL; 2722 char *p = NULL;
2179 char *retval; 2723 char *retval;
2180 2724
2181 spin_lock(&dcache_lock); 2725 write_seqlock(&rename_lock);
2182 if (d_unlinked(dentry)) { 2726 if (d_unlinked(dentry)) {
2183 p = buf + buflen; 2727 p = buf + buflen;
2184 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2728 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2186 buflen++; 2730 buflen++;
2187 } 2731 }
2188 retval = __dentry_path(dentry, buf, buflen); 2732 retval = __dentry_path(dentry, buf, buflen);
2189 spin_unlock(&dcache_lock); 2733 write_sequnlock(&rename_lock);
2190 if (!IS_ERR(retval) && p) 2734 if (!IS_ERR(retval) && p)
2191 *p = '/'; /* restore '/' overriden with '\0' */ 2735 *p = '/'; /* restore '/' overriden with '\0' */
2192 return retval; 2736 return retval;
2193Elong: 2737Elong:
2194 spin_unlock(&dcache_lock);
2195 return ERR_PTR(-ENAMETOOLONG); 2738 return ERR_PTR(-ENAMETOOLONG);
2196} 2739}
2197 2740
@@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2225 get_fs_root_and_pwd(current->fs, &root, &pwd); 2768 get_fs_root_and_pwd(current->fs, &root, &pwd);
2226 2769
2227 error = -ENOENT; 2770 error = -ENOENT;
2228 spin_lock(&dcache_lock); 2771 write_seqlock(&rename_lock);
2229 if (!d_unlinked(pwd.dentry)) { 2772 if (!d_unlinked(pwd.dentry)) {
2230 unsigned long len; 2773 unsigned long len;
2231 struct path tmp = root; 2774 struct path tmp = root;
@@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2234 2777
2235 prepend(&cwd, &buflen, "\0", 1); 2778 prepend(&cwd, &buflen, "\0", 1);
2236 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2779 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2237 spin_unlock(&dcache_lock); 2780 write_sequnlock(&rename_lock);
2238 2781
2239 if (error) 2782 if (error)
2240 goto out; 2783 goto out;
@@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2253 if (copy_to_user(buf, cwd, len)) 2796 if (copy_to_user(buf, cwd, len))
2254 error = -EFAULT; 2797 error = -EFAULT;
2255 } 2798 }
2256 } else 2799 } else {
2257 spin_unlock(&dcache_lock); 2800 write_sequnlock(&rename_lock);
2801 }
2258 2802
2259out: 2803out:
2260 path_put(&pwd); 2804 path_put(&pwd);
@@ -2282,25 +2826,25 @@ out:
2282int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2826int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2283{ 2827{
2284 int result; 2828 int result;
2285 unsigned long seq; 2829 unsigned seq;
2286 2830
2287 if (new_dentry == old_dentry) 2831 if (new_dentry == old_dentry)
2288 return 1; 2832 return 1;
2289 2833
2290 /*
2291 * Need rcu_readlock to protect against the d_parent trashing
2292 * due to d_move
2293 */
2294 rcu_read_lock();
2295 do { 2834 do {
2296 /* for restarting inner loop in case of seq retry */ 2835 /* for restarting inner loop in case of seq retry */
2297 seq = read_seqbegin(&rename_lock); 2836 seq = read_seqbegin(&rename_lock);
2837 /*
2838 * Need rcu_readlock to protect against the d_parent trashing
2839 * due to d_move
2840 */
2841 rcu_read_lock();
2298 if (d_ancestor(old_dentry, new_dentry)) 2842 if (d_ancestor(old_dentry, new_dentry))
2299 result = 1; 2843 result = 1;
2300 else 2844 else
2301 result = 0; 2845 result = 0;
2846 rcu_read_unlock();
2302 } while (read_seqretry(&rename_lock, seq)); 2847 } while (read_seqretry(&rename_lock, seq));
2303 rcu_read_unlock();
2304 2848
2305 return result; 2849 return result;
2306} 2850}
@@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under);
2332 2876
2333void d_genocide(struct dentry *root) 2877void d_genocide(struct dentry *root)
2334{ 2878{
2335 struct dentry *this_parent = root; 2879 struct dentry *this_parent;
2336 struct list_head *next; 2880 struct list_head *next;
2881 unsigned seq;
2882 int locked = 0;
2337 2883
2338 spin_lock(&dcache_lock); 2884 seq = read_seqbegin(&rename_lock);
2885again:
2886 this_parent = root;
2887 spin_lock(&this_parent->d_lock);
2339repeat: 2888repeat:
2340 next = this_parent->d_subdirs.next; 2889 next = this_parent->d_subdirs.next;
2341resume: 2890resume:
@@ -2343,21 +2892,62 @@ resume:
2343 struct list_head *tmp = next; 2892 struct list_head *tmp = next;
2344 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2893 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2345 next = tmp->next; 2894 next = tmp->next;
2346 if (d_unhashed(dentry)||!dentry->d_inode) 2895
2896 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2897 if (d_unhashed(dentry) || !dentry->d_inode) {
2898 spin_unlock(&dentry->d_lock);
2347 continue; 2899 continue;
2900 }
2348 if (!list_empty(&dentry->d_subdirs)) { 2901 if (!list_empty(&dentry->d_subdirs)) {
2902 spin_unlock(&this_parent->d_lock);
2903 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2349 this_parent = dentry; 2904 this_parent = dentry;
2905 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2350 goto repeat; 2906 goto repeat;
2351 } 2907 }
2352 atomic_dec(&dentry->d_count); 2908 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2909 dentry->d_flags |= DCACHE_GENOCIDE;
2910 dentry->d_count--;
2911 }
2912 spin_unlock(&dentry->d_lock);
2353 } 2913 }
2354 if (this_parent != root) { 2914 if (this_parent != root) {
2355 next = this_parent->d_u.d_child.next; 2915 struct dentry *tmp;
2356 atomic_dec(&this_parent->d_count); 2916 struct dentry *child;
2357 this_parent = this_parent->d_parent; 2917
2918 tmp = this_parent->d_parent;
2919 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2920 this_parent->d_flags |= DCACHE_GENOCIDE;
2921 this_parent->d_count--;
2922 }
2923 rcu_read_lock();
2924 spin_unlock(&this_parent->d_lock);
2925 child = this_parent;
2926 this_parent = tmp;
2927 spin_lock(&this_parent->d_lock);
2928 /* might go back up the wrong parent if we have had a rename
2929 * or deletion */
2930 if (this_parent != child->d_parent ||
2931 (!locked && read_seqretry(&rename_lock, seq))) {
2932 spin_unlock(&this_parent->d_lock);
2933 rcu_read_unlock();
2934 goto rename_retry;
2935 }
2936 rcu_read_unlock();
2937 next = child->d_u.d_child.next;
2358 goto resume; 2938 goto resume;
2359 } 2939 }
2360 spin_unlock(&dcache_lock); 2940 spin_unlock(&this_parent->d_lock);
2941 if (!locked && read_seqretry(&rename_lock, seq))
2942 goto rename_retry;
2943 if (locked)
2944 write_sequnlock(&rename_lock);
2945 return;
2946
2947rename_retry:
2948 locked = 1;
2949 write_seqlock(&rename_lock);
2950 goto again;
2361} 2951}
2362 2952
2363/** 2953/**
@@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void)
2411 3001
2412 dentry_hashtable = 3002 dentry_hashtable =
2413 alloc_large_system_hash("Dentry cache", 3003 alloc_large_system_hash("Dentry cache",
2414 sizeof(struct hlist_head), 3004 sizeof(struct dcache_hash_bucket),
2415 dhash_entries, 3005 dhash_entries,
2416 13, 3006 13,
2417 HASH_EARLY, 3007 HASH_EARLY,
@@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void)
2420 0); 3010 0);
2421 3011
2422 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3012 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2423 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3013 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2424} 3014}
2425 3015
2426static void __init dcache_init(void) 3016static void __init dcache_init(void)
2427{ 3017{
2428 int loop; 3018 int loop;
2429 3019
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2433 /* 3020 /*
2434 * A constructor could be added for stable state like the lists, 3021 * A constructor could be added for stable state like the lists,
2435 * but it is probably not worth it because of the cache nature 3022 * but it is probably not worth it because of the cache nature
@@ -2446,7 +3033,7 @@ static void __init dcache_init(void)
2446 3033
2447 dentry_hashtable = 3034 dentry_hashtable =
2448 alloc_large_system_hash("Dentry cache", 3035 alloc_large_system_hash("Dentry cache",
2449 sizeof(struct hlist_head), 3036 sizeof(struct dcache_hash_bucket),
2450 dhash_entries, 3037 dhash_entries,
2451 13, 3038 13,
2452 0, 3039 0,
@@ -2455,7 +3042,7 @@ static void __init dcache_init(void)
2455 0); 3042 0);
2456 3043
2457 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3044 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2458 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3045 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2459} 3046}
2460 3047
2461/* SLAB cache for __getname() consumers */ 3048/* SLAB cache for __getname() consumers */
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 37a34c2c622..9c64ae9e4c1 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -63,6 +63,9 @@
63#define NEEDED_RMEM (4*1024*1024) 63#define NEEDED_RMEM (4*1024*1024)
64#define CONN_HASH_SIZE 32 64#define CONN_HASH_SIZE 32
65 65
66/* Number of messages to send before rescheduling */
67#define MAX_SEND_MSG_COUNT 25
68
66struct cbuf { 69struct cbuf {
67 unsigned int base; 70 unsigned int base;
68 unsigned int len; 71 unsigned int len;
@@ -108,6 +111,7 @@ struct connection {
108#define CF_INIT_PENDING 4 111#define CF_INIT_PENDING 4
109#define CF_IS_OTHERCON 5 112#define CF_IS_OTHERCON 5
110#define CF_CLOSE 6 113#define CF_CLOSE 6
114#define CF_APP_LIMITED 7
111 struct list_head writequeue; /* List of outgoing writequeue_entries */ 115 struct list_head writequeue; /* List of outgoing writequeue_entries */
112 spinlock_t writequeue_lock; 116 spinlock_t writequeue_lock;
113 int (*rx_action) (struct connection *); /* What to do when active */ 117 int (*rx_action) (struct connection *); /* What to do when active */
@@ -295,7 +299,17 @@ static void lowcomms_write_space(struct sock *sk)
295{ 299{
296 struct connection *con = sock2con(sk); 300 struct connection *con = sock2con(sk);
297 301
298 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 302 if (!con)
303 return;
304
305 clear_bit(SOCK_NOSPACE, &con->sock->flags);
306
307 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
308 con->sock->sk->sk_write_pending--;
309 clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
310 }
311
312 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
299 queue_work(send_workqueue, &con->swork); 313 queue_work(send_workqueue, &con->swork);
300} 314}
301 315
@@ -915,6 +929,7 @@ static void tcp_connect_to_sock(struct connection *con)
915 struct sockaddr_storage saddr, src_addr; 929 struct sockaddr_storage saddr, src_addr;
916 int addr_len; 930 int addr_len;
917 struct socket *sock = NULL; 931 struct socket *sock = NULL;
932 int one = 1;
918 933
919 if (con->nodeid == 0) { 934 if (con->nodeid == 0) {
920 log_print("attempt to connect sock 0 foiled"); 935 log_print("attempt to connect sock 0 foiled");
@@ -960,6 +975,11 @@ static void tcp_connect_to_sock(struct connection *con)
960 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 975 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
961 976
962 log_print("connecting to %d", con->nodeid); 977 log_print("connecting to %d", con->nodeid);
978
979 /* Turn off Nagle's algorithm */
980 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
981 sizeof(one));
982
963 result = 983 result =
964 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 984 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
965 O_NONBLOCK); 985 O_NONBLOCK);
@@ -1011,6 +1031,10 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
1011 goto create_out; 1031 goto create_out;
1012 } 1032 }
1013 1033
1034 /* Turn off Nagle's algorithm */
1035 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
1036 sizeof(one));
1037
1014 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 1038 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
1015 (char *)&one, sizeof(one)); 1039 (char *)&one, sizeof(one));
1016 1040
@@ -1297,6 +1321,7 @@ static void send_to_sock(struct connection *con)
1297 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 1321 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1298 struct writequeue_entry *e; 1322 struct writequeue_entry *e;
1299 int len, offset; 1323 int len, offset;
1324 int count = 0;
1300 1325
1301 mutex_lock(&con->sock_mutex); 1326 mutex_lock(&con->sock_mutex);
1302 if (con->sock == NULL) 1327 if (con->sock == NULL)
@@ -1319,14 +1344,27 @@ static void send_to_sock(struct connection *con)
1319 ret = kernel_sendpage(con->sock, e->page, offset, len, 1344 ret = kernel_sendpage(con->sock, e->page, offset, len,
1320 msg_flags); 1345 msg_flags);
1321 if (ret == -EAGAIN || ret == 0) { 1346 if (ret == -EAGAIN || ret == 0) {
1347 if (ret == -EAGAIN &&
1348 test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
1349 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1350 /* Notify TCP that we're limited by the
1351 * application window size.
1352 */
1353 set_bit(SOCK_NOSPACE, &con->sock->flags);
1354 con->sock->sk->sk_write_pending++;
1355 }
1322 cond_resched(); 1356 cond_resched();
1323 goto out; 1357 goto out;
1324 } 1358 }
1325 if (ret <= 0) 1359 if (ret <= 0)
1326 goto send_error; 1360 goto send_error;
1327 } 1361 }
1328 /* Don't starve people filling buffers */ 1362
1363 /* Don't starve people filling buffers */
1364 if (++count >= MAX_SEND_MSG_COUNT) {
1329 cond_resched(); 1365 cond_resched();
1366 count = 0;
1367 }
1330 1368
1331 spin_lock(&con->writequeue_lock); 1369 spin_lock(&con->writequeue_lock);
1332 e->offset += ret; 1370 e->offset += ret;
@@ -1430,20 +1468,19 @@ static void work_stop(void)
1430 1468
1431static int work_start(void) 1469static int work_start(void)
1432{ 1470{
1433 int error; 1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
1434 recv_workqueue = create_workqueue("dlm_recv"); 1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1435 error = IS_ERR(recv_workqueue); 1473 if (!recv_workqueue) {
1436 if (error) { 1474 log_print("can't start dlm_recv");
1437 log_print("can't start dlm_recv %d", error); 1475 return -ENOMEM;
1438 return error;
1439 } 1476 }
1440 1477
1441 send_workqueue = create_singlethread_workqueue("dlm_send"); 1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
1442 error = IS_ERR(send_workqueue); 1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1443 if (error) { 1480 if (!send_workqueue) {
1444 log_print("can't start dlm_send %d", error); 1481 log_print("can't start dlm_send");
1445 destroy_workqueue(recv_workqueue); 1482 destroy_workqueue(recv_workqueue);
1446 return error; 1483 return -ENOMEM;
1447 } 1484 }
1448 1485
1449 return 0; 1486 return 0;
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803f7f7..6fc4f319b55 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,12 +44,17 @@
44 */ 44 */
45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU)
54 return -ECHILD;
55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
53 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
54 goto out; 59 goto out;
55 dentry_save = nd->path.dentry; 60 dentry_save = nd->path.dentry;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 9d1a22d6276..337352a9475 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
260 ecryptfs_dentry->d_parent)); 260 ecryptfs_dentry->d_parent));
261 lower_inode = lower_dentry->d_inode; 261 lower_inode = lower_dentry->d_inode;
262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); 262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
263 BUG_ON(!atomic_read(&lower_dentry->d_count)); 263 BUG_ON(!lower_dentry->d_count);
264 ecryptfs_set_dentry_private(ecryptfs_dentry, 264 ecryptfs_set_dentry_private(ecryptfs_dentry,
265 kmem_cache_alloc(ecryptfs_dentry_info_cache, 265 kmem_cache_alloc(ecryptfs_dentry_info_cache,
266 GFP_KERNEL)); 266 GFP_KERNEL));
@@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
441 struct qstr lower_name; 441 struct qstr lower_name;
442 int rc = 0; 442 int rc = 0;
443 443
444 ecryptfs_dentry->d_op = &ecryptfs_dops; 444 d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
445 if ((ecryptfs_dentry->d_name.len == 1 445 if ((ecryptfs_dentry->d_name.len == 1
446 && !strcmp(ecryptfs_dentry->d_name.name, ".")) 446 && !strcmp(ecryptfs_dentry->d_name.name, "."))
447 || (ecryptfs_dentry->d_name.len == 2 447 || (ecryptfs_dentry->d_name.len == 2
@@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
454 lower_name.hash = ecryptfs_dentry->d_name.hash; 454 lower_name.hash = ecryptfs_dentry->d_name.hash;
455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
457 &lower_name); 457 lower_dir_dentry->d_inode, &lower_name);
458 if (rc < 0) 458 if (rc < 0)
459 goto out_d_drop; 459 goto out_d_drop;
460 } 460 }
@@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
492 &lower_name); 492 lower_dir_dentry->d_inode, &lower_name);
493 if (rc < 0) 493 if (rc < 0)
494 goto out_d_drop; 494 goto out_d_drop;
495 } 495 }
@@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
980} 980}
981 981
982static int 982static int
983ecryptfs_permission(struct inode *inode, int mask) 983ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
984{ 984{
985 if (flags & IPERM_FLAG_RCU)
986 return -ECHILD;
985 return inode_permission(ecryptfs_inode_to_lower(inode), mask); 987 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
986} 988}
987 989
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a9dbd62518e..35103867537 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
189 if (special_file(lower_inode->i_mode)) 189 if (special_file(lower_inode->i_mode))
190 init_special_inode(inode, lower_inode->i_mode, 190 init_special_inode(inode, lower_inode->i_mode,
191 lower_inode->i_rdev); 191 lower_inode->i_rdev);
192 dentry->d_op = &ecryptfs_dops; 192 d_set_d_op(dentry, &ecryptfs_dops);
193 fsstack_copy_attr_all(inode, lower_inode); 193 fsstack_copy_attr_all(inode, lower_inode);
194 /* This size will be overwritten for real files w/ headers and 194 /* This size will be overwritten for real files w/ headers and
195 * other metadata */ 195 * other metadata */
@@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
594 deactivate_locked_super(s); 594 deactivate_locked_super(s);
595 goto out; 595 goto out;
596 } 596 }
597 s->s_root->d_op = &ecryptfs_dops; 597 d_set_d_op(s->s_root, &ecryptfs_dops);
598 s->s_root->d_sb = s; 598 s->s_root->d_sb = s;
599 s->s_root->d_parent = s->s_root; 599 s->s_root->d_parent = s->s_root;
600 600
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2720178b771..3042fe123a3 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -62,6 +62,16 @@ out:
62 return inode; 62 return inode;
63} 63}
64 64
65static void ecryptfs_i_callback(struct rcu_head *head)
66{
67 struct inode *inode = container_of(head, struct inode, i_rcu);
68 struct ecryptfs_inode_info *inode_info;
69 inode_info = ecryptfs_inode_to_private(inode);
70
71 INIT_LIST_HEAD(&inode->i_dentry);
72 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
73}
74
65/** 75/**
66 * ecryptfs_destroy_inode 76 * ecryptfs_destroy_inode
67 * @inode: The ecryptfs inode 77 * @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
88 } 98 }
89 } 99 }
90 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 100 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
91 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 101 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
92} 102}
93 103
94/** 104/**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652c..0f31acb0131 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 65 return &ei->vfs_inode;
66} 66}
67 67
68static void efs_destroy_inode(struct inode *inode) 68static void efs_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 72 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
71} 73}
72 74
75static void efs_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, efs_i_callback);
78}
79
73static void init_once(void *foo) 80static void init_once(void *foo)
74{ 81{
75 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 82 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8cf07242067..cc8a9b7d606 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -217,7 +217,7 @@ struct ep_send_events_data {
217 * Configuration options available inside /proc/sys/fs/epoll/ 217 * Configuration options available inside /proc/sys/fs/epoll/
218 */ 218 */
219/* Maximum number of epoll watched descriptors, per user */ 219/* Maximum number of epoll watched descriptors, per user */
220static int max_user_watches __read_mostly; 220static long max_user_watches __read_mostly;
221 221
222/* 222/*
223 * This mutex is used to serialize ep_free() and eventpoll_release_file(). 223 * This mutex is used to serialize ep_free() and eventpoll_release_file().
@@ -240,16 +240,18 @@ static struct kmem_cache *pwq_cache __read_mostly;
240 240
241#include <linux/sysctl.h> 241#include <linux/sysctl.h>
242 242
243static int zero; 243static long zero;
244static long long_max = LONG_MAX;
244 245
245ctl_table epoll_table[] = { 246ctl_table epoll_table[] = {
246 { 247 {
247 .procname = "max_user_watches", 248 .procname = "max_user_watches",
248 .data = &max_user_watches, 249 .data = &max_user_watches,
249 .maxlen = sizeof(int), 250 .maxlen = sizeof(max_user_watches),
250 .mode = 0644, 251 .mode = 0644,
251 .proc_handler = proc_dointvec_minmax, 252 .proc_handler = proc_doulongvec_minmax,
252 .extra1 = &zero, 253 .extra1 = &zero,
254 .extra2 = &long_max,
253 }, 255 },
254 { } 256 { }
255}; 257};
@@ -561,7 +563,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
561 /* At this point it is safe to free the eventpoll item */ 563 /* At this point it is safe to free the eventpoll item */
562 kmem_cache_free(epi_cache, epi); 564 kmem_cache_free(epi_cache, epi);
563 565
564 atomic_dec(&ep->user->epoll_watches); 566 atomic_long_dec(&ep->user->epoll_watches);
565 567
566 return 0; 568 return 0;
567} 569}
@@ -898,11 +900,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
898{ 900{
899 int error, revents, pwake = 0; 901 int error, revents, pwake = 0;
900 unsigned long flags; 902 unsigned long flags;
903 long user_watches;
901 struct epitem *epi; 904 struct epitem *epi;
902 struct ep_pqueue epq; 905 struct ep_pqueue epq;
903 906
904 if (unlikely(atomic_read(&ep->user->epoll_watches) >= 907 user_watches = atomic_long_read(&ep->user->epoll_watches);
905 max_user_watches)) 908 if (unlikely(user_watches >= max_user_watches))
906 return -ENOSPC; 909 return -ENOSPC;
907 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) 910 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
908 return -ENOMEM; 911 return -ENOMEM;
@@ -966,7 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
966 969
967 spin_unlock_irqrestore(&ep->lock, flags); 970 spin_unlock_irqrestore(&ep->lock, flags);
968 971
969 atomic_inc(&ep->user->epoll_watches); 972 atomic_long_inc(&ep->user->epoll_watches);
970 973
971 /* We have to call this outside the lock */ 974 /* We have to call this outside the lock */
972 if (pwake) 975 if (pwake)
@@ -1426,6 +1429,7 @@ static int __init eventpoll_init(void)
1426 */ 1429 */
1427 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / 1430 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
1428 EP_ITEM_COST; 1431 EP_ITEM_COST;
1432 BUG_ON(max_user_watches < 0);
1429 1433
1430 /* Initialize the structure used to perform safe poll wait head wake ups */ 1434 /* Initialize the structure used to perform safe poll wait head wake ups */
1431 ep_nested_calls_init(&poll_safewake_ncalls); 1435 ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e045..8c6c4669b38 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
150 return &oi->vfs_inode; 150 return &oi->vfs_inode;
151} 151}
152 152
153static void exofs_i_callback(struct rcu_head *head)
154{
155 struct inode *inode = container_of(head, struct inode, i_rcu);
156 INIT_LIST_HEAD(&inode->i_dentry);
157 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
158}
159
153/* 160/*
154 * Remove an inode from the cache 161 * Remove an inode from the cache
155 */ 162 */
156static void exofs_destroy_inode(struct inode *inode) 163static void exofs_destroy_inode(struct inode *inode)
157{ 164{
158 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 165 call_rcu(&inode->i_rcu, exofs_i_callback);
159} 166}
160 167
161/* 168/*
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b304056f1..4b6825740dd 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
43 void *context) 43 void *context)
44{ 44{
45 struct dentry *dentry, *toput = NULL; 45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode;
46 47
47 if (acceptable(context, result)) 48 if (acceptable(context, result))
48 return result; 49 return result;
49 50
50 spin_lock(&dcache_lock); 51 inode = result->d_inode;
51 list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { 52 spin_lock(&inode->i_lock);
52 dget_locked(dentry); 53 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
53 spin_unlock(&dcache_lock); 54 dget(dentry);
55 spin_unlock(&inode->i_lock);
54 if (toput) 56 if (toput)
55 dput(toput); 57 dput(toput);
56 if (dentry != result && acceptable(context, dentry)) { 58 if (dentry != result && acceptable(context, dentry)) {
57 dput(result); 59 dput(result);
58 return dentry; 60 return dentry;
59 } 61 }
60 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
61 toput = dentry; 63 toput = dentry;
62 } 64 }
63 spin_unlock(&dcache_lock); 65 spin_unlock(&inode->i_lock);
64 66
65 if (toput) 67 if (toput)
66 dput(toput); 68 dput(toput);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc0431bad..7b4180554a6 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
232} 232}
233 233
234int 234int
235ext2_check_acl(struct inode *inode, int mask) 235ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
236{ 236{
237 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 237 struct posix_acl *acl;
238
239 if (flags & IPERM_FLAG_RCU) {
240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
241 return -ECHILD;
242 return -EAGAIN;
243 }
238 244
245 acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
239 if (IS_ERR(acl)) 246 if (IS_ERR(acl))
240 return PTR_ERR(acl); 247 return PTR_ERR(acl);
241 if (acl) { 248 if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb9ac4..c939b7b1209 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_check_acl (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int, unsigned int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2709b34206a..47cda410b54 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -28,21 +28,30 @@
28 28
29typedef struct ext2_dir_entry_2 ext2_dirent; 29typedef struct ext2_dir_entry_2 ext2_dirent;
30 30
31/*
32 * Tests against MAX_REC_LEN etc were put in place for 64k block
33 * sizes; if that is not possible on this arch, we can skip
34 * those tests and speed things up.
35 */
31static inline unsigned ext2_rec_len_from_disk(__le16 dlen) 36static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
32{ 37{
33 unsigned len = le16_to_cpu(dlen); 38 unsigned len = le16_to_cpu(dlen);
34 39
40#if (PAGE_CACHE_SIZE >= 65536)
35 if (len == EXT2_MAX_REC_LEN) 41 if (len == EXT2_MAX_REC_LEN)
36 return 1 << 16; 42 return 1 << 16;
43#endif
37 return len; 44 return len;
38} 45}
39 46
40static inline __le16 ext2_rec_len_to_disk(unsigned len) 47static inline __le16 ext2_rec_len_to_disk(unsigned len)
41{ 48{
49#if (PAGE_CACHE_SIZE >= 65536)
42 if (len == (1 << 16)) 50 if (len == (1 << 16))
43 return cpu_to_le16(EXT2_MAX_REC_LEN); 51 return cpu_to_le16(EXT2_MAX_REC_LEN);
44 else 52 else
45 BUG_ON(len > (1 << 16)); 53 BUG_ON(len > (1 << 16));
54#endif
46 return cpu_to_le16(len); 55 return cpu_to_le16(len);
47} 56}
48 57
@@ -129,15 +138,15 @@ static void ext2_check_page(struct page *page, int quiet)
129 p = (ext2_dirent *)(kaddr + offs); 138 p = (ext2_dirent *)(kaddr + offs);
130 rec_len = ext2_rec_len_from_disk(p->rec_len); 139 rec_len = ext2_rec_len_from_disk(p->rec_len);
131 140
132 if (rec_len < EXT2_DIR_REC_LEN(1)) 141 if (unlikely(rec_len < EXT2_DIR_REC_LEN(1)))
133 goto Eshort; 142 goto Eshort;
134 if (rec_len & 3) 143 if (unlikely(rec_len & 3))
135 goto Ealign; 144 goto Ealign;
136 if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) 145 if (unlikely(rec_len < EXT2_DIR_REC_LEN(p->name_len)))
137 goto Enamelen; 146 goto Enamelen;
138 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) 147 if (unlikely(((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)))
139 goto Espan; 148 goto Espan;
140 if (le32_to_cpu(p->inode) > max_inumber) 149 if (unlikely(le32_to_cpu(p->inode) > max_inumber))
141 goto Einumber; 150 goto Einumber;
142 } 151 }
143 if (offs != limit) 152 if (offs != limit)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f8aecd2e329..2e1d8341d82 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,7 +67,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
67 inode = NULL; 67 inode = NULL;
68 if (ino) { 68 if (ino) {
69 inode = ext2_iget(dir->i_sb, ino); 69 inode = ext2_iget(dir->i_sb, ino);
70 if (unlikely(IS_ERR(inode))) { 70 if (IS_ERR(inode)) {
71 if (PTR_ERR(inode) == -ESTALE) { 71 if (PTR_ERR(inode) == -ESTALE) {
72 ext2_error(dir->i_sb, __func__, 72 ext2_error(dir->i_sb, __func__,
73 "deleted inode referenced: %lu", 73 "deleted inode referenced: %lu",
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d7..7731695e65d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -43,9 +43,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data);
43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
44static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
45 45
46void ext2_error (struct super_block * sb, const char * function, 46void ext2_error(struct super_block *sb, const char *function,
47 const char * fmt, ...) 47 const char *fmt, ...)
48{ 48{
49 struct va_format vaf;
49 va_list args; 50 va_list args;
50 struct ext2_sb_info *sbi = EXT2_SB(sb); 51 struct ext2_sb_info *sbi = EXT2_SB(sb);
51 struct ext2_super_block *es = sbi->s_es; 52 struct ext2_super_block *es = sbi->s_es;
@@ -59,9 +60,13 @@ void ext2_error (struct super_block * sb, const char * function,
59 } 60 }
60 61
61 va_start(args, fmt); 62 va_start(args, fmt);
62 printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function); 63
63 vprintk(fmt, args); 64 vaf.fmt = fmt;
64 printk("\n"); 65 vaf.va = &args;
66
67 printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n",
68 sb->s_id, function, &vaf);
69
65 va_end(args); 70 va_end(args);
66 71
67 if (test_opt(sb, ERRORS_PANIC)) 72 if (test_opt(sb, ERRORS_PANIC))
@@ -76,12 +81,16 @@ void ext2_error (struct super_block * sb, const char * function,
76void ext2_msg(struct super_block *sb, const char *prefix, 81void ext2_msg(struct super_block *sb, const char *prefix,
77 const char *fmt, ...) 82 const char *fmt, ...)
78{ 83{
84 struct va_format vaf;
79 va_list args; 85 va_list args;
80 86
81 va_start(args, fmt); 87 va_start(args, fmt);
82 printk("%sEXT2-fs (%s): ", prefix, sb->s_id); 88
83 vprintk(fmt, args); 89 vaf.fmt = fmt;
84 printk("\n"); 90 vaf.va = &args;
91
92 printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
93
85 va_end(args); 94 va_end(args);
86} 95}
87 96
@@ -161,11 +170,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
161 return &ei->vfs_inode; 170 return &ei->vfs_inode;
162} 171}
163 172
164static void ext2_destroy_inode(struct inode *inode) 173static void ext2_i_callback(struct rcu_head *head)
165{ 174{
175 struct inode *inode = container_of(head, struct inode, i_rcu);
176 INIT_LIST_HEAD(&inode->i_dentry);
166 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 177 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
167} 178}
168 179
180static void ext2_destroy_inode(struct inode *inode)
181{
182 call_rcu(&inode->i_rcu, ext2_i_callback);
183}
184
169static void init_once(void *foo) 185static void init_once(void *foo)
170{ 186{
171 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 187 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f84700be327..c2e4dce984d 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -199,14 +199,6 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
199 goto found; 199 goto found;
200 entry = next; 200 entry = next;
201 } 201 }
202 /* Check the remaining name entries */
203 while (!IS_LAST_ENTRY(entry)) {
204 struct ext2_xattr_entry *next =
205 EXT2_XATTR_NEXT(entry);
206 if ((char *)next >= end)
207 goto bad_block;
208 entry = next;
209 }
210 if (ext2_xattr_cache_insert(bh)) 202 if (ext2_xattr_cache_insert(bh))
211 ea_idebug(inode, "cache insert failed"); 203 ea_idebug(inode, "cache insert failed");
212 error = -ENODATA; 204 error = -ENODATA;
@@ -355,7 +347,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
355/* 347/*
356 * ext2_xattr_set() 348 * ext2_xattr_set()
357 * 349 *
358 * Create, replace or remove an extended attribute for this inode. Buffer 350 * Create, replace or remove an extended attribute for this inode. Value
359 * is NULL to remove an existing extended attribute, and non-NULL to 351 * is NULL to remove an existing extended attribute, and non-NULL to
360 * either replace an existing extended attribute, or create a new extended 352 * either replace an existing extended attribute, or create a new extended
361 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 353 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe21218..e4fa49e6c53 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
240} 240}
241 241
242int 242int
243ext3_check_acl(struct inode *inode, int mask) 243ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
244{ 244{
245 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 245 struct posix_acl *acl;
246
247 if (flags & IPERM_FLAG_RCU) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD;
250 return -EAGAIN;
251 }
246 252
253 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
247 if (IS_ERR(acl)) 254 if (IS_ERR(acl))
248 return PTR_ERR(acl); 255 return PTR_ERR(acl);
249 if (acl) { 256 if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 597334626de..5faf8048e90 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int, unsigned int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b3db2264942..045995c8ce5 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,7 @@
20#include <linux/ext3_jbd.h> 20#include <linux/ext3_jbd.h>
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/blkdev.h>
23 24
24/* 25/*
25 * balloc.c contains the blocks allocation and deallocation routines 26 * balloc.c contains the blocks allocation and deallocation routines
@@ -39,6 +40,21 @@
39 40
40#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 41#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
41 42
43/*
44 * Calculate the block group number and offset, given a block number
45 */
46static void ext3_get_group_no_and_offset(struct super_block *sb,
47 ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
48{
49 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
50
51 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
52 if (offsetp)
53 *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
54 if (blockgrpp)
55 *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
56}
57
42/** 58/**
43 * ext3_get_group_desc() -- load group descriptor from disk 59 * ext3_get_group_desc() -- load group descriptor from disk
44 * @sb: super block 60 * @sb: super block
@@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1885 return ext3_bg_num_gdb_meta(sb,group); 1901 return ext3_bg_num_gdb_meta(sb,group);
1886 1902
1887} 1903}
1904
1905/**
1906 * ext3_trim_all_free -- function to trim all free space in alloc. group
1907 * @sb: super block for file system
1908 * @group: allocation group to trim
1909 * @start: first group block to examine
1910 * @max: last group block to examine
1911 * @gdp: allocation group description structure
1912 * @minblocks: minimum extent block count
1913 *
1914 * ext3_trim_all_free walks through group's block bitmap searching for free
1915 * blocks. When the free block is found, it tries to allocate this block and
1916 * consequent free block to get the biggest free extent possible, until it
1917 * reaches any used block. Then issue a TRIM command on this extent and free
1918 * the extent in the block bitmap. This is done until whole group is scanned.
1919 */
1920ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1921 ext3_grpblk_t start, ext3_grpblk_t max,
1922 ext3_grpblk_t minblocks)
1923{
1924 handle_t *handle;
1925 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
1926 ext3_fsblk_t discard_block;
1927 struct ext3_sb_info *sbi;
1928 struct buffer_head *gdp_bh, *bitmap_bh = NULL;
1929 struct ext3_group_desc *gdp;
1930 int err = 0, ret = 0;
1931
1932 /*
1933 * We will update one block bitmap, and one group descriptor
1934 */
1935 handle = ext3_journal_start_sb(sb, 2);
1936 if (IS_ERR(handle))
1937 return PTR_ERR(handle);
1938
1939 bitmap_bh = read_block_bitmap(sb, group);
1940 if (!bitmap_bh) {
1941 err = -EIO;
1942 goto err_out;
1943 }
1944
1945 BUFFER_TRACE(bitmap_bh, "getting undo access");
1946 err = ext3_journal_get_undo_access(handle, bitmap_bh);
1947 if (err)
1948 goto err_out;
1949
1950 gdp = ext3_get_group_desc(sb, group, &gdp_bh);
1951 if (!gdp) {
1952 err = -EIO;
1953 goto err_out;
1954 }
1955
1956 BUFFER_TRACE(gdp_bh, "get_write_access");
1957 err = ext3_journal_get_write_access(handle, gdp_bh);
1958 if (err)
1959 goto err_out;
1960
1961 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1962 sbi = EXT3_SB(sb);
1963
1964 /* Walk through the whole group */
1965 while (start < max) {
1966 start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1967 if (start < 0)
1968 break;
1969 next = start;
1970
1971 /*
1972 * Allocate contiguous free extents by setting bits in the
1973 * block bitmap
1974 */
1975 while (next < max
1976 && claim_block(sb_bgl_lock(sbi, group),
1977 next, bitmap_bh)) {
1978 next++;
1979 }
1980
1981 /* We did not claim any blocks */
1982 if (next == start)
1983 continue;
1984
1985 discard_block = (ext3_fsblk_t)start +
1986 ext3_group_first_block_no(sb, group);
1987
1988 /* Update counters */
1989 spin_lock(sb_bgl_lock(sbi, group));
1990 le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
1991 spin_unlock(sb_bgl_lock(sbi, group));
1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1993
1994 /* Do not issue a TRIM on extents smaller than minblocks */
1995 if ((next - start) < minblocks)
1996 goto free_extent;
1997
1998 /* Send the TRIM command down to the device */
1999 err = sb_issue_discard(sb, discard_block, next - start,
2000 GFP_NOFS, 0);
2001 count += (next - start);
2002free_extent:
2003 freed = 0;
2004
2005 /*
2006 * Clear bits in the bitmap
2007 */
2008 for (bit = start; bit < next; bit++) {
2009 BUFFER_TRACE(bitmap_bh, "clear bit");
2010 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
2011 bit, bitmap_bh->b_data)) {
2012 ext3_error(sb, __func__,
2013 "bit already cleared for block "E3FSBLK,
2014 (unsigned long)bit);
2015 BUFFER_TRACE(bitmap_bh, "bit already cleared");
2016 } else {
2017 freed++;
2018 }
2019 }
2020
2021 /* Update couters */
2022 spin_lock(sb_bgl_lock(sbi, group));
2023 le16_add_cpu(&gdp->bg_free_blocks_count, freed);
2024 spin_unlock(sb_bgl_lock(sbi, group));
2025 percpu_counter_add(&sbi->s_freeblocks_counter, freed);
2026
2027 start = next;
2028 if (err < 0) {
2029 if (err != -EOPNOTSUPP)
2030 ext3_warning(sb, __func__, "Discard command "
2031 "returned error %d\n", err);
2032 break;
2033 }
2034
2035 if (fatal_signal_pending(current)) {
2036 err = -ERESTARTSYS;
2037 break;
2038 }
2039
2040 cond_resched();
2041
2042 /* No more suitable extents */
2043 if ((free_blocks - count) < minblocks)
2044 break;
2045 }
2046
2047 /* We dirtied the bitmap block */
2048 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
2049 ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
2050 if (!err)
2051 err = ret;
2052
2053 /* And the group descriptor block */
2054 BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
2055 ret = ext3_journal_dirty_metadata(handle, gdp_bh);
2056 if (!err)
2057 err = ret;
2058
2059 ext3_debug("trimmed %d blocks in the group %d\n",
2060 count, group);
2061
2062err_out:
2063 if (err)
2064 count = err;
2065 ext3_journal_stop(handle);
2066 brelse(bitmap_bh);
2067
2068 return count;
2069}
2070
2071/**
2072 * ext3_trim_fs() -- trim ioctl handle function
2073 * @sb: superblock for filesystem
2074 * @start: First Byte to trim
2075 * @len: number of Bytes to trim from start
2076 * @minlen: minimum extent length in Bytes
2077 *
2078 * ext3_trim_fs goes through all allocation groups containing Bytes from
2079 * start to start+len. For each such a group ext3_trim_all_free function
2080 * is invoked to trim all free space.
2081 */
2082int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2083{
2084 ext3_grpblk_t last_block, first_block, free_blocks;
2085 unsigned long first_group, last_group;
2086 unsigned long group, ngroups;
2087 struct ext3_group_desc *gdp;
2088 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2089 uint64_t start, len, minlen, trimmed;
2090 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2091 int ret = 0;
2092
2093 start = range->start >> sb->s_blocksize_bits;
2094 len = range->len >> sb->s_blocksize_bits;
2095 minlen = range->minlen >> sb->s_blocksize_bits;
2096 trimmed = 0;
2097
2098 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2099 return -EINVAL;
2100 if (start >= max_blks)
2101 goto out;
2102 if (start < le32_to_cpu(es->s_first_data_block)) {
2103 len -= le32_to_cpu(es->s_first_data_block) - start;
2104 start = le32_to_cpu(es->s_first_data_block);
2105 }
2106 if (start + len > max_blks)
2107 len = max_blks - start;
2108
2109 ngroups = EXT3_SB(sb)->s_groups_count;
2110 smp_rmb();
2111
2112 /* Determine first and last group to examine based on start and len */
2113 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2114 &first_group, &first_block);
2115 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
2116 &last_group, &last_block);
2117 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
2118 last_block = EXT3_BLOCKS_PER_GROUP(sb);
2119
2120 if (first_group > last_group)
2121 return -EINVAL;
2122
2123 for (group = first_group; group <= last_group; group++) {
2124 gdp = ext3_get_group_desc(sb, group, NULL);
2125 if (!gdp)
2126 break;
2127
2128 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
2129 if (free_blocks < minlen)
2130 continue;
2131
2132 if (len >= EXT3_BLOCKS_PER_GROUP(sb))
2133 len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block);
2134 else
2135 last_block = first_block + len;
2136
2137 ret = ext3_trim_all_free(sb, group, first_block,
2138 last_block, minlen);
2139 if (ret < 0)
2140 break;
2141
2142 trimmed += ret;
2143 first_block = 0;
2144 }
2145
2146 if (ret >= 0)
2147 ret = 0;
2148
2149out:
2150 range->len = trimmed * sb->s_blocksize;
2151
2152 return ret;
2153}
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e2e72c367cf..34f0a072b93 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
69 const char * error_msg = NULL; 69 const char * error_msg = NULL;
70 const int rlen = ext3_rec_len_from_disk(de->rec_len); 70 const int rlen = ext3_rec_len_from_disk(de->rec_len);
71 71
72 if (rlen < EXT3_DIR_REC_LEN(1)) 72 if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
73 error_msg = "rec_len is smaller than minimal"; 73 error_msg = "rec_len is smaller than minimal";
74 else if (rlen % 4 != 0) 74 else if (unlikely(rlen % 4 != 0))
75 error_msg = "rec_len % 4 != 0"; 75 error_msg = "rec_len % 4 != 0";
76 else if (rlen < EXT3_DIR_REC_LEN(de->name_len)) 76 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
77 error_msg = "rec_len is too small for name_len"; 77 error_msg = "rec_len is too small for name_len";
78 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 78 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
79 error_msg = "directory entry across blocks"; 79 error_msg = "directory entry across blocks";
80 else if (le32_to_cpu(de->inode) > 80 else if (unlikely(le32_to_cpu(de->inode) >
81 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) 81 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
82 error_msg = "inode out of bounds"; 82 error_msg = "inode out of bounds";
83 83
84 if (error_msg != NULL) 84 if (unlikely(error_msg != NULL))
85 ext3_error (dir->i_sb, function, 85 ext3_error (dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 86 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 88 dir->i_ino, error_msg, offset,
89 (unsigned long) le32_to_cpu(de->inode), 89 (unsigned long) le32_to_cpu(de->inode),
90 rlen, de->name_len); 90 rlen, de->name_len);
91
91 return error_msg == NULL ? 1 : 0; 92 return error_msg == NULL ? 1 : 0;
92} 93}
93 94
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a9580617edd..ae94f6d949f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2145,13 +2145,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2145 if (try_to_extend_transaction(handle, inode)) { 2145 if (try_to_extend_transaction(handle, inode)) {
2146 if (bh) { 2146 if (bh) {
2147 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 2147 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
2148 ext3_journal_dirty_metadata(handle, bh); 2148 if (ext3_journal_dirty_metadata(handle, bh))
2149 return;
2149 } 2150 }
2150 ext3_mark_inode_dirty(handle, inode); 2151 ext3_mark_inode_dirty(handle, inode);
2151 truncate_restart_transaction(handle, inode); 2152 truncate_restart_transaction(handle, inode);
2152 if (bh) { 2153 if (bh) {
2153 BUFFER_TRACE(bh, "retaking write access"); 2154 BUFFER_TRACE(bh, "retaking write access");
2154 ext3_journal_get_write_access(handle, bh); 2155 if (ext3_journal_get_write_access(handle, bh))
2156 return;
2155 } 2157 }
2156 } 2158 }
2157 2159
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 88974814783..fc080dd561f 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -276,7 +276,29 @@ group_add_out:
276 mnt_drop_write(filp->f_path.mnt); 276 mnt_drop_write(filp->f_path.mnt);
277 return err; 277 return err;
278 } 278 }
279 case FITRIM: {
279 280
281 struct super_block *sb = inode->i_sb;
282 struct fstrim_range range;
283 int ret = 0;
284
285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM;
287
288 if (copy_from_user(&range, (struct fstrim_range *)arg,
289 sizeof(range)))
290 return -EFAULT;
291
292 ret = ext3_trim_fs(sb, &range);
293 if (ret < 0)
294 return ret;
295
296 if (copy_to_user((struct fstrim_range *)arg, &range,
297 sizeof(range)))
298 return -EFAULT;
299
300 return 0;
301 }
280 302
281 default: 303 default:
282 return -ENOTTY; 304 return -ENOTTY;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index bce9dce639b..b27ba71810e 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -858,6 +858,7 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
858 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 858 struct buffer_head * bh_use[NAMEI_RA_SIZE];
859 struct buffer_head * bh, *ret = NULL; 859 struct buffer_head * bh, *ret = NULL;
860 unsigned long start, block, b; 860 unsigned long start, block, b;
861 const u8 *name = entry->name;
861 int ra_max = 0; /* Number of bh's in the readahead 862 int ra_max = 0; /* Number of bh's in the readahead
862 buffer, bh_use[] */ 863 buffer, bh_use[] */
863 int ra_ptr = 0; /* Current index into readahead 864 int ra_ptr = 0; /* Current index into readahead
@@ -871,6 +872,16 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
871 namelen = entry->len; 872 namelen = entry->len;
872 if (namelen > EXT3_NAME_LEN) 873 if (namelen > EXT3_NAME_LEN)
873 return NULL; 874 return NULL;
875 if ((namelen <= 2) && (name[0] == '.') &&
876 (name[1] == '.' || name[1] == 0)) {
877 /*
878 * "." or ".." will only be in the first block
879 * NFS may look up ".."; "." should be handled by the VFS
880 */
881 block = start = 0;
882 nblocks = 1;
883 goto restart;
884 }
874 if (is_dx(dir)) { 885 if (is_dx(dir)) {
875 bh = ext3_dx_find_entry(dir, entry, res_dir, &err); 886 bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
876 /* 887 /*
@@ -961,55 +972,35 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
961 struct qstr *entry, struct ext3_dir_entry_2 **res_dir, 972 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
962 int *err) 973 int *err)
963{ 974{
964 struct super_block * sb; 975 struct super_block *sb = dir->i_sb;
965 struct dx_hash_info hinfo; 976 struct dx_hash_info hinfo;
966 u32 hash;
967 struct dx_frame frames[2], *frame; 977 struct dx_frame frames[2], *frame;
968 struct ext3_dir_entry_2 *de, *top;
969 struct buffer_head *bh; 978 struct buffer_head *bh;
970 unsigned long block; 979 unsigned long block;
971 int retval; 980 int retval;
972 int namelen = entry->len;
973 const u8 *name = entry->name;
974 981
975 sb = dir->i_sb; 982 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
976 /* NFS may look up ".." - look at dx_root directory block */ 983 return NULL;
977 if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
978 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
979 return NULL;
980 } else {
981 frame = frames;
982 frame->bh = NULL; /* for dx_release() */
983 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
984 dx_set_block(frame->at, 0); /* dx_root block is 0 */
985 }
986 hash = hinfo.hash;
987 do { 984 do {
988 block = dx_get_block(frame->at); 985 block = dx_get_block(frame->at);
989 if (!(bh = ext3_bread (NULL,dir, block, 0, err))) 986 if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
990 goto errout; 987 goto errout;
991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) {
995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 + ((char *) de - bh->b_data);
997
998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 brelse(bh);
1000 *err = ERR_BAD_DX_DIR;
1001 goto errout;
1002 }
1003 988
1004 if (ext3_match(namelen, name, de)) { 989 retval = search_dirblock(bh, dir, entry,
1005 *res_dir = de; 990 block << EXT3_BLOCK_SIZE_BITS(sb),
1006 dx_release(frames); 991 res_dir);
1007 return bh; 992 if (retval == 1) {
1008 } 993 dx_release(frames);
994 return bh;
1009 } 995 }
1010 brelse (bh); 996 brelse(bh);
997 if (retval == -1) {
998 *err = ERR_BAD_DX_DIR;
999 goto errout;
1000 }
1001
1011 /* Check to see if we should continue to search */ 1002 /* Check to see if we should continue to search */
1012 retval = ext3_htree_next_block(dir, hash, frame, 1003 retval = ext3_htree_next_block(dir, hinfo.hash, frame,
1013 frames, NULL); 1004 frames, NULL);
1014 if (retval < 0) { 1005 if (retval < 0) {
1015 ext3_warning(sb, __func__, 1006 ext3_warning(sb, __func__,
@@ -1047,7 +1038,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1047 return ERR_PTR(-EIO); 1038 return ERR_PTR(-EIO);
1048 } 1039 }
1049 inode = ext3_iget(dir->i_sb, ino); 1040 inode = ext3_iget(dir->i_sb, ino);
1050 if (unlikely(IS_ERR(inode))) { 1041 if (IS_ERR(inode)) {
1051 if (PTR_ERR(inode) == -ESTALE) { 1042 if (PTR_ERR(inode) == -ESTALE) {
1052 ext3_error(dir->i_sb, __func__, 1043 ext3_error(dir->i_sb, __func__,
1053 "deleted inode referenced: %lu", 1044 "deleted inode referenced: %lu",
@@ -1607,7 +1598,9 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1607 if (err) 1598 if (err)
1608 goto journal_error; 1599 goto journal_error;
1609 } 1600 }
1610 ext3_journal_dirty_metadata(handle, frames[0].bh); 1601 err = ext3_journal_dirty_metadata(handle, frames[0].bh);
1602 if (err)
1603 goto journal_error;
1611 } 1604 }
1612 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1605 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1613 if (!de) 1606 if (!de)
@@ -1644,8 +1637,13 @@ static int ext3_delete_entry (handle_t *handle,
1644 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i)) 1637 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
1645 return -EIO; 1638 return -EIO;
1646 if (de == de_del) { 1639 if (de == de_del) {
1640 int err;
1641
1647 BUFFER_TRACE(bh, "get_write_access"); 1642 BUFFER_TRACE(bh, "get_write_access");
1648 ext3_journal_get_write_access(handle, bh); 1643 err = ext3_journal_get_write_access(handle, bh);
1644 if (err)
1645 goto journal_error;
1646
1649 if (pde) 1647 if (pde)
1650 pde->rec_len = ext3_rec_len_to_disk( 1648 pde->rec_len = ext3_rec_len_to_disk(
1651 ext3_rec_len_from_disk(pde->rec_len) + 1649 ext3_rec_len_from_disk(pde->rec_len) +
@@ -1654,7 +1652,12 @@ static int ext3_delete_entry (handle_t *handle,
1654 de->inode = 0; 1652 de->inode = 0;
1655 dir->i_version++; 1653 dir->i_version++;
1656 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 1654 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
1657 ext3_journal_dirty_metadata(handle, bh); 1655 err = ext3_journal_dirty_metadata(handle, bh);
1656 if (err) {
1657journal_error:
1658 ext3_std_error(dir->i_sb, err);
1659 return err;
1660 }
1658 return 0; 1661 return 0;
1659 } 1662 }
1660 i += ext3_rec_len_from_disk(de->rec_len); 1663 i += ext3_rec_len_from_disk(de->rec_len);
@@ -1762,7 +1765,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1762{ 1765{
1763 handle_t *handle; 1766 handle_t *handle;
1764 struct inode * inode; 1767 struct inode * inode;
1765 struct buffer_head * dir_block; 1768 struct buffer_head * dir_block = NULL;
1766 struct ext3_dir_entry_2 * de; 1769 struct ext3_dir_entry_2 * de;
1767 int err, retries = 0; 1770 int err, retries = 0;
1768 1771
@@ -1790,15 +1793,14 @@ retry:
1790 inode->i_fop = &ext3_dir_operations; 1793 inode->i_fop = &ext3_dir_operations;
1791 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1794 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1792 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1795 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1793 if (!dir_block) { 1796 if (!dir_block)
1794 drop_nlink(inode); /* is this nlink == 0? */ 1797 goto out_clear_inode;
1795 unlock_new_inode(inode); 1798
1796 ext3_mark_inode_dirty(handle, inode);
1797 iput (inode);
1798 goto out_stop;
1799 }
1800 BUFFER_TRACE(dir_block, "get_write_access"); 1799 BUFFER_TRACE(dir_block, "get_write_access");
1801 ext3_journal_get_write_access(handle, dir_block); 1800 err = ext3_journal_get_write_access(handle, dir_block);
1801 if (err)
1802 goto out_clear_inode;
1803
1802 de = (struct ext3_dir_entry_2 *) dir_block->b_data; 1804 de = (struct ext3_dir_entry_2 *) dir_block->b_data;
1803 de->inode = cpu_to_le32(inode->i_ino); 1805 de->inode = cpu_to_le32(inode->i_ino);
1804 de->name_len = 1; 1806 de->name_len = 1;
@@ -1814,11 +1816,16 @@ retry:
1814 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1816 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1815 inode->i_nlink = 2; 1817 inode->i_nlink = 2;
1816 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1818 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1817 ext3_journal_dirty_metadata(handle, dir_block); 1819 err = ext3_journal_dirty_metadata(handle, dir_block);
1818 brelse (dir_block); 1820 if (err)
1819 ext3_mark_inode_dirty(handle, inode); 1821 goto out_clear_inode;
1820 err = ext3_add_entry (handle, dentry, inode); 1822
1823 err = ext3_mark_inode_dirty(handle, inode);
1824 if (!err)
1825 err = ext3_add_entry (handle, dentry, inode);
1826
1821 if (err) { 1827 if (err) {
1828out_clear_inode:
1822 inode->i_nlink = 0; 1829 inode->i_nlink = 0;
1823 unlock_new_inode(inode); 1830 unlock_new_inode(inode);
1824 ext3_mark_inode_dirty(handle, inode); 1831 ext3_mark_inode_dirty(handle, inode);
@@ -1827,10 +1834,14 @@ retry:
1827 } 1834 }
1828 inc_nlink(dir); 1835 inc_nlink(dir);
1829 ext3_update_dx_flag(dir); 1836 ext3_update_dx_flag(dir);
1830 ext3_mark_inode_dirty(handle, dir); 1837 err = ext3_mark_inode_dirty(handle, dir);
1838 if (err)
1839 goto out_clear_inode;
1840
1831 d_instantiate(dentry, inode); 1841 d_instantiate(dentry, inode);
1832 unlock_new_inode(inode); 1842 unlock_new_inode(inode);
1833out_stop: 1843out_stop:
1844 brelse(dir_block);
1834 ext3_journal_stop(handle); 1845 ext3_journal_stop(handle);
1835 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1846 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
1836 goto retry; 1847 goto retry;
@@ -2353,7 +2364,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2353 goto end_rename; 2364 goto end_rename;
2354 } else { 2365 } else {
2355 BUFFER_TRACE(new_bh, "get write access"); 2366 BUFFER_TRACE(new_bh, "get write access");
2356 ext3_journal_get_write_access(handle, new_bh); 2367 retval = ext3_journal_get_write_access(handle, new_bh);
2368 if (retval)
2369 goto journal_error;
2357 new_de->inode = cpu_to_le32(old_inode->i_ino); 2370 new_de->inode = cpu_to_le32(old_inode->i_ino);
2358 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb, 2371 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2359 EXT3_FEATURE_INCOMPAT_FILETYPE)) 2372 EXT3_FEATURE_INCOMPAT_FILETYPE))
@@ -2362,7 +2375,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2362 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC; 2375 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
2363 ext3_mark_inode_dirty(handle, new_dir); 2376 ext3_mark_inode_dirty(handle, new_dir);
2364 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); 2377 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
2365 ext3_journal_dirty_metadata(handle, new_bh); 2378 retval = ext3_journal_dirty_metadata(handle, new_bh);
2379 if (retval)
2380 goto journal_error;
2366 brelse(new_bh); 2381 brelse(new_bh);
2367 new_bh = NULL; 2382 new_bh = NULL;
2368 } 2383 }
@@ -2411,10 +2426,17 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2411 ext3_update_dx_flag(old_dir); 2426 ext3_update_dx_flag(old_dir);
2412 if (dir_bh) { 2427 if (dir_bh) {
2413 BUFFER_TRACE(dir_bh, "get_write_access"); 2428 BUFFER_TRACE(dir_bh, "get_write_access");
2414 ext3_journal_get_write_access(handle, dir_bh); 2429 retval = ext3_journal_get_write_access(handle, dir_bh);
2430 if (retval)
2431 goto journal_error;
2415 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); 2432 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
2416 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); 2433 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
2417 ext3_journal_dirty_metadata(handle, dir_bh); 2434 retval = ext3_journal_dirty_metadata(handle, dir_bh);
2435 if (retval) {
2436journal_error:
2437 ext3_std_error(new_dir->i_sb, retval);
2438 goto end_rename;
2439 }
2418 drop_nlink(old_dir); 2440 drop_nlink(old_dir);
2419 if (new_inode) { 2441 if (new_inode) {
2420 drop_nlink(new_inode); 2442 drop_nlink(new_inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index e746d30b123..108b142e11e 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -249,7 +249,11 @@ static int setup_new_group_blocks(struct super_block *sb,
249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
250 set_buffer_uptodate(gdb); 250 set_buffer_uptodate(gdb);
251 unlock_buffer(gdb); 251 unlock_buffer(gdb);
252 ext3_journal_dirty_metadata(handle, gdb); 252 err = ext3_journal_dirty_metadata(handle, gdb);
253 if (err) {
254 brelse(gdb);
255 goto exit_bh;
256 }
253 ext3_set_bit(bit, bh->b_data); 257 ext3_set_bit(bit, bh->b_data);
254 brelse(gdb); 258 brelse(gdb);
255 } 259 }
@@ -269,7 +273,11 @@ static int setup_new_group_blocks(struct super_block *sb,
269 err = PTR_ERR(gdb); 273 err = PTR_ERR(gdb);
270 goto exit_bh; 274 goto exit_bh;
271 } 275 }
272 ext3_journal_dirty_metadata(handle, gdb); 276 err = ext3_journal_dirty_metadata(handle, gdb);
277 if (err) {
278 brelse(gdb);
279 goto exit_bh;
280 }
273 ext3_set_bit(bit, bh->b_data); 281 ext3_set_bit(bit, bh->b_data);
274 brelse(gdb); 282 brelse(gdb);
275 } 283 }
@@ -295,7 +303,11 @@ static int setup_new_group_blocks(struct super_block *sb,
295 err = PTR_ERR(it); 303 err = PTR_ERR(it);
296 goto exit_bh; 304 goto exit_bh;
297 } 305 }
298 ext3_journal_dirty_metadata(handle, it); 306 err = ext3_journal_dirty_metadata(handle, it);
307 if (err) {
308 brelse(it);
309 goto exit_bh;
310 }
299 brelse(it); 311 brelse(it);
300 ext3_set_bit(bit, bh->b_data); 312 ext3_set_bit(bit, bh->b_data);
301 } 313 }
@@ -306,7 +318,9 @@ static int setup_new_group_blocks(struct super_block *sb,
306 318
307 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), 319 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
308 bh->b_data); 320 bh->b_data);
309 ext3_journal_dirty_metadata(handle, bh); 321 err = ext3_journal_dirty_metadata(handle, bh);
322 if (err)
323 goto exit_bh;
310 brelse(bh); 324 brelse(bh);
311 325
312 /* Mark unused entries in inode bitmap used */ 326 /* Mark unused entries in inode bitmap used */
@@ -319,7 +333,7 @@ static int setup_new_group_blocks(struct super_block *sb,
319 333
320 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), 334 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
321 bh->b_data); 335 bh->b_data);
322 ext3_journal_dirty_metadata(handle, bh); 336 err = ext3_journal_dirty_metadata(handle, bh);
323exit_bh: 337exit_bh:
324 brelse(bh); 338 brelse(bh);
325 339
@@ -503,12 +517,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
503 * reserved inode, and will become GDT blocks (primary and backup). 517 * reserved inode, and will become GDT blocks (primary and backup).
504 */ 518 */
505 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; 519 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
506 ext3_journal_dirty_metadata(handle, dind); 520 err = ext3_journal_dirty_metadata(handle, dind);
521 if (err)
522 goto exit_group_desc;
507 brelse(dind); 523 brelse(dind);
524 dind = NULL;
508 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 525 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
509 ext3_mark_iloc_dirty(handle, inode, &iloc); 526 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
527 if (err)
528 goto exit_group_desc;
510 memset((*primary)->b_data, 0, sb->s_blocksize); 529 memset((*primary)->b_data, 0, sb->s_blocksize);
511 ext3_journal_dirty_metadata(handle, *primary); 530 err = ext3_journal_dirty_metadata(handle, *primary);
531 if (err)
532 goto exit_group_desc;
512 533
513 o_group_desc = EXT3_SB(sb)->s_group_desc; 534 o_group_desc = EXT3_SB(sb)->s_group_desc;
514 memcpy(n_group_desc, o_group_desc, 535 memcpy(n_group_desc, o_group_desc,
@@ -519,10 +540,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
519 kfree(o_group_desc); 540 kfree(o_group_desc);
520 541
521 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 542 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
522 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 543 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
544 if (err)
545 goto exit_inode;
523 546
524 return 0; 547 return 0;
525 548
549exit_group_desc:
550 kfree(n_group_desc);
526exit_inode: 551exit_inode:
527 //ext3_journal_release_buffer(handle, iloc.bh); 552 //ext3_journal_release_buffer(handle, iloc.bh);
528 brelse(iloc.bh); 553 brelse(iloc.bh);
@@ -706,16 +731,20 @@ static void update_backups(struct super_block *sb,
706 } 731 }
707 ext3_debug("update metadata backup %#04lx\n", 732 ext3_debug("update metadata backup %#04lx\n",
708 (unsigned long)bh->b_blocknr); 733 (unsigned long)bh->b_blocknr);
709 if ((err = ext3_journal_get_write_access(handle, bh))) 734 if ((err = ext3_journal_get_write_access(handle, bh))) {
735 brelse(bh);
710 break; 736 break;
737 }
711 lock_buffer(bh); 738 lock_buffer(bh);
712 memcpy(bh->b_data, data, size); 739 memcpy(bh->b_data, data, size);
713 if (rest) 740 if (rest)
714 memset(bh->b_data + size, 0, rest); 741 memset(bh->b_data + size, 0, rest);
715 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
716 unlock_buffer(bh); 743 unlock_buffer(bh);
717 ext3_journal_dirty_metadata(handle, bh); 744 err = ext3_journal_dirty_metadata(handle, bh);
718 brelse(bh); 745 brelse(bh);
746 if (err)
747 break;
719 } 748 }
720 if ((err2 = ext3_journal_stop(handle)) && !err) 749 if ((err2 = ext3_journal_stop(handle)) && !err)
721 err = err2; 750 err = err2;
@@ -922,7 +951,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
922 /* Update the global fs size fields */ 951 /* Update the global fs size fields */
923 sbi->s_groups_count++; 952 sbi->s_groups_count++;
924 953
925 ext3_journal_dirty_metadata(handle, primary); 954 err = ext3_journal_dirty_metadata(handle, primary);
955 if (err)
956 goto exit_journal;
926 957
927 /* Update the reserved block counts only once the new group is 958 /* Update the reserved block counts only once the new group is
928 * active. */ 959 * active. */
@@ -934,7 +965,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
934 percpu_counter_add(&sbi->s_freeinodes_counter, 965 percpu_counter_add(&sbi->s_freeinodes_counter,
935 EXT3_INODES_PER_GROUP(sb)); 966 EXT3_INODES_PER_GROUP(sb));
936 967
937 ext3_journal_dirty_metadata(handle, sbi->s_sbh); 968 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
938 969
939exit_journal: 970exit_journal:
940 mutex_unlock(&sbi->s_resize_lock); 971 mutex_unlock(&sbi->s_resize_lock);
@@ -1064,8 +1095,14 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1064 goto exit_put; 1095 goto exit_put;
1065 } 1096 }
1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1097 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1098 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1099 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1100 if (err) {
1101 ext3_warning(sb, __func__,
1102 "error %d on journal dirty metadata", err);
1103 ext3_journal_stop(handle);
1104 goto exit_put;
1105 }
1069 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", 1106 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1070 o_blocks_count, o_blocks_count + add); 1107 o_blocks_count, o_blocks_count + add);
1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1108 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f..b7d0554631e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -143,12 +143,16 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
143void ext3_msg(struct super_block *sb, const char *prefix, 143void ext3_msg(struct super_block *sb, const char *prefix,
144 const char *fmt, ...) 144 const char *fmt, ...)
145{ 145{
146 struct va_format vaf;
146 va_list args; 147 va_list args;
147 148
148 va_start(args, fmt); 149 va_start(args, fmt);
149 printk("%sEXT3-fs (%s): ", prefix, sb->s_id); 150
150 vprintk(fmt, args); 151 vaf.fmt = fmt;
151 printk("\n"); 152 vaf.va = &args;
153
154 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
155
152 va_end(args); 156 va_end(args);
153} 157}
154 158
@@ -195,15 +199,20 @@ static void ext3_handle_error(struct super_block *sb)
195 sb->s_id); 199 sb->s_id);
196} 200}
197 201
198void ext3_error (struct super_block * sb, const char * function, 202void ext3_error(struct super_block *sb, const char *function,
199 const char * fmt, ...) 203 const char *fmt, ...)
200{ 204{
205 struct va_format vaf;
201 va_list args; 206 va_list args;
202 207
203 va_start(args, fmt); 208 va_start(args, fmt);
204 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 209
205 vprintk(fmt, args); 210 vaf.fmt = fmt;
206 printk("\n"); 211 vaf.va = &args;
212
213 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
214 sb->s_id, function, &vaf);
215
207 va_end(args); 216 va_end(args);
208 217
209 ext3_handle_error(sb); 218 ext3_handle_error(sb);
@@ -274,15 +283,20 @@ void __ext3_std_error (struct super_block * sb, const char * function,
274 * case we take the easy way out and panic immediately. 283 * case we take the easy way out and panic immediately.
275 */ 284 */
276 285
277void ext3_abort (struct super_block * sb, const char * function, 286void ext3_abort(struct super_block *sb, const char *function,
278 const char * fmt, ...) 287 const char *fmt, ...)
279{ 288{
289 struct va_format vaf;
280 va_list args; 290 va_list args;
281 291
282 va_start(args, fmt); 292 va_start(args, fmt);
283 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function); 293
284 vprintk(fmt, args); 294 vaf.fmt = fmt;
285 printk("\n"); 295 vaf.va = &args;
296
297 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
298 sb->s_id, function, &vaf);
299
286 va_end(args); 300 va_end(args);
287 301
288 if (test_opt(sb, ERRORS_PANIC)) 302 if (test_opt(sb, ERRORS_PANIC))
@@ -300,16 +314,20 @@ void ext3_abort (struct super_block * sb, const char * function,
300 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 314 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
301} 315}
302 316
303void ext3_warning (struct super_block * sb, const char * function, 317void ext3_warning(struct super_block *sb, const char *function,
304 const char * fmt, ...) 318 const char *fmt, ...)
305{ 319{
320 struct va_format vaf;
306 va_list args; 321 va_list args;
307 322
308 va_start(args, fmt); 323 va_start(args, fmt);
309 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ", 324
310 sb->s_id, function); 325 vaf.fmt = fmt;
311 vprintk(fmt, args); 326 vaf.va = &args;
312 printk("\n"); 327
328 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
329 sb->s_id, function, &vaf);
330
313 va_end(args); 331 va_end(args);
314} 332}
315 333
@@ -479,6 +497,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
479 return &ei->vfs_inode; 497 return &ei->vfs_inode;
480} 498}
481 499
500static void ext3_i_callback(struct rcu_head *head)
501{
502 struct inode *inode = container_of(head, struct inode, i_rcu);
503 INIT_LIST_HEAD(&inode->i_dentry);
504 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
505}
506
482static void ext3_destroy_inode(struct inode *inode) 507static void ext3_destroy_inode(struct inode *inode)
483{ 508{
484 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 509 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -489,7 +514,7 @@ static void ext3_destroy_inode(struct inode *inode)
489 false); 514 false);
490 dump_stack(); 515 dump_stack();
491 } 516 }
492 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 517 call_rcu(&inode->i_rcu, ext3_i_callback);
493} 518}
494 519
495static void init_once(void *foo) 520static void init_once(void *foo)
@@ -1841,13 +1866,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1841 goto failed_mount; 1866 goto failed_mount;
1842 } 1867 }
1843 1868
1844 if (generic_check_addressable(sb->s_blocksize_bits, 1869 err = generic_check_addressable(sb->s_blocksize_bits,
1845 le32_to_cpu(es->s_blocks_count))) { 1870 le32_to_cpu(es->s_blocks_count));
1871 if (err) {
1846 ext3_msg(sb, KERN_ERR, 1872 ext3_msg(sb, KERN_ERR,
1847 "error: filesystem is too large to mount safely"); 1873 "error: filesystem is too large to mount safely");
1848 if (sizeof(sector_t) < 8) 1874 if (sizeof(sector_t) < 8)
1849 ext3_msg(sb, KERN_ERR, 1875 ext3_msg(sb, KERN_ERR,
1850 "error: CONFIG_LBDAF not enabled"); 1876 "error: CONFIG_LBDAF not enabled");
1877 ret = err;
1851 goto failed_mount; 1878 goto failed_mount;
1852 } 1879 }
1853 1880
@@ -2290,7 +2317,7 @@ static int ext3_load_journal(struct super_block *sb,
2290 EXT3_SB(sb)->s_journal = journal; 2317 EXT3_SB(sb)->s_journal = journal;
2291 ext3_clear_journal_err(sb, es); 2318 ext3_clear_journal_err(sb, es);
2292 2319
2293 if (journal_devnum && 2320 if (!really_read_only && journal_devnum &&
2294 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2321 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2295 es->s_journal_dev = cpu_to_le32(journal_devnum); 2322 es->s_journal_dev = cpu_to_le32(journal_devnum);
2296 2323
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index e69dc6dfaa8..32e6cc23bd9 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -925,7 +925,7 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
925/* 925/*
926 * ext3_xattr_set_handle() 926 * ext3_xattr_set_handle()
927 * 927 *
928 * Create, replace or remove an extended attribute for this inode. Buffer 928 * Create, replace or remove an extended attribute for this inode. Value
929 * is NULL to remove an existing extended attribute, and non-NULL to 929 * is NULL to remove an existing extended attribute, and non-NULL to
930 * either replace an existing extended attribute, or create a new extended 930 * either replace an existing extended attribute, or create a new extended
931 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 931 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed4504ea..e0270d1f8d8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask) 241ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
242{ 242{
243 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 243 struct posix_acl *acl;
244
245 if (flags & IPERM_FLAG_RCU) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD;
248 return -EAGAIN;
249 }
244 250
251 acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
245 if (IS_ERR(acl)) 252 if (IS_ERR(acl))
246 return PTR_ERR(acl); 253 return PTR_ERR(acl);
247 if (acl) { 254 if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5deac..dec821168fd 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int, unsigned int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 14c3af26c67..adf96b82278 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -592,7 +592,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
592 * Account for the allocated meta blocks. We will never 592 * Account for the allocated meta blocks. We will never
593 * fail EDQUOT for metdata, but we do account for it. 593 * fail EDQUOT for metdata, but we do account for it.
594 */ 594 */
595 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { 595 if (!(*errp) &&
596 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
596 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 597 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
597 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 598 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
598 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 599 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ece76fb6a40..164c56092e5 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -60,9 +60,13 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 60 return (ext4_filetype_table[filetype]);
61} 61}
62 62
63 63/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 *
66 * Note: this is the opposite of what ext2 and ext3 historically returned...
67 */
64int __ext4_check_dir_entry(const char *function, unsigned int line, 68int __ext4_check_dir_entry(const char *function, unsigned int line,
65 struct inode *dir, 69 struct inode *dir, struct file *filp,
66 struct ext4_dir_entry_2 *de, 70 struct ext4_dir_entry_2 *de,
67 struct buffer_head *bh, 71 struct buffer_head *bh,
68 unsigned int offset) 72 unsigned int offset)
@@ -71,26 +75,37 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
71 const int rlen = ext4_rec_len_from_disk(de->rec_len, 75 const int rlen = ext4_rec_len_from_disk(de->rec_len,
72 dir->i_sb->s_blocksize); 76 dir->i_sb->s_blocksize);
73 77
74 if (rlen < EXT4_DIR_REC_LEN(1)) 78 if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
75 error_msg = "rec_len is smaller than minimal"; 79 error_msg = "rec_len is smaller than minimal";
76 else if (rlen % 4 != 0) 80 else if (unlikely(rlen % 4 != 0))
77 error_msg = "rec_len % 4 != 0"; 81 error_msg = "rec_len % 4 != 0";
78 else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) 82 else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
79 error_msg = "rec_len is too small for name_len"; 83 error_msg = "rec_len is too small for name_len";
80 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 84 else if (unlikely(((char *) de - bh->b_data) + rlen >
85 dir->i_sb->s_blocksize))
81 error_msg = "directory entry across blocks"; 86 error_msg = "directory entry across blocks";
82 else if (le32_to_cpu(de->inode) > 87 else if (unlikely(le32_to_cpu(de->inode) >
83 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) 88 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
84 error_msg = "inode out of bounds"; 89 error_msg = "inode out of bounds";
90 else
91 return 0;
85 92
86 if (error_msg != NULL) 93 if (filp)
87 ext4_error_inode(dir, function, line, bh->b_blocknr, 94 ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
88 "bad entry in directory: %s - " 95 "bad entry in directory: %s - offset=%u(%u), "
89 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", 96 "inode=%u, rec_len=%d, name_len=%d",
90 error_msg, (unsigned) (offset%bh->b_size), offset, 97 error_msg, (unsigned) (offset%bh->b_size),
91 le32_to_cpu(de->inode), 98 offset, le32_to_cpu(de->inode),
92 rlen, de->name_len); 99 rlen, de->name_len);
93 return error_msg == NULL ? 1 : 0; 100 else
101 ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
102 "bad entry in directory: %s - offset=%u(%u), "
103 "inode=%u, rec_len=%d, name_len=%d",
104 error_msg, (unsigned) (offset%bh->b_size),
105 offset, le32_to_cpu(de->inode),
106 rlen, de->name_len);
107
108 return 1;
94} 109}
95 110
96static int ext4_readdir(struct file *filp, 111static int ext4_readdir(struct file *filp,
@@ -152,8 +167,9 @@ static int ext4_readdir(struct file *filp,
152 */ 167 */
153 if (!bh) { 168 if (!bh) {
154 if (!dir_has_error) { 169 if (!dir_has_error) {
155 EXT4_ERROR_INODE(inode, "directory " 170 EXT4_ERROR_FILE(filp, 0,
156 "contains a hole at offset %Lu", 171 "directory contains a "
172 "hole at offset %llu",
157 (unsigned long long) filp->f_pos); 173 (unsigned long long) filp->f_pos);
158 dir_has_error = 1; 174 dir_has_error = 1;
159 } 175 }
@@ -194,8 +210,8 @@ revalidate:
194 while (!error && filp->f_pos < inode->i_size 210 while (!error && filp->f_pos < inode->i_size
195 && offset < sb->s_blocksize) { 211 && offset < sb->s_blocksize) {
196 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 212 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
197 if (!ext4_check_dir_entry(inode, de, 213 if (ext4_check_dir_entry(inode, filp, de,
198 bh, offset)) { 214 bh, offset)) {
199 /* 215 /*
200 * On error, skip the f_pos to the next block 216 * On error, skip the f_pos to the next block
201 */ 217 */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1f253a9a141..1de65f57203 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -62,8 +62,8 @@
62#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ 62#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
63 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) 63 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
64 64
65#define EXT4_ERROR_FILE(file, fmt, a...) \ 65#define EXT4_ERROR_FILE(file, block, fmt, a...) \
66 ext4_error_file(__func__, __LINE__, (file), (fmt), ## a) 66 ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
67 67
68/* data type for block offset of block group */ 68/* data type for block offset of block group */
69typedef int ext4_grpblk_t; 69typedef int ext4_grpblk_t;
@@ -561,22 +561,6 @@ struct ext4_new_group_data {
561#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 561#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
562#endif 562#endif
563 563
564
565/*
566 * Mount options
567 */
568struct ext4_mount_options {
569 unsigned long s_mount_opt;
570 uid_t s_resuid;
571 gid_t s_resgid;
572 unsigned long s_commit_interval;
573 u32 s_min_batch_time, s_max_batch_time;
574#ifdef CONFIG_QUOTA
575 int s_jquota_fmt;
576 char *s_qf_names[MAXQUOTAS];
577#endif
578};
579
580/* Max physical block we can address w/o extents */ 564/* Max physical block we can address w/o extents */
581#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF 565#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
582 566
@@ -709,6 +693,8 @@ do { \
709 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ 693 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
710 ext4_decode_extra_time(&(inode)->xtime, \ 694 ext4_decode_extra_time(&(inode)->xtime, \
711 raw_inode->xtime ## _extra); \ 695 raw_inode->xtime ## _extra); \
696 else \
697 (inode)->xtime.tv_nsec = 0; \
712} while (0) 698} while (0)
713 699
714#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ 700#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
@@ -719,6 +705,8 @@ do { \
719 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ 705 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
720 ext4_decode_extra_time(&(einode)->xtime, \ 706 ext4_decode_extra_time(&(einode)->xtime, \
721 raw_inode->xtime ## _extra); \ 707 raw_inode->xtime ## _extra); \
708 else \
709 (einode)->xtime.tv_nsec = 0; \
722} while (0) 710} while (0)
723 711
724#define i_disk_version osd1.linux1.l_i_version 712#define i_disk_version osd1.linux1.l_i_version
@@ -750,12 +738,13 @@ do { \
750 738
751/* 739/*
752 * storage for cached extent 740 * storage for cached extent
741 * If ec_len == 0, then the cache is invalid.
742 * If ec_start == 0, then the cache represents a gap (null mapping)
753 */ 743 */
754struct ext4_ext_cache { 744struct ext4_ext_cache {
755 ext4_fsblk_t ec_start; 745 ext4_fsblk_t ec_start;
756 ext4_lblk_t ec_block; 746 ext4_lblk_t ec_block;
757 __u32 ec_len; /* must be 32bit to return holes */ 747 __u32 ec_len; /* must be 32bit to return holes */
758 __u32 ec_type;
759}; 748};
760 749
761/* 750/*
@@ -774,10 +763,12 @@ struct ext4_inode_info {
774 * near to their parent directory's inode. 763 * near to their parent directory's inode.
775 */ 764 */
776 ext4_group_t i_block_group; 765 ext4_group_t i_block_group;
766 ext4_lblk_t i_dir_start_lookup;
767#if (BITS_PER_LONG < 64)
777 unsigned long i_state_flags; /* Dynamic state flags */ 768 unsigned long i_state_flags; /* Dynamic state flags */
769#endif
778 unsigned long i_flags; 770 unsigned long i_flags;
779 771
780 ext4_lblk_t i_dir_start_lookup;
781#ifdef CONFIG_EXT4_FS_XATTR 772#ifdef CONFIG_EXT4_FS_XATTR
782 /* 773 /*
783 * Extended attributes can be read independently of the main file 774 * Extended attributes can be read independently of the main file
@@ -820,7 +811,7 @@ struct ext4_inode_info {
820 */ 811 */
821 struct rw_semaphore i_data_sem; 812 struct rw_semaphore i_data_sem;
822 struct inode vfs_inode; 813 struct inode vfs_inode;
823 struct jbd2_inode jinode; 814 struct jbd2_inode *jinode;
824 815
825 struct ext4_ext_cache i_cached_extent; 816 struct ext4_ext_cache i_cached_extent;
826 /* 817 /*
@@ -840,14 +831,12 @@ struct ext4_inode_info {
840 unsigned int i_reserved_data_blocks; 831 unsigned int i_reserved_data_blocks;
841 unsigned int i_reserved_meta_blocks; 832 unsigned int i_reserved_meta_blocks;
842 unsigned int i_allocated_meta_blocks; 833 unsigned int i_allocated_meta_blocks;
843 unsigned short i_delalloc_reserved_flag; 834 ext4_lblk_t i_da_metadata_calc_last_lblock;
844 sector_t i_da_metadata_calc_last_lblock;
845 int i_da_metadata_calc_len; 835 int i_da_metadata_calc_len;
846 836
847 /* on-disk additional length */ 837 /* on-disk additional length */
848 __u16 i_extra_isize; 838 __u16 i_extra_isize;
849 839
850 spinlock_t i_block_reservation_lock;
851#ifdef CONFIG_QUOTA 840#ifdef CONFIG_QUOTA
852 /* quota space reservation, managed internally by quota code */ 841 /* quota space reservation, managed internally by quota code */
853 qsize_t i_reserved_quota; 842 qsize_t i_reserved_quota;
@@ -856,9 +845,11 @@ struct ext4_inode_info {
856 /* completed IOs that might need unwritten extents handling */ 845 /* completed IOs that might need unwritten extents handling */
857 struct list_head i_completed_io_list; 846 struct list_head i_completed_io_list;
858 spinlock_t i_completed_io_lock; 847 spinlock_t i_completed_io_lock;
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
859 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
860 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
861 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 851
852 spinlock_t i_block_reservation_lock;
862 853
863 /* 854 /*
864 * Transactions that contain inode's metadata needed to complete 855 * Transactions that contain inode's metadata needed to complete
@@ -917,11 +908,20 @@ struct ext4_inode_info {
917#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 908#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
918#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 909#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
919 910
920#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 911#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
921#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 912 ~EXT4_MOUNT_##opt
913#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
914 EXT4_MOUNT_##opt
922#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 915#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
923 EXT4_MOUNT_##opt) 916 EXT4_MOUNT_##opt)
924 917
918#define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
919 ~EXT4_MOUNT2_##opt
920#define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
921 EXT4_MOUNT2_##opt
922#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
923 EXT4_MOUNT2_##opt)
924
925#define ext4_set_bit ext2_set_bit 925#define ext4_set_bit ext2_set_bit
926#define ext4_set_bit_atomic ext2_set_bit_atomic 926#define ext4_set_bit_atomic ext2_set_bit_atomic
927#define ext4_clear_bit ext2_clear_bit 927#define ext4_clear_bit ext2_clear_bit
@@ -1087,6 +1087,7 @@ struct ext4_sb_info {
1087 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1087 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
1088 struct buffer_head **s_group_desc; 1088 struct buffer_head **s_group_desc;
1089 unsigned int s_mount_opt; 1089 unsigned int s_mount_opt;
1090 unsigned int s_mount_opt2;
1090 unsigned int s_mount_flags; 1091 unsigned int s_mount_flags;
1091 ext4_fsblk_t s_sb_block; 1092 ext4_fsblk_t s_sb_block;
1092 uid_t s_resuid; 1093 uid_t s_resuid;
@@ -1237,24 +1238,39 @@ enum {
1237 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1238 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1238 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1239 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1239 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1240 EXT4_STATE_NEWENTRY, /* File just added to dir */
1241 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1240}; 1242};
1241 1243
1242#define EXT4_INODE_BIT_FNS(name, field) \ 1244#define EXT4_INODE_BIT_FNS(name, field, offset) \
1243static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ 1245static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
1244{ \ 1246{ \
1245 return test_bit(bit, &EXT4_I(inode)->i_##field); \ 1247 return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1246} \ 1248} \
1247static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ 1249static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
1248{ \ 1250{ \
1249 set_bit(bit, &EXT4_I(inode)->i_##field); \ 1251 set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1250} \ 1252} \
1251static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ 1253static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
1252{ \ 1254{ \
1253 clear_bit(bit, &EXT4_I(inode)->i_##field); \ 1255 clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1254} 1256}
1255 1257
1256EXT4_INODE_BIT_FNS(flag, flags) 1258EXT4_INODE_BIT_FNS(flag, flags, 0)
1257EXT4_INODE_BIT_FNS(state, state_flags) 1259#if (BITS_PER_LONG < 64)
1260EXT4_INODE_BIT_FNS(state, state_flags, 0)
1261
1262static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1263{
1264 (ei)->i_state_flags = 0;
1265}
1266#else
1267EXT4_INODE_BIT_FNS(state, flags, 32)
1268
1269static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1270{
1271 /* We depend on the fact that callers will set i_flags */
1272}
1273#endif
1258#else 1274#else
1259/* Assume that user mode programs are passing in an ext4fs superblock, not 1275/* Assume that user mode programs are passing in an ext4fs superblock, not
1260 * a kernel struct super_block. This will allow us to call the feature-test 1276 * a kernel struct super_block. This will allow us to call the feature-test
@@ -1642,10 +1658,12 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1642 1658
1643/* dir.c */ 1659/* dir.c */
1644extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 1660extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
1661 struct file *,
1645 struct ext4_dir_entry_2 *, 1662 struct ext4_dir_entry_2 *,
1646 struct buffer_head *, unsigned int); 1663 struct buffer_head *, unsigned int);
1647#define ext4_check_dir_entry(dir, de, bh, offset) \ 1664#define ext4_check_dir_entry(dir, filp, de, bh, offset) \
1648 __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset)) 1665 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
1666 (de), (bh), (offset)))
1649extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 1667extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1650 __u32 minor_hash, 1668 __u32 minor_hash,
1651 struct ext4_dir_entry_2 *dirent); 1669 struct ext4_dir_entry_2 *dirent);
@@ -1653,6 +1671,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1653 1671
1654/* fsync.c */ 1672/* fsync.c */
1655extern int ext4_sync_file(struct file *, int); 1673extern int ext4_sync_file(struct file *, int);
1674extern int ext4_flush_completed_IO(struct inode *);
1656 1675
1657/* hash.c */ 1676/* hash.c */
1658extern int ext4fs_dirhash(const char *name, int len, struct 1677extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1752,8 +1771,8 @@ extern void ext4_error_inode(struct inode *, const char *, unsigned int,
1752 ext4_fsblk_t, const char *, ...) 1771 ext4_fsblk_t, const char *, ...)
1753 __attribute__ ((format (printf, 5, 6))); 1772 __attribute__ ((format (printf, 5, 6)));
1754extern void ext4_error_file(struct file *, const char *, unsigned int, 1773extern void ext4_error_file(struct file *, const char *, unsigned int,
1755 const char *, ...) 1774 ext4_fsblk_t, const char *, ...)
1756 __attribute__ ((format (printf, 4, 5))); 1775 __attribute__ ((format (printf, 5, 6)));
1757extern void __ext4_std_error(struct super_block *, const char *, 1776extern void __ext4_std_error(struct super_block *, const char *,
1758 unsigned int, int); 1777 unsigned int, int);
1759extern void __ext4_abort(struct super_block *, const char *, unsigned int, 1778extern void __ext4_abort(struct super_block *, const char *, unsigned int,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 28ce70fd9cd..2e29abb30f7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -119,10 +119,6 @@ struct ext4_ext_path {
119 * structure for external API 119 * structure for external API
120 */ 120 */
121 121
122#define EXT4_EXT_CACHE_NO 0
123#define EXT4_EXT_CACHE_GAP 1
124#define EXT4_EXT_CACHE_EXTENT 2
125
126/* 122/*
127 * to be called by ext4_ext_walk_space() 123 * to be called by ext4_ext_walk_space()
128 * negative retcode - error 124 * negative retcode - error
@@ -197,7 +193,7 @@ static inline unsigned short ext_depth(struct inode *inode)
197static inline void 193static inline void
198ext4_ext_invalidate_cache(struct inode *inode) 194ext4_ext_invalidate_cache(struct inode *inode)
199{ 195{
200 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; 196 EXT4_I(inode)->i_cached_extent.ec_len = 0;
201} 197}
202 198
203static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) 199static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
@@ -278,7 +274,7 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
278} 274}
279 275
280extern int ext4_ext_calc_metadata_amount(struct inode *inode, 276extern int ext4_ext_calc_metadata_amount(struct inode *inode,
281 sector_t lblocks); 277 ext4_lblk_t lblocks);
282extern int ext4_extent_tree_init(handle_t *, struct inode *); 278extern int ext4_extent_tree_init(handle_t *, struct inode *);
283extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 279extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
284 int num, 280 int num,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b0bd792c58c..d8b992e658c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal)
253static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) 253static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254{ 254{
255 if (ext4_handle_valid(handle)) 255 if (ext4_handle_valid(handle))
256 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); 256 return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
257 return 0; 257 return 0;
258} 258}
259 259
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 966ecb0d8f8..d202d765dad 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
117 struct ext4_extent *ex; 117 struct ext4_extent *ex;
118 depth = path->p_depth; 118 depth = path->p_depth;
119 119
120 /* try to predict block placement */ 120 /*
121 * Try to predict block placement assuming that we are
122 * filling in a file which will eventually be
123 * non-sparse --- i.e., in the case of libbfd writing
124 * an ELF object sections out-of-order but in a way
125 * the eventually results in a contiguous object or
126 * executable file, or some database extending a table
127 * space file. However, this is actually somewhat
128 * non-ideal if we are writing a sparse file such as
129 * qemu or KVM writing a raw image file that is going
130 * to stay fairly sparse, since it will end up
131 * fragmenting the file system's free space. Maybe we
132 * should have some hueristics or some way to allow
133 * userspace to pass a hint to file system,
134 * especiially if the latter case turns out to be
135 * common.
136 */
121 ex = path[depth].p_ext; 137 ex = path[depth].p_ext;
122 if (ex) 138 if (ex) {
123 return (ext4_ext_pblock(ex) + 139 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
124 (block - le32_to_cpu(ex->ee_block))); 140 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
141
142 if (block > ext_block)
143 return ext_pblk + (block - ext_block);
144 else
145 return ext_pblk - (ext_block - block);
146 }
125 147
126 /* it looks like index is empty; 148 /* it looks like index is empty;
127 * try to find starting block from index itself */ 149 * try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244 * to allocate @blocks 266 * to allocate @blocks
245 * Worse case is one block per extent 267 * Worse case is one block per extent
246 */ 268 */
247int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) 269int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
248{ 270{
249 struct ext4_inode_info *ei = EXT4_I(inode); 271 struct ext4_inode_info *ei = EXT4_I(inode);
250 int idxs, num = 0; 272 int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1872 cbex.ec_block = start; 1894 cbex.ec_block = start;
1873 cbex.ec_len = end - start; 1895 cbex.ec_len = end - start;
1874 cbex.ec_start = 0; 1896 cbex.ec_start = 0;
1875 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1876 } else { 1897 } else {
1877 cbex.ec_block = le32_to_cpu(ex->ee_block); 1898 cbex.ec_block = le32_to_cpu(ex->ee_block);
1878 cbex.ec_len = ext4_ext_get_actual_len(ex); 1899 cbex.ec_len = ext4_ext_get_actual_len(ex);
1879 cbex.ec_start = ext4_ext_pblock(ex); 1900 cbex.ec_start = ext4_ext_pblock(ex);
1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1881 } 1901 }
1882 1902
1883 if (unlikely(cbex.ec_len == 0)) { 1903 if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1917 1937
1918static void 1938static void
1919ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1939ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1920 __u32 len, ext4_fsblk_t start, int type) 1940 __u32 len, ext4_fsblk_t start)
1921{ 1941{
1922 struct ext4_ext_cache *cex; 1942 struct ext4_ext_cache *cex;
1923 BUG_ON(len == 0); 1943 BUG_ON(len == 0);
1924 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1944 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1925 cex = &EXT4_I(inode)->i_cached_extent; 1945 cex = &EXT4_I(inode)->i_cached_extent;
1926 cex->ec_type = type;
1927 cex->ec_block = block; 1946 cex->ec_block = block;
1928 cex->ec_len = len; 1947 cex->ec_len = len;
1929 cex->ec_start = start; 1948 cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1976 } 1995 }
1977 1996
1978 ext_debug(" -> %u:%lu\n", lblock, len); 1997 ext_debug(" -> %u:%lu\n", lblock, len);
1979 ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); 1998 ext4_ext_put_in_cache(inode, lblock, len, 0);
1980} 1999}
1981 2000
2001/*
2002 * Return 0 if cache is invalid; 1 if the cache is valid
2003 */
1982static int 2004static int
1983ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2005ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1984 struct ext4_extent *ex) 2006 struct ext4_extent *ex)
1985{ 2007{
1986 struct ext4_ext_cache *cex; 2008 struct ext4_ext_cache *cex;
1987 int ret = EXT4_EXT_CACHE_NO; 2009 int ret = 0;
1988 2010
1989 /* 2011 /*
1990 * We borrow i_block_reservation_lock to protect i_cached_extent 2012 * We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1993 cex = &EXT4_I(inode)->i_cached_extent; 2015 cex = &EXT4_I(inode)->i_cached_extent;
1994 2016
1995 /* has cache valid data? */ 2017 /* has cache valid data? */
1996 if (cex->ec_type == EXT4_EXT_CACHE_NO) 2018 if (cex->ec_len == 0)
1997 goto errout; 2019 goto errout;
1998 2020
1999 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
2000 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
2001 if (in_range(block, cex->ec_block, cex->ec_len)) { 2021 if (in_range(block, cex->ec_block, cex->ec_len)) {
2002 ex->ee_block = cpu_to_le32(cex->ec_block); 2022 ex->ee_block = cpu_to_le32(cex->ec_block);
2003 ext4_ext_store_pblock(ex, cex->ec_start); 2023 ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2005 ext_debug("%u cached by %u:%u:%llu\n", 2025 ext_debug("%u cached by %u:%u:%llu\n",
2006 block, 2026 block,
2007 cex->ec_block, cex->ec_len, cex->ec_start); 2027 cex->ec_block, cex->ec_len, cex->ec_start);
2008 ret = cex->ec_type; 2028 ret = 1;
2009 } 2029 }
2010errout: 2030errout:
2011 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2031 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary 3102 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */ 3103 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 3104static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map, 3105 ext4_lblk_t lblk,
3086 struct ext4_ext_path *path, 3106 struct ext4_ext_path *path,
3087 unsigned int len) 3107 unsigned int len)
3088{ 3108{
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3112 * this turns out to be false, we can bail out from this 3132 * this turns out to be false, we can bail out from this
3113 * function immediately. 3133 * function immediately.
3114 */ 3134 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + 3135 if (lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex)) 3136 ext4_ext_get_actual_len(last_ex))
3117 return 0; 3137 return 0;
3118 /* 3138 /*
@@ -3168,8 +3188,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3168 path); 3188 path);
3169 if (ret >= 0) { 3189 if (ret >= 0) {
3170 ext4_update_inode_fsync_trans(handle, inode, 1); 3190 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path, 3191 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3172 map->m_len); 3192 path, map->m_len);
3173 } else 3193 } else
3174 err = ret; 3194 err = ret;
3175 goto out2; 3195 goto out2;
@@ -3199,7 +3219,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3219 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3200 if (ret >= 0) { 3220 if (ret >= 0) {
3201 ext4_update_inode_fsync_trans(handle, inode, 1); 3221 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len); 3222 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3223 map->m_len);
3203 if (err < 0) 3224 if (err < 0)
3204 goto out2; 3225 goto out2;
3205 } 3226 }
@@ -3276,7 +3297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3276 struct ext4_extent_header *eh; 3297 struct ext4_extent_header *eh;
3277 struct ext4_extent newex, *ex; 3298 struct ext4_extent newex, *ex;
3278 ext4_fsblk_t newblock; 3299 ext4_fsblk_t newblock;
3279 int err = 0, depth, ret, cache_type; 3300 int err = 0, depth, ret;
3280 unsigned int allocated = 0; 3301 unsigned int allocated = 0;
3281 struct ext4_allocation_request ar; 3302 struct ext4_allocation_request ar;
3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3303 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3306,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3285 map->m_lblk, map->m_len, inode->i_ino); 3306 map->m_lblk, map->m_len, inode->i_ino);
3286 3307
3287 /* check in cache */ 3308 /* check in cache */
3288 cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); 3309 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3289 if (cache_type) { 3310 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3290 if (cache_type == EXT4_EXT_CACHE_GAP) {
3291 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3311 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3292 /* 3312 /*
3293 * block isn't allocated yet and 3313 * block isn't allocated yet and
@@ -3296,7 +3316,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3296 goto out2; 3316 goto out2;
3297 } 3317 }
3298 /* we should allocate requested block */ 3318 /* we should allocate requested block */
3299 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { 3319 } else {
3300 /* block is already allocated */ 3320 /* block is already allocated */
3301 newblock = map->m_lblk 3321 newblock = map->m_lblk
3302 - le32_to_cpu(newex.ee_block) 3322 - le32_to_cpu(newex.ee_block)
@@ -3305,8 +3325,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3305 allocated = ext4_ext_get_actual_len(&newex) - 3325 allocated = ext4_ext_get_actual_len(&newex) -
3306 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3326 (map->m_lblk - le32_to_cpu(newex.ee_block));
3307 goto out; 3327 goto out;
3308 } else {
3309 BUG();
3310 } 3328 }
3311 } 3329 }
3312 3330
@@ -3357,8 +3375,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3357 /* Do not put uninitialized extent in the cache */ 3375 /* Do not put uninitialized extent in the cache */
3358 if (!ext4_ext_is_uninitialized(ex)) { 3376 if (!ext4_ext_is_uninitialized(ex)) {
3359 ext4_ext_put_in_cache(inode, ee_block, 3377 ext4_ext_put_in_cache(inode, ee_block,
3360 ee_len, ee_start, 3378 ee_len, ee_start);
3361 EXT4_EXT_CACHE_EXTENT);
3362 goto out; 3379 goto out;
3363 } 3380 }
3364 ret = ext4_ext_handle_uninitialized_extents(handle, 3381 ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3456,7 +3473,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3456 map->m_flags |= EXT4_MAP_UNINIT; 3473 map->m_flags |= EXT4_MAP_UNINIT;
3457 } 3474 }
3458 3475
3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len); 3476 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3460 if (err) 3477 if (err)
3461 goto out2; 3478 goto out2;
3462 3479
@@ -3490,8 +3507,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3490 * when it is _not_ an uninitialized extent. 3507 * when it is _not_ an uninitialized extent.
3491 */ 3508 */
3492 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3509 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3493 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, 3510 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
3494 EXT4_EXT_CACHE_EXTENT);
3495 ext4_update_inode_fsync_trans(handle, inode, 1); 3511 ext4_update_inode_fsync_trans(handle, inode, 1);
3496 } else 3512 } else
3497 ext4_update_inode_fsync_trans(handle, inode, 0); 3513 ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3519,6 +3535,12 @@ void ext4_ext_truncate(struct inode *inode)
3519 int err = 0; 3535 int err = 0;
3520 3536
3521 /* 3537 /*
3538 * finish any pending end_io work so we won't run the risk of
3539 * converting any truncated blocks to initialized later
3540 */
3541 ext4_flush_completed_IO(inode);
3542
3543 /*
3522 * probably first extent we're gonna free will be last in block 3544 * probably first extent we're gonna free will be last in block
3523 */ 3545 */
3524 err = ext4_writepage_trans_blocks(inode); 3546 err = ext4_writepage_trans_blocks(inode);
@@ -3767,7 +3789,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3767 3789
3768 logical = (__u64)newex->ec_block << blksize_bits; 3790 logical = (__u64)newex->ec_block << blksize_bits;
3769 3791
3770 if (newex->ec_type == EXT4_EXT_CACHE_GAP) { 3792 if (newex->ec_start == 0) {
3771 pgoff_t offset; 3793 pgoff_t offset;
3772 struct page *page; 3794 struct page *page;
3773 struct buffer_head *bh = NULL; 3795 struct buffer_head *bh = NULL;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55ddcee..bb003dc9fff 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
104{ 104{
105 struct super_block *sb = inode->i_sb; 105 struct super_block *sb = inode->i_sb;
106 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 106 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
107 struct ext4_inode_info *ei = EXT4_I(inode);
107 struct vfsmount *mnt = filp->f_path.mnt; 108 struct vfsmount *mnt = filp->f_path.mnt;
108 struct path path; 109 struct path path;
109 char buf[64], *cp; 110 char buf[64], *cp;
@@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
127 ext4_mark_super_dirty(sb); 128 ext4_mark_super_dirty(sb);
128 } 129 }
129 } 130 }
131 /*
132 * Set up the jbd2_inode if we are opening the inode for
133 * writing and the journal is present
134 */
135 if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
136 struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
137
138 spin_lock(&inode->i_lock);
139 if (!ei->jinode) {
140 if (!jinode) {
141 spin_unlock(&inode->i_lock);
142 return -ENOMEM;
143 }
144 ei->jinode = jinode;
145 jbd2_journal_init_jbd_inode(ei->jinode, inode);
146 jinode = NULL;
147 }
148 spin_unlock(&inode->i_lock);
149 if (unlikely(jinode != NULL))
150 jbd2_free_inode(jinode);
151 }
130 return dquot_file_open(inode, filp); 152 return dquot_file_open(inode, filp);
131} 153}
132 154
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index c1a7bc923cf..7829b287822 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
75 * to written. 75 * to written.
76 * The function return the number of pending IOs on success. 76 * The function return the number of pending IOs on success.
77 */ 77 */
78static int flush_completed_IO(struct inode *inode) 78extern int ext4_flush_completed_IO(struct inode *inode)
79{ 79{
80 ext4_io_end_t *io; 80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode); 81 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -169,7 +169,7 @@ int ext4_sync_file(struct file *file, int datasync)
169 if (inode->i_sb->s_flags & MS_RDONLY) 169 if (inode->i_sb->s_flags & MS_RDONLY)
170 return 0; 170 return 0;
171 171
172 ret = flush_completed_IO(inode); 172 ret = ext4_flush_completed_IO(inode);
173 if (ret < 0) 173 if (ret < 0)
174 return ret; 174 return ret;
175 175
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1ce240a23eb..eb9097aec6f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1027,7 +1027,7 @@ got:
1027 inode->i_generation = sbi->s_next_generation++; 1027 inode->i_generation = sbi->s_next_generation++;
1028 spin_unlock(&sbi->s_next_gen_lock); 1028 spin_unlock(&sbi->s_next_gen_lock);
1029 1029
1030 ei->i_state_flags = 0; 1030 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
1031 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1031 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1032 1032
1033 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1033 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ef9d5be0b2a..9f7f9e49914 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,7 +39,9 @@
39#include <linux/bio.h> 39#include <linux/bio.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/printk.h>
42#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/ratelimit.h>
43 45
44#include "ext4_jbd2.h" 46#include "ext4_jbd2.h"
45#include "xattr.h" 47#include "xattr.h"
@@ -54,10 +56,17 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
54 loff_t new_size) 56 loff_t new_size)
55{ 57{
56 trace_ext4_begin_ordered_truncate(inode, new_size); 58 trace_ext4_begin_ordered_truncate(inode, new_size);
57 return jbd2_journal_begin_ordered_truncate( 59 /*
58 EXT4_SB(inode->i_sb)->s_journal, 60 * If jinode is zero, then we never opened the file for
59 &EXT4_I(inode)->jinode, 61 * writing, so there's no need to call
60 new_size); 62 * jbd2_journal_begin_ordered_truncate() since there's no
63 * outstanding writes we need to flush.
64 */
65 if (!EXT4_I(inode)->jinode)
66 return 0;
67 return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
68 EXT4_I(inode)->jinode,
69 new_size);
61} 70}
62 71
63static void ext4_invalidatepage(struct page *page, unsigned long offset); 72static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -552,7 +561,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
552} 561}
553 562
554/** 563/**
555 * ext4_blks_to_allocate: Look up the block map and count the number 564 * ext4_blks_to_allocate - Look up the block map and count the number
556 * of direct blocks need to be allocated for the given branch. 565 * of direct blocks need to be allocated for the given branch.
557 * 566 *
558 * @branch: chain of indirect blocks 567 * @branch: chain of indirect blocks
@@ -591,13 +600,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
591 600
592/** 601/**
593 * ext4_alloc_blocks: multiple allocate blocks needed for a branch 602 * ext4_alloc_blocks: multiple allocate blocks needed for a branch
603 * @handle: handle for this transaction
604 * @inode: inode which needs allocated blocks
605 * @iblock: the logical block to start allocated at
606 * @goal: preferred physical block of allocation
594 * @indirect_blks: the number of blocks need to allocate for indirect 607 * @indirect_blks: the number of blocks need to allocate for indirect
595 * blocks 608 * blocks
596 * 609 * @blks: number of desired blocks
597 * @new_blocks: on return it will store the new block numbers for 610 * @new_blocks: on return it will store the new block numbers for
598 * the indirect blocks(if needed) and the first direct block, 611 * the indirect blocks(if needed) and the first direct block,
599 * @blks: on return it will store the total number of allocated 612 * @err: on return it will store the error code
600 * direct blocks 613 *
614 * This function will return the number of blocks allocated as
615 * requested by the passed-in parameters.
601 */ 616 */
602static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 617static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
603 ext4_lblk_t iblock, ext4_fsblk_t goal, 618 ext4_lblk_t iblock, ext4_fsblk_t goal,
@@ -711,9 +726,11 @@ failed_out:
711 726
712/** 727/**
713 * ext4_alloc_branch - allocate and set up a chain of blocks. 728 * ext4_alloc_branch - allocate and set up a chain of blocks.
729 * @handle: handle for this transaction
714 * @inode: owner 730 * @inode: owner
715 * @indirect_blks: number of allocated indirect blocks 731 * @indirect_blks: number of allocated indirect blocks
716 * @blks: number of allocated direct blocks 732 * @blks: number of allocated direct blocks
733 * @goal: preferred place for allocation
717 * @offsets: offsets (in the blocks) to store the pointers to next. 734 * @offsets: offsets (in the blocks) to store the pointers to next.
718 * @branch: place to store the chain in. 735 * @branch: place to store the chain in.
719 * 736 *
@@ -826,6 +843,7 @@ failed:
826 843
827/** 844/**
828 * ext4_splice_branch - splice the allocated branch onto inode. 845 * ext4_splice_branch - splice the allocated branch onto inode.
846 * @handle: handle for this transaction
829 * @inode: owner 847 * @inode: owner
830 * @block: (logical) number of block we are adding 848 * @block: (logical) number of block we are adding
831 * @chain: chain of indirect blocks (with a missing link - see 849 * @chain: chain of indirect blocks (with a missing link - see
@@ -1081,7 +1099,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1081 * Calculate the number of metadata blocks need to reserve 1099 * Calculate the number of metadata blocks need to reserve
1082 * to allocate a block located at @lblock 1100 * to allocate a block located at @lblock
1083 */ 1101 */
1084static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) 1102static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
1085{ 1103{
1086 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1104 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1087 return ext4_ext_calc_metadata_amount(inode, lblock); 1105 return ext4_ext_calc_metadata_amount(inode, lblock);
@@ -1320,7 +1338,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1320 * avoid double accounting 1338 * avoid double accounting
1321 */ 1339 */
1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1340 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1323 EXT4_I(inode)->i_delalloc_reserved_flag = 1; 1341 ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
1324 /* 1342 /*
1325 * We need to check for EXT4 here because migrate 1343 * We need to check for EXT4 here because migrate
1326 * could have changed the inode type in between 1344 * could have changed the inode type in between
@@ -1350,7 +1368,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1350 ext4_da_update_reserve_space(inode, retval, 1); 1368 ext4_da_update_reserve_space(inode, retval, 1);
1351 } 1369 }
1352 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1370 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1353 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1371 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
1354 1372
1355 up_write((&EXT4_I(inode)->i_data_sem)); 1373 up_write((&EXT4_I(inode)->i_data_sem));
1356 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1374 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -1878,7 +1896,7 @@ static int ext4_journalled_write_end(struct file *file,
1878/* 1896/*
1879 * Reserve a single block located at lblock 1897 * Reserve a single block located at lblock
1880 */ 1898 */
1881static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) 1899static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1882{ 1900{
1883 int retries = 0; 1901 int retries = 0;
1884 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1902 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2239,7 +2257,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2239 * affects functions in many different parts of the allocation 2257 * affects functions in many different parts of the allocation
2240 * call path. This flag exists primarily because we don't 2258 * call path. This flag exists primarily because we don't
2241 * want to change *many* call functions, so ext4_map_blocks() 2259 * want to change *many* call functions, so ext4_map_blocks()
2242 * will set the magic i_delalloc_reserved_flag once the 2260 * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
2243 * inode's allocation semaphore is taken. 2261 * inode's allocation semaphore is taken.
2244 * 2262 *
2245 * If the blocks in questions were delalloc blocks, set 2263 * If the blocks in questions were delalloc blocks, set
@@ -3720,8 +3738,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3720retry: 3738retry:
3721 io_end = ext4_init_io_end(inode, GFP_ATOMIC); 3739 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3722 if (!io_end) { 3740 if (!io_end) {
3723 if (printk_ratelimit()) 3741 pr_warn_ratelimited("%s: allocation fail\n", __func__);
3724 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3725 schedule(); 3742 schedule();
3726 goto retry; 3743 goto retry;
3727 } 3744 }
@@ -4045,7 +4062,7 @@ int ext4_block_truncate_page(handle_t *handle,
4045 if (ext4_should_journal_data(inode)) { 4062 if (ext4_should_journal_data(inode)) {
4046 err = ext4_handle_dirty_metadata(handle, inode, bh); 4063 err = ext4_handle_dirty_metadata(handle, inode, bh);
4047 } else { 4064 } else {
4048 if (ext4_should_order_data(inode)) 4065 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
4049 err = ext4_jbd2_file_inode(handle, inode); 4066 err = ext4_jbd2_file_inode(handle, inode);
4050 mark_buffer_dirty(bh); 4067 mark_buffer_dirty(bh);
4051 } 4068 }
@@ -4169,6 +4186,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4169{ 4186{
4170 __le32 *p; 4187 __le32 *p;
4171 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; 4188 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4189 int err;
4172 4190
4173 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 4191 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4174 flags |= EXT4_FREE_BLOCKS_METADATA; 4192 flags |= EXT4_FREE_BLOCKS_METADATA;
@@ -4184,11 +4202,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4184 if (try_to_extend_transaction(handle, inode)) { 4202 if (try_to_extend_transaction(handle, inode)) {
4185 if (bh) { 4203 if (bh) {
4186 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4204 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4187 ext4_handle_dirty_metadata(handle, inode, bh); 4205 err = ext4_handle_dirty_metadata(handle, inode, bh);
4206 if (unlikely(err)) {
4207 ext4_std_error(inode->i_sb, err);
4208 return 1;
4209 }
4210 }
4211 err = ext4_mark_inode_dirty(handle, inode);
4212 if (unlikely(err)) {
4213 ext4_std_error(inode->i_sb, err);
4214 return 1;
4215 }
4216 err = ext4_truncate_restart_trans(handle, inode,
4217 blocks_for_truncate(inode));
4218 if (unlikely(err)) {
4219 ext4_std_error(inode->i_sb, err);
4220 return 1;
4188 } 4221 }
4189 ext4_mark_inode_dirty(handle, inode);
4190 ext4_truncate_restart_trans(handle, inode,
4191 blocks_for_truncate(inode));
4192 if (bh) { 4222 if (bh) {
4193 BUFFER_TRACE(bh, "retaking write access"); 4223 BUFFER_TRACE(bh, "retaking write access");
4194 ext4_journal_get_write_access(handle, bh); 4224 ext4_journal_get_write_access(handle, bh);
@@ -4349,6 +4379,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4349 (__le32 *) bh->b_data, 4379 (__le32 *) bh->b_data,
4350 (__le32 *) bh->b_data + addr_per_block, 4380 (__le32 *) bh->b_data + addr_per_block,
4351 depth); 4381 depth);
4382 brelse(bh);
4352 4383
4353 /* 4384 /*
4354 * Everything below this this pointer has been 4385 * Everything below this this pointer has been
@@ -4859,7 +4890,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4859 } 4890 }
4860 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4891 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4861 4892
4862 ei->i_state_flags = 0; 4893 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
4863 ei->i_dir_start_lookup = 0; 4894 ei->i_dir_start_lookup = 0;
4864 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4895 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4865 /* We now have enough fields to check if the inode was active or not. 4896 /* We now have enough fields to check if the inode was active or not.
@@ -5118,7 +5149,7 @@ static int ext4_do_update_inode(handle_t *handle,
5118 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 5149 if (ext4_inode_blocks_set(handle, raw_inode, ei))
5119 goto out_brelse; 5150 goto out_brelse;
5120 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 5151 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
5121 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 5152 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
5122 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 5153 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
5123 cpu_to_le32(EXT4_OS_HURD)) 5154 cpu_to_le32(EXT4_OS_HURD))
5124 raw_inode->i_file_acl_high = 5155 raw_inode->i_file_acl_high =
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5b4d4e3a4d5..851f49b2f9d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2608,18 +2608,12 @@ int ext4_mb_release(struct super_block *sb)
2608static inline int ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2609 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2610{ 2610{
2611 int ret;
2612 ext4_fsblk_t discard_block; 2611 ext4_fsblk_t discard_block;
2613 2612
2614 discard_block = block + ext4_group_first_block_no(sb, block_group); 2613 discard_block = block + ext4_group_first_block_no(sb, block_group);
2615 trace_ext4_discard_blocks(sb, 2614 trace_ext4_discard_blocks(sb,
2616 (unsigned long long) discard_block, count); 2615 (unsigned long long) discard_block, count);
2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2616 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2618 if (ret == -EOPNOTSUPP) {
2619 ext4_warning(sb, "discard not supported, disabling");
2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2621 }
2622 return ret;
2623} 2617}
2624 2618
2625/* 2619/*
@@ -2631,7 +2625,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2631 struct super_block *sb = journal->j_private; 2625 struct super_block *sb = journal->j_private;
2632 struct ext4_buddy e4b; 2626 struct ext4_buddy e4b;
2633 struct ext4_group_info *db; 2627 struct ext4_group_info *db;
2634 int err, count = 0, count2 = 0; 2628 int err, ret, count = 0, count2 = 0;
2635 struct ext4_free_data *entry; 2629 struct ext4_free_data *entry;
2636 struct list_head *l, *ltmp; 2630 struct list_head *l, *ltmp;
2637 2631
@@ -2641,9 +2635,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2641 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2635 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2642 entry->count, entry->group, entry); 2636 entry->count, entry->group, entry);
2643 2637
2644 if (test_opt(sb, DISCARD)) 2638 if (test_opt(sb, DISCARD)) {
2645 ext4_issue_discard(sb, entry->group, 2639 ret = ext4_issue_discard(sb, entry->group,
2646 entry->start_blk, entry->count); 2640 entry->start_blk, entry->count);
2641 if (unlikely(ret == -EOPNOTSUPP)) {
2642 ext4_warning(sb, "discard not supported, "
2643 "disabling");
2644 clear_opt(sb, DISCARD);
2645 }
2646 }
2647 2647
2648 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2648 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2649 /* we expect to find existing buddy because it's pinned */ 2649 /* we expect to find existing buddy because it's pinned */
@@ -3881,19 +3881,6 @@ repeat:
3881 } 3881 }
3882} 3882}
3883 3883
3884/*
3885 * finds all preallocated spaces and return blocks being freed to them
3886 * if preallocated space becomes full (no block is used from the space)
3887 * then the function frees space in buddy
3888 * XXX: at the moment, truncate (which is the only way to free blocks)
3889 * discards all preallocations
3890 */
3891static void ext4_mb_return_to_preallocation(struct inode *inode,
3892 struct ext4_buddy *e4b,
3893 sector_t block, int count)
3894{
3895 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
3896}
3897#ifdef CONFIG_EXT4_DEBUG 3884#ifdef CONFIG_EXT4_DEBUG
3898static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 3885static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3899{ 3886{
@@ -4283,7 +4270,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4283 * EDQUOT check, as blocks and quotas have been already 4270 * EDQUOT check, as blocks and quotas have been already
4284 * reserved when data being copied into pagecache. 4271 * reserved when data being copied into pagecache.
4285 */ 4272 */
4286 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4273 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4287 ar->flags |= EXT4_MB_DELALLOC_RESERVED; 4274 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4288 else { 4275 else {
4289 /* Without delayed allocation we need to verify 4276 /* Without delayed allocation we need to verify
@@ -4380,7 +4367,8 @@ out:
4380 if (inquota && ar->len < inquota) 4367 if (inquota && ar->len < inquota)
4381 dquot_free_block(ar->inode, inquota - ar->len); 4368 dquot_free_block(ar->inode, inquota - ar->len);
4382 if (!ar->len) { 4369 if (!ar->len) {
4383 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4370 if (!ext4_test_inode_state(ar->inode,
4371 EXT4_STATE_DELALLOC_RESERVED))
4384 /* release all the reserved blocks if non delalloc */ 4372 /* release all the reserved blocks if non delalloc */
4385 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4373 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4386 reserv_blks); 4374 reserv_blks);
@@ -4626,7 +4614,11 @@ do_more:
4626 * blocks being freed are metadata. these blocks shouldn't 4614 * blocks being freed are metadata. these blocks shouldn't
4627 * be used until this transaction is committed 4615 * be used until this transaction is committed
4628 */ 4616 */
4629 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4617 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4618 if (!new_entry) {
4619 err = -ENOMEM;
4620 goto error_return;
4621 }
4630 new_entry->start_blk = bit; 4622 new_entry->start_blk = bit;
4631 new_entry->group = block_group; 4623 new_entry->group = block_group;
4632 new_entry->count = count; 4624 new_entry->count = count;
@@ -4643,7 +4635,6 @@ do_more:
4643 ext4_lock_group(sb, block_group); 4635 ext4_lock_group(sb, block_group);
4644 mb_clear_bits(bitmap_bh->b_data, bit, count); 4636 mb_clear_bits(bitmap_bh->b_data, bit, count);
4645 mb_free_blocks(inode, &e4b, bit, count); 4637 mb_free_blocks(inode, &e4b, bit, count);
4646 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4647 } 4638 }
4648 4639
4649 ret = ext4_free_blks_count(sb, gdp) + count; 4640 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4718,8 +4709,6 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4718 ext4_unlock_group(sb, group); 4709 ext4_unlock_group(sb, group);
4719 4710
4720 ret = ext4_issue_discard(sb, group, start, count); 4711 ret = ext4_issue_discard(sb, group, start, count);
4721 if (ret)
4722 ext4_std_error(sb, ret);
4723 4712
4724 ext4_lock_group(sb, group); 4713 ext4_lock_group(sb, group);
4725 mb_free_blocks(NULL, e4b, start, ex.fe_len); 4714 mb_free_blocks(NULL, e4b, start, ex.fe_len);
@@ -4819,6 +4808,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4819 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 4808 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4820 ext4_grpblk_t cnt = 0, first_block, last_block; 4809 ext4_grpblk_t cnt = 0, first_block, last_block;
4821 uint64_t start, len, minlen, trimmed; 4810 uint64_t start, len, minlen, trimmed;
4811 ext4_fsblk_t first_data_blk =
4812 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4822 int ret = 0; 4813 int ret = 0;
4823 4814
4824 start = range->start >> sb->s_blocksize_bits; 4815 start = range->start >> sb->s_blocksize_bits;
@@ -4828,6 +4819,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4828 4819
4829 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 4820 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4830 return -EINVAL; 4821 return -EINVAL;
4822 if (start < first_data_blk) {
4823 len -= first_data_blk - start;
4824 start = first_data_blk;
4825 }
4831 4826
4832 /* Determine first and last group to examine based on start and len */ 4827 /* Determine first and last group to examine based on start and len */
4833 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4828 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
@@ -4851,7 +4846,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4851 if (len >= EXT4_BLOCKS_PER_GROUP(sb)) 4846 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4852 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); 4847 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4853 else 4848 else
4854 last_block = len; 4849 last_block = first_block + len;
4855 4850
4856 if (e4b.bd_info->bb_free >= minlen) { 4851 if (e4b.bd_info->bb_free >= minlen) {
4857 cnt = ext4_trim_all_free(sb, &e4b, first_block, 4852 cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 25f3a974b72..b0a126f23c2 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -496,7 +496,7 @@ int ext4_ext_migrate(struct inode *inode)
496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
499 S_IFREG, 0, goal); 499 S_IFREG, NULL, goal);
500 if (IS_ERR(tmp_inode)) { 500 if (IS_ERR(tmp_inode)) {
501 retval = -ENOMEM; 501 retval = -ENOMEM;
502 ext4_journal_stop(handle); 502 ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index dc40e75cba8..5485390d32c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -581,9 +581,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
581 dir->i_sb->s_blocksize - 581 dir->i_sb->s_blocksize -
582 EXT4_DIR_REC_LEN(0)); 582 EXT4_DIR_REC_LEN(0));
583 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 583 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
584 if (!ext4_check_dir_entry(dir, de, bh, 584 if (ext4_check_dir_entry(dir, NULL, de, bh,
585 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) 585 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
586 +((char *)de - bh->b_data))) { 586 + ((char *)de - bh->b_data))) {
587 /* On error, skip the f_pos to the next block. */ 587 /* On error, skip the f_pos to the next block. */
588 dir_file->f_pos = (dir_file->f_pos | 588 dir_file->f_pos = (dir_file->f_pos |
589 (dir->i_sb->s_blocksize - 1)) + 1; 589 (dir->i_sb->s_blocksize - 1)) + 1;
@@ -820,7 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
820 if ((char *) de + namelen <= dlimit && 820 if ((char *) de + namelen <= dlimit &&
821 ext4_match (namelen, name, de)) { 821 ext4_match (namelen, name, de)) {
822 /* found a match - just to be sure, do a full check */ 822 /* found a match - just to be sure, do a full check */
823 if (!ext4_check_dir_entry(dir, de, bh, offset)) 823 if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
824 return -1; 824 return -1;
825 *res_dir = de; 825 *res_dir = de;
826 return 1; 826 return 1;
@@ -1036,7 +1036,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1036 return ERR_PTR(-EIO); 1036 return ERR_PTR(-EIO);
1037 } 1037 }
1038 inode = ext4_iget(dir->i_sb, ino); 1038 inode = ext4_iget(dir->i_sb, ino);
1039 if (unlikely(IS_ERR(inode))) { 1039 if (IS_ERR(inode)) {
1040 if (PTR_ERR(inode) == -ESTALE) { 1040 if (PTR_ERR(inode) == -ESTALE) {
1041 EXT4_ERROR_INODE(dir, 1041 EXT4_ERROR_INODE(dir,
1042 "deleted inode referenced: %u", 1042 "deleted inode referenced: %u",
@@ -1269,7 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1269 de = (struct ext4_dir_entry_2 *)bh->b_data; 1269 de = (struct ext4_dir_entry_2 *)bh->b_data;
1270 top = bh->b_data + blocksize - reclen; 1270 top = bh->b_data + blocksize - reclen;
1271 while ((char *) de <= top) { 1271 while ((char *) de <= top) {
1272 if (!ext4_check_dir_entry(dir, de, bh, offset)) 1272 if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
1273 return -EIO; 1273 return -EIO;
1274 if (ext4_match(namelen, name, de)) 1274 if (ext4_match(namelen, name, de))
1275 return -EEXIST; 1275 return -EEXIST;
@@ -1602,7 +1602,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1602 if (err) 1602 if (err)
1603 goto journal_error; 1603 goto journal_error;
1604 } 1604 }
1605 ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1605 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
1606 if (err) {
1607 ext4_std_error(inode->i_sb, err);
1608 goto cleanup;
1609 }
1606 } 1610 }
1607 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1611 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1608 if (!de) 1612 if (!de)
@@ -1630,17 +1634,21 @@ static int ext4_delete_entry(handle_t *handle,
1630{ 1634{
1631 struct ext4_dir_entry_2 *de, *pde; 1635 struct ext4_dir_entry_2 *de, *pde;
1632 unsigned int blocksize = dir->i_sb->s_blocksize; 1636 unsigned int blocksize = dir->i_sb->s_blocksize;
1633 int i; 1637 int i, err;
1634 1638
1635 i = 0; 1639 i = 0;
1636 pde = NULL; 1640 pde = NULL;
1637 de = (struct ext4_dir_entry_2 *) bh->b_data; 1641 de = (struct ext4_dir_entry_2 *) bh->b_data;
1638 while (i < bh->b_size) { 1642 while (i < bh->b_size) {
1639 if (!ext4_check_dir_entry(dir, de, bh, i)) 1643 if (ext4_check_dir_entry(dir, NULL, de, bh, i))
1640 return -EIO; 1644 return -EIO;
1641 if (de == de_del) { 1645 if (de == de_del) {
1642 BUFFER_TRACE(bh, "get_write_access"); 1646 BUFFER_TRACE(bh, "get_write_access");
1643 ext4_journal_get_write_access(handle, bh); 1647 err = ext4_journal_get_write_access(handle, bh);
1648 if (unlikely(err)) {
1649 ext4_std_error(dir->i_sb, err);
1650 return err;
1651 }
1644 if (pde) 1652 if (pde)
1645 pde->rec_len = ext4_rec_len_to_disk( 1653 pde->rec_len = ext4_rec_len_to_disk(
1646 ext4_rec_len_from_disk(pde->rec_len, 1654 ext4_rec_len_from_disk(pde->rec_len,
@@ -1652,7 +1660,11 @@ static int ext4_delete_entry(handle_t *handle,
1652 de->inode = 0; 1660 de->inode = 0;
1653 dir->i_version++; 1661 dir->i_version++;
1654 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1662 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1655 ext4_handle_dirty_metadata(handle, dir, bh); 1663 err = ext4_handle_dirty_metadata(handle, dir, bh);
1664 if (unlikely(err)) {
1665 ext4_std_error(dir->i_sb, err);
1666 return err;
1667 }
1656 return 0; 1668 return 0;
1657 } 1669 }
1658 i += ext4_rec_len_from_disk(de->rec_len, blocksize); 1670 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -1789,7 +1801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1789{ 1801{
1790 handle_t *handle; 1802 handle_t *handle;
1791 struct inode *inode; 1803 struct inode *inode;
1792 struct buffer_head *dir_block; 1804 struct buffer_head *dir_block = NULL;
1793 struct ext4_dir_entry_2 *de; 1805 struct ext4_dir_entry_2 *de;
1794 unsigned int blocksize = dir->i_sb->s_blocksize; 1806 unsigned int blocksize = dir->i_sb->s_blocksize;
1795 int err, retries = 0; 1807 int err, retries = 0;
@@ -1822,7 +1834,9 @@ retry:
1822 if (!dir_block) 1834 if (!dir_block)
1823 goto out_clear_inode; 1835 goto out_clear_inode;
1824 BUFFER_TRACE(dir_block, "get_write_access"); 1836 BUFFER_TRACE(dir_block, "get_write_access");
1825 ext4_journal_get_write_access(handle, dir_block); 1837 err = ext4_journal_get_write_access(handle, dir_block);
1838 if (err)
1839 goto out_clear_inode;
1826 de = (struct ext4_dir_entry_2 *) dir_block->b_data; 1840 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
1827 de->inode = cpu_to_le32(inode->i_ino); 1841 de->inode = cpu_to_le32(inode->i_ino);
1828 de->name_len = 1; 1842 de->name_len = 1;
@@ -1839,10 +1853,12 @@ retry:
1839 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1853 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1840 inode->i_nlink = 2; 1854 inode->i_nlink = 2;
1841 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1855 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1842 ext4_handle_dirty_metadata(handle, dir, dir_block); 1856 err = ext4_handle_dirty_metadata(handle, dir, dir_block);
1843 brelse(dir_block); 1857 if (err)
1844 ext4_mark_inode_dirty(handle, inode); 1858 goto out_clear_inode;
1845 err = ext4_add_entry(handle, dentry, inode); 1859 err = ext4_mark_inode_dirty(handle, inode);
1860 if (!err)
1861 err = ext4_add_entry(handle, dentry, inode);
1846 if (err) { 1862 if (err) {
1847out_clear_inode: 1863out_clear_inode:
1848 clear_nlink(inode); 1864 clear_nlink(inode);
@@ -1853,10 +1869,13 @@ out_clear_inode:
1853 } 1869 }
1854 ext4_inc_count(handle, dir); 1870 ext4_inc_count(handle, dir);
1855 ext4_update_dx_flag(dir); 1871 ext4_update_dx_flag(dir);
1856 ext4_mark_inode_dirty(handle, dir); 1872 err = ext4_mark_inode_dirty(handle, dir);
1873 if (err)
1874 goto out_clear_inode;
1857 d_instantiate(dentry, inode); 1875 d_instantiate(dentry, inode);
1858 unlock_new_inode(inode); 1876 unlock_new_inode(inode);
1859out_stop: 1877out_stop:
1878 brelse(dir_block);
1860 ext4_journal_stop(handle); 1879 ext4_journal_stop(handle);
1861 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 1880 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
1862 goto retry; 1881 goto retry;
@@ -1919,7 +1938,7 @@ static int empty_dir(struct inode *inode)
1919 } 1938 }
1920 de = (struct ext4_dir_entry_2 *) bh->b_data; 1939 de = (struct ext4_dir_entry_2 *) bh->b_data;
1921 } 1940 }
1922 if (!ext4_check_dir_entry(inode, de, bh, offset)) { 1941 if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
1923 de = (struct ext4_dir_entry_2 *)(bh->b_data + 1942 de = (struct ext4_dir_entry_2 *)(bh->b_data +
1924 sb->s_blocksize); 1943 sb->s_blocksize);
1925 offset = (offset | (sb->s_blocksize - 1)) + 1; 1944 offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2407,7 +2426,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2407 ext4_current_time(new_dir); 2426 ext4_current_time(new_dir);
2408 ext4_mark_inode_dirty(handle, new_dir); 2427 ext4_mark_inode_dirty(handle, new_dir);
2409 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); 2428 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2410 ext4_handle_dirty_metadata(handle, new_dir, new_bh); 2429 retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
2430 if (unlikely(retval)) {
2431 ext4_std_error(new_dir->i_sb, retval);
2432 goto end_rename;
2433 }
2411 brelse(new_bh); 2434 brelse(new_bh);
2412 new_bh = NULL; 2435 new_bh = NULL;
2413 } 2436 }
@@ -2459,7 +2482,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2459 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2482 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2460 cpu_to_le32(new_dir->i_ino); 2483 cpu_to_le32(new_dir->i_ino);
2461 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2484 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2462 ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2485 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
2486 if (retval) {
2487 ext4_std_error(old_dir->i_sb, retval);
2488 goto end_rename;
2489 }
2463 ext4_dec_count(handle, old_dir); 2490 ext4_dec_count(handle, old_dir);
2464 if (new_inode) { 2491 if (new_inode) {
2465 /* checked empty_dir above, can't have another parent, 2492 /* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index beacce11ac5..7270dcfca92 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -44,7 +44,7 @@ int __init ext4_init_pageio(void)
44 if (io_page_cachep == NULL) 44 if (io_page_cachep == NULL)
45 return -ENOMEM; 45 return -ENOMEM;
46 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); 46 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
47 if (io_page_cachep == NULL) { 47 if (io_end_cachep == NULL) {
48 kmem_cache_destroy(io_page_cachep); 48 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 49 return -ENOMEM;
50 } 50 }
@@ -158,11 +158,8 @@ static void ext4_end_io_work(struct work_struct *work)
158 158
159ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 159ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
160{ 160{
161 ext4_io_end_t *io = NULL; 161 ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
162
163 io = kmem_cache_alloc(io_end_cachep, flags);
164 if (io) { 162 if (io) {
165 memset(io, 0, sizeof(*io));
166 atomic_inc(&EXT4_I(inode)->i_ioend_count); 163 atomic_inc(&EXT4_I(inode)->i_ioend_count);
167 io->inode = inode; 164 io->inode = inode;
168 INIT_WORK(&io->work, ext4_end_io_work); 165 INIT_WORK(&io->work, ext4_end_io_work);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc963929de6..3ecc6e45d2f 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -220,7 +220,11 @@ static int setup_new_group_blocks(struct super_block *sb,
220 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 220 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
221 set_buffer_uptodate(gdb); 221 set_buffer_uptodate(gdb);
222 unlock_buffer(gdb); 222 unlock_buffer(gdb);
223 ext4_handle_dirty_metadata(handle, NULL, gdb); 223 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
224 if (unlikely(err)) {
225 brelse(gdb);
226 goto exit_bh;
227 }
224 ext4_set_bit(bit, bh->b_data); 228 ext4_set_bit(bit, bh->b_data);
225 brelse(gdb); 229 brelse(gdb);
226 } 230 }
@@ -232,6 +236,8 @@ static int setup_new_group_blocks(struct super_block *sb,
232 GFP_NOFS); 236 GFP_NOFS);
233 if (err) 237 if (err)
234 goto exit_bh; 238 goto exit_bh;
239 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
240 ext4_set_bit(bit, bh->b_data);
235 241
236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 242 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
237 input->block_bitmap - start); 243 input->block_bitmap - start);
@@ -247,13 +253,20 @@ static int setup_new_group_blocks(struct super_block *sb,
247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
248 if (err) 254 if (err)
249 goto exit_bh; 255 goto exit_bh;
256 for (i = 0, bit = input->inode_table - start;
257 i < sbi->s_itb_per_group; i++, bit++)
258 ext4_set_bit(bit, bh->b_data);
250 259
251 if ((err = extend_or_restart_transaction(handle, 2, bh))) 260 if ((err = extend_or_restart_transaction(handle, 2, bh)))
252 goto exit_bh; 261 goto exit_bh;
253 262
254 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 263 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
255 bh->b_data); 264 bh->b_data);
256 ext4_handle_dirty_metadata(handle, NULL, bh); 265 err = ext4_handle_dirty_metadata(handle, NULL, bh);
266 if (unlikely(err)) {
267 ext4_std_error(sb, err);
268 goto exit_bh;
269 }
257 brelse(bh); 270 brelse(bh);
258 /* Mark unused entries in inode bitmap used */ 271 /* Mark unused entries in inode bitmap used */
259 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 272 ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
@@ -265,7 +278,9 @@ static int setup_new_group_blocks(struct super_block *sb,
265 278
266 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 279 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
267 bh->b_data); 280 bh->b_data);
268 ext4_handle_dirty_metadata(handle, NULL, bh); 281 err = ext4_handle_dirty_metadata(handle, NULL, bh);
282 if (unlikely(err))
283 ext4_std_error(sb, err);
269exit_bh: 284exit_bh:
270 brelse(bh); 285 brelse(bh);
271 286
@@ -417,17 +432,21 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
417 goto exit_dind; 432 goto exit_dind;
418 } 433 }
419 434
420 if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) 435 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
436 if (unlikely(err))
421 goto exit_dind; 437 goto exit_dind;
422 438
423 if ((err = ext4_journal_get_write_access(handle, *primary))) 439 err = ext4_journal_get_write_access(handle, *primary);
440 if (unlikely(err))
424 goto exit_sbh; 441 goto exit_sbh;
425 442
426 if ((err = ext4_journal_get_write_access(handle, dind))) 443 err = ext4_journal_get_write_access(handle, dind);
427 goto exit_primary; 444 if (unlikely(err))
445 ext4_std_error(sb, err);
428 446
429 /* ext4_reserve_inode_write() gets a reference on the iloc */ 447 /* ext4_reserve_inode_write() gets a reference on the iloc */
430 if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) 448 err = ext4_reserve_inode_write(handle, inode, &iloc);
449 if (unlikely(err))
431 goto exit_dindj; 450 goto exit_dindj;
432 451
433 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 452 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
@@ -449,12 +468,20 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
449 * reserved inode, and will become GDT blocks (primary and backup). 468 * reserved inode, and will become GDT blocks (primary and backup).
450 */ 469 */
451 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; 470 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
452 ext4_handle_dirty_metadata(handle, NULL, dind); 471 err = ext4_handle_dirty_metadata(handle, NULL, dind);
453 brelse(dind); 472 if (unlikely(err)) {
473 ext4_std_error(sb, err);
474 goto exit_inode;
475 }
454 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 476 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
455 ext4_mark_iloc_dirty(handle, inode, &iloc); 477 ext4_mark_iloc_dirty(handle, inode, &iloc);
456 memset((*primary)->b_data, 0, sb->s_blocksize); 478 memset((*primary)->b_data, 0, sb->s_blocksize);
457 ext4_handle_dirty_metadata(handle, NULL, *primary); 479 err = ext4_handle_dirty_metadata(handle, NULL, *primary);
480 if (unlikely(err)) {
481 ext4_std_error(sb, err);
482 goto exit_inode;
483 }
484 brelse(dind);
458 485
459 o_group_desc = EXT4_SB(sb)->s_group_desc; 486 o_group_desc = EXT4_SB(sb)->s_group_desc;
460 memcpy(n_group_desc, o_group_desc, 487 memcpy(n_group_desc, o_group_desc,
@@ -465,19 +492,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
465 kfree(o_group_desc); 492 kfree(o_group_desc);
466 493
467 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 494 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
468 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 495 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
496 if (err)
497 ext4_std_error(sb, err);
469 498
470 return 0; 499 return err;
471 500
472exit_inode: 501exit_inode:
473 /* ext4_journal_release_buffer(handle, iloc.bh); */ 502 /* ext4_journal_release_buffer(handle, iloc.bh); */
474 brelse(iloc.bh); 503 brelse(iloc.bh);
475exit_dindj: 504exit_dindj:
476 /* ext4_journal_release_buffer(handle, dind); */ 505 /* ext4_journal_release_buffer(handle, dind); */
477exit_primary:
478 /* ext4_journal_release_buffer(handle, *primary); */
479exit_sbh: 506exit_sbh:
480 /* ext4_journal_release_buffer(handle, *primary); */ 507 /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
481exit_dind: 508exit_dind:
482 brelse(dind); 509 brelse(dind);
483exit_bh: 510exit_bh:
@@ -660,7 +687,9 @@ static void update_backups(struct super_block *sb,
660 memset(bh->b_data + size, 0, rest); 687 memset(bh->b_data + size, 0, rest);
661 set_buffer_uptodate(bh); 688 set_buffer_uptodate(bh);
662 unlock_buffer(bh); 689 unlock_buffer(bh);
663 ext4_handle_dirty_metadata(handle, NULL, bh); 690 err = ext4_handle_dirty_metadata(handle, NULL, bh);
691 if (unlikely(err))
692 ext4_std_error(sb, err);
664 brelse(bh); 693 brelse(bh);
665 } 694 }
666 if ((err2 = ext4_journal_stop(handle)) && !err) 695 if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -878,7 +907,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
878 /* Update the global fs size fields */ 907 /* Update the global fs size fields */
879 sbi->s_groups_count++; 908 sbi->s_groups_count++;
880 909
881 ext4_handle_dirty_metadata(handle, NULL, primary); 910 err = ext4_handle_dirty_metadata(handle, NULL, primary);
911 if (unlikely(err)) {
912 ext4_std_error(sb, err);
913 goto exit_journal;
914 }
882 915
883 /* Update the reserved block counts only once the new group is 916 /* Update the reserved block counts only once the new group is
884 * active. */ 917 * active. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb15c9c0be7..29c80f6d8b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -388,13 +388,14 @@ static void ext4_handle_error(struct super_block *sb)
388void __ext4_error(struct super_block *sb, const char *function, 388void __ext4_error(struct super_block *sb, const char *function,
389 unsigned int line, const char *fmt, ...) 389 unsigned int line, const char *fmt, ...)
390{ 390{
391 struct va_format vaf;
391 va_list args; 392 va_list args;
392 393
393 va_start(args, fmt); 394 va_start(args, fmt);
394 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", 395 vaf.fmt = fmt;
395 sb->s_id, function, line, current->comm); 396 vaf.va = &args;
396 vprintk(fmt, args); 397 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
397 printk("\n"); 398 sb->s_id, function, line, current->comm, &vaf);
398 va_end(args); 399 va_end(args);
399 400
400 ext4_handle_error(sb); 401 ext4_handle_error(sb);
@@ -405,28 +406,31 @@ void ext4_error_inode(struct inode *inode, const char *function,
405 const char *fmt, ...) 406 const char *fmt, ...)
406{ 407{
407 va_list args; 408 va_list args;
409 struct va_format vaf;
408 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 410 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
409 411
410 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 412 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
411 es->s_last_error_block = cpu_to_le64(block); 413 es->s_last_error_block = cpu_to_le64(block);
412 save_error_info(inode->i_sb, function, line); 414 save_error_info(inode->i_sb, function, line);
413 va_start(args, fmt); 415 va_start(args, fmt);
416 vaf.fmt = fmt;
417 vaf.va = &args;
414 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 418 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
415 inode->i_sb->s_id, function, line, inode->i_ino); 419 inode->i_sb->s_id, function, line, inode->i_ino);
416 if (block) 420 if (block)
417 printk("block %llu: ", block); 421 printk(KERN_CONT "block %llu: ", block);
418 printk("comm %s: ", current->comm); 422 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
419 vprintk(fmt, args);
420 printk("\n");
421 va_end(args); 423 va_end(args);
422 424
423 ext4_handle_error(inode->i_sb); 425 ext4_handle_error(inode->i_sb);
424} 426}
425 427
426void ext4_error_file(struct file *file, const char *function, 428void ext4_error_file(struct file *file, const char *function,
427 unsigned int line, const char *fmt, ...) 429 unsigned int line, ext4_fsblk_t block,
430 const char *fmt, ...)
428{ 431{
429 va_list args; 432 va_list args;
433 struct va_format vaf;
430 struct ext4_super_block *es; 434 struct ext4_super_block *es;
431 struct inode *inode = file->f_dentry->d_inode; 435 struct inode *inode = file->f_dentry->d_inode;
432 char pathname[80], *path; 436 char pathname[80], *path;
@@ -434,17 +438,18 @@ void ext4_error_file(struct file *file, const char *function,
434 es = EXT4_SB(inode->i_sb)->s_es; 438 es = EXT4_SB(inode->i_sb)->s_es;
435 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 439 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
436 save_error_info(inode->i_sb, function, line); 440 save_error_info(inode->i_sb, function, line);
437 va_start(args, fmt);
438 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 441 path = d_path(&(file->f_path), pathname, sizeof(pathname));
439 if (!path) 442 if (IS_ERR(path))
440 path = "(unknown)"; 443 path = "(unknown)";
441 printk(KERN_CRIT 444 printk(KERN_CRIT
442 "EXT4-fs error (device %s): %s:%d: inode #%lu " 445 "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
443 "(comm %s path %s): ", 446 inode->i_sb->s_id, function, line, inode->i_ino);
444 inode->i_sb->s_id, function, line, inode->i_ino, 447 if (block)
445 current->comm, path); 448 printk(KERN_CONT "block %llu: ", block);
446 vprintk(fmt, args); 449 va_start(args, fmt);
447 printk("\n"); 450 vaf.fmt = fmt;
451 vaf.va = &args;
452 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
448 va_end(args); 453 va_end(args);
449 454
450 ext4_handle_error(inode->i_sb); 455 ext4_handle_error(inode->i_sb);
@@ -543,28 +548,29 @@ void __ext4_abort(struct super_block *sb, const char *function,
543 panic("EXT4-fs panic from previous error\n"); 548 panic("EXT4-fs panic from previous error\n");
544} 549}
545 550
546void ext4_msg (struct super_block * sb, const char *prefix, 551void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
547 const char *fmt, ...)
548{ 552{
553 struct va_format vaf;
549 va_list args; 554 va_list args;
550 555
551 va_start(args, fmt); 556 va_start(args, fmt);
552 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 557 vaf.fmt = fmt;
553 vprintk(fmt, args); 558 vaf.va = &args;
554 printk("\n"); 559 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
555 va_end(args); 560 va_end(args);
556} 561}
557 562
558void __ext4_warning(struct super_block *sb, const char *function, 563void __ext4_warning(struct super_block *sb, const char *function,
559 unsigned int line, const char *fmt, ...) 564 unsigned int line, const char *fmt, ...)
560{ 565{
566 struct va_format vaf;
561 va_list args; 567 va_list args;
562 568
563 va_start(args, fmt); 569 va_start(args, fmt);
564 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", 570 vaf.fmt = fmt;
565 sb->s_id, function, line); 571 vaf.va = &args;
566 vprintk(fmt, args); 572 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
567 printk("\n"); 573 sb->s_id, function, line, &vaf);
568 va_end(args); 574 va_end(args);
569} 575}
570 576
@@ -575,21 +581,25 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,
575__releases(bitlock) 581__releases(bitlock)
576__acquires(bitlock) 582__acquires(bitlock)
577{ 583{
584 struct va_format vaf;
578 va_list args; 585 va_list args;
579 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 586 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
580 587
581 es->s_last_error_ino = cpu_to_le32(ino); 588 es->s_last_error_ino = cpu_to_le32(ino);
582 es->s_last_error_block = cpu_to_le64(block); 589 es->s_last_error_block = cpu_to_le64(block);
583 __save_error_info(sb, function, line); 590 __save_error_info(sb, function, line);
591
584 va_start(args, fmt); 592 va_start(args, fmt);
593
594 vaf.fmt = fmt;
595 vaf.va = &args;
585 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 596 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
586 sb->s_id, function, line, grp); 597 sb->s_id, function, line, grp);
587 if (ino) 598 if (ino)
588 printk("inode %lu: ", ino); 599 printk(KERN_CONT "inode %lu: ", ino);
589 if (block) 600 if (block)
590 printk("block %llu:", (unsigned long long) block); 601 printk(KERN_CONT "block %llu:", (unsigned long long) block);
591 vprintk(fmt, args); 602 printk(KERN_CONT "%pV\n", &vaf);
592 printk("\n");
593 va_end(args); 603 va_end(args);
594 604
595 if (test_opt(sb, ERRORS_CONT)) { 605 if (test_opt(sb, ERRORS_CONT)) {
@@ -808,21 +818,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
808 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 818 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
809 INIT_LIST_HEAD(&ei->i_prealloc_list); 819 INIT_LIST_HEAD(&ei->i_prealloc_list);
810 spin_lock_init(&ei->i_prealloc_lock); 820 spin_lock_init(&ei->i_prealloc_lock);
811 /*
812 * Note: We can be called before EXT4_SB(sb)->s_journal is set,
813 * therefore it can be null here. Don't check it, just initialize
814 * jinode.
815 */
816 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
817 ei->i_reserved_data_blocks = 0; 821 ei->i_reserved_data_blocks = 0;
818 ei->i_reserved_meta_blocks = 0; 822 ei->i_reserved_meta_blocks = 0;
819 ei->i_allocated_meta_blocks = 0; 823 ei->i_allocated_meta_blocks = 0;
820 ei->i_da_metadata_calc_len = 0; 824 ei->i_da_metadata_calc_len = 0;
821 ei->i_delalloc_reserved_flag = 0;
822 spin_lock_init(&(ei->i_block_reservation_lock)); 825 spin_lock_init(&(ei->i_block_reservation_lock));
823#ifdef CONFIG_QUOTA 826#ifdef CONFIG_QUOTA
824 ei->i_reserved_quota = 0; 827 ei->i_reserved_quota = 0;
825#endif 828#endif
829 ei->jinode = NULL;
826 INIT_LIST_HEAD(&ei->i_completed_io_list); 830 INIT_LIST_HEAD(&ei->i_completed_io_list);
827 spin_lock_init(&ei->i_completed_io_lock); 831 spin_lock_init(&ei->i_completed_io_lock);
828 ei->cur_aio_dio = NULL; 832 ei->cur_aio_dio = NULL;
@@ -841,6 +845,13 @@ static int ext4_drop_inode(struct inode *inode)
841 return drop; 845 return drop;
842} 846}
843 847
848static void ext4_i_callback(struct rcu_head *head)
849{
850 struct inode *inode = container_of(head, struct inode, i_rcu);
851 INIT_LIST_HEAD(&inode->i_dentry);
852 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
853}
854
844static void ext4_destroy_inode(struct inode *inode) 855static void ext4_destroy_inode(struct inode *inode)
845{ 856{
846 ext4_ioend_wait(inode); 857 ext4_ioend_wait(inode);
@@ -853,7 +864,7 @@ static void ext4_destroy_inode(struct inode *inode)
853 true); 864 true);
854 dump_stack(); 865 dump_stack();
855 } 866 }
856 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 867 call_rcu(&inode->i_rcu, ext4_i_callback);
857} 868}
858 869
859static void init_once(void *foo) 870static void init_once(void *foo)
@@ -891,9 +902,12 @@ void ext4_clear_inode(struct inode *inode)
891 end_writeback(inode); 902 end_writeback(inode);
892 dquot_drop(inode); 903 dquot_drop(inode);
893 ext4_discard_preallocations(inode); 904 ext4_discard_preallocations(inode);
894 if (EXT4_JOURNAL(inode)) 905 if (EXT4_I(inode)->jinode) {
895 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 906 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
896 &EXT4_I(inode)->jinode); 907 EXT4_I(inode)->jinode);
908 jbd2_free_inode(EXT4_I(inode)->jinode);
909 EXT4_I(inode)->jinode = NULL;
910 }
897} 911}
898 912
899static inline void ext4_show_quota_options(struct seq_file *seq, 913static inline void ext4_show_quota_options(struct seq_file *seq,
@@ -1386,7 +1400,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1386 sbi->s_qf_names[qtype] = NULL; 1400 sbi->s_qf_names[qtype] = NULL;
1387 return 0; 1401 return 0;
1388 } 1402 }
1389 set_opt(sbi->s_mount_opt, QUOTA); 1403 set_opt(sb, QUOTA);
1390 return 1; 1404 return 1;
1391} 1405}
1392 1406
@@ -1441,21 +1455,21 @@ static int parse_options(char *options, struct super_block *sb,
1441 switch (token) { 1455 switch (token) {
1442 case Opt_bsd_df: 1456 case Opt_bsd_df:
1443 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1457 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1444 clear_opt(sbi->s_mount_opt, MINIX_DF); 1458 clear_opt(sb, MINIX_DF);
1445 break; 1459 break;
1446 case Opt_minix_df: 1460 case Opt_minix_df:
1447 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1461 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1448 set_opt(sbi->s_mount_opt, MINIX_DF); 1462 set_opt(sb, MINIX_DF);
1449 1463
1450 break; 1464 break;
1451 case Opt_grpid: 1465 case Opt_grpid:
1452 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1466 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1453 set_opt(sbi->s_mount_opt, GRPID); 1467 set_opt(sb, GRPID);
1454 1468
1455 break; 1469 break;
1456 case Opt_nogrpid: 1470 case Opt_nogrpid:
1457 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1471 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1458 clear_opt(sbi->s_mount_opt, GRPID); 1472 clear_opt(sb, GRPID);
1459 1473
1460 break; 1474 break;
1461 case Opt_resuid: 1475 case Opt_resuid:
@@ -1473,38 +1487,38 @@ static int parse_options(char *options, struct super_block *sb,
1473 /* *sb_block = match_int(&args[0]); */ 1487 /* *sb_block = match_int(&args[0]); */
1474 break; 1488 break;
1475 case Opt_err_panic: 1489 case Opt_err_panic:
1476 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1490 clear_opt(sb, ERRORS_CONT);
1477 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1491 clear_opt(sb, ERRORS_RO);
1478 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1492 set_opt(sb, ERRORS_PANIC);
1479 break; 1493 break;
1480 case Opt_err_ro: 1494 case Opt_err_ro:
1481 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1495 clear_opt(sb, ERRORS_CONT);
1482 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1496 clear_opt(sb, ERRORS_PANIC);
1483 set_opt(sbi->s_mount_opt, ERRORS_RO); 1497 set_opt(sb, ERRORS_RO);
1484 break; 1498 break;
1485 case Opt_err_cont: 1499 case Opt_err_cont:
1486 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1500 clear_opt(sb, ERRORS_RO);
1487 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1501 clear_opt(sb, ERRORS_PANIC);
1488 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1502 set_opt(sb, ERRORS_CONT);
1489 break; 1503 break;
1490 case Opt_nouid32: 1504 case Opt_nouid32:
1491 set_opt(sbi->s_mount_opt, NO_UID32); 1505 set_opt(sb, NO_UID32);
1492 break; 1506 break;
1493 case Opt_debug: 1507 case Opt_debug:
1494 set_opt(sbi->s_mount_opt, DEBUG); 1508 set_opt(sb, DEBUG);
1495 break; 1509 break;
1496 case Opt_oldalloc: 1510 case Opt_oldalloc:
1497 set_opt(sbi->s_mount_opt, OLDALLOC); 1511 set_opt(sb, OLDALLOC);
1498 break; 1512 break;
1499 case Opt_orlov: 1513 case Opt_orlov:
1500 clear_opt(sbi->s_mount_opt, OLDALLOC); 1514 clear_opt(sb, OLDALLOC);
1501 break; 1515 break;
1502#ifdef CONFIG_EXT4_FS_XATTR 1516#ifdef CONFIG_EXT4_FS_XATTR
1503 case Opt_user_xattr: 1517 case Opt_user_xattr:
1504 set_opt(sbi->s_mount_opt, XATTR_USER); 1518 set_opt(sb, XATTR_USER);
1505 break; 1519 break;
1506 case Opt_nouser_xattr: 1520 case Opt_nouser_xattr:
1507 clear_opt(sbi->s_mount_opt, XATTR_USER); 1521 clear_opt(sb, XATTR_USER);
1508 break; 1522 break;
1509#else 1523#else
1510 case Opt_user_xattr: 1524 case Opt_user_xattr:
@@ -1514,10 +1528,10 @@ static int parse_options(char *options, struct super_block *sb,
1514#endif 1528#endif
1515#ifdef CONFIG_EXT4_FS_POSIX_ACL 1529#ifdef CONFIG_EXT4_FS_POSIX_ACL
1516 case Opt_acl: 1530 case Opt_acl:
1517 set_opt(sbi->s_mount_opt, POSIX_ACL); 1531 set_opt(sb, POSIX_ACL);
1518 break; 1532 break;
1519 case Opt_noacl: 1533 case Opt_noacl:
1520 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1534 clear_opt(sb, POSIX_ACL);
1521 break; 1535 break;
1522#else 1536#else
1523 case Opt_acl: 1537 case Opt_acl:
@@ -1536,7 +1550,7 @@ static int parse_options(char *options, struct super_block *sb,
1536 "Cannot specify journal on remount"); 1550 "Cannot specify journal on remount");
1537 return 0; 1551 return 0;
1538 } 1552 }
1539 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1553 set_opt(sb, UPDATE_JOURNAL);
1540 break; 1554 break;
1541 case Opt_journal_dev: 1555 case Opt_journal_dev:
1542 if (is_remount) { 1556 if (is_remount) {
@@ -1549,14 +1563,14 @@ static int parse_options(char *options, struct super_block *sb,
1549 *journal_devnum = option; 1563 *journal_devnum = option;
1550 break; 1564 break;
1551 case Opt_journal_checksum: 1565 case Opt_journal_checksum:
1552 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1566 set_opt(sb, JOURNAL_CHECKSUM);
1553 break; 1567 break;
1554 case Opt_journal_async_commit: 1568 case Opt_journal_async_commit:
1555 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1569 set_opt(sb, JOURNAL_ASYNC_COMMIT);
1556 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1570 set_opt(sb, JOURNAL_CHECKSUM);
1557 break; 1571 break;
1558 case Opt_noload: 1572 case Opt_noload:
1559 set_opt(sbi->s_mount_opt, NOLOAD); 1573 set_opt(sb, NOLOAD);
1560 break; 1574 break;
1561 case Opt_commit: 1575 case Opt_commit:
1562 if (match_int(&args[0], &option)) 1576 if (match_int(&args[0], &option))
@@ -1599,15 +1613,15 @@ static int parse_options(char *options, struct super_block *sb,
1599 return 0; 1613 return 0;
1600 } 1614 }
1601 } else { 1615 } else {
1602 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1616 clear_opt(sb, DATA_FLAGS);
1603 sbi->s_mount_opt |= data_opt; 1617 sbi->s_mount_opt |= data_opt;
1604 } 1618 }
1605 break; 1619 break;
1606 case Opt_data_err_abort: 1620 case Opt_data_err_abort:
1607 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1621 set_opt(sb, DATA_ERR_ABORT);
1608 break; 1622 break;
1609 case Opt_data_err_ignore: 1623 case Opt_data_err_ignore:
1610 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1624 clear_opt(sb, DATA_ERR_ABORT);
1611 break; 1625 break;
1612#ifdef CONFIG_QUOTA 1626#ifdef CONFIG_QUOTA
1613 case Opt_usrjquota: 1627 case Opt_usrjquota:
@@ -1647,12 +1661,12 @@ set_qf_format:
1647 break; 1661 break;
1648 case Opt_quota: 1662 case Opt_quota:
1649 case Opt_usrquota: 1663 case Opt_usrquota:
1650 set_opt(sbi->s_mount_opt, QUOTA); 1664 set_opt(sb, QUOTA);
1651 set_opt(sbi->s_mount_opt, USRQUOTA); 1665 set_opt(sb, USRQUOTA);
1652 break; 1666 break;
1653 case Opt_grpquota: 1667 case Opt_grpquota:
1654 set_opt(sbi->s_mount_opt, QUOTA); 1668 set_opt(sb, QUOTA);
1655 set_opt(sbi->s_mount_opt, GRPQUOTA); 1669 set_opt(sb, GRPQUOTA);
1656 break; 1670 break;
1657 case Opt_noquota: 1671 case Opt_noquota:
1658 if (sb_any_quota_loaded(sb)) { 1672 if (sb_any_quota_loaded(sb)) {
@@ -1660,9 +1674,9 @@ set_qf_format:
1660 "options when quota turned on"); 1674 "options when quota turned on");
1661 return 0; 1675 return 0;
1662 } 1676 }
1663 clear_opt(sbi->s_mount_opt, QUOTA); 1677 clear_opt(sb, QUOTA);
1664 clear_opt(sbi->s_mount_opt, USRQUOTA); 1678 clear_opt(sb, USRQUOTA);
1665 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1679 clear_opt(sb, GRPQUOTA);
1666 break; 1680 break;
1667#else 1681#else
1668 case Opt_quota: 1682 case Opt_quota:
@@ -1688,7 +1702,7 @@ set_qf_format:
1688 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1702 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1689 break; 1703 break;
1690 case Opt_nobarrier: 1704 case Opt_nobarrier:
1691 clear_opt(sbi->s_mount_opt, BARRIER); 1705 clear_opt(sb, BARRIER);
1692 break; 1706 break;
1693 case Opt_barrier: 1707 case Opt_barrier:
1694 if (args[0].from) { 1708 if (args[0].from) {
@@ -1697,9 +1711,9 @@ set_qf_format:
1697 } else 1711 } else
1698 option = 1; /* No argument, default to 1 */ 1712 option = 1; /* No argument, default to 1 */
1699 if (option) 1713 if (option)
1700 set_opt(sbi->s_mount_opt, BARRIER); 1714 set_opt(sb, BARRIER);
1701 else 1715 else
1702 clear_opt(sbi->s_mount_opt, BARRIER); 1716 clear_opt(sb, BARRIER);
1703 break; 1717 break;
1704 case Opt_ignore: 1718 case Opt_ignore:
1705 break; 1719 break;
@@ -1723,17 +1737,17 @@ set_qf_format:
1723 "Ignoring deprecated bh option"); 1737 "Ignoring deprecated bh option");
1724 break; 1738 break;
1725 case Opt_i_version: 1739 case Opt_i_version:
1726 set_opt(sbi->s_mount_opt, I_VERSION); 1740 set_opt(sb, I_VERSION);
1727 sb->s_flags |= MS_I_VERSION; 1741 sb->s_flags |= MS_I_VERSION;
1728 break; 1742 break;
1729 case Opt_nodelalloc: 1743 case Opt_nodelalloc:
1730 clear_opt(sbi->s_mount_opt, DELALLOC); 1744 clear_opt(sb, DELALLOC);
1731 break; 1745 break;
1732 case Opt_mblk_io_submit: 1746 case Opt_mblk_io_submit:
1733 set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); 1747 set_opt(sb, MBLK_IO_SUBMIT);
1734 break; 1748 break;
1735 case Opt_nomblk_io_submit: 1749 case Opt_nomblk_io_submit:
1736 clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); 1750 clear_opt(sb, MBLK_IO_SUBMIT);
1737 break; 1751 break;
1738 case Opt_stripe: 1752 case Opt_stripe:
1739 if (match_int(&args[0], &option)) 1753 if (match_int(&args[0], &option))
@@ -1743,13 +1757,13 @@ set_qf_format:
1743 sbi->s_stripe = option; 1757 sbi->s_stripe = option;
1744 break; 1758 break;
1745 case Opt_delalloc: 1759 case Opt_delalloc:
1746 set_opt(sbi->s_mount_opt, DELALLOC); 1760 set_opt(sb, DELALLOC);
1747 break; 1761 break;
1748 case Opt_block_validity: 1762 case Opt_block_validity:
1749 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1763 set_opt(sb, BLOCK_VALIDITY);
1750 break; 1764 break;
1751 case Opt_noblock_validity: 1765 case Opt_noblock_validity:
1752 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1766 clear_opt(sb, BLOCK_VALIDITY);
1753 break; 1767 break;
1754 case Opt_inode_readahead_blks: 1768 case Opt_inode_readahead_blks:
1755 if (match_int(&args[0], &option)) 1769 if (match_int(&args[0], &option))
@@ -1773,7 +1787,7 @@ set_qf_format:
1773 option); 1787 option);
1774 break; 1788 break;
1775 case Opt_noauto_da_alloc: 1789 case Opt_noauto_da_alloc:
1776 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1790 set_opt(sb, NO_AUTO_DA_ALLOC);
1777 break; 1791 break;
1778 case Opt_auto_da_alloc: 1792 case Opt_auto_da_alloc:
1779 if (args[0].from) { 1793 if (args[0].from) {
@@ -1782,24 +1796,24 @@ set_qf_format:
1782 } else 1796 } else
1783 option = 1; /* No argument, default to 1 */ 1797 option = 1; /* No argument, default to 1 */
1784 if (option) 1798 if (option)
1785 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1799 clear_opt(sb, NO_AUTO_DA_ALLOC);
1786 else 1800 else
1787 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1801 set_opt(sb,NO_AUTO_DA_ALLOC);
1788 break; 1802 break;
1789 case Opt_discard: 1803 case Opt_discard:
1790 set_opt(sbi->s_mount_opt, DISCARD); 1804 set_opt(sb, DISCARD);
1791 break; 1805 break;
1792 case Opt_nodiscard: 1806 case Opt_nodiscard:
1793 clear_opt(sbi->s_mount_opt, DISCARD); 1807 clear_opt(sb, DISCARD);
1794 break; 1808 break;
1795 case Opt_dioread_nolock: 1809 case Opt_dioread_nolock:
1796 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1810 set_opt(sb, DIOREAD_NOLOCK);
1797 break; 1811 break;
1798 case Opt_dioread_lock: 1812 case Opt_dioread_lock:
1799 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1813 clear_opt(sb, DIOREAD_NOLOCK);
1800 break; 1814 break;
1801 case Opt_init_inode_table: 1815 case Opt_init_inode_table:
1802 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 1816 set_opt(sb, INIT_INODE_TABLE);
1803 if (args[0].from) { 1817 if (args[0].from) {
1804 if (match_int(&args[0], &option)) 1818 if (match_int(&args[0], &option))
1805 return 0; 1819 return 0;
@@ -1810,7 +1824,7 @@ set_qf_format:
1810 sbi->s_li_wait_mult = option; 1824 sbi->s_li_wait_mult = option;
1811 break; 1825 break;
1812 case Opt_noinit_inode_table: 1826 case Opt_noinit_inode_table:
1813 clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 1827 clear_opt(sb, INIT_INODE_TABLE);
1814 break; 1828 break;
1815 default: 1829 default:
1816 ext4_msg(sb, KERN_ERR, 1830 ext4_msg(sb, KERN_ERR,
@@ -1822,10 +1836,10 @@ set_qf_format:
1822#ifdef CONFIG_QUOTA 1836#ifdef CONFIG_QUOTA
1823 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1837 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1824 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1838 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1825 clear_opt(sbi->s_mount_opt, USRQUOTA); 1839 clear_opt(sb, USRQUOTA);
1826 1840
1827 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1841 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1828 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1842 clear_opt(sb, GRPQUOTA);
1829 1843
1830 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1844 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1831 ext4_msg(sb, KERN_ERR, "old and new quota " 1845 ext4_msg(sb, KERN_ERR, "old and new quota "
@@ -1895,12 +1909,12 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1895 ext4_commit_super(sb, 1); 1909 ext4_commit_super(sb, 1);
1896 if (test_opt(sb, DEBUG)) 1910 if (test_opt(sb, DEBUG))
1897 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1911 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1898 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1912 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1899 sb->s_blocksize, 1913 sb->s_blocksize,
1900 sbi->s_groups_count, 1914 sbi->s_groups_count,
1901 EXT4_BLOCKS_PER_GROUP(sb), 1915 EXT4_BLOCKS_PER_GROUP(sb),
1902 EXT4_INODES_PER_GROUP(sb), 1916 EXT4_INODES_PER_GROUP(sb),
1903 sbi->s_mount_opt); 1917 sbi->s_mount_opt, sbi->s_mount_opt2);
1904 1918
1905 return res; 1919 return res;
1906} 1920}
@@ -1930,14 +1944,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1930 size = flex_group_count * sizeof(struct flex_groups); 1944 size = flex_group_count * sizeof(struct flex_groups);
1931 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1945 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1932 if (sbi->s_flex_groups == NULL) { 1946 if (sbi->s_flex_groups == NULL) {
1933 sbi->s_flex_groups = vmalloc(size); 1947 sbi->s_flex_groups = vzalloc(size);
1934 if (sbi->s_flex_groups) 1948 if (sbi->s_flex_groups == NULL) {
1935 memset(sbi->s_flex_groups, 0, size); 1949 ext4_msg(sb, KERN_ERR,
1936 } 1950 "not enough memory for %u flex groups",
1937 if (sbi->s_flex_groups == NULL) { 1951 flex_group_count);
1938 ext4_msg(sb, KERN_ERR, "not enough memory for " 1952 goto failed;
1939 "%u flex groups", flex_group_count); 1953 }
1940 goto failed;
1941 } 1954 }
1942 1955
1943 for (i = 0; i < sbi->s_groups_count; i++) { 1956 for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2916,7 +2929,7 @@ static int ext4_register_li_request(struct super_block *sb,
2916 struct ext4_sb_info *sbi = EXT4_SB(sb); 2929 struct ext4_sb_info *sbi = EXT4_SB(sb);
2917 struct ext4_li_request *elr; 2930 struct ext4_li_request *elr;
2918 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2931 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2919 int ret; 2932 int ret = 0;
2920 2933
2921 if (sbi->s_li_request != NULL) 2934 if (sbi->s_li_request != NULL)
2922 return 0; 2935 return 0;
@@ -3071,41 +3084,41 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3071 3084
3072 /* Set defaults before we parse the mount options */ 3085 /* Set defaults before we parse the mount options */
3073 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3086 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3074 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 3087 set_opt(sb, INIT_INODE_TABLE);
3075 if (def_mount_opts & EXT4_DEFM_DEBUG) 3088 if (def_mount_opts & EXT4_DEFM_DEBUG)
3076 set_opt(sbi->s_mount_opt, DEBUG); 3089 set_opt(sb, DEBUG);
3077 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3090 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
3078 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3091 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
3079 "2.6.38"); 3092 "2.6.38");
3080 set_opt(sbi->s_mount_opt, GRPID); 3093 set_opt(sb, GRPID);
3081 } 3094 }
3082 if (def_mount_opts & EXT4_DEFM_UID16) 3095 if (def_mount_opts & EXT4_DEFM_UID16)
3083 set_opt(sbi->s_mount_opt, NO_UID32); 3096 set_opt(sb, NO_UID32);
3084#ifdef CONFIG_EXT4_FS_XATTR 3097#ifdef CONFIG_EXT4_FS_XATTR
3085 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3098 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
3086 set_opt(sbi->s_mount_opt, XATTR_USER); 3099 set_opt(sb, XATTR_USER);
3087#endif 3100#endif
3088#ifdef CONFIG_EXT4_FS_POSIX_ACL 3101#ifdef CONFIG_EXT4_FS_POSIX_ACL
3089 if (def_mount_opts & EXT4_DEFM_ACL) 3102 if (def_mount_opts & EXT4_DEFM_ACL)
3090 set_opt(sbi->s_mount_opt, POSIX_ACL); 3103 set_opt(sb, POSIX_ACL);
3091#endif 3104#endif
3092 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3105 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3093 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 3106 set_opt(sb, JOURNAL_DATA);
3094 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3107 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3095 set_opt(sbi->s_mount_opt, ORDERED_DATA); 3108 set_opt(sb, ORDERED_DATA);
3096 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3109 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3097 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 3110 set_opt(sb, WRITEBACK_DATA);
3098 3111
3099 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3112 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3100 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 3113 set_opt(sb, ERRORS_PANIC);
3101 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3114 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3102 set_opt(sbi->s_mount_opt, ERRORS_CONT); 3115 set_opt(sb, ERRORS_CONT);
3103 else 3116 else
3104 set_opt(sbi->s_mount_opt, ERRORS_RO); 3117 set_opt(sb, ERRORS_RO);
3105 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3118 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3106 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 3119 set_opt(sb, BLOCK_VALIDITY);
3107 if (def_mount_opts & EXT4_DEFM_DISCARD) 3120 if (def_mount_opts & EXT4_DEFM_DISCARD)
3108 set_opt(sbi->s_mount_opt, DISCARD); 3121 set_opt(sb, DISCARD);
3109 3122
3110 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 3123 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
3111 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 3124 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -3114,7 +3127,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3114 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3127 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3115 3128
3116 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3129 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3117 set_opt(sbi->s_mount_opt, BARRIER); 3130 set_opt(sb, BARRIER);
3118 3131
3119 /* 3132 /*
3120 * enable delayed allocation by default 3133 * enable delayed allocation by default
@@ -3122,7 +3135,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3122 */ 3135 */
3123 if (!IS_EXT3_SB(sb) && 3136 if (!IS_EXT3_SB(sb) &&
3124 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3137 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3125 set_opt(sbi->s_mount_opt, DELALLOC); 3138 set_opt(sb, DELALLOC);
3126 3139
3127 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3140 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3128 &journal_devnum, &journal_ioprio, NULL, 0)) { 3141 &journal_devnum, &journal_ioprio, NULL, 0)) {
@@ -3425,8 +3438,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3425 "suppressed and not mounted read-only"); 3438 "suppressed and not mounted read-only");
3426 goto failed_mount_wq; 3439 goto failed_mount_wq;
3427 } else { 3440 } else {
3428 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 3441 clear_opt(sb, DATA_FLAGS);
3429 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 3442 set_opt(sb, WRITEBACK_DATA);
3430 sbi->s_journal = NULL; 3443 sbi->s_journal = NULL;
3431 needs_recovery = 0; 3444 needs_recovery = 0;
3432 goto no_journal; 3445 goto no_journal;
@@ -3464,9 +3477,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3464 */ 3477 */
3465 if (jbd2_journal_check_available_features 3478 if (jbd2_journal_check_available_features
3466 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 3479 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3467 set_opt(sbi->s_mount_opt, ORDERED_DATA); 3480 set_opt(sb, ORDERED_DATA);
3468 else 3481 else
3469 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 3482 set_opt(sb, JOURNAL_DATA);
3470 break; 3483 break;
3471 3484
3472 case EXT4_MOUNT_ORDERED_DATA: 3485 case EXT4_MOUNT_ORDERED_DATA:
@@ -3556,18 +3569,18 @@ no_journal:
3556 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 3569 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
3557 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 3570 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
3558 "requested data journaling mode"); 3571 "requested data journaling mode");
3559 clear_opt(sbi->s_mount_opt, DELALLOC); 3572 clear_opt(sb, DELALLOC);
3560 } 3573 }
3561 if (test_opt(sb, DIOREAD_NOLOCK)) { 3574 if (test_opt(sb, DIOREAD_NOLOCK)) {
3562 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3575 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3563 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3576 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3564 "option - requested data journaling mode"); 3577 "option - requested data journaling mode");
3565 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 3578 clear_opt(sb, DIOREAD_NOLOCK);
3566 } 3579 }
3567 if (sb->s_blocksize < PAGE_SIZE) { 3580 if (sb->s_blocksize < PAGE_SIZE) {
3568 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3581 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3569 "option - block size is too small"); 3582 "option - block size is too small");
3570 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 3583 clear_opt(sb, DIOREAD_NOLOCK);
3571 } 3584 }
3572 } 3585 }
3573 3586
@@ -4166,6 +4179,22 @@ static int ext4_unfreeze(struct super_block *sb)
4166 return 0; 4179 return 0;
4167} 4180}
4168 4181
4182/*
4183 * Structure to save mount options for ext4_remount's benefit
4184 */
4185struct ext4_mount_options {
4186 unsigned long s_mount_opt;
4187 unsigned long s_mount_opt2;
4188 uid_t s_resuid;
4189 gid_t s_resgid;
4190 unsigned long s_commit_interval;
4191 u32 s_min_batch_time, s_max_batch_time;
4192#ifdef CONFIG_QUOTA
4193 int s_jquota_fmt;
4194 char *s_qf_names[MAXQUOTAS];
4195#endif
4196};
4197
4169static int ext4_remount(struct super_block *sb, int *flags, char *data) 4198static int ext4_remount(struct super_block *sb, int *flags, char *data)
4170{ 4199{
4171 struct ext4_super_block *es; 4200 struct ext4_super_block *es;
@@ -4186,6 +4215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4186 lock_super(sb); 4215 lock_super(sb);
4187 old_sb_flags = sb->s_flags; 4216 old_sb_flags = sb->s_flags;
4188 old_opts.s_mount_opt = sbi->s_mount_opt; 4217 old_opts.s_mount_opt = sbi->s_mount_opt;
4218 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4189 old_opts.s_resuid = sbi->s_resuid; 4219 old_opts.s_resuid = sbi->s_resuid;
4190 old_opts.s_resgid = sbi->s_resgid; 4220 old_opts.s_resgid = sbi->s_resgid;
4191 old_opts.s_commit_interval = sbi->s_commit_interval; 4221 old_opts.s_commit_interval = sbi->s_commit_interval;
@@ -4339,6 +4369,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4339restore_opts: 4369restore_opts:
4340 sb->s_flags = old_sb_flags; 4370 sb->s_flags = old_sb_flags;
4341 sbi->s_mount_opt = old_opts.s_mount_opt; 4371 sbi->s_mount_opt = old_opts.s_mount_opt;
4372 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4342 sbi->s_resuid = old_opts.s_resuid; 4373 sbi->s_resuid = old_opts.s_resuid;
4343 sbi->s_resgid = old_opts.s_resgid; 4374 sbi->s_resgid = old_opts.s_resgid;
4344 sbi->s_commit_interval = old_opts.s_commit_interval; 4375 sbi->s_commit_interval = old_opts.s_commit_interval;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fa4b899da4b..fc32176eee3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -427,23 +427,23 @@ cleanup:
427static int 427static int
428ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) 428ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
429{ 429{
430 int i_error, b_error; 430 int ret, ret2;
431 431
432 down_read(&EXT4_I(dentry->d_inode)->xattr_sem); 432 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
433 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size); 433 ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
434 if (i_error < 0) { 434 if (ret < 0)
435 b_error = 0; 435 goto errout;
436 } else { 436 if (buffer) {
437 if (buffer) { 437 buffer += ret;
438 buffer += i_error; 438 buffer_size -= ret;
439 buffer_size -= i_error;
440 }
441 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
442 if (b_error < 0)
443 i_error = 0;
444 } 439 }
440 ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
441 if (ret < 0)
442 goto errout;
443 ret += ret2;
444errout:
445 up_read(&EXT4_I(dentry->d_inode)->xattr_sem); 445 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
446 return i_error + b_error; 446 return ret;
447} 447}
448 448
449/* 449/*
@@ -947,7 +947,7 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
947/* 947/*
948 * ext4_xattr_set_handle() 948 * ext4_xattr_set_handle()
949 * 949 *
950 * Create, replace or remove an extended attribute for this inode. Buffer 950 * Create, replace or remove an extended attribute for this inode. Value
951 * is NULL to remove an existing extended attribute, and non-NULL to 951 * is NULL to remove an existing extended attribute, and non-NULL to
952 * either replace an existing extended attribute, or create a new extended 952 * either replace an existing extended attribute, or create a new extended
953 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 953 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c3..206351af7c5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
514 return &ei->vfs_inode; 514 return &ei->vfs_inode;
515} 515}
516 516
517static void fat_destroy_inode(struct inode *inode) 517static void fat_i_callback(struct rcu_head *head)
518{ 518{
519 struct inode *inode = container_of(head, struct inode, i_rcu);
520 INIT_LIST_HEAD(&inode->i_dentry);
519 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 521 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
520} 522}
521 523
524static void fat_destroy_inode(struct inode *inode)
525{
526 call_rcu(&inode->i_rcu, fat_i_callback);
527}
528
522static void init_once(void *foo) 529static void init_once(void *foo)
523{ 530{
524 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 531 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
743 */ 750 */
744 result = d_obtain_alias(inode); 751 result = d_obtain_alias(inode);
745 if (!IS_ERR(result)) 752 if (!IS_ERR(result))
746 result->d_op = sb->s_root->d_op; 753 d_set_d_op(result, sb->s_root->d_op);
747 return result; 754 return result;
748} 755}
749 756
@@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
793 800
794 parent = d_obtain_alias(inode); 801 parent = d_obtain_alias(inode);
795 if (!IS_ERR(parent)) 802 if (!IS_ERR(parent))
796 parent->d_op = sb->s_root->d_op; 803 d_set_d_op(parent, sb->s_root->d_op);
797out: 804out:
798 unlock_super(sb); 805 unlock_super(sb);
799 806
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3345aabd1dd..35ffe43afa4 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
148 * that the existing dentry can be used. The msdos fs routines will 148 * that the existing dentry can be used. The msdos fs routines will
149 * return ENOENT or EINVAL as appropriate. 149 * return ENOENT or EINVAL as appropriate.
150 */ 150 */
151static int msdos_hash(struct dentry *dentry, struct qstr *qstr) 151static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
152 struct qstr *qstr)
152{ 153{
153 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 154 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
154 unsigned char msdos_name[MSDOS_NAME]; 155 unsigned char msdos_name[MSDOS_NAME];
@@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
164 * Compare two msdos names. If either of the names are invalid, 165 * Compare two msdos names. If either of the names are invalid,
165 * we fall back to doing the standard name comparison. 166 * we fall back to doing the standard name comparison.
166 */ 167 */
167static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 168static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
169 const struct dentry *dentry, const struct inode *inode,
170 unsigned int len, const char *str, const struct qstr *name)
168{ 171{
169 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 172 struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
170 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; 173 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
171 int error; 174 int error;
172 175
173 error = msdos_format_name(a->name, a->len, a_msdos_name, options); 176 error = msdos_format_name(name->name, name->len, a_msdos_name, options);
174 if (error) 177 if (error)
175 goto old_compare; 178 goto old_compare;
176 error = msdos_format_name(b->name, b->len, b_msdos_name, options); 179 error = msdos_format_name(str, len, b_msdos_name, options);
177 if (error) 180 if (error)
178 goto old_compare; 181 goto old_compare;
179 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); 182 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +185,8 @@ out:
182 185
183old_compare: 186old_compare:
184 error = 1; 187 error = 1;
185 if (a->len == b->len) 188 if (name->len == len)
186 error = memcmp(a->name, b->name, a->len); 189 error = memcmp(name->name, str, len);
187 goto out; 190 goto out;
188} 191}
189 192
@@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
224 } 227 }
225out: 228out:
226 unlock_super(sb); 229 unlock_super(sb);
227 dentry->d_op = &msdos_dentry_operations; 230 d_set_d_op(dentry, &msdos_dentry_operations);
228 dentry = d_splice_alias(inode, dentry); 231 dentry = d_splice_alias(inode, dentry);
229 if (dentry) 232 if (dentry)
230 dentry->d_op = &msdos_dentry_operations; 233 d_set_d_op(dentry, &msdos_dentry_operations);
231 return dentry; 234 return dentry;
232 235
233error: 236error:
@@ -670,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
670 } 673 }
671 674
672 sb->s_flags |= MS_NOATIME; 675 sb->s_flags |= MS_NOATIME;
673 sb->s_root->d_op = &msdos_dentry_operations; 676 d_set_d_op(sb->s_root, &msdos_dentry_operations);
674 unlock_super(sb); 677 unlock_super(sb);
675 return 0; 678 return 0;
676} 679}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b936703b892..e3ffc5e1233 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU)
47 return -ECHILD;
48
46 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
47 if (dentry->d_inode) 50 if (dentry->d_inode)
48 return 1; 51 return 1;
@@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
51 54
52static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
53{ 56{
57 if (nd->flags & LOOKUP_RCU)
58 return -ECHILD;
59
54 /* 60 /*
55 * This is not negative dentry. Always valid. 61 * This is not negative dentry. Always valid.
56 * 62 *
@@ -85,22 +91,26 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
85} 91}
86 92
87/* returns the length of a struct qstr, ignoring trailing dots */ 93/* returns the length of a struct qstr, ignoring trailing dots */
88static unsigned int vfat_striptail_len(struct qstr *qstr) 94static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
89{ 95{
90 unsigned int len = qstr->len; 96 while (len && name[len - 1] == '.')
91
92 while (len && qstr->name[len - 1] == '.')
93 len--; 97 len--;
94 return len; 98 return len;
95} 99}
96 100
101static unsigned int vfat_striptail_len(const struct qstr *qstr)
102{
103 return __vfat_striptail_len(qstr->len, qstr->name);
104}
105
97/* 106/*
98 * Compute the hash for the vfat name corresponding to the dentry. 107 * Compute the hash for the vfat name corresponding to the dentry.
99 * Note: if the name is invalid, we leave the hash code unchanged so 108 * Note: if the name is invalid, we leave the hash code unchanged so
100 * that the existing dentry can be used. The vfat fs routines will 109 * that the existing dentry can be used. The vfat fs routines will
101 * return ENOENT or EINVAL as appropriate. 110 * return ENOENT or EINVAL as appropriate.
102 */ 111 */
103static int vfat_hash(struct dentry *dentry, struct qstr *qstr) 112static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *qstr)
104{ 114{
105 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); 115 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
106 return 0; 116 return 0;
@@ -112,9 +122,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
112 * that the existing dentry can be used. The vfat fs routines will 122 * that the existing dentry can be used. The vfat fs routines will
113 * return ENOENT or EINVAL as appropriate. 123 * return ENOENT or EINVAL as appropriate.
114 */ 124 */
115static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) 125static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
126 struct qstr *qstr)
116{ 127{
117 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 128 struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
118 const unsigned char *name; 129 const unsigned char *name;
119 unsigned int len; 130 unsigned int len;
120 unsigned long hash; 131 unsigned long hash;
@@ -133,16 +144,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
133/* 144/*
134 * Case insensitive compare of two vfat names. 145 * Case insensitive compare of two vfat names.
135 */ 146 */
136static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) 147static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
148 const struct dentry *dentry, const struct inode *inode,
149 unsigned int len, const char *str, const struct qstr *name)
137{ 150{
138 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 151 struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
139 unsigned int alen, blen; 152 unsigned int alen, blen;
140 153
141 /* A filename cannot end in '.' or we treat it like it has none */ 154 /* A filename cannot end in '.' or we treat it like it has none */
142 alen = vfat_striptail_len(a); 155 alen = vfat_striptail_len(name);
143 blen = vfat_striptail_len(b); 156 blen = __vfat_striptail_len(len, str);
144 if (alen == blen) { 157 if (alen == blen) {
145 if (nls_strnicmp(t, a->name, b->name, alen) == 0) 158 if (nls_strnicmp(t, name->name, str, alen) == 0)
146 return 0; 159 return 0;
147 } 160 }
148 return 1; 161 return 1;
@@ -151,15 +164,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
151/* 164/*
152 * Case sensitive compare of two vfat names. 165 * Case sensitive compare of two vfat names.
153 */ 166 */
154static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 167static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
168 const struct dentry *dentry, const struct inode *inode,
169 unsigned int len, const char *str, const struct qstr *name)
155{ 170{
156 unsigned int alen, blen; 171 unsigned int alen, blen;
157 172
158 /* A filename cannot end in '.' or we treat it like it has none */ 173 /* A filename cannot end in '.' or we treat it like it has none */
159 alen = vfat_striptail_len(a); 174 alen = vfat_striptail_len(name);
160 blen = vfat_striptail_len(b); 175 blen = __vfat_striptail_len(len, str);
161 if (alen == blen) { 176 if (alen == blen) {
162 if (strncmp(a->name, b->name, alen) == 0) 177 if (strncmp(name->name, str, alen) == 0)
163 return 0; 178 return 0;
164 } 179 }
165 return 1; 180 return 1;
@@ -757,11 +772,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
757 772
758out: 773out:
759 unlock_super(sb); 774 unlock_super(sb);
760 dentry->d_op = sb->s_root->d_op; 775 d_set_d_op(dentry, sb->s_root->d_op);
761 dentry->d_time = dentry->d_parent->d_inode->i_version; 776 dentry->d_time = dentry->d_parent->d_inode->i_version;
762 dentry = d_splice_alias(inode, dentry); 777 dentry = d_splice_alias(inode, dentry);
763 if (dentry) { 778 if (dentry) {
764 dentry->d_op = sb->s_root->d_op; 779 d_set_d_op(dentry, sb->s_root->d_op);
765 dentry->d_time = dentry->d_parent->d_inode->i_version; 780 dentry->d_time = dentry->d_parent->d_inode->i_version;
766 } 781 }
767 return dentry; 782 return dentry;
@@ -1063,9 +1078,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1063 } 1078 }
1064 1079
1065 if (MSDOS_SB(sb)->options.name_check != 's') 1080 if (MSDOS_SB(sb)->options.name_check != 's')
1066 sb->s_root->d_op = &vfat_ci_dentry_ops; 1081 d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
1067 else 1082 else
1068 sb->s_root->d_op = &vfat_dentry_ops; 1083 d_set_d_op(sb->s_root, &vfat_dentry_ops);
1069 1084
1070 unlock_super(sb); 1085 unlock_super(sb);
1071 return 0; 1086 return 0;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492d8ee..751d6b255a1 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
115 tmp = &(*tmp)->next; 115 tmp = &(*tmp)->next;
116 } 116 }
117 write_unlock(&file_systems_lock); 117 write_unlock(&file_systems_lock);
118
119 synchronize_rcu();
120
118 return -EINVAL; 121 return -EINVAL;
119} 122}
120 123
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079..2ba6719ac61 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
337 return ip; 337 return ip;
338} 338}
339 339
340static void vxfs_i_callback(struct rcu_head *head)
341{
342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(vxfs_inode_cachep, inode->i_private);
345}
346
340/** 347/**
341 * vxfs_evict_inode - remove inode from main memory 348 * vxfs_evict_inode - remove inode from main memory
342 * @ip: inode to discard. 349 * @ip: inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
350{ 357{
351 truncate_inode_pages(&ip->i_data, 0); 358 truncate_inode_pages(&ip->i_data, 0);
352 end_writeback(ip); 359 end_writeback(ip);
353 kmem_cache_free(vxfs_inode_cachep, ip->i_private); 360 call_rcu(&ip->i_rcu, vxfs_i_callback);
354} 361}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9cf5f3..68ca487bedb 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
14 struct path old_root; 14 struct path old_root;
15 15
16 spin_lock(&fs->lock); 16 spin_lock(&fs->lock);
17 write_seqcount_begin(&fs->seq);
17 old_root = fs->root; 18 old_root = fs->root;
18 fs->root = *path; 19 fs->root = *path;
19 path_get(path); 20 path_get_long(path);
21 write_seqcount_end(&fs->seq);
20 spin_unlock(&fs->lock); 22 spin_unlock(&fs->lock);
21 if (old_root.dentry) 23 if (old_root.dentry)
22 path_put(&old_root); 24 path_put_long(&old_root);
23} 25}
24 26
25/* 27/*
@@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
31 struct path old_pwd; 33 struct path old_pwd;
32 34
33 spin_lock(&fs->lock); 35 spin_lock(&fs->lock);
36 write_seqcount_begin(&fs->seq);
34 old_pwd = fs->pwd; 37 old_pwd = fs->pwd;
35 fs->pwd = *path; 38 fs->pwd = *path;
36 path_get(path); 39 path_get_long(path);
40 write_seqcount_end(&fs->seq);
37 spin_unlock(&fs->lock); 41 spin_unlock(&fs->lock);
38 42
39 if (old_pwd.dentry) 43 if (old_pwd.dentry)
40 path_put(&old_pwd); 44 path_put_long(&old_pwd);
41} 45}
42 46
43void chroot_fs_refs(struct path *old_root, struct path *new_root) 47void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
52 fs = p->fs; 56 fs = p->fs;
53 if (fs) { 57 if (fs) {
54 spin_lock(&fs->lock); 58 spin_lock(&fs->lock);
59 write_seqcount_begin(&fs->seq);
55 if (fs->root.dentry == old_root->dentry 60 if (fs->root.dentry == old_root->dentry
56 && fs->root.mnt == old_root->mnt) { 61 && fs->root.mnt == old_root->mnt) {
57 path_get(new_root); 62 path_get_long(new_root);
58 fs->root = *new_root; 63 fs->root = *new_root;
59 count++; 64 count++;
60 } 65 }
61 if (fs->pwd.dentry == old_root->dentry 66 if (fs->pwd.dentry == old_root->dentry
62 && fs->pwd.mnt == old_root->mnt) { 67 && fs->pwd.mnt == old_root->mnt) {
63 path_get(new_root); 68 path_get_long(new_root);
64 fs->pwd = *new_root; 69 fs->pwd = *new_root;
65 count++; 70 count++;
66 } 71 }
72 write_seqcount_end(&fs->seq);
67 spin_unlock(&fs->lock); 73 spin_unlock(&fs->lock);
68 } 74 }
69 task_unlock(p); 75 task_unlock(p);
70 } while_each_thread(g, p); 76 } while_each_thread(g, p);
71 read_unlock(&tasklist_lock); 77 read_unlock(&tasklist_lock);
72 while (count--) 78 while (count--)
73 path_put(old_root); 79 path_put_long(old_root);
74} 80}
75 81
76void free_fs_struct(struct fs_struct *fs) 82void free_fs_struct(struct fs_struct *fs)
77{ 83{
78 path_put(&fs->root); 84 path_put_long(&fs->root);
79 path_put(&fs->pwd); 85 path_put_long(&fs->pwd);
80 kmem_cache_free(fs_cachep, fs); 86 kmem_cache_free(fs_cachep, fs);
81} 87}
82 88
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
88 int kill; 94 int kill;
89 task_lock(tsk); 95 task_lock(tsk);
90 spin_lock(&fs->lock); 96 spin_lock(&fs->lock);
97 write_seqcount_begin(&fs->seq);
91 tsk->fs = NULL; 98 tsk->fs = NULL;
92 kill = !--fs->users; 99 kill = !--fs->users;
100 write_seqcount_end(&fs->seq);
93 spin_unlock(&fs->lock); 101 spin_unlock(&fs->lock);
94 task_unlock(tsk); 102 task_unlock(tsk);
95 if (kill) 103 if (kill)
@@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
105 fs->users = 1; 113 fs->users = 1;
106 fs->in_exec = 0; 114 fs->in_exec = 0;
107 spin_lock_init(&fs->lock); 115 spin_lock_init(&fs->lock);
116 seqcount_init(&fs->seq);
108 fs->umask = old->umask; 117 fs->umask = old->umask;
109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118
119 spin_lock(&old->lock);
120 fs->root = old->root;
121 path_get_long(&fs->root);
122 fs->pwd = old->pwd;
123 path_get_long(&fs->pwd);
124 spin_unlock(&old->lock);
110 } 125 }
111 return fs; 126 return fs;
112} 127}
@@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask);
144struct fs_struct init_fs = { 159struct fs_struct init_fs = {
145 .users = 1, 160 .users = 1,
146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), 161 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
162 .seq = SEQCNT_ZERO,
147 .umask = 0022, 163 .umask = 0022,
148}; 164};
149 165
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6e07696308d..cf8d28d1fba 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
251 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 251 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
252} 252}
253 253
254void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
255 u64 nodeid, u64 nlookup)
256{
257 forget->forget_one.nodeid = nodeid;
258 forget->forget_one.nlookup = nlookup;
259
260 spin_lock(&fc->lock);
261 fc->forget_list_tail->next = forget;
262 fc->forget_list_tail = forget;
263 wake_up(&fc->waitq);
264 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
265 spin_unlock(&fc->lock);
266}
267
254static void flush_bg_queue(struct fuse_conn *fc) 268static void flush_bg_queue(struct fuse_conn *fc)
255{ 269{
256 while (fc->active_background < fc->max_background && 270 while (fc->active_background < fc->max_background &&
@@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
438 } 452 }
439} 453}
440 454
441void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
442{
443 req->isreply = 0;
444 fuse_request_send_nowait(fc, req);
445}
446
447void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) 455void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
448{ 456{
449 req->isreply = 1; 457 req->isreply = 1;
@@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
896 return err; 904 return err;
897} 905}
898 906
907static int forget_pending(struct fuse_conn *fc)
908{
909 return fc->forget_list_head.next != NULL;
910}
911
899static int request_pending(struct fuse_conn *fc) 912static int request_pending(struct fuse_conn *fc)
900{ 913{
901 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); 914 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
915 forget_pending(fc);
902} 916}
903 917
904/* Wait until a request is available on the pending list */ 918/* Wait until a request is available on the pending list */
@@ -960,6 +974,120 @@ __releases(fc->lock)
960 return err ? err : reqsize; 974 return err ? err : reqsize;
961} 975}
962 976
977static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
978 unsigned max,
979 unsigned *countp)
980{
981 struct fuse_forget_link *head = fc->forget_list_head.next;
982 struct fuse_forget_link **newhead = &head;
983 unsigned count;
984
985 for (count = 0; *newhead != NULL && count < max; count++)
986 newhead = &(*newhead)->next;
987
988 fc->forget_list_head.next = *newhead;
989 *newhead = NULL;
990 if (fc->forget_list_head.next == NULL)
991 fc->forget_list_tail = &fc->forget_list_head;
992
993 if (countp != NULL)
994 *countp = count;
995
996 return head;
997}
998
999static int fuse_read_single_forget(struct fuse_conn *fc,
1000 struct fuse_copy_state *cs,
1001 size_t nbytes)
1002__releases(fc->lock)
1003{
1004 int err;
1005 struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1006 struct fuse_forget_in arg = {
1007 .nlookup = forget->forget_one.nlookup,
1008 };
1009 struct fuse_in_header ih = {
1010 .opcode = FUSE_FORGET,
1011 .nodeid = forget->forget_one.nodeid,
1012 .unique = fuse_get_unique(fc),
1013 .len = sizeof(ih) + sizeof(arg),
1014 };
1015
1016 spin_unlock(&fc->lock);
1017 kfree(forget);
1018 if (nbytes < ih.len)
1019 return -EINVAL;
1020
1021 err = fuse_copy_one(cs, &ih, sizeof(ih));
1022 if (!err)
1023 err = fuse_copy_one(cs, &arg, sizeof(arg));
1024 fuse_copy_finish(cs);
1025
1026 if (err)
1027 return err;
1028
1029 return ih.len;
1030}
1031
1032static int fuse_read_batch_forget(struct fuse_conn *fc,
1033 struct fuse_copy_state *cs, size_t nbytes)
1034__releases(fc->lock)
1035{
1036 int err;
1037 unsigned max_forgets;
1038 unsigned count;
1039 struct fuse_forget_link *head;
1040 struct fuse_batch_forget_in arg = { .count = 0 };
1041 struct fuse_in_header ih = {
1042 .opcode = FUSE_BATCH_FORGET,
1043 .unique = fuse_get_unique(fc),
1044 .len = sizeof(ih) + sizeof(arg),
1045 };
1046
1047 if (nbytes < ih.len) {
1048 spin_unlock(&fc->lock);
1049 return -EINVAL;
1050 }
1051
1052 max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1053 head = dequeue_forget(fc, max_forgets, &count);
1054 spin_unlock(&fc->lock);
1055
1056 arg.count = count;
1057 ih.len += count * sizeof(struct fuse_forget_one);
1058 err = fuse_copy_one(cs, &ih, sizeof(ih));
1059 if (!err)
1060 err = fuse_copy_one(cs, &arg, sizeof(arg));
1061
1062 while (head) {
1063 struct fuse_forget_link *forget = head;
1064
1065 if (!err) {
1066 err = fuse_copy_one(cs, &forget->forget_one,
1067 sizeof(forget->forget_one));
1068 }
1069 head = forget->next;
1070 kfree(forget);
1071 }
1072
1073 fuse_copy_finish(cs);
1074
1075 if (err)
1076 return err;
1077
1078 return ih.len;
1079}
1080
1081static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1082 size_t nbytes)
1083__releases(fc->lock)
1084{
1085 if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1086 return fuse_read_single_forget(fc, cs, nbytes);
1087 else
1088 return fuse_read_batch_forget(fc, cs, nbytes);
1089}
1090
963/* 1091/*
964 * Read a single request into the userspace filesystem's buffer. This 1092 * Read a single request into the userspace filesystem's buffer. This
965 * function waits until a request is available, then removes it from 1093 * function waits until a request is available, then removes it from
@@ -998,6 +1126,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
998 return fuse_read_interrupt(fc, cs, nbytes, req); 1126 return fuse_read_interrupt(fc, cs, nbytes, req);
999 } 1127 }
1000 1128
1129 if (forget_pending(fc)) {
1130 if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1131 return fuse_read_forget(fc, cs, nbytes);
1132
1133 if (fc->forget_batch <= -8)
1134 fc->forget_batch = 16;
1135 }
1136
1001 req = list_entry(fc->pending.next, struct fuse_req, list); 1137 req = list_entry(fc->pending.next, struct fuse_req, list);
1002 req->state = FUSE_REQ_READING; 1138 req->state = FUSE_REQ_READING;
1003 list_move(&req->list, &fc->io); 1139 list_move(&req->list, &fc->io);
@@ -1090,7 +1226,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1090 if (!fc) 1226 if (!fc)
1091 return -EPERM; 1227 return -EPERM;
1092 1228
1093 bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); 1229 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1094 if (!bufs) 1230 if (!bufs)
1095 return -ENOMEM; 1231 return -ENOMEM;
1096 1232
@@ -1626,7 +1762,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1626 if (!fc) 1762 if (!fc)
1627 return -EPERM; 1763 return -EPERM;
1628 1764
1629 bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); 1765 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1630 if (!bufs) 1766 if (!bufs)
1631 return -ENOMEM; 1767 return -ENOMEM;
1632 1768
@@ -1770,6 +1906,8 @@ __acquires(fc->lock)
1770 flush_bg_queue(fc); 1906 flush_bg_queue(fc);
1771 end_requests(fc, &fc->pending); 1907 end_requests(fc, &fc->pending);
1772 end_requests(fc, &fc->processing); 1908 end_requests(fc, &fc->processing);
1909 while (forget_pending(fc))
1910 kfree(dequeue_forget(fc, 1, NULL));
1773} 1911}
1774 1912
1775/* 1913/*
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482..042af7346ec 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,9 +10,9 @@
10 10
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/gfp.h>
14#include <linux/sched.h> 13#include <linux/sched.h>
15#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h>
16 16
17#if BITS_PER_LONG >= 64 17#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 18static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
@@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
156 */ 156 */
157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) 157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode = entry->d_inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU)
162 return -ECHILD;
163
164 inode = entry->d_inode;
161 if (inode && is_bad_inode(inode)) 165 if (inode && is_bad_inode(inode))
162 return 0; 166 return 0;
163 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 167 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -165,7 +169,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
165 struct fuse_entry_out outarg; 169 struct fuse_entry_out outarg;
166 struct fuse_conn *fc; 170 struct fuse_conn *fc;
167 struct fuse_req *req; 171 struct fuse_req *req;
168 struct fuse_req *forget_req; 172 struct fuse_forget_link *forget;
169 struct dentry *parent; 173 struct dentry *parent;
170 u64 attr_version; 174 u64 attr_version;
171 175
@@ -178,8 +182,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
178 if (IS_ERR(req)) 182 if (IS_ERR(req))
179 return 0; 183 return 0;
180 184
181 forget_req = fuse_get_req(fc); 185 forget = fuse_alloc_forget();
182 if (IS_ERR(forget_req)) { 186 if (!forget) {
183 fuse_put_request(fc, req); 187 fuse_put_request(fc, req);
184 return 0; 188 return 0;
185 } 189 }
@@ -199,15 +203,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
199 if (!err) { 203 if (!err) {
200 struct fuse_inode *fi = get_fuse_inode(inode); 204 struct fuse_inode *fi = get_fuse_inode(inode);
201 if (outarg.nodeid != get_node_id(inode)) { 205 if (outarg.nodeid != get_node_id(inode)) {
202 fuse_send_forget(fc, forget_req, 206 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
203 outarg.nodeid, 1);
204 return 0; 207 return 0;
205 } 208 }
206 spin_lock(&fc->lock); 209 spin_lock(&fc->lock);
207 fi->nlookup++; 210 fi->nlookup++;
208 spin_unlock(&fc->lock); 211 spin_unlock(&fc->lock);
209 } 212 }
210 fuse_put_request(fc, forget_req); 213 kfree(forget);
211 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) 214 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
212 return 0; 215 return 0;
213 216
@@ -259,7 +262,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
259{ 262{
260 struct fuse_conn *fc = get_fuse_conn_super(sb); 263 struct fuse_conn *fc = get_fuse_conn_super(sb);
261 struct fuse_req *req; 264 struct fuse_req *req;
262 struct fuse_req *forget_req; 265 struct fuse_forget_link *forget;
263 u64 attr_version; 266 u64 attr_version;
264 int err; 267 int err;
265 268
@@ -273,9 +276,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
273 if (IS_ERR(req)) 276 if (IS_ERR(req))
274 goto out; 277 goto out;
275 278
276 forget_req = fuse_get_req(fc); 279 forget = fuse_alloc_forget();
277 err = PTR_ERR(forget_req); 280 err = -ENOMEM;
278 if (IS_ERR(forget_req)) { 281 if (!forget) {
279 fuse_put_request(fc, req); 282 fuse_put_request(fc, req);
280 goto out; 283 goto out;
281 } 284 }
@@ -301,13 +304,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
301 attr_version); 304 attr_version);
302 err = -ENOMEM; 305 err = -ENOMEM;
303 if (!*inode) { 306 if (!*inode) {
304 fuse_send_forget(fc, forget_req, outarg->nodeid, 1); 307 fuse_queue_forget(fc, forget, outarg->nodeid, 1);
305 goto out; 308 goto out;
306 } 309 }
307 err = 0; 310 err = 0;
308 311
309 out_put_forget: 312 out_put_forget:
310 fuse_put_request(fc, forget_req); 313 kfree(forget);
311 out: 314 out:
312 return err; 315 return err;
313} 316}
@@ -347,7 +350,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
347 } 350 }
348 351
349 entry = newent ? newent : entry; 352 entry = newent ? newent : entry;
350 entry->d_op = &fuse_dentry_operations; 353 d_set_d_op(entry, &fuse_dentry_operations);
351 if (outarg_valid) 354 if (outarg_valid)
352 fuse_change_entry_timeout(entry, &outarg); 355 fuse_change_entry_timeout(entry, &outarg);
353 else 356 else
@@ -374,7 +377,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
374 struct inode *inode; 377 struct inode *inode;
375 struct fuse_conn *fc = get_fuse_conn(dir); 378 struct fuse_conn *fc = get_fuse_conn(dir);
376 struct fuse_req *req; 379 struct fuse_req *req;
377 struct fuse_req *forget_req; 380 struct fuse_forget_link *forget;
378 struct fuse_create_in inarg; 381 struct fuse_create_in inarg;
379 struct fuse_open_out outopen; 382 struct fuse_open_out outopen;
380 struct fuse_entry_out outentry; 383 struct fuse_entry_out outentry;
@@ -388,9 +391,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
388 if (flags & O_DIRECT) 391 if (flags & O_DIRECT)
389 return -EINVAL; 392 return -EINVAL;
390 393
391 forget_req = fuse_get_req(fc); 394 forget = fuse_alloc_forget();
392 if (IS_ERR(forget_req)) 395 if (!forget)
393 return PTR_ERR(forget_req); 396 return -ENOMEM;
394 397
395 req = fuse_get_req(fc); 398 req = fuse_get_req(fc);
396 err = PTR_ERR(req); 399 err = PTR_ERR(req);
@@ -448,10 +451,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
448 if (!inode) { 451 if (!inode) {
449 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 452 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
450 fuse_sync_release(ff, flags); 453 fuse_sync_release(ff, flags);
451 fuse_send_forget(fc, forget_req, outentry.nodeid, 1); 454 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
452 return -ENOMEM; 455 return -ENOMEM;
453 } 456 }
454 fuse_put_request(fc, forget_req); 457 kfree(forget);
455 d_instantiate(entry, inode); 458 d_instantiate(entry, inode);
456 fuse_change_entry_timeout(entry, &outentry); 459 fuse_change_entry_timeout(entry, &outentry);
457 fuse_invalidate_attr(dir); 460 fuse_invalidate_attr(dir);
@@ -469,7 +472,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
469 out_put_request: 472 out_put_request:
470 fuse_put_request(fc, req); 473 fuse_put_request(fc, req);
471 out_put_forget_req: 474 out_put_forget_req:
472 fuse_put_request(fc, forget_req); 475 kfree(forget);
473 return err; 476 return err;
474} 477}
475 478
@@ -483,12 +486,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
483 struct fuse_entry_out outarg; 486 struct fuse_entry_out outarg;
484 struct inode *inode; 487 struct inode *inode;
485 int err; 488 int err;
486 struct fuse_req *forget_req; 489 struct fuse_forget_link *forget;
487 490
488 forget_req = fuse_get_req(fc); 491 forget = fuse_alloc_forget();
489 if (IS_ERR(forget_req)) { 492 if (!forget) {
490 fuse_put_request(fc, req); 493 fuse_put_request(fc, req);
491 return PTR_ERR(forget_req); 494 return -ENOMEM;
492 } 495 }
493 496
494 memset(&outarg, 0, sizeof(outarg)); 497 memset(&outarg, 0, sizeof(outarg));
@@ -515,10 +518,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
515 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 518 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
516 &outarg.attr, entry_attr_timeout(&outarg), 0); 519 &outarg.attr, entry_attr_timeout(&outarg), 0);
517 if (!inode) { 520 if (!inode) {
518 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 521 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
519 return -ENOMEM; 522 return -ENOMEM;
520 } 523 }
521 fuse_put_request(fc, forget_req); 524 kfree(forget);
522 525
523 if (S_ISDIR(inode->i_mode)) { 526 if (S_ISDIR(inode->i_mode)) {
524 struct dentry *alias; 527 struct dentry *alias;
@@ -541,7 +544,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
541 return 0; 544 return 0;
542 545
543 out_put_forget_req: 546 out_put_forget_req:
544 fuse_put_request(fc, forget_req); 547 kfree(forget);
545 return err; 548 return err;
546} 549}
547 550
@@ -981,12 +984,15 @@ static int fuse_access(struct inode *inode, int mask)
981 * access request is sent. Execute permission is still checked 984 * access request is sent. Execute permission is still checked
982 * locally based on file mode. 985 * locally based on file mode.
983 */ 986 */
984static int fuse_permission(struct inode *inode, int mask) 987static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
985{ 988{
986 struct fuse_conn *fc = get_fuse_conn(inode); 989 struct fuse_conn *fc = get_fuse_conn(inode);
987 bool refreshed = false; 990 bool refreshed = false;
988 int err = 0; 991 int err = 0;
989 992
993 if (flags & IPERM_FLAG_RCU)
994 return -ECHILD;
995
990 if (!fuse_allow_task(fc, current)) 996 if (!fuse_allow_task(fc, current))
991 return -EACCES; 997 return -EACCES;
992 998
@@ -1001,7 +1007,7 @@ static int fuse_permission(struct inode *inode, int mask)
1001 } 1007 }
1002 1008
1003 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1009 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1004 err = generic_permission(inode, mask, NULL); 1010 err = generic_permission(inode, mask, flags, NULL);
1005 1011
1006 /* If permission is denied, try to refresh file 1012 /* If permission is denied, try to refresh file
1007 attributes. This is also needed, because the root 1013 attributes. This is also needed, because the root
@@ -1009,7 +1015,8 @@ static int fuse_permission(struct inode *inode, int mask)
1009 if (err == -EACCES && !refreshed) { 1015 if (err == -EACCES && !refreshed) {
1010 err = fuse_do_getattr(inode, NULL, NULL); 1016 err = fuse_do_getattr(inode, NULL, NULL);
1011 if (!err) 1017 if (!err)
1012 err = generic_permission(inode, mask, NULL); 1018 err = generic_permission(inode, mask,
1019 flags, NULL);
1013 } 1020 }
1014 1021
1015 /* Note: the opposite of the above test does not 1022 /* Note: the opposite of the above test does not
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8b984a2cebb..95da1bc1c82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1634,9 +1634,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1634 * and 64bit. Fortunately we can determine which structure the server 1634 * and 64bit. Fortunately we can determine which structure the server
1635 * used from the size of the reply. 1635 * used from the size of the reply.
1636 */ 1636 */
1637static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, 1637static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
1638 size_t transferred, unsigned count, 1638 size_t transferred, unsigned count,
1639 bool is_compat) 1639 bool is_compat)
1640{ 1640{
1641#ifdef CONFIG_COMPAT 1641#ifdef CONFIG_COMPAT
1642 if (count * sizeof(struct compat_iovec) == transferred) { 1642 if (count * sizeof(struct compat_iovec) == transferred) {
@@ -1680,6 +1680,42 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1680 return 0; 1680 return 0;
1681} 1681}
1682 1682
1683static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
1684 void *src, size_t transferred, unsigned count,
1685 bool is_compat)
1686{
1687 unsigned i;
1688 struct fuse_ioctl_iovec *fiov = src;
1689
1690 if (fc->minor < 16) {
1691 return fuse_copy_ioctl_iovec_old(dst, src, transferred,
1692 count, is_compat);
1693 }
1694
1695 if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
1696 return -EIO;
1697
1698 for (i = 0; i < count; i++) {
1699 /* Did the server supply an inappropriate value? */
1700 if (fiov[i].base != (unsigned long) fiov[i].base ||
1701 fiov[i].len != (unsigned long) fiov[i].len)
1702 return -EIO;
1703
1704 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
1705 dst[i].iov_len = (size_t) fiov[i].len;
1706
1707#ifdef CONFIG_COMPAT
1708 if (is_compat &&
1709 (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
1710 (compat_size_t) dst[i].iov_len != fiov[i].len))
1711 return -EIO;
1712#endif
1713 }
1714
1715 return 0;
1716}
1717
1718
1683/* 1719/*
1684 * For ioctls, there is no generic way to determine how much memory 1720 * For ioctls, there is no generic way to determine how much memory
1685 * needs to be read and/or written. Furthermore, ioctls are allowed 1721 * needs to be read and/or written. Furthermore, ioctls are allowed
@@ -1740,18 +1776,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1740 struct fuse_ioctl_out outarg; 1776 struct fuse_ioctl_out outarg;
1741 struct fuse_req *req = NULL; 1777 struct fuse_req *req = NULL;
1742 struct page **pages = NULL; 1778 struct page **pages = NULL;
1743 struct page *iov_page = NULL; 1779 struct iovec *iov_page = NULL;
1744 struct iovec *in_iov = NULL, *out_iov = NULL; 1780 struct iovec *in_iov = NULL, *out_iov = NULL;
1745 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; 1781 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
1746 size_t in_size, out_size, transferred; 1782 size_t in_size, out_size, transferred;
1747 int err; 1783 int err;
1748 1784
1785#if BITS_PER_LONG == 32
1786 inarg.flags |= FUSE_IOCTL_32BIT;
1787#else
1788 if (flags & FUSE_IOCTL_COMPAT)
1789 inarg.flags |= FUSE_IOCTL_32BIT;
1790#endif
1791
1749 /* assume all the iovs returned by client always fits in a page */ 1792 /* assume all the iovs returned by client always fits in a page */
1750 BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 1793 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1751 1794
1752 err = -ENOMEM; 1795 err = -ENOMEM;
1753 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); 1796 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
1754 iov_page = alloc_page(GFP_KERNEL); 1797 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
1755 if (!pages || !iov_page) 1798 if (!pages || !iov_page)
1756 goto out; 1799 goto out;
1757 1800
@@ -1760,7 +1803,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1760 * RETRY from server is not allowed. 1803 * RETRY from server is not allowed.
1761 */ 1804 */
1762 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 1805 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
1763 struct iovec *iov = page_address(iov_page); 1806 struct iovec *iov = iov_page;
1764 1807
1765 iov->iov_base = (void __user *)arg; 1808 iov->iov_base = (void __user *)arg;
1766 iov->iov_len = _IOC_SIZE(cmd); 1809 iov->iov_len = _IOC_SIZE(cmd);
@@ -1841,7 +1884,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1841 1884
1842 /* did it ask for retry? */ 1885 /* did it ask for retry? */
1843 if (outarg.flags & FUSE_IOCTL_RETRY) { 1886 if (outarg.flags & FUSE_IOCTL_RETRY) {
1844 char *vaddr; 1887 void *vaddr;
1845 1888
1846 /* no retry if in restricted mode */ 1889 /* no retry if in restricted mode */
1847 err = -EIO; 1890 err = -EIO;
@@ -1862,14 +1905,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1862 goto out; 1905 goto out;
1863 1906
1864 vaddr = kmap_atomic(pages[0], KM_USER0); 1907 vaddr = kmap_atomic(pages[0], KM_USER0);
1865 err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, 1908 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1866 transferred, in_iovs + out_iovs, 1909 transferred, in_iovs + out_iovs,
1867 (flags & FUSE_IOCTL_COMPAT) != 0); 1910 (flags & FUSE_IOCTL_COMPAT) != 0);
1868 kunmap_atomic(vaddr, KM_USER0); 1911 kunmap_atomic(vaddr, KM_USER0);
1869 if (err) 1912 if (err)
1870 goto out; 1913 goto out;
1871 1914
1872 in_iov = page_address(iov_page); 1915 in_iov = iov_page;
1873 out_iov = in_iov + in_iovs; 1916 out_iov = in_iov + in_iovs;
1874 1917
1875 err = fuse_verify_ioctl_iov(in_iov, in_iovs); 1918 err = fuse_verify_ioctl_iov(in_iov, in_iovs);
@@ -1891,8 +1934,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1891 out: 1934 out:
1892 if (req) 1935 if (req)
1893 fuse_put_request(fc, req); 1936 fuse_put_request(fc, req);
1894 if (iov_page) 1937 free_page((unsigned long) iov_page);
1895 __free_page(iov_page);
1896 while (num_pages) 1938 while (num_pages)
1897 __free_page(pages[--num_pages]); 1939 __free_page(pages[--num_pages]);
1898 kfree(pages); 1940 kfree(pages);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 57d4a3a0f10..ae5744a2f9e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -53,6 +53,12 @@ extern struct mutex fuse_mutex;
53extern unsigned max_user_bgreq; 53extern unsigned max_user_bgreq;
54extern unsigned max_user_congthresh; 54extern unsigned max_user_congthresh;
55 55
56/* One forget request */
57struct fuse_forget_link {
58 struct fuse_forget_one forget_one;
59 struct fuse_forget_link *next;
60};
61
56/** FUSE inode */ 62/** FUSE inode */
57struct fuse_inode { 63struct fuse_inode {
58 /** Inode data */ 64 /** Inode data */
@@ -66,7 +72,7 @@ struct fuse_inode {
66 u64 nlookup; 72 u64 nlookup;
67 73
68 /** The request used for sending the FORGET message */ 74 /** The request used for sending the FORGET message */
69 struct fuse_req *forget_req; 75 struct fuse_forget_link *forget;
70 76
71 /** Time in jiffies until the file attributes are valid */ 77 /** Time in jiffies until the file attributes are valid */
72 u64 i_time; 78 u64 i_time;
@@ -255,7 +261,6 @@ struct fuse_req {
255 261
256 /** Data for asynchronous requests */ 262 /** Data for asynchronous requests */
257 union { 263 union {
258 struct fuse_forget_in forget_in;
259 struct { 264 struct {
260 struct fuse_release_in in; 265 struct fuse_release_in in;
261 struct path path; 266 struct path path;
@@ -369,6 +374,13 @@ struct fuse_conn {
369 /** Pending interrupts */ 374 /** Pending interrupts */
370 struct list_head interrupts; 375 struct list_head interrupts;
371 376
377 /** Queue of pending forgets */
378 struct fuse_forget_link forget_list_head;
379 struct fuse_forget_link *forget_list_tail;
380
381 /** Batching of FORGET requests (positive indicates FORGET batch) */
382 int forget_batch;
383
372 /** Flag indicating if connection is blocked. This will be 384 /** Flag indicating if connection is blocked. This will be
373 the case before the INIT reply is received, and if there 385 the case before the INIT reply is received, and if there
374 are too many outstading backgrounds requests */ 386 are too many outstading backgrounds requests */
@@ -543,8 +555,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
543/** 555/**
544 * Send FORGET command 556 * Send FORGET command
545 */ 557 */
546void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 558void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
547 u64 nodeid, u64 nlookup); 559 u64 nodeid, u64 nlookup);
560
561struct fuse_forget_link *fuse_alloc_forget(void);
548 562
549/** 563/**
550 * Initialize READ or READDIR request 564 * Initialize READ or READDIR request
@@ -656,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
656void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); 670void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
657 671
658/** 672/**
659 * Send a request with no reply
660 */
661void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
662
663/**
664 * Send a request in the background 673 * Send a request in the background
665 */ 674 */
666void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); 675void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a9..f62b32cffea 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -71,6 +71,11 @@ struct fuse_mount_data {
71 unsigned blksize; 71 unsigned blksize;
72}; 72};
73 73
74struct fuse_forget_link *fuse_alloc_forget()
75{
76 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
77}
78
74static struct inode *fuse_alloc_inode(struct super_block *sb) 79static struct inode *fuse_alloc_inode(struct super_block *sb)
75{ 80{
76 struct inode *inode; 81 struct inode *inode;
@@ -90,8 +95,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
90 INIT_LIST_HEAD(&fi->queued_writes); 95 INIT_LIST_HEAD(&fi->queued_writes);
91 INIT_LIST_HEAD(&fi->writepages); 96 INIT_LIST_HEAD(&fi->writepages);
92 init_waitqueue_head(&fi->page_waitq); 97 init_waitqueue_head(&fi->page_waitq);
93 fi->forget_req = fuse_request_alloc(); 98 fi->forget = fuse_alloc_forget();
94 if (!fi->forget_req) { 99 if (!fi->forget) {
95 kmem_cache_free(fuse_inode_cachep, inode); 100 kmem_cache_free(fuse_inode_cachep, inode);
96 return NULL; 101 return NULL;
97 } 102 }
@@ -99,27 +104,20 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
99 return inode; 104 return inode;
100} 105}
101 106
102static void fuse_destroy_inode(struct inode *inode) 107static void fuse_i_callback(struct rcu_head *head)
103{ 108{
104 struct fuse_inode *fi = get_fuse_inode(inode); 109 struct inode *inode = container_of(head, struct inode, i_rcu);
105 BUG_ON(!list_empty(&fi->write_files)); 110 INIT_LIST_HEAD(&inode->i_dentry);
106 BUG_ON(!list_empty(&fi->queued_writes));
107 if (fi->forget_req)
108 fuse_request_free(fi->forget_req);
109 kmem_cache_free(fuse_inode_cachep, inode); 111 kmem_cache_free(fuse_inode_cachep, inode);
110} 112}
111 113
112void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 114static void fuse_destroy_inode(struct inode *inode)
113 u64 nodeid, u64 nlookup)
114{ 115{
115 struct fuse_forget_in *inarg = &req->misc.forget_in; 116 struct fuse_inode *fi = get_fuse_inode(inode);
116 inarg->nlookup = nlookup; 117 BUG_ON(!list_empty(&fi->write_files));
117 req->in.h.opcode = FUSE_FORGET; 118 BUG_ON(!list_empty(&fi->queued_writes));
118 req->in.h.nodeid = nodeid; 119 kfree(fi->forget);
119 req->in.numargs = 1; 120 call_rcu(&inode->i_rcu, fuse_i_callback);
120 req->in.args[0].size = sizeof(struct fuse_forget_in);
121 req->in.args[0].value = inarg;
122 fuse_request_send_noreply(fc, req);
123} 121}
124 122
125static void fuse_evict_inode(struct inode *inode) 123static void fuse_evict_inode(struct inode *inode)
@@ -129,8 +127,8 @@ static void fuse_evict_inode(struct inode *inode)
129 if (inode->i_sb->s_flags & MS_ACTIVE) { 127 if (inode->i_sb->s_flags & MS_ACTIVE) {
130 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
131 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
132 fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); 130 fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
133 fi->forget_req = NULL; 131 fi->forget = NULL;
134 } 132 }
135} 133}
136 134
@@ -534,6 +532,7 @@ void fuse_conn_init(struct fuse_conn *fc)
534 INIT_LIST_HEAD(&fc->interrupts); 532 INIT_LIST_HEAD(&fc->interrupts);
535 INIT_LIST_HEAD(&fc->bg_queue); 533 INIT_LIST_HEAD(&fc->bg_queue);
536 INIT_LIST_HEAD(&fc->entry); 534 INIT_LIST_HEAD(&fc->entry);
535 fc->forget_list_tail = &fc->forget_list_head;
537 atomic_set(&fc->num_waiting, 0); 536 atomic_set(&fc->num_waiting, 0);
538 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 537 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
539 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 538 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
@@ -619,7 +618,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
619 618
620 entry = d_obtain_alias(inode); 619 entry = d_obtain_alias(inode);
621 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { 620 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
622 entry->d_op = &fuse_dentry_operations; 621 d_set_d_op(entry, &fuse_dentry_operations);
623 fuse_invalidate_entry_cache(entry); 622 fuse_invalidate_entry_cache(entry);
624 } 623 }
625 624
@@ -721,7 +720,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
721 720
722 parent = d_obtain_alias(inode); 721 parent = d_obtain_alias(inode);
723 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { 722 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
724 parent->d_op = &fuse_dentry_operations; 723 d_set_d_op(parent, &fuse_dentry_operations);
725 fuse_invalidate_entry_cache(parent); 724 fuse_invalidate_entry_cache(parent);
726 } 725 }
727 726
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a5a69..06c48a89183 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,14 +190,20 @@ generic_acl_chmod(struct inode *inode)
190} 190}
191 191
192int 192int
193generic_check_acl(struct inode *inode, int mask) 193generic_check_acl(struct inode *inode, int mask, unsigned int flags)
194{ 194{
195 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 195 if (flags & IPERM_FLAG_RCU) {
196 196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
197 if (acl) { 197 return -ECHILD;
198 int error = posix_acl_permission(inode, acl, mask); 198 } else {
199 posix_acl_release(acl); 199 struct posix_acl *acl;
200 return error; 200
201 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
202 if (acl) {
203 int error = posix_acl_permission(inode, acl, mask);
204 posix_acl_release(acl);
205 return error;
206 }
201 } 207 }
202 return -EAGAIN; 208 return -EAGAIN;
203} 209}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 48171f4c943..7118f1a780a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
75 * Returns: errno 75 * Returns: errno
76 */ 76 */
77 77
78int gfs2_check_acl(struct inode *inode, int mask) 78int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
79{ 79{
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU)
84 return -ECHILD;
85
83 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
84 if (IS_ERR(acl)) 87 if (IS_ERR(acl))
85 return PTR_ERR(acl); 88 return PTR_ERR(acl);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b522b0cb39e..a93907c8159 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
17#define GFS2_ACL_MAX_ENTRIES 25 17#define GFS2_ACL_MAX_ENTRIES 25
18 18
19extern int gfs2_check_acl(struct inode *inode, int mask); 19extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern const struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5476c066d4e..3c4039d5eef 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
763 int metadata; 763 int metadata;
764 unsigned int revokes = 0; 764 unsigned int revokes = 0;
765 int x; 765 int x;
766 int error; 766 int error = 0;
767 767
768 if (!*top) 768 if (!*top)
769 sm->sm_first = 0; 769 sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 if (metadata) 780 if (metadata)
781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
782 782
783 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 783 if (ip != GFS2_I(sdp->sd_rindex))
784 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
785 else if (!sdp->sd_rgrps)
786 error = gfs2_ri_update(ip);
787
784 if (error) 788 if (error)
785 return error; 789 return error;
786 790
@@ -879,7 +883,8 @@ out_rg_gunlock:
879out_rlist: 883out_rlist:
880 gfs2_rlist_free(&rlist); 884 gfs2_rlist_free(&rlist);
881out: 885out:
882 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); 886 if (ip != GFS2_I(sdp->sd_rindex))
887 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
883 return error; 888 return error;
884} 889}
885 890
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b385..4a456338b87 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -11,6 +11,7 @@
11#include <linux/completion.h> 11#include <linux/completion.h>
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <linux/gfs2_ondisk.h> 13#include <linux/gfs2_ondisk.h>
14#include <linux/namei.h>
14#include <linux/crc32.h> 15#include <linux/crc32.h>
15 16
16#include "gfs2.h" 17#include "gfs2.h"
@@ -34,15 +35,23 @@
34 35
35static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) 36static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
36{ 37{
37 struct dentry *parent = dget_parent(dentry); 38 struct dentry *parent;
38 struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); 39 struct gfs2_sbd *sdp;
39 struct gfs2_inode *dip = GFS2_I(parent->d_inode); 40 struct gfs2_inode *dip;
40 struct inode *inode = dentry->d_inode; 41 struct inode *inode;
41 struct gfs2_holder d_gh; 42 struct gfs2_holder d_gh;
42 struct gfs2_inode *ip = NULL; 43 struct gfs2_inode *ip = NULL;
43 int error; 44 int error;
44 int had_lock = 0; 45 int had_lock = 0;
45 46
47 if (nd->flags & LOOKUP_RCU)
48 return -ECHILD;
49
50 parent = dget_parent(dentry);
51 sdp = GFS2_SB(parent->d_inode);
52 dip = GFS2_I(parent->d_inode);
53 inode = dentry->d_inode;
54
46 if (inode) { 55 if (inode) {
47 if (is_bad_inode(inode)) 56 if (is_bad_inode(inode))
48 goto invalid; 57 goto invalid;
@@ -100,13 +109,14 @@ fail:
100 return 0; 109 return 0;
101} 110}
102 111
103static int gfs2_dhash(struct dentry *dentry, struct qstr *str) 112static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *str)
104{ 114{
105 str->hash = gfs2_disk_hash(str->name, str->len); 115 str->hash = gfs2_disk_hash(str->name, str->len);
106 return 0; 116 return 0;
107} 117}
108 118
109static int gfs2_dentry_delete(struct dentry *dentry) 119static int gfs2_dentry_delete(const struct dentry *dentry)
110{ 120{
111 struct gfs2_inode *ginode; 121 struct gfs2_inode *ginode;
112 122
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5ab3839dfcb..97012ecff56 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
130 130
131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); 131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
132 if (!IS_ERR(dentry)) 132 if (!IS_ERR(dentry))
133 dentry->d_op = &gfs2_dops; 133 d_set_d_op(dentry, &gfs2_dops);
134 return dentry; 134 return dentry;
135} 135}
136 136
@@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
158out_inode: 158out_inode:
159 dentry = d_obtain_alias(inode); 159 dentry = d_obtain_alias(inode);
160 if (!IS_ERR(dentry)) 160 if (!IS_ERR(dentry))
161 dentry->d_op = &gfs2_dops; 161 d_set_d_op(dentry, &gfs2_dops);
162 return dentry; 162 return dentry;
163} 163}
164 164
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa996471ec5..fca6689e12e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
241 !capable(CAP_LINUX_IMMUTABLE)) 241 !capable(CAP_LINUX_IMMUTABLE))
242 goto out; 242 goto out;
243 if (!IS_IMMUTABLE(inode)) { 243 if (!IS_IMMUTABLE(inode)) {
244 error = gfs2_permission(inode, MAY_WRITE); 244 error = gfs2_permission(inode, MAY_WRITE, 0);
245 if (error) 245 if (error)
246 goto out; 246 goto out;
247 } 247 }
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f92c1770416..08a8beb152e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -541,21 +541,6 @@ out_locked:
541 spin_unlock(&gl->gl_spin); 541 spin_unlock(&gl->gl_spin);
542} 542}
543 543
544static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
545 unsigned int req_state,
546 unsigned int flags)
547{
548 int ret = LM_OUT_ERROR;
549
550 if (!sdp->sd_lockstruct.ls_ops->lm_lock)
551 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
552
553 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
554 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
555 req_state, flags);
556 return ret;
557}
558
559/** 544/**
560 * do_xmote - Calls the DLM to change the state of a lock 545 * do_xmote - Calls the DLM to change the state of a lock
561 * @gl: The lock state 546 * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
575 560
576 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | 561 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
577 LM_FLAG_PRIORITY); 562 LM_FLAG_PRIORITY);
578 BUG_ON(gl->gl_state == target); 563 GLOCK_BUG_ON(gl, gl->gl_state == target);
579 BUG_ON(gl->gl_state == gl->gl_target); 564 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
580 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 565 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
581 glops->go_inval) { 566 glops->go_inval) {
582 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 567 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
583 do_error(gl, 0); /* Fail queued try locks */ 568 do_error(gl, 0); /* Fail queued try locks */
584 } 569 }
570 gl->gl_req = target;
585 spin_unlock(&gl->gl_spin); 571 spin_unlock(&gl->gl_spin);
586 if (glops->go_xmote_th) 572 if (glops->go_xmote_th)
587 glops->go_xmote_th(gl); 573 glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
594 gl->gl_state == LM_ST_DEFERRED) && 580 gl->gl_state == LM_ST_DEFERRED) &&
595 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 581 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
596 lck_flags |= LM_FLAG_TRY_1CB; 582 lck_flags |= LM_FLAG_TRY_1CB;
597 ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
598 583
599 if (!(ret & LM_OUT_ASYNC)) { 584 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
600 finish_xmote(gl, ret); 585 /* lock_dlm */
586 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
587 GLOCK_BUG_ON(gl, ret);
588 } else { /* lock_nolock */
589 finish_xmote(gl, target);
601 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 590 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
602 gfs2_glock_put(gl); 591 gfs2_glock_put(gl);
603 } else {
604 GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
605 } 592 }
593
606 spin_lock(&gl->gl_spin); 594 spin_lock(&gl->gl_spin);
607} 595}
608 596
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
951 939
952void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 940void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
953{ 941{
942 struct va_format vaf;
954 va_list args; 943 va_list args;
955 944
956 va_start(args, fmt); 945 va_start(args, fmt);
946
957 if (seq) { 947 if (seq) {
958 struct gfs2_glock_iter *gi = seq->private; 948 struct gfs2_glock_iter *gi = seq->private;
959 vsprintf(gi->string, fmt, args); 949 vsprintf(gi->string, fmt, args);
960 seq_printf(seq, gi->string); 950 seq_printf(seq, gi->string);
961 } else { 951 } else {
962 printk(KERN_ERR " "); 952 vaf.fmt = fmt;
963 vprintk(fmt, args); 953 vaf.va = &args;
954
955 printk(KERN_ERR " %pV", &vaf);
964 } 956 }
957
965 va_end(args); 958 va_end(args);
966} 959}
967 960
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
1361 * @gl: Pointer to the glock 1354 * @gl: Pointer to the glock
1362 * @ret: The return value from the dlm 1355 * @ret: The return value from the dlm
1363 * 1356 *
1357 * The gl_reply field is under the gl_spin lock so that it is ok
1358 * to use a bitfield shared with other glock state fields.
1364 */ 1359 */
1365 1360
1366void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1361void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1367{ 1362{
1368 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 1363 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1369 1364
1365 spin_lock(&gl->gl_spin);
1370 gl->gl_reply = ret; 1366 gl->gl_reply = ret;
1371 1367
1372 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { 1368 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1373 spin_lock(&gl->gl_spin);
1374 if (gfs2_should_freeze(gl)) { 1369 if (gfs2_should_freeze(gl)) {
1375 set_bit(GLF_FROZEN, &gl->gl_flags); 1370 set_bit(GLF_FROZEN, &gl->gl_flags);
1376 spin_unlock(&gl->gl_spin); 1371 spin_unlock(&gl->gl_spin);
1377 return; 1372 return;
1378 } 1373 }
1379 spin_unlock(&gl->gl_spin);
1380 } 1374 }
1375
1376 spin_unlock(&gl->gl_spin);
1381 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1377 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1378 smp_wmb();
1382 gfs2_glock_hold(gl); 1379 gfs2_glock_hold(gl);
1383 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1380 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1384 gfs2_glock_put(gl); 1381 gfs2_glock_put(gl);
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1626static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) 1623static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1627{ 1624{
1628 struct task_struct *gh_owner = NULL; 1625 struct task_struct *gh_owner = NULL;
1629 char buffer[KSYM_SYMBOL_LEN];
1630 char flags_buf[32]; 1626 char flags_buf[32];
1631 1627
1632 sprint_symbol(buffer, gh->gh_ip);
1633 if (gh->gh_owner_pid) 1628 if (gh->gh_owner_pid)
1634 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1629 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1635 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", 1630 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1636 state2str(gh->gh_state), 1631 state2str(gh->gh_state),
1637 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 1632 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1638 gh->gh_error, 1633 gh->gh_error,
1639 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1634 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1640 gh_owner ? gh_owner->comm : "(ended)", buffer); 1635 gh_owner ? gh_owner->comm : "(ended)",
1636 (void *)gh->gh_ip);
1641 return 0; 1637 return 0;
1642} 1638}
1643 1639
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void)
1782 } 1778 }
1783#endif 1779#endif
1784 1780
1785 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1786 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1787 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1788 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1789 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1790 WQ_FREEZEABLE, 0); 1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE,
1787 0);
1791 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1792 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
1793 return PTR_ERR(gfs2_delete_workqueue); 1790 return PTR_ERR(gfs2_delete_workqueue);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index db1c26d6d22..691851ceb61 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,11 +87,10 @@ enum {
87#define GL_ASYNC 0x00000040 87#define GL_ASYNC 0x00000040
88#define GL_EXACT 0x00000080 88#define GL_EXACT 0x00000080
89#define GL_SKIP 0x00000100 89#define GL_SKIP 0x00000100
90#define GL_ATIME 0x00000200
91#define GL_NOCACHE 0x00000400 90#define GL_NOCACHE 0x00000400
92 91
93/* 92/*
94 * lm_lock() and lm_async_cb return flags 93 * lm_async_cb return flags
95 * 94 *
96 * LM_OUT_ST_MASK 95 * LM_OUT_ST_MASK
97 * Masks the lower two bits of lock state in the returned value. 96 * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
99 * LM_OUT_CANCELED 98 * LM_OUT_CANCELED
100 * The lock request was canceled. 99 * The lock request was canceled.
101 * 100 *
102 * LM_OUT_ASYNC
103 * The result of the request will be returned in an LM_CB_ASYNC callback.
104 *
105 */ 101 */
106 102
107#define LM_OUT_ST_MASK 0x00000003 103#define LM_OUT_ST_MASK 0x00000003
108#define LM_OUT_CANCELED 0x00000008 104#define LM_OUT_CANCELED 0x00000008
109#define LM_OUT_ASYNC 0x00000080 105#define LM_OUT_ERROR 0x00000004
110#define LM_OUT_ERROR 0x00000100
111 106
112/* 107/*
113 * lm_recovery_done() messages 108 * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
124 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
128 unsigned int req_state, unsigned int flags); 123 unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
130 const match_table_t *lm_tokens; 125 const match_table_t *lm_tokens;
131}; 126};
132 127
133#define LM_FLAG_TRY 0x00000001
134#define LM_FLAG_TRY_1CB 0x00000002
135#define LM_FLAG_NOEXP 0x00000004
136#define LM_FLAG_ANY 0x00000008
137#define LM_FLAG_PRIORITY 0x00000010
138
139#define GL_ASYNC 0x00000040
140#define GL_EXACT 0x00000080
141#define GL_SKIP 0x00000100
142#define GL_NOCACHE 0x00000400
143
144#define GLR_TRYFAILED 13
145
146extern struct workqueue_struct *gfs2_delete_workqueue; 128extern struct workqueue_struct *gfs2_delete_workqueue;
147static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 129static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
148{ 130{
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
212int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 194int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
213void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 195void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
214void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 196void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
197
198__attribute__ ((format(printf, 2, 3)))
215void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 199void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
216 200
217/** 201/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 0d149dcc04e..263561bf1a5 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
325 325
326 if (gl->gl_state != LM_ST_UNLOCKED && 326 if (gl->gl_state != LM_ST_UNLOCKED &&
327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
328 flush_workqueue(gfs2_delete_workqueue);
329 gfs2_meta_syncfs(sdp); 328 gfs2_meta_syncfs(sdp);
330 gfs2_log_shutdown(sdp); 329 gfs2_log_shutdown(sdp);
331 } 330 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 764fbb49efc..a79790c0627 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
11#define __INCORE_DOT_H__ 11#define __INCORE_DOT_H__
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/kobject.h>
14#include <linux/workqueue.h> 15#include <linux/workqueue.h>
15#include <linux/dlm.h> 16#include <linux/dlm.h>
16#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
@@ -207,12 +208,14 @@ struct gfs2_glock {
207 208
208 spinlock_t gl_spin; 209 spinlock_t gl_spin;
209 210
210 unsigned int gl_state; 211 /* State fields protected by gl_spin */
211 unsigned int gl_target; 212 unsigned int gl_state:2, /* Current state */
212 unsigned int gl_reply; 213 gl_target:2, /* Target state */
214 gl_demote_state:2, /* State requested by remote node */
215 gl_req:2, /* State in last dlm request */
216 gl_reply:8; /* Last reply from the dlm */
217
213 unsigned int gl_hash; 218 unsigned int gl_hash;
214 unsigned int gl_req;
215 unsigned int gl_demote_state; /* state requested by remote node */
216 unsigned long gl_demote_time; /* time of first demote request */ 219 unsigned long gl_demote_time; /* time of first demote request */
217 struct list_head gl_holders; 220 struct list_head gl_holders;
218 221
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e1213f7f921..2232b3c780b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -509,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
509 } 509 }
510 510
511 if (!is_root) { 511 if (!is_root) {
512 error = gfs2_permission(dir, MAY_EXEC); 512 error = gfs2_permission(dir, MAY_EXEC, 0);
513 if (error) 513 if (error)
514 goto out; 514 goto out;
515 } 515 }
@@ -539,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
539{ 539{
540 int error; 540 int error;
541 541
542 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 542 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
543 if (error) 543 if (error)
544 return error; 544 return error;
545 545
@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
916 if (error) 916 if (error)
917 return error; 917 return error;
918 918
919 if ((attr->ia_valid & ATTR_SIZE) &&
920 attr->ia_size != i_size_read(inode)) {
921 error = vmtruncate(inode, attr->ia_size);
922 if (error)
923 return error;
924 }
925
926 setattr_copy(inode, attr); 919 setattr_copy(inode, attr);
927 mark_inode_dirty(inode); 920 mark_inode_dirty(inode);
928
929 gfs2_assert_warn(GFS2_SB(inode), !error);
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 921 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data); 922 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh); 923 brelse(dibh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d8499fadcc5..732a183efdb 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -113,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs, 113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name, 114 const struct qstr *name,
115 unsigned int mode, dev_t dev); 115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask); 116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 1c09425b45f..6e493aee28f 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
146 return lkf; 146 return lkf;
147} 147}
148 148
149static unsigned int gdlm_lock(struct gfs2_glock *gl, 149static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
150 unsigned int req_state, unsigned int flags) 150 unsigned int flags)
151{ 151{
152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
153 int error;
154 int req; 153 int req;
155 u32 lkf; 154 u32 lkf;
156 155
157 gl->gl_req = req_state;
158 req = make_mode(req_state); 156 req = make_mode(req_state);
159 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 157 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
160 158
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
162 * Submit the actual lock request. 160 * Submit the actual lock request.
163 */ 161 */
164 162
165 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 163 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
166 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
167 if (error == -EAGAIN)
168 return 0;
169 if (error)
170 return LM_OUT_ERROR;
171 return LM_OUT_ASYNC;
172} 165}
173 166
174static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b8..2aeabd4218c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
440 iput(inode); 440 iput(inode);
441 return -ENOMEM; 441 return -ENOMEM;
442 } 442 }
443 dentry->d_op = &gfs2_dops; 443 d_set_d_op(dentry, &gfs2_dops);
444 *dptr = dentry; 444 *dptr = dentry;
445 return 0; 445 return 0;
446} 446}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7502c..1501db4f0e6 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
106{ 106{
107 struct inode *inode = NULL; 107 struct inode *inode = NULL;
108 108
109 dentry->d_op = &gfs2_dops; 109 d_set_d_op(dentry, &gfs2_dops);
110 110
111 inode = gfs2_lookupi(dir, &dentry->d_name, 0); 111 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
112 if (inode && IS_ERR(inode)) 112 if (inode && IS_ERR(inode))
@@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
166 if (error) 166 if (error)
167 goto out_child; 167 goto out_child;
168 168
169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); 169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
170 if (error) 170 if (error)
171 goto out_gunlock; 171 goto out_gunlock;
172 172
@@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
289 if (IS_APPEND(&dip->i_inode)) 289 if (IS_APPEND(&dip->i_inode))
290 return -EPERM; 290 return -EPERM;
291 291
292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
293 if (error) 293 if (error)
294 return error; 294 return error;
295 295
@@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
822 } 822 }
823 } 823 }
824 } else { 824 } else {
825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); 825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
826 if (error) 826 if (error)
827 goto out_gunlock; 827 goto out_gunlock;
828 828
@@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
857 /* Check out the dir to be renamed */ 857 /* Check out the dir to be renamed */
858 858
859 if (dir_rename) { 859 if (dir_rename) {
860 error = gfs2_permission(odentry->d_inode, MAY_WRITE); 860 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
861 if (error) 861 if (error)
862 goto out_gunlock; 862 goto out_gunlock;
863 } 863 }
@@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1041 * Returns: errno 1041 * Returns: errno
1042 */ 1042 */
1043 1043
1044int gfs2_permission(struct inode *inode, int mask) 1044int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1045{ 1045{
1046 struct gfs2_inode *ip = GFS2_I(inode); 1046 struct gfs2_inode *ip;
1047 struct gfs2_holder i_gh; 1047 struct gfs2_holder i_gh;
1048 int error; 1048 int error;
1049 int unlock = 0; 1049 int unlock = 0;
1050 1050
1051 if (flags & IPERM_FLAG_RCU)
1052 return -ECHILD;
1053
1054 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1055 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1056 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1057 if (error)
@@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask)
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1062 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES; 1063 error = -EACCES;
1060 else 1064 else
1061 error = generic_permission(inode, mask, gfs2_check_acl); 1065 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock) 1066 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh); 1067 gfs2_glock_dq_uninit(&i_gh);
1064 1068
@@ -1069,7 +1073,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{ 1073{
1070 struct gfs2_inode *ip = GFS2_I(inode); 1074 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode); 1075 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 struct buffer_head *dibh;
1073 u32 ouid, ogid, nuid, ngid; 1076 u32 ouid, ogid, nuid, ngid;
1074 int error; 1077 int error;
1075 1078
@@ -1100,25 +1103,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1100 if (error) 1103 if (error)
1101 goto out_gunlock_q; 1104 goto out_gunlock_q;
1102 1105
1103 error = gfs2_meta_inode_buffer(ip, &dibh); 1106 error = gfs2_setattr_simple(ip, attr);
1104 if (error) 1107 if (error)
1105 goto out_end_trans; 1108 goto out_end_trans;
1106 1109
1107 if ((attr->ia_valid & ATTR_SIZE) &&
1108 attr->ia_size != i_size_read(inode)) {
1109 int error;
1110
1111 error = vmtruncate(inode, attr->ia_size);
1112 gfs2_assert_warn(sdp, !error);
1113 }
1114
1115 setattr_copy(inode, attr);
1116 mark_inode_dirty(inode);
1117
1118 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1119 gfs2_dinode_out(ip, dibh->b_data);
1120 brelse(dibh);
1121
1122 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1110 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1123 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1111 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1124 gfs2_quota_change(ip, -blocks, ouid, ogid); 1112 gfs2_quota_change(ip, -blocks, ouid, ogid);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index f606baf9ba7..a689901963d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); 666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
667 qd->qd_qb.qb_limit = qp->qu_limit; 667 qd->qd_qb.qb_limit = qp->qu_limit;
668 } 668 }
669 if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
670 qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
671 qd->qd_qb.qb_value = qp->qu_value;
672 }
669 } 673 }
670 674
671 /* Write the quota into the quota file on disk */ 675 /* Write the quota into the quota file on disk */
@@ -1509,7 +1513,7 @@ out:
1509} 1513}
1510 1514
1511/* GFS2 only supports a subset of the XFS fields */ 1515/* GFS2 only supports a subset of the XFS fields */
1512#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) 1516#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
1513 1517
1514static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, 1518static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1515 struct fs_disk_quota *fdq) 1519 struct fs_disk_quota *fdq)
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1569 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1573 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1570 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) 1574 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1571 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1575 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1576
1572 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1577 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1573 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) 1578 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1574 fdq->d_fieldmask ^= FS_DQ_BHARD; 1579 fdq->d_fieldmask ^= FS_DQ_BHARD;
1580
1581 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
1582 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
1583 fdq->d_fieldmask ^= FS_DQ_BCOUNT;
1584
1575 if (fdq->d_fieldmask == 0) 1585 if (fdq->d_fieldmask == 0)
1576 goto out_i; 1586 goto out_i;
1577 1587
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
1620 .get_dqblk = gfs2_get_dqblk, 1630 .get_dqblk = gfs2_get_dqblk,
1621 .set_dqblk = gfs2_set_dqblk, 1631 .set_dqblk = gfs2_set_dqblk,
1622}; 1632};
1623
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 33c8407b876..7293ea27020 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
500 for (rgrps = 0;; rgrps++) { 500 for (rgrps = 0;; rgrps++) {
501 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 501 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
502 502
503 if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) 503 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
504 break; 504 break;
505 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 505 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
506 sizeof(struct gfs2_rindex)); 506 sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
583 * Returns: 0 on successful update, error code otherwise 583 * Returns: 0 on successful update, error code otherwise
584 */ 584 */
585 585
586static int gfs2_ri_update(struct gfs2_inode *ip) 586int gfs2_ri_update(struct gfs2_inode *ip)
587{ 587{
588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
589 struct inode *inode = &ip->i_inode; 589 struct inode *inode = &ip->i_inode;
@@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
614} 614}
615 615
616/** 616/**
617 * gfs2_ri_update_special - Pull in a new resource index from the disk
618 *
619 * This is a special version that's safe to call from gfs2_inplace_reserve_i.
620 * In this case we know that we don't have any resource groups in memory yet.
621 *
622 * @ip: pointer to the rindex inode
623 *
624 * Returns: 0 on successful update, error code otherwise
625 */
626static int gfs2_ri_update_special(struct gfs2_inode *ip)
627{
628 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
629 struct inode *inode = &ip->i_inode;
630 struct file_ra_state ra_state;
631 struct gfs2_rgrpd *rgd;
632 unsigned int max_data = 0;
633 int error;
634
635 file_ra_state_init(&ra_state, inode->i_mapping);
636 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
637 /* Ignore partials */
638 if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
639 i_size_read(inode))
640 break;
641 error = read_rindex_entry(ip, &ra_state);
642 if (error) {
643 clear_rgrpdi(sdp);
644 return error;
645 }
646 }
647 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
648 if (rgd->rd_data > max_data)
649 max_data = rgd->rd_data;
650 sdp->sd_max_rg_data = max_data;
651
652 sdp->sd_rindex_uptodate = 1;
653 return 0;
654}
655
656/**
657 * gfs2_rindex_hold - Grab a lock on the rindex 617 * gfs2_rindex_hold - Grab a lock on the rindex
658 * @sdp: The GFS2 superblock 618 * @sdp: The GFS2 superblock
659 * @ri_gh: the glock holder 619 * @ri_gh: the glock holder
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
1226 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1186 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1227 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1187 else if (!sdp->sd_rgrps) /* We may not have the rindex read
1228 in, so: */ 1188 in, so: */
1229 error = gfs2_ri_update_special(ip); 1189 error = gfs2_ri_update(ip);
1230 if (error) 1190 if (error)
1231 return error; 1191 return error;
1232 } 1192 }
1233 1193
1194try_again:
1234 do { 1195 do {
1235 error = get_local_rgrp(ip, &last_unlinked); 1196 error = get_local_rgrp(ip, &last_unlinked);
1236 /* If there is no space, flushing the log may release some */ 1197 /* If there is no space, flushing the log may release some */
1237 if (error) 1198 if (error) {
1199 if (ip == GFS2_I(sdp->sd_rindex) &&
1200 !sdp->sd_rindex_uptodate) {
1201 error = gfs2_ri_update(ip);
1202 if (error)
1203 return error;
1204 goto try_again;
1205 }
1238 gfs2_log_flush(sdp, NULL); 1206 gfs2_log_flush(sdp, NULL);
1207 }
1239 } while (error && tries++ < 3); 1208 } while (error && tries++ < 3);
1240 1209
1241 if (error) { 1210 if (error) {
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 0e35c0466f9..50c2bb04369 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
48 48
49extern void gfs2_inplace_release(struct gfs2_inode *ip); 49extern void gfs2_inplace_release(struct gfs2_inode *ip);
50 50
51extern int gfs2_ri_update(struct gfs2_inode *ip);
51extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
52extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
53 54
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430..16c2ecac7eb 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1405 return &ip->i_inode; 1405 return &ip->i_inode;
1406} 1406}
1407 1407
1408static void gfs2_destroy_inode(struct inode *inode) 1408static void gfs2_i_callback(struct rcu_head *head)
1409{ 1409{
1410 struct inode *inode = container_of(head, struct inode, i_rcu);
1411 INIT_LIST_HEAD(&inode->i_dentry);
1410 kmem_cache_free(gfs2_inode_cachep, inode); 1412 kmem_cache_free(gfs2_inode_cachep, inode);
1411} 1413}
1412 1414
1415static void gfs2_destroy_inode(struct inode *inode)
1416{
1417 call_rcu(&inode->i_rcu, gfs2_i_callback);
1418}
1419
1413const struct super_operations gfs2_super_ops = { 1420const struct super_operations gfs2_super_ops = {
1414 .alloc_inode = gfs2_alloc_inode, 1421 .alloc_inode = gfs2_alloc_inode,
1415 .destroy_inode = gfs2_destroy_inode, 1422 .destroy_inode = gfs2_destroy_inode,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 30b58f07c8a..439b61c0326 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,10 +1296,8 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct inode *inode = &ip->i_inode;
1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1301 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1302 struct buffer_head *dibh;
1303 int error; 1301 int error;
1304 1302
1305 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); 1303 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1321 if (error) 1319 if (error)
1322 return error; 1320 return error;
1323 1321
1324 error = gfs2_meta_inode_buffer(ip, &dibh); 1322 error = gfs2_setattr_simple(ip, attr);
1325 if (error)
1326 goto out_trans_end;
1327
1328 if ((attr->ia_valid & ATTR_SIZE) &&
1329 attr->ia_size != i_size_read(inode)) {
1330 int error;
1331
1332 error = vmtruncate(inode, attr->ia_size);
1333 gfs2_assert_warn(GFS2_SB(inode), !error);
1334 }
1335
1336 setattr_copy(inode, attr);
1337 mark_inode_dirty(inode);
1338
1339 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1340 gfs2_dinode_out(ip, dibh->b_data);
1341 brelse(dibh);
1342
1343out_trans_end:
1344 gfs2_trans_end(sdp); 1323 gfs2_trans_end(sdp);
1345 return error; 1324 return error;
1346} 1325}
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41..ea4aefe7c65 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
25 struct inode *inode = NULL; 25 struct inode *inode = NULL;
26 int res; 26 int res;
27 27
28 dentry->d_op = &hfs_dentry_operations; 28 d_set_d_op(dentry, &hfs_dentry_operations);
29 29
30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); 30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); 31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb81e84..ad97c2d5828 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
213/* string.c */ 213/* string.c */
214extern const struct dentry_operations hfs_dentry_operations; 214extern const struct dentry_operations hfs_dentry_operations;
215 215
216extern int hfs_hash_dentry(struct dentry *, struct qstr *); 216extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
217 struct qstr *);
217extern int hfs_strcmp(const unsigned char *, unsigned int, 218extern int hfs_strcmp(const unsigned char *, unsigned int,
218 const unsigned char *, unsigned int); 219 const unsigned char *, unsigned int);
219extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 220extern int hfs_compare_dentry(const struct dentry *parent,
221 const struct inode *pinode,
222 const struct dentry *dentry, const struct inode *inode,
223 unsigned int len, const char *str, const struct qstr *name);
220 224
221/* trans.c */ 225/* trans.c */
222extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); 226extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af7942..495a976a3cc 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
51/* 51/*
52 * Hash a string to an integer in a case-independent way 52 * Hash a string to an integer in a case-independent way
53 */ 53 */
54int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) 54int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
55 struct qstr *this)
55{ 56{
56 const unsigned char *name = this->name; 57 const unsigned char *name = this->name;
57 unsigned int hash, len = this->len; 58 unsigned int hash, len = this->len;
@@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
92 * Test for equality of two strings in the HFS filename character ordering. 93 * Test for equality of two strings in the HFS filename character ordering.
93 * return 1 on failure and 0 on success 94 * return 1 on failure and 0 on success
94 */ 95 */
95int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 96int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
97 const struct dentry *dentry, const struct inode *inode,
98 unsigned int len, const char *str, const struct qstr *name)
96{ 99{
97 const unsigned char *n1, *n2; 100 const unsigned char *n1, *n2;
98 int len;
99 101
100 len = s1->len;
101 if (len >= HFS_NAMELEN) { 102 if (len >= HFS_NAMELEN) {
102 if (s2->len < HFS_NAMELEN) 103 if (name->len < HFS_NAMELEN)
103 return 1; 104 return 1;
104 len = HFS_NAMELEN; 105 len = HFS_NAMELEN;
105 } else if (len != s2->len) 106 } else if (len != name->len)
106 return 1; 107 return 1;
107 108
108 n1 = s1->name; 109 n1 = str;
109 n2 = s2->name; 110 n2 = name->name;
110 while (len--) { 111 while (len--) {
111 if (caseorder[*n1++] != caseorder[*n2++]) 112 if (caseorder[*n1++] != caseorder[*n2++])
112 return 1; 113 return 1;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb..0bef62aa4f4 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
167 return i ? &i->vfs_inode : NULL; 167 return i ? &i->vfs_inode : NULL;
168} 168}
169 169
170static void hfs_destroy_inode(struct inode *inode) 170static void hfs_i_callback(struct rcu_head *head)
171{ 171{
172 struct inode *inode = container_of(head, struct inode, i_rcu);
173 INIT_LIST_HEAD(&inode->i_dentry);
172 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 174 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
173} 175}
174 176
177static void hfs_destroy_inode(struct inode *inode)
178{
179 call_rcu(&inode->i_rcu, hfs_i_callback);
180}
181
175static const struct super_operations hfs_super_operations = { 182static const struct super_operations hfs_super_operations = {
176 .alloc_inode = hfs_alloc_inode, 183 .alloc_inode = hfs_alloc_inode,
177 .destroy_inode = hfs_destroy_inode, 184 .destroy_inode = hfs_destroy_inode,
@@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
427 if (!sb->s_root) 434 if (!sb->s_root)
428 goto bail_iput; 435 goto bail_iput;
429 436
430 sb->s_root->d_op = &hfs_dentry_operations; 437 d_set_d_op(sb->s_root, &hfs_dentry_operations);
431 438
432 /* everything's okay */ 439 /* everything's okay */
433 return 0; 440 return 0;
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 7478f5c219a..19cf291eb91 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -8,15 +8,20 @@
8 * This file contains the code to do various system dependent things. 8 * This file contains the code to do various system dependent things.
9 */ 9 */
10 10
11#include <linux/namei.h>
11#include "hfs_fs.h" 12#include "hfs_fs.h"
12 13
13/* dentry case-handling: just lowercase everything */ 14/* dentry case-handling: just lowercase everything */
14 15
15static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) 16static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
16{ 17{
17 struct inode *inode = dentry->d_inode; 18 struct inode *inode;
18 int diff; 19 int diff;
19 20
21 if (nd->flags & LOOKUP_RCU)
22 return -ECHILD;
23
24 inode = dentry->d_inode;
20 if(!inode) 25 if(!inode)
21 return 1; 26 return 1;
22 27
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index d182438c7ae..5d799c13205 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -22,7 +22,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
22 return -ENOMEM; 22 return -ENOMEM;
23 fd->search_key = ptr; 23 fd->search_key = ptr;
24 fd->key = ptr + tree->max_key_len + 2; 24 fd->key = ptr + tree->max_key_len + 2;
25 dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); 25 dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n",
26 tree->cnid, __builtin_return_address(0));
26 mutex_lock(&tree->tree_lock); 27 mutex_lock(&tree->tree_lock);
27 return 0; 28 return 0;
28} 29}
@@ -31,7 +32,8 @@ void hfs_find_exit(struct hfs_find_data *fd)
31{ 32{
32 hfs_bnode_put(fd->bnode); 33 hfs_bnode_put(fd->bnode);
33 kfree(fd->search_key); 34 kfree(fd->search_key);
34 dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); 35 dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n",
36 fd->tree->cnid, __builtin_return_address(0));
35 mutex_unlock(&fd->tree->tree_lock); 37 mutex_unlock(&fd->tree->tree_lock);
36 fd->tree = NULL; 38 fd->tree = NULL;
37} 39}
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index ad57f5991eb..1cad80c789c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -15,7 +15,8 @@
15 15
16#define PAGE_CACHE_BITS (PAGE_CACHE_SIZE * 8) 16#define PAGE_CACHE_BITS (PAGE_CACHE_SIZE * 8)
17 17
18int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) 18int hfsplus_block_allocate(struct super_block *sb, u32 size,
19 u32 offset, u32 *max)
19{ 20{
20 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 21 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
21 struct page *page; 22 struct page *page;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 29da6574ba7..1c42cc5b899 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -42,7 +42,7 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
42u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off) 42u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
43{ 43{
44 __be16 data; 44 __be16 data;
45 // optimize later... 45 /* TODO: optimize later... */
46 hfs_bnode_read(node, &data, off, 2); 46 hfs_bnode_read(node, &data, off, 2);
47 return be16_to_cpu(data); 47 return be16_to_cpu(data);
48} 48}
@@ -50,7 +50,7 @@ u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
50u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off) 50u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off)
51{ 51{
52 u8 data; 52 u8 data;
53 // optimize later... 53 /* TODO: optimize later... */
54 hfs_bnode_read(node, &data, off, 1); 54 hfs_bnode_read(node, &data, off, 1);
55 return data; 55 return data;
56} 56}
@@ -96,7 +96,7 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
96void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data) 96void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data)
97{ 97{
98 __be16 v = cpu_to_be16(data); 98 __be16 v = cpu_to_be16(data);
99 // optimize later... 99 /* TODO: optimize later... */
100 hfs_bnode_write(node, &v, off, 2); 100 hfs_bnode_write(node, &v, off, 2);
101} 101}
102 102
@@ -212,7 +212,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
212 dst_page--; 212 dst_page--;
213 } 213 }
214 src -= len; 214 src -= len;
215 memmove(kmap(*dst_page) + src, kmap(*src_page) + src, len); 215 memmove(kmap(*dst_page) + src,
216 kmap(*src_page) + src, len);
216 kunmap(*src_page); 217 kunmap(*src_page);
217 set_page_dirty(*dst_page); 218 set_page_dirty(*dst_page);
218 kunmap(*dst_page); 219 kunmap(*dst_page);
@@ -250,14 +251,16 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
250 251
251 if (src == dst) { 252 if (src == dst) {
252 l = min(len, (int)PAGE_CACHE_SIZE - src); 253 l = min(len, (int)PAGE_CACHE_SIZE - src);
253 memmove(kmap(*dst_page) + src, kmap(*src_page) + src, l); 254 memmove(kmap(*dst_page) + src,
255 kmap(*src_page) + src, l);
254 kunmap(*src_page); 256 kunmap(*src_page);
255 set_page_dirty(*dst_page); 257 set_page_dirty(*dst_page);
256 kunmap(*dst_page); 258 kunmap(*dst_page);
257 259
258 while ((len -= l) != 0) { 260 while ((len -= l) != 0) {
259 l = min(len, (int)PAGE_CACHE_SIZE); 261 l = min(len, (int)PAGE_CACHE_SIZE);
260 memmove(kmap(*++dst_page), kmap(*++src_page), l); 262 memmove(kmap(*++dst_page),
263 kmap(*++src_page), l);
261 kunmap(*src_page); 264 kunmap(*src_page);
262 set_page_dirty(*dst_page); 265 set_page_dirty(*dst_page);
263 kunmap(*dst_page); 266 kunmap(*dst_page);
@@ -268,7 +271,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
268 do { 271 do {
269 src_ptr = kmap(*src_page) + src; 272 src_ptr = kmap(*src_page) + src;
270 dst_ptr = kmap(*dst_page) + dst; 273 dst_ptr = kmap(*dst_page) + dst;
271 if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) { 274 if (PAGE_CACHE_SIZE - src <
275 PAGE_CACHE_SIZE - dst) {
272 l = PAGE_CACHE_SIZE - src; 276 l = PAGE_CACHE_SIZE - src;
273 src = 0; 277 src = 0;
274 dst += l; 278 dst += l;
@@ -340,7 +344,8 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
340 return; 344 return;
341 tmp->next = node->next; 345 tmp->next = node->next;
342 cnid = cpu_to_be32(tmp->next); 346 cnid = cpu_to_be32(tmp->next);
343 hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, next), 4); 347 hfs_bnode_write(tmp, &cnid,
348 offsetof(struct hfs_bnode_desc, next), 4);
344 hfs_bnode_put(tmp); 349 hfs_bnode_put(tmp);
345 } else if (node->type == HFS_NODE_LEAF) 350 } else if (node->type == HFS_NODE_LEAF)
346 tree->leaf_head = node->next; 351 tree->leaf_head = node->next;
@@ -351,15 +356,15 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
351 return; 356 return;
352 tmp->prev = node->prev; 357 tmp->prev = node->prev;
353 cnid = cpu_to_be32(tmp->prev); 358 cnid = cpu_to_be32(tmp->prev);
354 hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, prev), 4); 359 hfs_bnode_write(tmp, &cnid,
360 offsetof(struct hfs_bnode_desc, prev), 4);
355 hfs_bnode_put(tmp); 361 hfs_bnode_put(tmp);
356 } else if (node->type == HFS_NODE_LEAF) 362 } else if (node->type == HFS_NODE_LEAF)
357 tree->leaf_tail = node->prev; 363 tree->leaf_tail = node->prev;
358 364
359 // move down? 365 /* move down? */
360 if (!node->prev && !node->next) { 366 if (!node->prev && !node->next)
361 printk(KERN_DEBUG "hfs_btree_del_level\n"); 367 dprint(DBG_BNODE_MOD, "hfs_btree_del_level\n");
362 }
363 if (!node->parent) { 368 if (!node->parent) {
364 tree->root = 0; 369 tree->root = 0;
365 tree->depth = 0; 370 tree->depth = 0;
@@ -379,16 +384,16 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
379 struct hfs_bnode *node; 384 struct hfs_bnode *node;
380 385
381 if (cnid >= tree->node_count) { 386 if (cnid >= tree->node_count) {
382 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); 387 printk(KERN_ERR "hfs: request for non-existent node "
388 "%d in B*Tree\n",
389 cnid);
383 return NULL; 390 return NULL;
384 } 391 }
385 392
386 for (node = tree->node_hash[hfs_bnode_hash(cnid)]; 393 for (node = tree->node_hash[hfs_bnode_hash(cnid)];
387 node; node = node->next_hash) { 394 node; node = node->next_hash)
388 if (node->this == cnid) { 395 if (node->this == cnid)
389 return node; 396 return node;
390 }
391 }
392 return NULL; 397 return NULL;
393} 398}
394 399
@@ -402,7 +407,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
402 loff_t off; 407 loff_t off;
403 408
404 if (cnid >= tree->node_count) { 409 if (cnid >= tree->node_count) {
405 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); 410 printk(KERN_ERR "hfs: request for non-existent node "
411 "%d in B*Tree\n",
412 cnid);
406 return NULL; 413 return NULL;
407 } 414 }
408 415
@@ -429,7 +436,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
429 } else { 436 } else {
430 spin_unlock(&tree->hash_lock); 437 spin_unlock(&tree->hash_lock);
431 kfree(node); 438 kfree(node);
432 wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags)); 439 wait_event(node2->lock_wq,
440 !test_bit(HFS_BNODE_NEW, &node2->flags));
433 return node2; 441 return node2;
434 } 442 }
435 spin_unlock(&tree->hash_lock); 443 spin_unlock(&tree->hash_lock);
@@ -483,7 +491,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
483 if (node) { 491 if (node) {
484 hfs_bnode_get(node); 492 hfs_bnode_get(node);
485 spin_unlock(&tree->hash_lock); 493 spin_unlock(&tree->hash_lock);
486 wait_event(node->lock_wq, !test_bit(HFS_BNODE_NEW, &node->flags)); 494 wait_event(node->lock_wq,
495 !test_bit(HFS_BNODE_NEW, &node->flags));
487 if (test_bit(HFS_BNODE_ERROR, &node->flags)) 496 if (test_bit(HFS_BNODE_ERROR, &node->flags))
488 goto node_error; 497 goto node_error;
489 return node; 498 return node;
@@ -497,7 +506,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num)
497 if (!test_bit(HFS_BNODE_NEW, &node->flags)) 506 if (!test_bit(HFS_BNODE_NEW, &node->flags))
498 return node; 507 return node;
499 508
500 desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset); 509 desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) +
510 node->page_offset);
501 node->prev = be32_to_cpu(desc->prev); 511 node->prev = be32_to_cpu(desc->prev);
502 node->next = be32_to_cpu(desc->next); 512 node->next = be32_to_cpu(desc->next);
503 node->num_recs = be16_to_cpu(desc->num_recs); 513 node->num_recs = be16_to_cpu(desc->num_recs);
@@ -556,11 +566,13 @@ node_error:
556 566
557void hfs_bnode_free(struct hfs_bnode *node) 567void hfs_bnode_free(struct hfs_bnode *node)
558{ 568{
559 //int i; 569#if 0
570 int i;
560 571
561 //for (i = 0; i < node->tree->pages_per_bnode; i++) 572 for (i = 0; i < node->tree->pages_per_bnode; i++)
562 // if (node->page[i]) 573 if (node->page[i])
563 // page_cache_release(node->page[i]); 574 page_cache_release(node->page[i]);
575#endif
564 kfree(node); 576 kfree(node);
565} 577}
566 578
@@ -607,7 +619,8 @@ void hfs_bnode_get(struct hfs_bnode *node)
607 if (node) { 619 if (node) {
608 atomic_inc(&node->refcnt); 620 atomic_inc(&node->refcnt);
609 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", 621 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
610 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 622 node->tree->cnid, node->this,
623 atomic_read(&node->refcnt));
611 } 624 }
612} 625}
613 626
@@ -619,7 +632,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
619 int i; 632 int i;
620 633
621 dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", 634 dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n",
622 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 635 node->tree->cnid, node->this,
636 atomic_read(&node->refcnt));
623 BUG_ON(!atomic_read(&node->refcnt)); 637 BUG_ON(!atomic_read(&node->refcnt));
624 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) 638 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
625 return; 639 return;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2f39d05443e..2312de34bd4 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -39,7 +39,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
39 !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) { 39 !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) {
40 retval = node->tree->max_key_len + 2; 40 retval = node->tree->max_key_len + 2;
41 } else { 41 } else {
42 recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); 42 recoff = hfs_bnode_read_u16(node,
43 node->tree->node_size - (rec + 1) * 2);
43 if (!recoff) 44 if (!recoff)
44 return 0; 45 return 0;
45 46
@@ -84,7 +85,8 @@ again:
84 end_rec_off = tree->node_size - (node->num_recs + 1) * 2; 85 end_rec_off = tree->node_size - (node->num_recs + 1) * 2;
85 end_off = hfs_bnode_read_u16(node, end_rec_off); 86 end_off = hfs_bnode_read_u16(node, end_rec_off);
86 end_rec_off -= 2; 87 end_rec_off -= 2;
87 dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); 88 dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n",
89 rec, size, end_off, end_rec_off);
88 if (size > end_rec_off - end_off) { 90 if (size > end_rec_off - end_off) {
89 if (new_node) 91 if (new_node)
90 panic("not enough room!\n"); 92 panic("not enough room!\n");
@@ -99,7 +101,9 @@ again:
99 } 101 }
100 node->num_recs++; 102 node->num_recs++;
101 /* write new last offset */ 103 /* write new last offset */
102 hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs); 104 hfs_bnode_write_u16(node,
105 offsetof(struct hfs_bnode_desc, num_recs),
106 node->num_recs);
103 hfs_bnode_write_u16(node, end_rec_off, end_off + size); 107 hfs_bnode_write_u16(node, end_rec_off, end_off + size);
104 data_off = end_off; 108 data_off = end_off;
105 data_rec_off = end_rec_off + 2; 109 data_rec_off = end_rec_off + 2;
@@ -151,7 +155,8 @@ skip:
151 if (tree->attributes & HFS_TREE_VARIDXKEYS) 155 if (tree->attributes & HFS_TREE_VARIDXKEYS)
152 key_len = be16_to_cpu(fd->search_key->key_len) + 2; 156 key_len = be16_to_cpu(fd->search_key->key_len) + 2;
153 else { 157 else {
154 fd->search_key->key_len = cpu_to_be16(tree->max_key_len); 158 fd->search_key->key_len =
159 cpu_to_be16(tree->max_key_len);
155 key_len = tree->max_key_len + 2; 160 key_len = tree->max_key_len + 2;
156 } 161 }
157 goto again; 162 goto again;
@@ -180,7 +185,8 @@ again:
180 mark_inode_dirty(tree->inode); 185 mark_inode_dirty(tree->inode);
181 } 186 }
182 hfs_bnode_dump(node); 187 hfs_bnode_dump(node);
183 dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); 188 dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n",
189 fd->record, fd->keylength + fd->entrylength);
184 if (!--node->num_recs) { 190 if (!--node->num_recs) {
185 hfs_bnode_unlink(node); 191 hfs_bnode_unlink(node);
186 if (!node->parent) 192 if (!node->parent)
@@ -194,7 +200,9 @@ again:
194 __hfs_brec_find(node, fd); 200 __hfs_brec_find(node, fd);
195 goto again; 201 goto again;
196 } 202 }
197 hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs); 203 hfs_bnode_write_u16(node,
204 offsetof(struct hfs_bnode_desc, num_recs),
205 node->num_recs);
198 206
199 if (rec_off == end_off) 207 if (rec_off == end_off)
200 goto skip; 208 goto skip;
@@ -364,7 +372,8 @@ again:
364 newkeylen = hfs_bnode_read_u16(node, 14) + 2; 372 newkeylen = hfs_bnode_read_u16(node, 14) + 2;
365 else 373 else
366 fd->keylength = newkeylen = tree->max_key_len + 2; 374 fd->keylength = newkeylen = tree->max_key_len + 2;
367 dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); 375 dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n",
376 rec, fd->keylength, newkeylen);
368 377
369 rec_off = tree->node_size - (rec + 2) * 2; 378 rec_off = tree->node_size - (rec + 2) * 2;
370 end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; 379 end_rec_off = tree->node_size - (parent->num_recs + 1) * 2;
@@ -375,7 +384,7 @@ again:
375 end_off = hfs_bnode_read_u16(parent, end_rec_off); 384 end_off = hfs_bnode_read_u16(parent, end_rec_off);
376 if (end_rec_off - end_off < diff) { 385 if (end_rec_off - end_off < diff) {
377 386
378 printk(KERN_DEBUG "hfs: splitting index node...\n"); 387 dprint(DBG_BNODE_MOD, "hfs: splitting index node.\n");
379 fd->bnode = parent; 388 fd->bnode = parent;
380 new_node = hfs_bnode_split(fd); 389 new_node = hfs_bnode_split(fd);
381 if (IS_ERR(new_node)) 390 if (IS_ERR(new_node))
@@ -383,7 +392,8 @@ again:
383 parent = fd->bnode; 392 parent = fd->bnode;
384 rec = fd->record; 393 rec = fd->record;
385 rec_off = tree->node_size - (rec + 2) * 2; 394 rec_off = tree->node_size - (rec + 2) * 2;
386 end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; 395 end_rec_off = tree->node_size -
396 (parent->num_recs + 1) * 2;
387 } 397 }
388 } 398 }
389 399
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 22e4d4e3299..21023d9f8ff 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -51,7 +51,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
51 goto free_inode; 51 goto free_inode;
52 52
53 /* Load the header */ 53 /* Load the header */
54 head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); 54 head = (struct hfs_btree_header_rec *)(kmap(page) +
55 sizeof(struct hfs_bnode_desc));
55 tree->root = be32_to_cpu(head->root); 56 tree->root = be32_to_cpu(head->root);
56 tree->leaf_count = be32_to_cpu(head->leaf_count); 57 tree->leaf_count = be32_to_cpu(head->leaf_count);
57 tree->leaf_head = be32_to_cpu(head->leaf_head); 58 tree->leaf_head = be32_to_cpu(head->leaf_head);
@@ -115,7 +116,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
115 116
116 tree->node_size_shift = ffs(size) - 1; 117 tree->node_size_shift = ffs(size) - 1;
117 118
118 tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 119 tree->pages_per_bnode =
120 (tree->node_size + PAGE_CACHE_SIZE - 1) >>
121 PAGE_CACHE_SHIFT;
119 122
120 kunmap(page); 123 kunmap(page);
121 page_cache_release(page); 124 page_cache_release(page);
@@ -144,8 +147,10 @@ void hfs_btree_close(struct hfs_btree *tree)
144 while ((node = tree->node_hash[i])) { 147 while ((node = tree->node_hash[i])) {
145 tree->node_hash[i] = node->next_hash; 148 tree->node_hash[i] = node->next_hash;
146 if (atomic_read(&node->refcnt)) 149 if (atomic_read(&node->refcnt))
147 printk(KERN_CRIT "hfs: node %d:%d still has %d user(s)!\n", 150 printk(KERN_CRIT "hfs: node %d:%d "
148 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 151 "still has %d user(s)!\n",
152 node->tree->cnid, node->this,
153 atomic_read(&node->refcnt));
149 hfs_bnode_free(node); 154 hfs_bnode_free(node);
150 tree->node_hash_cnt--; 155 tree->node_hash_cnt--;
151 } 156 }
@@ -166,7 +171,8 @@ void hfs_btree_write(struct hfs_btree *tree)
166 return; 171 return;
167 /* Load the header */ 172 /* Load the header */
168 page = node->page[0]; 173 page = node->page[0];
169 head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); 174 head = (struct hfs_btree_header_rec *)(kmap(page) +
175 sizeof(struct hfs_bnode_desc));
170 176
171 head->root = cpu_to_be32(tree->root); 177 head->root = cpu_to_be32(tree->root);
172 head->leaf_count = cpu_to_be32(tree->leaf_count); 178 head->leaf_count = cpu_to_be32(tree->leaf_count);
@@ -272,7 +278,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
272 tree->free_nodes--; 278 tree->free_nodes--;
273 mark_inode_dirty(tree->inode); 279 mark_inode_dirty(tree->inode);
274 hfs_bnode_put(node); 280 hfs_bnode_put(node);
275 return hfs_bnode_create(tree, idx); 281 return hfs_bnode_create(tree,
282 idx);
276 } 283 }
277 } 284 }
278 } 285 }
@@ -287,7 +294,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
287 kunmap(*pagep); 294 kunmap(*pagep);
288 nidx = node->next; 295 nidx = node->next;
289 if (!nidx) { 296 if (!nidx) {
290 printk(KERN_DEBUG "hfs: create new bmap node...\n"); 297 dprint(DBG_BNODE_MOD, "hfs: create new bmap node.\n");
291 next_node = hfs_bmap_new_bmap(node, idx); 298 next_node = hfs_bmap_new_bmap(node, idx);
292 } else 299 } else
293 next_node = hfs_bnode_find(tree, nidx); 300 next_node = hfs_bnode_find(tree, nidx);
@@ -329,7 +336,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
329 hfs_bnode_put(node); 336 hfs_bnode_put(node);
330 if (!i) { 337 if (!i) {
331 /* panic */; 338 /* panic */;
332 printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this); 339 printk(KERN_CRIT "hfs: unable to free bnode %u. "
340 "bmap not found!\n",
341 node->this);
333 return; 342 return;
334 } 343 }
335 node = hfs_bnode_find(tree, i); 344 node = hfs_bnode_find(tree, i);
@@ -337,7 +346,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
337 return; 346 return;
338 if (node->type != HFS_NODE_MAP) { 347 if (node->type != HFS_NODE_MAP) {
339 /* panic */; 348 /* panic */;
340 printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type); 349 printk(KERN_CRIT "hfs: invalid bmap found! "
350 "(%u,%d)\n",
351 node->this, node->type);
341 hfs_bnode_put(node); 352 hfs_bnode_put(node);
342 return; 353 return;
343 } 354 }
@@ -350,7 +361,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
350 m = 1 << (~nidx & 7); 361 m = 1 << (~nidx & 7);
351 byte = data[off]; 362 byte = data[off];
352 if (!(byte & m)) { 363 if (!(byte & m)) {
353 printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type); 364 printk(KERN_CRIT "hfs: trying to free free bnode "
365 "%u(%d)\n",
366 node->this, node->type);
354 kunmap(page); 367 kunmap(page);
355 hfs_bnode_put(node); 368 hfs_bnode_put(node);
356 return; 369 return;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 8af45fc5b05..b4ba1b31933 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -91,7 +91,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
91 perms->dev = 0; 91 perms->dev = 0;
92} 92}
93 93
94static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) 94static int hfsplus_cat_build_record(hfsplus_cat_entry *entry,
95 u32 cnid, struct inode *inode)
95{ 96{
96 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); 97 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
97 98
@@ -128,20 +129,32 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
128 if (cnid == inode->i_ino) { 129 if (cnid == inode->i_ino) {
129 hfsplus_cat_set_perms(inode, &file->permissions); 130 hfsplus_cat_set_perms(inode, &file->permissions);
130 if (S_ISLNK(inode->i_mode)) { 131 if (S_ISLNK(inode->i_mode)) {
131 file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); 132 file->user_info.fdType =
132 file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); 133 cpu_to_be32(HFSP_SYMLINK_TYPE);
134 file->user_info.fdCreator =
135 cpu_to_be32(HFSP_SYMLINK_CREATOR);
133 } else { 136 } else {
134 file->user_info.fdType = cpu_to_be32(sbi->type); 137 file->user_info.fdType =
135 file->user_info.fdCreator = cpu_to_be32(sbi->creator); 138 cpu_to_be32(sbi->type);
139 file->user_info.fdCreator =
140 cpu_to_be32(sbi->creator);
136 } 141 }
137 if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) 142 if (HFSPLUS_FLG_IMMUTABLE &
138 file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); 143 (file->permissions.rootflags |
144 file->permissions.userflags))
145 file->flags |=
146 cpu_to_be16(HFSPLUS_FILE_LOCKED);
139 } else { 147 } else {
140 file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); 148 file->user_info.fdType =
141 file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); 149 cpu_to_be32(HFSP_HARDLINK_TYPE);
142 file->user_info.fdFlags = cpu_to_be16(0x100); 150 file->user_info.fdCreator =
143 file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; 151 cpu_to_be32(HFSP_HFSPLUS_CREATOR);
144 file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); 152 file->user_info.fdFlags =
153 cpu_to_be16(0x100);
154 file->create_date =
155 HFSPLUS_I(sbi->hidden_dir)->create_date;
156 file->permissions.dev =
157 cpu_to_be32(HFSPLUS_I(inode)->linkid);
145 } 158 }
146 return sizeof(*file); 159 return sizeof(*file);
147 } 160 }
@@ -182,12 +195,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
182 return -EIO; 195 return -EIO;
183 } 196 }
184 197
185 hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), 198 hfsplus_cat_build_key_uni(fd->search_key,
186 &tmp.thread.nodeName); 199 be32_to_cpu(tmp.thread.parentID),
200 &tmp.thread.nodeName);
187 return hfs_brec_find(fd); 201 return hfs_brec_find(fd);
188} 202}
189 203
190int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) 204int hfsplus_create_cat(u32 cnid, struct inode *dir,
205 struct qstr *str, struct inode *inode)
191{ 206{
192 struct super_block *sb = dir->i_sb; 207 struct super_block *sb = dir->i_sb;
193 struct hfs_find_data fd; 208 struct hfs_find_data fd;
@@ -195,13 +210,15 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino
195 int entry_size; 210 int entry_size;
196 int err; 211 int err;
197 212
198 dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); 213 dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
214 str->name, cnid, inode->i_nlink);
199 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 215 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
200 216
201 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 217 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
202 entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? 218 entry_size = hfsplus_fill_cat_thread(sb, &entry,
219 S_ISDIR(inode->i_mode) ?
203 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, 220 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
204 dir->i_ino, str); 221 dir->i_ino, str);
205 err = hfs_brec_find(&fd); 222 err = hfs_brec_find(&fd);
206 if (err != -ENOENT) { 223 if (err != -ENOENT) {
207 if (!err) 224 if (!err)
@@ -227,7 +244,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino
227 244
228 dir->i_size++; 245 dir->i_size++;
229 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 246 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
230 mark_inode_dirty(dir); 247 hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
248
231 hfs_find_exit(&fd); 249 hfs_find_exit(&fd);
232 return 0; 250 return 0;
233 251
@@ -249,7 +267,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
249 int err, off; 267 int err, off;
250 u16 type; 268 u16 type;
251 269
252 dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); 270 dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
271 str ? str->name : NULL, cnid);
253 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 272 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
254 273
255 if (!str) { 274 if (!str) {
@@ -260,11 +279,15 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
260 if (err) 279 if (err)
261 goto out; 280 goto out;
262 281
263 off = fd.entryoffset + offsetof(struct hfsplus_cat_thread, nodeName); 282 off = fd.entryoffset +
283 offsetof(struct hfsplus_cat_thread, nodeName);
264 fd.search_key->cat.parent = cpu_to_be32(dir->i_ino); 284 fd.search_key->cat.parent = cpu_to_be32(dir->i_ino);
265 hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.length, off, 2); 285 hfs_bnode_read(fd.bnode,
286 &fd.search_key->cat.name.length, off, 2);
266 len = be16_to_cpu(fd.search_key->cat.name.length) * 2; 287 len = be16_to_cpu(fd.search_key->cat.name.length) * 2;
267 hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.unicode, off + 2, len); 288 hfs_bnode_read(fd.bnode,
289 &fd.search_key->cat.name.unicode,
290 off + 2, len);
268 fd.search_key->key_len = cpu_to_be16(6 + len); 291 fd.search_key->key_len = cpu_to_be16(6 + len);
269 } else 292 } else
270 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 293 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
@@ -281,7 +304,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
281 hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_DATA); 304 hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_DATA);
282#endif 305#endif
283 306
284 off = fd.entryoffset + offsetof(struct hfsplus_cat_file, rsrc_fork); 307 off = fd.entryoffset +
308 offsetof(struct hfsplus_cat_file, rsrc_fork);
285 hfs_bnode_read(fd.bnode, &fork, off, sizeof(fork)); 309 hfs_bnode_read(fd.bnode, &fork, off, sizeof(fork));
286 hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); 310 hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC);
287 } 311 }
@@ -308,7 +332,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
308 332
309 dir->i_size--; 333 dir->i_size--;
310 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 334 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
311 mark_inode_dirty(dir); 335 hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
312out: 336out:
313 hfs_find_exit(&fd); 337 hfs_find_exit(&fd);
314 338
@@ -325,7 +349,8 @@ int hfsplus_rename_cat(u32 cnid,
325 int entry_size, type; 349 int entry_size, type;
326 int err = 0; 350 int err = 0;
327 351
328 dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, 352 dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
353 cnid, src_dir->i_ino, src_name->name,
329 dst_dir->i_ino, dst_name->name); 354 dst_dir->i_ino, dst_name->name);
330 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); 355 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
331 dst_fd = src_fd; 356 dst_fd = src_fd;
@@ -353,7 +378,6 @@ int hfsplus_rename_cat(u32 cnid,
353 goto out; 378 goto out;
354 dst_dir->i_size++; 379 dst_dir->i_size++;
355 dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; 380 dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
356 mark_inode_dirty(dst_dir);
357 381
358 /* finally remove the old entry */ 382 /* finally remove the old entry */
359 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); 383 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
@@ -365,7 +389,6 @@ int hfsplus_rename_cat(u32 cnid,
365 goto out; 389 goto out;
366 src_dir->i_size--; 390 src_dir->i_size--;
367 src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; 391 src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
368 mark_inode_dirty(src_dir);
369 392
370 /* remove old thread entry */ 393 /* remove old thread entry */
371 hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); 394 hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
@@ -379,7 +402,8 @@ int hfsplus_rename_cat(u32 cnid,
379 402
380 /* create new thread entry */ 403 /* create new thread entry */
381 hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); 404 hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
382 entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); 405 entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
406 dst_dir->i_ino, dst_name);
383 err = hfs_brec_find(&dst_fd); 407 err = hfs_brec_find(&dst_fd);
384 if (err != -ENOENT) { 408 if (err != -ENOENT) {
385 if (!err) 409 if (!err)
@@ -387,6 +411,9 @@ int hfsplus_rename_cat(u32 cnid,
387 goto out; 411 goto out;
388 } 412 }
389 err = hfs_brec_insert(&dst_fd, &entry, entry_size); 413 err = hfs_brec_insert(&dst_fd, &entry, entry_size);
414
415 hfsplus_mark_inode_dirty(dst_dir, HFSPLUS_I_CAT_DIRTY);
416 hfsplus_mark_inode_dirty(src_dir, HFSPLUS_I_CAT_DIRTY);
390out: 417out:
391 hfs_bnode_put(dst_fd.bnode); 418 hfs_bnode_put(dst_fd.bnode);
392 hfs_find_exit(&src_fd); 419 hfs_find_exit(&src_fd);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 9d59c0571f5..f896dc84302 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
37 37
38 sb = dir->i_sb; 38 sb = dir->i_sb;
39 39
40 dentry->d_op = &hfsplus_dentry_operations; 40 d_set_d_op(dentry, &hfsplus_dentry_operations);
41 dentry->d_fsdata = NULL; 41 dentry->d_fsdata = NULL;
42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
@@ -66,11 +66,17 @@ again:
66 goto fail; 66 goto fail;
67 } 67 }
68 cnid = be32_to_cpu(entry.file.id); 68 cnid = be32_to_cpu(entry.file.id);
69 if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && 69 if (entry.file.user_info.fdType ==
70 entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && 70 cpu_to_be32(HFSP_HARDLINK_TYPE) &&
71 (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || 71 entry.file.user_info.fdCreator ==
72 entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && 72 cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
73 HFSPLUS_SB(sb)->hidden_dir) { 73 (entry.file.create_date ==
74 HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
75 create_date ||
76 entry.file.create_date ==
77 HFSPLUS_I(sb->s_root->d_inode)->
78 create_date) &&
79 HFSPLUS_SB(sb)->hidden_dir) {
74 struct qstr str; 80 struct qstr str;
75 char name[32]; 81 char name[32];
76 82
@@ -83,11 +89,13 @@ again:
83 linkid = 0; 89 linkid = 0;
84 } else { 90 } else {
85 dentry->d_fsdata = (void *)(unsigned long)cnid; 91 dentry->d_fsdata = (void *)(unsigned long)cnid;
86 linkid = be32_to_cpu(entry.file.permissions.dev); 92 linkid =
93 be32_to_cpu(entry.file.permissions.dev);
87 str.len = sprintf(name, "iNode%d", linkid); 94 str.len = sprintf(name, "iNode%d", linkid);
88 str.name = name; 95 str.name = name;
89 hfsplus_cat_build_key(sb, fd.search_key, 96 hfsplus_cat_build_key(sb, fd.search_key,
90 HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); 97 HFSPLUS_SB(sb)->hidden_dir->i_ino,
98 &str);
91 goto again; 99 goto again;
92 } 100 }
93 } else if (!dentry->d_fsdata) 101 } else if (!dentry->d_fsdata)
@@ -139,7 +147,8 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
139 filp->f_pos++; 147 filp->f_pos++;
140 /* fall through */ 148 /* fall through */
141 case 1: 149 case 1:
142 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 150 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
151 fd.entrylength);
143 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { 152 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
144 printk(KERN_ERR "hfs: bad catalog folder thread\n"); 153 printk(KERN_ERR "hfs: bad catalog folder thread\n");
145 err = -EIO; 154 err = -EIO;
@@ -169,14 +178,16 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
169 err = -EIO; 178 err = -EIO;
170 goto out; 179 goto out;
171 } 180 }
172 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 181 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
182 fd.entrylength);
173 type = be16_to_cpu(entry.type); 183 type = be16_to_cpu(entry.type);
174 len = HFSPLUS_MAX_STRLEN; 184 len = HFSPLUS_MAX_STRLEN;
175 err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len); 185 err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
176 if (err) 186 if (err)
177 goto out; 187 goto out;
178 if (type == HFSPLUS_FOLDER) { 188 if (type == HFSPLUS_FOLDER) {
179 if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { 189 if (fd.entrylength <
190 sizeof(struct hfsplus_cat_folder)) {
180 printk(KERN_ERR "hfs: small dir entry\n"); 191 printk(KERN_ERR "hfs: small dir entry\n");
181 err = -EIO; 192 err = -EIO;
182 goto out; 193 goto out;
@@ -202,7 +213,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
202 err = -EIO; 213 err = -EIO;
203 goto out; 214 goto out;
204 } 215 }
205 next: 216next:
206 filp->f_pos++; 217 filp->f_pos++;
207 if (filp->f_pos >= inode->i_size) 218 if (filp->f_pos >= inode->i_size)
208 goto out; 219 goto out;
@@ -273,7 +284,8 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
273 HFSPLUS_I(inode)->linkid = id; 284 HFSPLUS_I(inode)->linkid = id;
274 cnid = sbi->next_cnid++; 285 cnid = sbi->next_cnid++;
275 src_dentry->d_fsdata = (void *)(unsigned long)cnid; 286 src_dentry->d_fsdata = (void *)(unsigned long)cnid;
276 res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); 287 res = hfsplus_create_cat(cnid, src_dir,
288 &src_dentry->d_name, inode);
277 if (res) 289 if (res)
278 /* panic? */ 290 /* panic? */
279 goto out; 291 goto out;
@@ -485,6 +497,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
485}; 497};
486 498
487const struct file_operations hfsplus_dir_operations = { 499const struct file_operations hfsplus_dir_operations = {
500 .fsync = hfsplus_file_fsync,
488 .read = generic_read_dir, 501 .read = generic_read_dir,
489 .readdir = hfsplus_readdir, 502 .readdir = hfsplus_readdir,
490 .unlocked_ioctl = hfsplus_ioctl, 503 .unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 0c9cb1820a5..52a0bcaa7b6 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -83,7 +83,8 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext)
83 return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count); 83 return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count);
84} 84}
85 85
86static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) 86static void __hfsplus_ext_write_extent(struct inode *inode,
87 struct hfs_find_data *fd)
87{ 88{
88 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 89 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
89 int res; 90 int res;
@@ -95,24 +96,32 @@ static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data
95 HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); 96 HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
96 97
97 res = hfs_brec_find(fd); 98 res = hfs_brec_find(fd);
98 if (hip->flags & HFSPLUS_FLG_EXT_NEW) { 99 if (hip->extent_state & HFSPLUS_EXT_NEW) {
99 if (res != -ENOENT) 100 if (res != -ENOENT)
100 return; 101 return;
101 hfs_brec_insert(fd, hip->cached_extents, 102 hfs_brec_insert(fd, hip->cached_extents,
102 sizeof(hfsplus_extent_rec)); 103 sizeof(hfsplus_extent_rec));
103 hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); 104 hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
104 } else { 105 } else {
105 if (res) 106 if (res)
106 return; 107 return;
107 hfs_bnode_write(fd->bnode, hip->cached_extents, 108 hfs_bnode_write(fd->bnode, hip->cached_extents,
108 fd->entryoffset, fd->entrylength); 109 fd->entryoffset, fd->entrylength);
109 hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; 110 hip->extent_state &= ~HFSPLUS_EXT_DIRTY;
110 } 111 }
112
113 /*
114 * We can't just use hfsplus_mark_inode_dirty here, because we
115 * also get called from hfsplus_write_inode, which should not
116 * redirty the inode. Instead the callers have to be careful
117 * to explicily mark the inode dirty, too.
118 */
119 set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
111} 120}
112 121
113static void hfsplus_ext_write_extent_locked(struct inode *inode) 122static void hfsplus_ext_write_extent_locked(struct inode *inode)
114{ 123{
115 if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { 124 if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
116 struct hfs_find_data fd; 125 struct hfs_find_data fd;
117 126
118 hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); 127 hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
@@ -144,18 +153,20 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
144 return -ENOENT; 153 return -ENOENT;
145 if (fd->entrylength != sizeof(hfsplus_extent_rec)) 154 if (fd->entrylength != sizeof(hfsplus_extent_rec))
146 return -EIO; 155 return -EIO;
147 hfs_bnode_read(fd->bnode, extent, fd->entryoffset, sizeof(hfsplus_extent_rec)); 156 hfs_bnode_read(fd->bnode, extent, fd->entryoffset,
157 sizeof(hfsplus_extent_rec));
148 return 0; 158 return 0;
149} 159}
150 160
151static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) 161static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd,
162 struct inode *inode, u32 block)
152{ 163{
153 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 164 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
154 int res; 165 int res;
155 166
156 WARN_ON(!mutex_is_locked(&hip->extents_lock)); 167 WARN_ON(!mutex_is_locked(&hip->extents_lock));
157 168
158 if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) 169 if (hip->extent_state & HFSPLUS_EXT_DIRTY)
159 __hfsplus_ext_write_extent(inode, fd); 170 __hfsplus_ext_write_extent(inode, fd);
160 171
161 res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, 172 res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino,
@@ -164,10 +175,11 @@ static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct in
164 HFSPLUS_TYPE_DATA); 175 HFSPLUS_TYPE_DATA);
165 if (!res) { 176 if (!res) {
166 hip->cached_start = be32_to_cpu(fd->key->ext.start_block); 177 hip->cached_start = be32_to_cpu(fd->key->ext.start_block);
167 hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); 178 hip->cached_blocks =
179 hfsplus_ext_block_count(hip->cached_extents);
168 } else { 180 } else {
169 hip->cached_start = hip->cached_blocks = 0; 181 hip->cached_start = hip->cached_blocks = 0;
170 hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); 182 hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
171 } 183 }
172 return res; 184 return res;
173} 185}
@@ -197,6 +209,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
197 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 209 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
198 int res = -EIO; 210 int res = -EIO;
199 u32 ablock, dblock, mask; 211 u32 ablock, dblock, mask;
212 int was_dirty = 0;
200 int shift; 213 int shift;
201 214
202 /* Convert inode block to disk allocation block */ 215 /* Convert inode block to disk allocation block */
@@ -223,27 +236,37 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
223 return -EIO; 236 return -EIO;
224 237
225 mutex_lock(&hip->extents_lock); 238 mutex_lock(&hip->extents_lock);
239
240 /*
241 * hfsplus_ext_read_extent will write out a cached extent into
242 * the extents btree. In that case we may have to mark the inode
243 * dirty even for a pure read of an extent here.
244 */
245 was_dirty = (hip->extent_state & HFSPLUS_EXT_DIRTY);
226 res = hfsplus_ext_read_extent(inode, ablock); 246 res = hfsplus_ext_read_extent(inode, ablock);
227 if (!res) { 247 if (res) {
228 dblock = hfsplus_ext_find_block(hip->cached_extents,
229 ablock - hip->cached_start);
230 } else {
231 mutex_unlock(&hip->extents_lock); 248 mutex_unlock(&hip->extents_lock);
232 return -EIO; 249 return -EIO;
233 } 250 }
251 dblock = hfsplus_ext_find_block(hip->cached_extents,
252 ablock - hip->cached_start);
234 mutex_unlock(&hip->extents_lock); 253 mutex_unlock(&hip->extents_lock);
235 254
236done: 255done:
237 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); 256 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
257 inode->i_ino, (long long)iblock, dblock);
238 mask = (1 << sbi->fs_shift) - 1; 258 mask = (1 << sbi->fs_shift) - 1;
239 map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); 259 map_bh(bh_result, sb,
260 (dblock << sbi->fs_shift) + sbi->blockoffset +
261 (iblock & mask));
240 if (create) { 262 if (create) {
241 set_buffer_new(bh_result); 263 set_buffer_new(bh_result);
242 hip->phys_size += sb->s_blocksize; 264 hip->phys_size += sb->s_blocksize;
243 hip->fs_blocks++; 265 hip->fs_blocks++;
244 inode_add_bytes(inode, sb->s_blocksize); 266 inode_add_bytes(inode, sb->s_blocksize);
245 mark_inode_dirty(inode);
246 } 267 }
268 if (create || was_dirty)
269 mark_inode_dirty(inode);
247 return 0; 270 return 0;
248} 271}
249 272
@@ -326,7 +349,8 @@ found:
326 } 349 }
327} 350}
328 351
329int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw *fork, int type) 352int hfsplus_free_fork(struct super_block *sb, u32 cnid,
353 struct hfsplus_fork_raw *fork, int type)
330{ 354{
331 struct hfs_find_data fd; 355 struct hfs_find_data fd;
332 hfsplus_extent_rec ext_entry; 356 hfsplus_extent_rec ext_entry;
@@ -373,12 +397,13 @@ int hfsplus_file_extend(struct inode *inode)
373 u32 start, len, goal; 397 u32 start, len, goal;
374 int res; 398 int res;
375 399
376 if (sbi->alloc_file->i_size * 8 < 400 if (sbi->total_blocks - sbi->free_blocks + 8 >
377 sbi->total_blocks - sbi->free_blocks + 8) { 401 sbi->alloc_file->i_size * 8) {
378 // extend alloc file 402 /* extend alloc file */
379 printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", 403 printk(KERN_ERR "hfs: extend alloc file! "
380 sbi->alloc_file->i_size * 8, 404 "(%llu,%u,%u)\n",
381 sbi->total_blocks, sbi->free_blocks); 405 sbi->alloc_file->i_size * 8,
406 sbi->total_blocks, sbi->free_blocks);
382 return -ENOSPC; 407 return -ENOSPC;
383 } 408 }
384 409
@@ -429,7 +454,7 @@ int hfsplus_file_extend(struct inode *inode)
429 start, len); 454 start, len);
430 if (!res) { 455 if (!res) {
431 hfsplus_dump_extent(hip->cached_extents); 456 hfsplus_dump_extent(hip->cached_extents);
432 hip->flags |= HFSPLUS_FLG_EXT_DIRTY; 457 hip->extent_state |= HFSPLUS_EXT_DIRTY;
433 hip->cached_blocks += len; 458 hip->cached_blocks += len;
434 } else if (res == -ENOSPC) 459 } else if (res == -ENOSPC)
435 goto insert_extent; 460 goto insert_extent;
@@ -438,7 +463,7 @@ out:
438 mutex_unlock(&hip->extents_lock); 463 mutex_unlock(&hip->extents_lock);
439 if (!res) { 464 if (!res) {
440 hip->alloc_blocks += len; 465 hip->alloc_blocks += len;
441 mark_inode_dirty(inode); 466 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY);
442 } 467 }
443 return res; 468 return res;
444 469
@@ -450,7 +475,7 @@ insert_extent:
450 hip->cached_extents[0].start_block = cpu_to_be32(start); 475 hip->cached_extents[0].start_block = cpu_to_be32(start);
451 hip->cached_extents[0].block_count = cpu_to_be32(len); 476 hip->cached_extents[0].block_count = cpu_to_be32(len);
452 hfsplus_dump_extent(hip->cached_extents); 477 hfsplus_dump_extent(hip->cached_extents);
453 hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; 478 hip->extent_state |= HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW;
454 hip->cached_start = hip->alloc_blocks; 479 hip->cached_start = hip->alloc_blocks;
455 hip->cached_blocks = len; 480 hip->cached_blocks = len;
456 481
@@ -466,8 +491,9 @@ void hfsplus_file_truncate(struct inode *inode)
466 u32 alloc_cnt, blk_cnt, start; 491 u32 alloc_cnt, blk_cnt, start;
467 int res; 492 int res;
468 493
469 dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", 494 dprint(DBG_INODE, "truncate: %lu, %llu -> %llu\n",
470 inode->i_ino, (long long)hip->phys_size, inode->i_size); 495 inode->i_ino, (long long)hip->phys_size,
496 inode->i_size);
471 497
472 if (inode->i_size > hip->phys_size) { 498 if (inode->i_size > hip->phys_size) {
473 struct address_space *mapping = inode->i_mapping; 499 struct address_space *mapping = inode->i_mapping;
@@ -481,7 +507,8 @@ void hfsplus_file_truncate(struct inode *inode)
481 &page, &fsdata); 507 &page, &fsdata);
482 if (res) 508 if (res)
483 return; 509 return;
484 res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); 510 res = pagecache_write_end(NULL, mapping, size,
511 0, 0, page, fsdata);
485 if (res < 0) 512 if (res < 0)
486 return; 513 return;
487 mark_inode_dirty(inode); 514 mark_inode_dirty(inode);
@@ -513,12 +540,12 @@ void hfsplus_file_truncate(struct inode *inode)
513 alloc_cnt - start, alloc_cnt - blk_cnt); 540 alloc_cnt - start, alloc_cnt - blk_cnt);
514 hfsplus_dump_extent(hip->cached_extents); 541 hfsplus_dump_extent(hip->cached_extents);
515 if (blk_cnt > start) { 542 if (blk_cnt > start) {
516 hip->flags |= HFSPLUS_FLG_EXT_DIRTY; 543 hip->extent_state |= HFSPLUS_EXT_DIRTY;
517 break; 544 break;
518 } 545 }
519 alloc_cnt = start; 546 alloc_cnt = start;
520 hip->cached_start = hip->cached_blocks = 0; 547 hip->cached_start = hip->cached_blocks = 0;
521 hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); 548 hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
522 hfs_brec_remove(&fd); 549 hfs_brec_remove(&fd);
523 } 550 }
524 hfs_find_exit(&fd); 551 hfs_find_exit(&fd);
@@ -527,7 +554,8 @@ void hfsplus_file_truncate(struct inode *inode)
527 hip->alloc_blocks = blk_cnt; 554 hip->alloc_blocks = blk_cnt;
528out: 555out:
529 hip->phys_size = inode->i_size; 556 hip->phys_size = inode->i_size;
530 hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 557 hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >>
558 sb->s_blocksize_bits;
531 inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); 559 inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits);
532 mark_inode_dirty(inode); 560 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY);
533} 561}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index cb3653efb57..d6857523336 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -23,13 +23,16 @@
23#define DBG_EXTENT 0x00000020 23#define DBG_EXTENT 0x00000020
24#define DBG_BITMAP 0x00000040 24#define DBG_BITMAP 0x00000040
25 25
26//#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) 26#if 0
27//#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE) 27#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
28//#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) 28#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE)
29#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT)
30#endif
29#define DBG_MASK (0) 31#define DBG_MASK (0)
30 32
31#define dprint(flg, fmt, args...) \ 33#define dprint(flg, fmt, args...) \
32 if (flg & DBG_MASK) printk(fmt , ## args) 34 if (flg & DBG_MASK) \
35 printk(fmt , ## args)
33 36
34/* Runtime config options */ 37/* Runtime config options */
35#define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */ 38#define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */
@@ -37,7 +40,8 @@
37#define HFSPLUS_TYPE_DATA 0x00 40#define HFSPLUS_TYPE_DATA 0x00
38#define HFSPLUS_TYPE_RSRC 0xFF 41#define HFSPLUS_TYPE_RSRC 0xFF
39 42
40typedef int (*btree_keycmp)(const hfsplus_btree_key *, const hfsplus_btree_key *); 43typedef int (*btree_keycmp)(const hfsplus_btree_key *,
44 const hfsplus_btree_key *);
41 45
42#define NODE_HASH_SIZE 256 46#define NODE_HASH_SIZE 256
43 47
@@ -61,7 +65,6 @@ struct hfs_btree {
61 unsigned int max_key_len; 65 unsigned int max_key_len;
62 unsigned int depth; 66 unsigned int depth;
63 67
64 //unsigned int map1_size, map_size;
65 struct mutex tree_lock; 68 struct mutex tree_lock;
66 69
67 unsigned int pages_per_bnode; 70 unsigned int pages_per_bnode;
@@ -107,8 +110,8 @@ struct hfsplus_vh;
107struct hfs_btree; 110struct hfs_btree;
108 111
109struct hfsplus_sb_info { 112struct hfsplus_sb_info {
110 struct buffer_head *s_vhbh;
111 struct hfsplus_vh *s_vhdr; 113 struct hfsplus_vh *s_vhdr;
114 struct hfsplus_vh *s_backup_vhdr;
112 struct hfs_btree *ext_tree; 115 struct hfs_btree *ext_tree;
113 struct hfs_btree *cat_tree; 116 struct hfs_btree *cat_tree;
114 struct hfs_btree *attr_tree; 117 struct hfs_btree *attr_tree;
@@ -118,7 +121,8 @@ struct hfsplus_sb_info {
118 121
119 /* Runtime variables */ 122 /* Runtime variables */
120 u32 blockoffset; 123 u32 blockoffset;
121 u32 sect_count; 124 sector_t part_start;
125 sector_t sect_count;
122 int fs_shift; 126 int fs_shift;
123 127
124 /* immutable data from the volume header */ 128 /* immutable data from the volume header */
@@ -155,6 +159,12 @@ struct hfsplus_sb_info {
155#define HFSPLUS_SB_FORCE 2 159#define HFSPLUS_SB_FORCE 2
156#define HFSPLUS_SB_HFSX 3 160#define HFSPLUS_SB_HFSX 3
157#define HFSPLUS_SB_CASEFOLD 4 161#define HFSPLUS_SB_CASEFOLD 4
162#define HFSPLUS_SB_NOBARRIER 5
163
164static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
165{
166 return sb->s_fs_info;
167}
158 168
159 169
160struct hfsplus_inode_info { 170struct hfsplus_inode_info {
@@ -170,7 +180,7 @@ struct hfsplus_inode_info {
170 u32 cached_blocks; 180 u32 cached_blocks;
171 hfsplus_extent_rec first_extents; 181 hfsplus_extent_rec first_extents;
172 hfsplus_extent_rec cached_extents; 182 hfsplus_extent_rec cached_extents;
173 unsigned long flags; 183 unsigned int extent_state;
174 struct mutex extents_lock; 184 struct mutex extents_lock;
175 185
176 /* 186 /*
@@ -185,6 +195,11 @@ struct hfsplus_inode_info {
185 u32 linkid; 195 u32 linkid;
186 196
187 /* 197 /*
198 * Accessed using atomic bitops.
199 */
200 unsigned long flags;
201
202 /*
188 * Protected by i_mutex. 203 * Protected by i_mutex.
189 */ 204 */
190 sector_t fs_blocks; 205 sector_t fs_blocks;
@@ -195,12 +210,34 @@ struct hfsplus_inode_info {
195 struct inode vfs_inode; 210 struct inode vfs_inode;
196}; 211};
197 212
198#define HFSPLUS_FLG_RSRC 0x0001 213#define HFSPLUS_EXT_DIRTY 0x0001
199#define HFSPLUS_FLG_EXT_DIRTY 0x0002 214#define HFSPLUS_EXT_NEW 0x0002
200#define HFSPLUS_FLG_EXT_NEW 0x0004 215
216#define HFSPLUS_I_RSRC 0 /* represents a resource fork */
217#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */
218#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */
219#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */
220
221#define HFSPLUS_IS_RSRC(inode) \
222 test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags)
223
224static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
225{
226 return list_entry(inode, struct hfsplus_inode_info, vfs_inode);
227}
201 228
202#define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) 229/*
203#define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) 230 * Mark an inode dirty, and also mark the btree in which the
231 * specific type of metadata is stored.
232 * For data or metadata that gets written back by into the catalog btree
233 * by hfsplus_write_inode a plain mark_inode_dirty call is enough.
234 */
235static inline void hfsplus_mark_inode_dirty(struct inode *inode,
236 unsigned int flag)
237{
238 set_bit(flag, &HFSPLUS_I(inode)->flags);
239 mark_inode_dirty(inode);
240}
204 241
205struct hfs_find_data { 242struct hfs_find_data {
206 /* filled by caller */ 243 /* filled by caller */
@@ -318,9 +355,12 @@ int hfs_brec_read(struct hfs_find_data *, void *, int);
318int hfs_brec_goto(struct hfs_find_data *, int); 355int hfs_brec_goto(struct hfs_find_data *, int);
319 356
320/* catalog.c */ 357/* catalog.c */
321int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 358int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *,
322int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 359 const hfsplus_btree_key *);
323void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *); 360int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *,
361 const hfsplus_btree_key *);
362void hfsplus_cat_build_key(struct super_block *sb,
363 hfsplus_btree_key *, u32, struct qstr *);
324int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *); 364int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *);
325int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); 365int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
326int hfsplus_delete_cat(u32, struct inode *, struct qstr *); 366int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
@@ -336,7 +376,8 @@ extern const struct file_operations hfsplus_dir_operations;
336int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 376int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
337void hfsplus_ext_write_extent(struct inode *); 377void hfsplus_ext_write_extent(struct inode *);
338int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); 378int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
339int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); 379int hfsplus_free_fork(struct super_block *, u32,
380 struct hfsplus_fork_raw *, int);
340int hfsplus_file_extend(struct inode *); 381int hfsplus_file_extend(struct inode *);
341void hfsplus_file_truncate(struct inode *); 382void hfsplus_file_truncate(struct inode *);
342 383
@@ -351,6 +392,7 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
351int hfsplus_cat_write_inode(struct inode *); 392int hfsplus_cat_write_inode(struct inode *);
352struct inode *hfsplus_new_inode(struct super_block *, int); 393struct inode *hfsplus_new_inode(struct super_block *, int);
353void hfsplus_delete_inode(struct inode *); 394void hfsplus_delete_inode(struct inode *);
395int hfsplus_file_fsync(struct file *file, int datasync);
354 396
355/* ioctl.c */ 397/* ioctl.c */
356long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 398long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -362,6 +404,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
362 404
363/* options.c */ 405/* options.c */
364int hfsplus_parse_options(char *, struct hfsplus_sb_info *); 406int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
407int hfsplus_parse_options_remount(char *input, int *force);
365void hfsplus_fill_defaults(struct hfsplus_sb_info *); 408void hfsplus_fill_defaults(struct hfsplus_sb_info *);
366int hfsplus_show_options(struct seq_file *, struct vfsmount *); 409int hfsplus_show_options(struct seq_file *, struct vfsmount *);
367 410
@@ -375,45 +418,26 @@ extern u16 hfsplus_decompose_table[];
375extern u16 hfsplus_compose_table[]; 418extern u16 hfsplus_compose_table[];
376 419
377/* unicode.c */ 420/* unicode.c */
378int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); 421int hfsplus_strcasecmp(const struct hfsplus_unistr *,
379int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); 422 const struct hfsplus_unistr *);
380int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); 423int hfsplus_strcmp(const struct hfsplus_unistr *,
381int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); 424 const struct hfsplus_unistr *);
382int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); 425int hfsplus_uni2asc(struct super_block *,
383int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); 426 const struct hfsplus_unistr *, char *, int *);
427int hfsplus_asc2uni(struct super_block *,
428 struct hfsplus_unistr *, const char *, int);
429int hfsplus_hash_dentry(const struct dentry *dentry,
430 const struct inode *inode, struct qstr *str);
431int hfsplus_compare_dentry(const struct dentry *parent,
432 const struct inode *pinode,
433 const struct dentry *dentry, const struct inode *inode,
434 unsigned int len, const char *str, const struct qstr *name);
384 435
385/* wrapper.c */ 436/* wrapper.c */
386int hfsplus_read_wrapper(struct super_block *); 437int hfsplus_read_wrapper(struct super_block *);
387
388int hfs_part_find(struct super_block *, sector_t *, sector_t *); 438int hfs_part_find(struct super_block *, sector_t *, sector_t *);
389 439int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
390/* access macros */ 440 void *data, int rw);
391static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
392{
393 return sb->s_fs_info;
394}
395
396static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
397{
398 return list_entry(inode, struct hfsplus_inode_info, vfs_inode);
399}
400
401#define sb_bread512(sb, sec, data) ({ \
402 struct buffer_head *__bh; \
403 sector_t __block; \
404 loff_t __start; \
405 int __offset; \
406 \
407 __start = (loff_t)(sec) << HFSPLUS_SECTOR_SHIFT;\
408 __block = __start >> (sb)->s_blocksize_bits; \
409 __offset = __start & ((sb)->s_blocksize - 1); \
410 __bh = sb_bread((sb), __block); \
411 if (likely(__bh != NULL)) \
412 data = (void *)(__bh->b_data + __offset);\
413 else \
414 data = NULL; \
415 __bh; \
416})
417 441
418/* time macros */ 442/* time macros */
419#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) 443#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 6892899fd6f..927cdd6d5bf 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -36,7 +36,8 @@
36#define HFSP_WRAPOFF_EMBEDSIG 0x7C 36#define HFSP_WRAPOFF_EMBEDSIG 0x7C
37#define HFSP_WRAPOFF_EMBEDEXT 0x7E 37#define HFSP_WRAPOFF_EMBEDEXT 0x7E
38 38
39#define HFSP_HIDDENDIR_NAME "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data" 39#define HFSP_HIDDENDIR_NAME \
40 "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data"
40 41
41#define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */ 42#define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */
42#define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */ 43#define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 8afd7e84f98..a8df651747f 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -8,6 +8,7 @@
8 * Inode handling routines 8 * Inode handling routines
9 */ 9 */
10 10
11#include <linux/blkdev.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13#include <linux/pagemap.h> 14#include <linux/pagemap.h>
@@ -77,7 +78,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
77 if (!tree) 78 if (!tree)
78 return 0; 79 return 0;
79 if (tree->node_size >= PAGE_CACHE_SIZE) { 80 if (tree->node_size >= PAGE_CACHE_SIZE) {
80 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); 81 nidx = page->index >>
82 (tree->node_size_shift - PAGE_CACHE_SHIFT);
81 spin_lock(&tree->hash_lock); 83 spin_lock(&tree->hash_lock);
82 node = hfs_bnode_findhash(tree, nidx); 84 node = hfs_bnode_findhash(tree, nidx);
83 if (!node) 85 if (!node)
@@ -90,7 +92,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
90 } 92 }
91 spin_unlock(&tree->hash_lock); 93 spin_unlock(&tree->hash_lock);
92 } else { 94 } else {
93 nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift); 95 nidx = page->index <<
96 (PAGE_CACHE_SHIFT - tree->node_size_shift);
94 i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); 97 i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
95 spin_lock(&tree->hash_lock); 98 spin_lock(&tree->hash_lock);
96 do { 99 do {
@@ -166,8 +169,8 @@ const struct dentry_operations hfsplus_dentry_operations = {
166 .d_compare = hfsplus_compare_dentry, 169 .d_compare = hfsplus_compare_dentry,
167}; 170};
168 171
169static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, 172static struct dentry *hfsplus_file_lookup(struct inode *dir,
170 struct nameidata *nd) 173 struct dentry *dentry, struct nameidata *nd)
171{ 174{
172 struct hfs_find_data fd; 175 struct hfs_find_data fd;
173 struct super_block *sb = dir->i_sb; 176 struct super_block *sb = dir->i_sb;
@@ -190,7 +193,9 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
190 inode->i_ino = dir->i_ino; 193 inode->i_ino = dir->i_ino;
191 INIT_LIST_HEAD(&hip->open_dir_list); 194 INIT_LIST_HEAD(&hip->open_dir_list);
192 mutex_init(&hip->extents_lock); 195 mutex_init(&hip->extents_lock);
193 hip->flags = HFSPLUS_FLG_RSRC; 196 hip->extent_state = 0;
197 hip->flags = 0;
198 set_bit(HFSPLUS_I_RSRC, &hip->flags);
194 199
195 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 200 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
196 err = hfsplus_find_cat(sb, dir->i_ino, &fd); 201 err = hfsplus_find_cat(sb, dir->i_ino, &fd);
@@ -219,7 +224,8 @@ out:
219 return NULL; 224 return NULL;
220} 225}
221 226
222static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) 227static void hfsplus_get_perms(struct inode *inode,
228 struct hfsplus_perm *perms, int dir)
223{ 229{
224 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); 230 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
225 u16 mode; 231 u16 mode;
@@ -302,29 +308,41 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
302 return 0; 308 return 0;
303} 309}
304 310
305static int hfsplus_file_fsync(struct file *filp, int datasync) 311int hfsplus_file_fsync(struct file *file, int datasync)
306{ 312{
307 struct inode *inode = filp->f_mapping->host; 313 struct inode *inode = file->f_mapping->host;
308 struct super_block * sb; 314 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
309 int ret, err; 315 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
310 316 int error = 0, error2;
311 /* sync the inode to buffers */ 317
312 ret = write_inode_now(inode, 0); 318 /*
313 319 * Sync inode metadata into the catalog and extent trees.
314 /* sync the superblock to buffers */ 320 */
315 sb = inode->i_sb; 321 sync_inode_metadata(inode, 1);
316 if (sb->s_dirt) { 322
317 if (!(sb->s_flags & MS_RDONLY)) 323 /*
318 hfsplus_sync_fs(sb, 1); 324 * And explicitly write out the btrees.
319 else 325 */
320 sb->s_dirt = 0; 326 if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags))
327 error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping);
328
329 if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) {
330 error2 =
331 filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
332 if (!error)
333 error = error2;
321 } 334 }
322 335
323 /* .. finally sync the buffers to disk */ 336 if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) {
324 err = sync_blockdev(sb->s_bdev); 337 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
325 if (!ret) 338 if (!error)
326 ret = err; 339 error = error2;
327 return ret; 340 }
341
342 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
343 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
344
345 return error;
328} 346}
329 347
330static const struct inode_operations hfsplus_file_inode_operations = { 348static const struct inode_operations hfsplus_file_inode_operations = {
@@ -337,7 +355,7 @@ static const struct inode_operations hfsplus_file_inode_operations = {
337}; 355};
338 356
339static const struct file_operations hfsplus_file_operations = { 357static const struct file_operations hfsplus_file_operations = {
340 .llseek = generic_file_llseek, 358 .llseek = generic_file_llseek,
341 .read = do_sync_read, 359 .read = do_sync_read,
342 .aio_read = generic_file_aio_read, 360 .aio_read = generic_file_aio_read,
343 .write = do_sync_write, 361 .write = do_sync_write,
@@ -370,6 +388,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
370 INIT_LIST_HEAD(&hip->open_dir_list); 388 INIT_LIST_HEAD(&hip->open_dir_list);
371 mutex_init(&hip->extents_lock); 389 mutex_init(&hip->extents_lock);
372 atomic_set(&hip->opencnt, 0); 390 atomic_set(&hip->opencnt, 0);
391 hip->extent_state = 0;
373 hip->flags = 0; 392 hip->flags = 0;
374 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); 393 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
375 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); 394 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
@@ -457,7 +476,8 @@ void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
457 } 476 }
458} 477}
459 478
460void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) 479void hfsplus_inode_write_fork(struct inode *inode,
480 struct hfsplus_fork_raw *fork)
461{ 481{
462 memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, 482 memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents,
463 sizeof(hfsplus_extent_rec)); 483 sizeof(hfsplus_extent_rec));
@@ -499,13 +519,14 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
499 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, 519 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
500 sizeof(struct hfsplus_cat_file)); 520 sizeof(struct hfsplus_cat_file));
501 521
502 hfsplus_inode_read_fork(inode, HFSPLUS_IS_DATA(inode) ? 522 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
503 &file->data_fork : &file->rsrc_fork); 523 &file->rsrc_fork : &file->data_fork);
504 hfsplus_get_perms(inode, &file->permissions, 0); 524 hfsplus_get_perms(inode, &file->permissions, 0);
505 inode->i_nlink = 1; 525 inode->i_nlink = 1;
506 if (S_ISREG(inode->i_mode)) { 526 if (S_ISREG(inode->i_mode)) {
507 if (file->permissions.dev) 527 if (file->permissions.dev)
508 inode->i_nlink = be32_to_cpu(file->permissions.dev); 528 inode->i_nlink =
529 be32_to_cpu(file->permissions.dev);
509 inode->i_op = &hfsplus_file_inode_operations; 530 inode->i_op = &hfsplus_file_inode_operations;
510 inode->i_fop = &hfsplus_file_operations; 531 inode->i_fop = &hfsplus_file_operations;
511 inode->i_mapping->a_ops = &hfsplus_aops; 532 inode->i_mapping->a_ops = &hfsplus_aops;
@@ -578,7 +599,9 @@ int hfsplus_cat_write_inode(struct inode *inode)
578 sizeof(struct hfsplus_cat_file)); 599 sizeof(struct hfsplus_cat_file));
579 hfsplus_inode_write_fork(inode, &file->data_fork); 600 hfsplus_inode_write_fork(inode, &file->data_fork);
580 hfsplus_cat_set_perms(inode, &file->permissions); 601 hfsplus_cat_set_perms(inode, &file->permissions);
581 if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) 602 if (HFSPLUS_FLG_IMMUTABLE &
603 (file->permissions.rootflags |
604 file->permissions.userflags))
582 file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); 605 file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
583 else 606 else
584 file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); 607 file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED);
@@ -588,6 +611,8 @@ int hfsplus_cat_write_inode(struct inode *inode)
588 hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, 611 hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
589 sizeof(struct hfsplus_cat_file)); 612 sizeof(struct hfsplus_cat_file));
590 } 613 }
614
615 set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags);
591out: 616out:
592 hfs_find_exit(&fd); 617 hfs_find_exit(&fd);
593 return 0; 618 return 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 40a85a3ded6..508ce662ce1 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -28,7 +28,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
28 28
29 if (inode->i_flags & S_IMMUTABLE) 29 if (inode->i_flags & S_IMMUTABLE)
30 flags |= FS_IMMUTABLE_FL; 30 flags |= FS_IMMUTABLE_FL;
31 if (inode->i_flags |= S_APPEND) 31 if (inode->i_flags & S_APPEND)
32 flags |= FS_APPEND_FL; 32 flags |= FS_APPEND_FL;
33 if (hip->userflags & HFSPLUS_FLG_NODUMP) 33 if (hip->userflags & HFSPLUS_FLG_NODUMP)
34 flags |= FS_NODUMP_FL; 34 flags |= FS_NODUMP_FL;
@@ -147,9 +147,11 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
147 res = -ERANGE; 147 res = -ERANGE;
148 } else 148 } else
149 res = -EOPNOTSUPP; 149 res = -EOPNOTSUPP;
150 if (!res) 150 if (!res) {
151 hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, 151 hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
152 sizeof(struct hfsplus_cat_file)); 152 sizeof(struct hfsplus_cat_file));
153 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
154 }
153out: 155out:
154 hfs_find_exit(&fd); 156 hfs_find_exit(&fd);
155 return res; 157 return res;
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index f9ab276a4d8..bb62a588214 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -23,6 +23,7 @@ enum {
23 opt_umask, opt_uid, opt_gid, 23 opt_umask, opt_uid, opt_gid,
24 opt_part, opt_session, opt_nls, 24 opt_part, opt_session, opt_nls,
25 opt_nodecompose, opt_decompose, 25 opt_nodecompose, opt_decompose,
26 opt_barrier, opt_nobarrier,
26 opt_force, opt_err 27 opt_force, opt_err
27}; 28};
28 29
@@ -37,6 +38,8 @@ static const match_table_t tokens = {
37 { opt_nls, "nls=%s" }, 38 { opt_nls, "nls=%s" },
38 { opt_decompose, "decompose" }, 39 { opt_decompose, "decompose" },
39 { opt_nodecompose, "nodecompose" }, 40 { opt_nodecompose, "nodecompose" },
41 { opt_barrier, "barrier" },
42 { opt_nobarrier, "nobarrier" },
40 { opt_force, "force" }, 43 { opt_force, "force" },
41 { opt_err, NULL } 44 { opt_err, NULL }
42}; 45};
@@ -65,6 +68,32 @@ static inline int match_fourchar(substring_t *arg, u32 *result)
65 return 0; 68 return 0;
66} 69}
67 70
71int hfsplus_parse_options_remount(char *input, int *force)
72{
73 char *p;
74 substring_t args[MAX_OPT_ARGS];
75 int token;
76
77 if (!input)
78 return 0;
79
80 while ((p = strsep(&input, ",")) != NULL) {
81 if (!*p)
82 continue;
83
84 token = match_token(p, tokens, args);
85 switch (token) {
86 case opt_force:
87 *force = 1;
88 break;
89 default:
90 break;
91 }
92 }
93
94 return 1;
95}
96
68/* Parse options from mount. Returns 0 on failure */ 97/* Parse options from mount. Returns 0 on failure */
69/* input is the options passed to mount() as a string */ 98/* input is the options passed to mount() as a string */
70int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) 99int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
@@ -136,7 +165,9 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
136 if (p) 165 if (p)
137 sbi->nls = load_nls(p); 166 sbi->nls = load_nls(p);
138 if (!sbi->nls) { 167 if (!sbi->nls) {
139 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); 168 printk(KERN_ERR "hfs: unable to load "
169 "nls mapping \"%s\"\n",
170 p);
140 kfree(p); 171 kfree(p);
141 return 0; 172 return 0;
142 } 173 }
@@ -148,6 +179,12 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
148 case opt_nodecompose: 179 case opt_nodecompose:
149 set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); 180 set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
150 break; 181 break;
182 case opt_barrier:
183 clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
184 break;
185 case opt_nobarrier:
186 set_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
187 break;
151 case opt_force: 188 case opt_force:
152 set_bit(HFSPLUS_SB_FORCE, &sbi->flags); 189 set_bit(HFSPLUS_SB_FORCE, &sbi->flags);
153 break; 190 break;
@@ -177,7 +214,8 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
177 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); 214 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
178 if (sbi->type != HFSPLUS_DEF_CR_TYPE) 215 if (sbi->type != HFSPLUS_DEF_CR_TYPE)
179 seq_printf(seq, ",type=%.4s", (char *)&sbi->type); 216 seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
180 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid); 217 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
218 sbi->uid, sbi->gid);
181 if (sbi->part >= 0) 219 if (sbi->part >= 0)
182 seq_printf(seq, ",part=%u", sbi->part); 220 seq_printf(seq, ",part=%u", sbi->part);
183 if (sbi->session >= 0) 221 if (sbi->session >= 0)
@@ -186,5 +224,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
186 seq_printf(seq, ",nls=%s", sbi->nls->charset); 224 seq_printf(seq, ",nls=%s", sbi->nls->charset);
187 if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) 225 if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags))
188 seq_printf(seq, ",nodecompose"); 226 seq_printf(seq, ",nodecompose");
227 if (test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
228 seq_printf(seq, ",nobarrier");
189 return 0; 229 return 0;
190} 230}
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 208b16c645c..d66ad113b1c 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -2,7 +2,8 @@
2 * linux/fs/hfsplus/part_tbl.c 2 * linux/fs/hfsplus/part_tbl.c
3 * 3 *
4 * Copyright (C) 1996-1997 Paul H. Hargrove 4 * Copyright (C) 1996-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU General Public License. 5 * This file may be distributed under the terms of
6 * the GNU General Public License.
6 * 7 *
7 * Original code to handle the new style Mac partition table based on 8 * Original code to handle the new style Mac partition table based on
8 * a patch contributed by Holger Schemel (aeglos@valinor.owl.de). 9 * a patch contributed by Holger Schemel (aeglos@valinor.owl.de).
@@ -13,6 +14,7 @@
13 * 14 *
14 */ 15 */
15 16
17#include <linux/slab.h>
16#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
17 19
18/* offsets to various blocks */ 20/* offsets to various blocks */
@@ -58,77 +60,94 @@ struct new_pmap {
58 */ 60 */
59struct old_pmap { 61struct old_pmap {
60 __be16 pdSig; /* Signature bytes */ 62 __be16 pdSig; /* Signature bytes */
61 struct old_pmap_entry { 63 struct old_pmap_entry {
62 __be32 pdStart; 64 __be32 pdStart;
63 __be32 pdSize; 65 __be32 pdSize;
64 __be32 pdFSID; 66 __be32 pdFSID;
65 } pdEntry[42]; 67 } pdEntry[42];
66} __packed; 68} __packed;
67 69
70static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
71 sector_t *part_start, sector_t *part_size)
72{
73 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
74 int i;
75
76 for (i = 0; i < 42; i++) {
77 struct old_pmap_entry *p = &pm->pdEntry[i];
78
79 if (p->pdStart && p->pdSize &&
80 p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ &&
81 (sbi->part < 0 || sbi->part == i)) {
82 *part_start += be32_to_cpu(p->pdStart);
83 *part_size = be32_to_cpu(p->pdSize);
84 return 0;
85 }
86 }
87
88 return -ENOENT;
89}
90
91static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
92 sector_t *part_start, sector_t *part_size)
93{
94 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
95 int size = be32_to_cpu(pm->pmMapBlkCnt);
96 int res;
97 int i = 0;
98
99 do {
100 if (!memcmp(pm->pmPartType, "Apple_HFS", 9) &&
101 (sbi->part < 0 || sbi->part == i)) {
102 *part_start += be32_to_cpu(pm->pmPyPartStart);
103 *part_size = be32_to_cpu(pm->pmPartBlkCnt);
104 return 0;
105 }
106
107 if (++i >= size)
108 return -ENOENT;
109
110 res = hfsplus_submit_bio(sb->s_bdev,
111 *part_start + HFS_PMAP_BLK + i,
112 pm, READ);
113 if (res)
114 return res;
115 } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
116
117 return -ENOENT;
118}
119
68/* 120/*
69 * hfs_part_find() 121 * Parse the partition map looking for the start and length of a
70 * 122 * HFS/HFS+ partition.
71 * Parse the partition map looking for the
72 * start and length of the 'part'th HFS partition.
73 */ 123 */
74int hfs_part_find(struct super_block *sb, 124int hfs_part_find(struct super_block *sb,
75 sector_t *part_start, sector_t *part_size) 125 sector_t *part_start, sector_t *part_size)
76{ 126{
77 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 127 void *data;
78 struct buffer_head *bh; 128 int res;
79 __be16 *data; 129
80 int i, size, res; 130 data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
131 if (!data)
132 return -ENOMEM;
81 133
82 res = -ENOENT; 134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
83 bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK, data); 135 data, READ);
84 if (!bh) 136 if (res)
85 return -EIO; 137 return res;
86 138
87 switch (be16_to_cpu(*data)) { 139 switch (be16_to_cpu(*((__be16 *)data))) {
88 case HFS_OLD_PMAP_MAGIC: 140 case HFS_OLD_PMAP_MAGIC:
89 { 141 res = hfs_parse_old_pmap(sb, data, part_start, part_size);
90 struct old_pmap *pm;
91 struct old_pmap_entry *p;
92
93 pm = (struct old_pmap *)bh->b_data;
94 p = pm->pdEntry;
95 size = 42;
96 for (i = 0; i < size; p++, i++) {
97 if (p->pdStart && p->pdSize &&
98 p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ &&
99 (sbi->part < 0 || sbi->part == i)) {
100 *part_start += be32_to_cpu(p->pdStart);
101 *part_size = be32_to_cpu(p->pdSize);
102 res = 0;
103 }
104 }
105 break; 142 break;
106 }
107 case HFS_NEW_PMAP_MAGIC: 143 case HFS_NEW_PMAP_MAGIC:
108 { 144 res = hfs_parse_new_pmap(sb, data, part_start, part_size);
109 struct new_pmap *pm; 145 break;
110 146 default:
111 pm = (struct new_pmap *)bh->b_data; 147 res = -ENOENT;
112 size = be32_to_cpu(pm->pmMapBlkCnt);
113 for (i = 0; i < size;) {
114 if (!memcmp(pm->pmPartType,"Apple_HFS", 9) &&
115 (sbi->part < 0 || sbi->part == i)) {
116 *part_start += be32_to_cpu(pm->pmPyPartStart);
117 *part_size = be32_to_cpu(pm->pmPartBlkCnt);
118 res = 0;
119 break;
120 }
121 brelse(bh);
122 bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK + ++i, pm);
123 if (!bh)
124 return -EIO;
125 if (pm->pmSig != cpu_to_be16(HFS_NEW_PMAP_MAGIC))
126 break;
127 }
128 break; 148 break;
129 }
130 } 149 }
131 brelse(bh);
132 150
151 kfree(data);
133 return res; 152 return res;
134} 153}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 52cc746d3ba..6ee6ad20acf 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -10,6 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/blkdev.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
15#include <linux/vfs.h> 16#include <linux/vfs.h>
@@ -66,6 +67,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
66 INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); 67 INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list);
67 mutex_init(&HFSPLUS_I(inode)->extents_lock); 68 mutex_init(&HFSPLUS_I(inode)->extents_lock);
68 HFSPLUS_I(inode)->flags = 0; 69 HFSPLUS_I(inode)->flags = 0;
70 HFSPLUS_I(inode)->extent_state = 0;
69 HFSPLUS_I(inode)->rsrc_inode = NULL; 71 HFSPLUS_I(inode)->rsrc_inode = NULL;
70 atomic_set(&HFSPLUS_I(inode)->opencnt, 0); 72 atomic_set(&HFSPLUS_I(inode)->opencnt, 0);
71 73
@@ -157,45 +159,65 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
157{ 159{
158 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 160 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
159 struct hfsplus_vh *vhdr = sbi->s_vhdr; 161 struct hfsplus_vh *vhdr = sbi->s_vhdr;
162 int write_backup = 0;
163 int error, error2;
164
165 if (!wait)
166 return 0;
160 167
161 dprint(DBG_SUPER, "hfsplus_write_super\n"); 168 dprint(DBG_SUPER, "hfsplus_write_super\n");
162 169
163 mutex_lock(&sbi->vh_mutex);
164 mutex_lock(&sbi->alloc_mutex);
165 sb->s_dirt = 0; 170 sb->s_dirt = 0;
166 171
172 /*
173 * Explicitly write out the special metadata inodes.
174 *
175 * While these special inodes are marked as hashed and written
176 * out peridocically by the flusher threads we redirty them
177 * during writeout of normal inodes, and thus the life lock
178 * prevents us from getting the latest state to disk.
179 */
180 error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping);
181 error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
182 if (!error)
183 error = error2;
184 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
185 if (!error)
186 error = error2;
187
188 mutex_lock(&sbi->vh_mutex);
189 mutex_lock(&sbi->alloc_mutex);
167 vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); 190 vhdr->free_blocks = cpu_to_be32(sbi->free_blocks);
168 vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); 191 vhdr->next_cnid = cpu_to_be32(sbi->next_cnid);
169 vhdr->folder_count = cpu_to_be32(sbi->folder_count); 192 vhdr->folder_count = cpu_to_be32(sbi->folder_count);
170 vhdr->file_count = cpu_to_be32(sbi->file_count); 193 vhdr->file_count = cpu_to_be32(sbi->file_count);
171 194
172 mark_buffer_dirty(sbi->s_vhbh);
173 if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { 195 if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) {
174 if (sbi->sect_count) { 196 memcpy(sbi->s_backup_vhdr, sbi->s_vhdr, sizeof(*sbi->s_vhdr));
175 struct buffer_head *bh; 197 write_backup = 1;
176 u32 block, offset;
177
178 block = sbi->blockoffset;
179 block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9);
180 offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1);
181 printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n",
182 sbi->blockoffset, sbi->sect_count,
183 block, offset);
184 bh = sb_bread(sb, block);
185 if (bh) {
186 vhdr = (struct hfsplus_vh *)(bh->b_data + offset);
187 if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) {
188 memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr));
189 mark_buffer_dirty(bh);
190 brelse(bh);
191 } else
192 printk(KERN_WARNING "hfs: backup not found!\n");
193 }
194 }
195 } 198 }
199
200 error2 = hfsplus_submit_bio(sb->s_bdev,
201 sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
202 sbi->s_vhdr, WRITE_SYNC);
203 if (!error)
204 error = error2;
205 if (!write_backup)
206 goto out;
207
208 error2 = hfsplus_submit_bio(sb->s_bdev,
209 sbi->part_start + sbi->sect_count - 2,
210 sbi->s_backup_vhdr, WRITE_SYNC);
211 if (!error)
212 error2 = error;
213out:
196 mutex_unlock(&sbi->alloc_mutex); 214 mutex_unlock(&sbi->alloc_mutex);
197 mutex_unlock(&sbi->vh_mutex); 215 mutex_unlock(&sbi->vh_mutex);
198 return 0; 216
217 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
218 blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
219
220 return error;
199} 221}
200 222
201static void hfsplus_write_super(struct super_block *sb) 223static void hfsplus_write_super(struct super_block *sb)
@@ -215,23 +237,22 @@ static void hfsplus_put_super(struct super_block *sb)
215 if (!sb->s_fs_info) 237 if (!sb->s_fs_info)
216 return; 238 return;
217 239
218 if (sb->s_dirt)
219 hfsplus_write_super(sb);
220 if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { 240 if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
221 struct hfsplus_vh *vhdr = sbi->s_vhdr; 241 struct hfsplus_vh *vhdr = sbi->s_vhdr;
222 242
223 vhdr->modify_date = hfsp_now2mt(); 243 vhdr->modify_date = hfsp_now2mt();
224 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); 244 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
225 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); 245 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
226 mark_buffer_dirty(sbi->s_vhbh); 246
227 sync_dirty_buffer(sbi->s_vhbh); 247 hfsplus_sync_fs(sb, 1);
228 } 248 }
229 249
230 hfs_btree_close(sbi->cat_tree); 250 hfs_btree_close(sbi->cat_tree);
231 hfs_btree_close(sbi->ext_tree); 251 hfs_btree_close(sbi->ext_tree);
232 iput(sbi->alloc_file); 252 iput(sbi->alloc_file);
233 iput(sbi->hidden_dir); 253 iput(sbi->hidden_dir);
234 brelse(sbi->s_vhbh); 254 kfree(sbi->s_vhdr);
255 kfree(sbi->s_backup_vhdr);
235 unload_nls(sbi->nls); 256 unload_nls(sbi->nls);
236 kfree(sb->s_fs_info); 257 kfree(sb->s_fs_info);
237 sb->s_fs_info = NULL; 258 sb->s_fs_info = NULL;
@@ -263,26 +284,31 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
263 return 0; 284 return 0;
264 if (!(*flags & MS_RDONLY)) { 285 if (!(*flags & MS_RDONLY)) {
265 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; 286 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;
266 struct hfsplus_sb_info sbi; 287 int force = 0;
267 288
268 memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); 289 if (!hfsplus_parse_options_remount(data, &force))
269 sbi.nls = HFSPLUS_SB(sb)->nls;
270 if (!hfsplus_parse_options(data, &sbi))
271 return -EINVAL; 290 return -EINVAL;
272 291
273 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { 292 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
274 printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " 293 printk(KERN_WARNING "hfs: filesystem was "
275 "running fsck.hfsplus is recommended. leaving read-only.\n"); 294 "not cleanly unmounted, "
295 "running fsck.hfsplus is recommended. "
296 "leaving read-only.\n");
276 sb->s_flags |= MS_RDONLY; 297 sb->s_flags |= MS_RDONLY;
277 *flags |= MS_RDONLY; 298 *flags |= MS_RDONLY;
278 } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { 299 } else if (force) {
279 /* nothing */ 300 /* nothing */
280 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { 301 } else if (vhdr->attributes &
281 printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); 302 cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
303 printk(KERN_WARNING "hfs: filesystem is marked locked, "
304 "leaving read-only.\n");
282 sb->s_flags |= MS_RDONLY; 305 sb->s_flags |= MS_RDONLY;
283 *flags |= MS_RDONLY; 306 *flags |= MS_RDONLY;
284 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 307 } else if (vhdr->attributes &
285 printk(KERN_WARNING "hfs: filesystem is marked journaled, leaving read-only.\n"); 308 cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
309 printk(KERN_WARNING "hfs: filesystem is "
310 "marked journaled, "
311 "leaving read-only.\n");
286 sb->s_flags |= MS_RDONLY; 312 sb->s_flags |= MS_RDONLY;
287 *flags |= MS_RDONLY; 313 *flags |= MS_RDONLY;
288 } 314 }
@@ -372,17 +398,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
372 sb->s_maxbytes = MAX_LFS_FILESIZE; 398 sb->s_maxbytes = MAX_LFS_FILESIZE;
373 399
374 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { 400 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
375 printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " 401 printk(KERN_WARNING "hfs: Filesystem was "
376 "running fsck.hfsplus is recommended. mounting read-only.\n"); 402 "not cleanly unmounted, "
403 "running fsck.hfsplus is recommended. "
404 "mounting read-only.\n");
377 sb->s_flags |= MS_RDONLY; 405 sb->s_flags |= MS_RDONLY;
378 } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { 406 } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {
379 /* nothing */ 407 /* nothing */
380 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { 408 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
381 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 409 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
382 sb->s_flags |= MS_RDONLY; 410 sb->s_flags |= MS_RDONLY;
383 } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { 411 } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) &&
384 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " 412 !(sb->s_flags & MS_RDONLY)) {
385 "use the force option at your own risk, mounting read-only.\n"); 413 printk(KERN_WARNING "hfs: write access to "
414 "a journaled filesystem is not supported, "
415 "use the force option at your own risk, "
416 "mounting read-only.\n");
386 sb->s_flags |= MS_RDONLY; 417 sb->s_flags |= MS_RDONLY;
387 } 418 }
388 419
@@ -419,7 +450,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
419 err = -ENOMEM; 450 err = -ENOMEM;
420 goto cleanup; 451 goto cleanup;
421 } 452 }
422 sb->s_root->d_op = &hfsplus_dentry_operations; 453 d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
423 454
424 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
425 str.name = HFSP_HIDDENDIR_NAME; 456 str.name = HFSP_HIDDENDIR_NAME;
@@ -449,19 +480,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
449 be32_add_cpu(&vhdr->write_count, 1); 480 be32_add_cpu(&vhdr->write_count, 1);
450 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 481 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
451 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 482 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
452 mark_buffer_dirty(sbi->s_vhbh); 483 hfsplus_sync_fs(sb, 1);
453 sync_dirty_buffer(sbi->s_vhbh);
454 484
455 if (!sbi->hidden_dir) { 485 if (!sbi->hidden_dir) {
456 printk(KERN_DEBUG "hfs: create hidden dir...\n");
457
458 mutex_lock(&sbi->vh_mutex); 486 mutex_lock(&sbi->vh_mutex);
459 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); 487 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
460 hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, 488 hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
461 &str, sbi->hidden_dir); 489 &str, sbi->hidden_dir);
462 mutex_unlock(&sbi->vh_mutex); 490 mutex_unlock(&sbi->vh_mutex);
463 491
464 mark_inode_dirty(sbi->hidden_dir); 492 hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
465 } 493 }
466out: 494out:
467 unload_nls(sbi->nls); 495 unload_nls(sbi->nls);
@@ -488,11 +516,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
488 return i ? &i->vfs_inode : NULL; 516 return i ? &i->vfs_inode : NULL;
489} 517}
490 518
491static void hfsplus_destroy_inode(struct inode *inode) 519static void hfsplus_i_callback(struct rcu_head *head)
492{ 520{
521 struct inode *inode = container_of(head, struct inode, i_rcu);
522
523 INIT_LIST_HEAD(&inode->i_dentry);
493 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); 524 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
494} 525}
495 526
527static void hfsplus_destroy_inode(struct inode *inode)
528{
529 call_rcu(&inode->i_rcu, hfsplus_i_callback);
530}
531
496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 532#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
497 533
498static struct dentry *hfsplus_mount(struct file_system_type *fs_type, 534static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index b66d67de882..a3f0bfcc881 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -17,14 +17,14 @@
17/* Returns folded char, or 0 if ignorable */ 17/* Returns folded char, or 0 if ignorable */
18static inline u16 case_fold(u16 c) 18static inline u16 case_fold(u16 c)
19{ 19{
20 u16 tmp; 20 u16 tmp;
21 21
22 tmp = hfsplus_case_fold_table[c >> 8]; 22 tmp = hfsplus_case_fold_table[c >> 8];
23 if (tmp) 23 if (tmp)
24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 else 25 else
26 tmp = c; 26 tmp = c;
27 return tmp; 27 return tmp;
28} 28}
29 29
30/* Compare unicode strings, return values like normal strcmp */ 30/* Compare unicode strings, return values like normal strcmp */
@@ -118,7 +118,9 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
118 return NULL; 118 return NULL;
119} 119}
120 120
121int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) 121int hfsplus_uni2asc(struct super_block *sb,
122 const struct hfsplus_unistr *ustr,
123 char *astr, int *len_p)
122{ 124{
123 const hfsplus_unichr *ip; 125 const hfsplus_unichr *ip;
124 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 126 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
@@ -171,7 +173,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
171 goto same; 173 goto same;
172 c1 = be16_to_cpu(*ip); 174 c1 = be16_to_cpu(*ip);
173 if (likely(compose)) 175 if (likely(compose))
174 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1); 176 ce1 = hfsplus_compose_lookup(
177 hfsplus_compose_table, c1);
175 if (ce1) 178 if (ce1)
176 break; 179 break;
177 switch (c0) { 180 switch (c0) {
@@ -199,7 +202,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
199 if (ce2) { 202 if (ce2) {
200 i = 1; 203 i = 1;
201 while (i < ustrlen) { 204 while (i < ustrlen) {
202 ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i])); 205 ce1 = hfsplus_compose_lookup(ce2,
206 be16_to_cpu(ip[i]));
203 if (!ce1) 207 if (!ce1)
204 break; 208 break;
205 i++; 209 i++;
@@ -211,7 +215,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
211 goto done; 215 goto done;
212 } 216 }
213 } 217 }
214 same: 218same:
215 switch (c0) { 219 switch (c0) {
216 case 0: 220 case 0:
217 cc = 0x2400; 221 cc = 0x2400;
@@ -222,7 +226,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
222 default: 226 default:
223 cc = c0; 227 cc = c0;
224 } 228 }
225 done: 229done:
226 res = nls->uni2char(cc, op, len); 230 res = nls->uni2char(cc, op, len);
227 if (res < 0) { 231 if (res < 0) {
228 if (res == -ENAMETOOLONG) 232 if (res == -ENAMETOOLONG)
@@ -320,7 +324,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
320 * Composed unicode characters are decomposed and case-folding is performed 324 * Composed unicode characters are decomposed and case-folding is performed
321 * if the appropriate bits are (un)set on the superblock. 325 * if the appropriate bits are (un)set on the superblock.
322 */ 326 */
323int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) 327int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
328 struct qstr *str)
324{ 329{
325 struct super_block *sb = dentry->d_sb; 330 struct super_block *sb = dentry->d_sb;
326 const char *astr; 331 const char *astr;
@@ -363,9 +368,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
363 * Composed unicode characters are decomposed and case-folding is performed 368 * Composed unicode characters are decomposed and case-folding is performed
364 * if the appropriate bits are (un)set on the superblock. 369 * if the appropriate bits are (un)set on the superblock.
365 */ 370 */
366int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 371int hfsplus_compare_dentry(const struct dentry *parent,
372 const struct inode *pinode,
373 const struct dentry *dentry, const struct inode *inode,
374 unsigned int len, const char *str, const struct qstr *name)
367{ 375{
368 struct super_block *sb = dentry->d_sb; 376 struct super_block *sb = parent->d_sb;
369 int casefold, decompose, size; 377 int casefold, decompose, size;
370 int dsize1, dsize2, len1, len2; 378 int dsize1, dsize2, len1, len2;
371 const u16 *dstr1, *dstr2; 379 const u16 *dstr1, *dstr2;
@@ -375,10 +383,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
375 383
376 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 384 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
377 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 385 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
378 astr1 = s1->name; 386 astr1 = str;
379 len1 = s1->len; 387 len1 = len;
380 astr2 = s2->name; 388 astr2 = name->name;
381 len2 = s2->len; 389 len2 = name->len;
382 dsize1 = dsize2 = 0; 390 dsize1 = dsize2 = 0;
383 dstr1 = dstr2 = NULL; 391 dstr1 = dstr2 = NULL;
384 392
@@ -388,7 +396,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
388 astr1 += size; 396 astr1 += size;
389 len1 -= size; 397 len1 -= size;
390 398
391 if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) { 399 if (decompose)
400 dstr1 = decompose_unichar(c, &dsize1);
401 if (!decompose || !dstr1) {
392 c1 = c; 402 c1 = c;
393 dstr1 = &c1; 403 dstr1 = &c1;
394 dsize1 = 1; 404 dsize1 = 1;
@@ -400,7 +410,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
400 astr2 += size; 410 astr2 += size;
401 len2 -= size; 411 len2 -= size;
402 412
403 if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) { 413 if (decompose)
414 dstr2 = decompose_unichar(c, &dsize2);
415 if (!decompose || !dstr2) {
404 c2 = c; 416 c2 = c;
405 dstr2 = &c2; 417 dstr2 = &c2;
406 dsize2 = 1; 418 dsize2 = 1;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 8972c20b321..196231794f6 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -24,6 +24,40 @@ struct hfsplus_wd {
24 u16 embed_count; 24 u16 embed_count;
25}; 25};
26 26
27static void hfsplus_end_io_sync(struct bio *bio, int err)
28{
29 if (err)
30 clear_bit(BIO_UPTODATE, &bio->bi_flags);
31 complete(bio->bi_private);
32}
33
34int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
35 void *data, int rw)
36{
37 DECLARE_COMPLETION_ONSTACK(wait);
38 struct bio *bio;
39
40 bio = bio_alloc(GFP_NOIO, 1);
41 bio->bi_sector = sector;
42 bio->bi_bdev = bdev;
43 bio->bi_end_io = hfsplus_end_io_sync;
44 bio->bi_private = &wait;
45
46 /*
47 * We always submit one sector at a time, so bio_add_page must not fail.
48 */
49 if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
50 offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
51 BUG();
52
53 submit_bio(rw, bio);
54 wait_for_completion(&wait);
55
56 if (!bio_flagged(bio, BIO_UPTODATE))
57 return -EIO;
58 return 0;
59}
60
27static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) 61static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
28{ 62{
29 u32 extent; 63 u32 extent;
@@ -40,12 +74,14 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
40 !(attrib & HFSP_WRAP_ATTRIB_SPARED)) 74 !(attrib & HFSP_WRAP_ATTRIB_SPARED))
41 return 0; 75 return 0;
42 76
43 wd->ablk_size = be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE)); 77 wd->ablk_size =
78 be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE));
44 if (wd->ablk_size < HFSPLUS_SECTOR_SIZE) 79 if (wd->ablk_size < HFSPLUS_SECTOR_SIZE)
45 return 0; 80 return 0;
46 if (wd->ablk_size % HFSPLUS_SECTOR_SIZE) 81 if (wd->ablk_size % HFSPLUS_SECTOR_SIZE)
47 return 0; 82 return 0;
48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); 83 wd->ablk_start =
84 be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
49 85
50 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); 86 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
51 wd->embed_start = (extent >> 16) & 0xFFFF; 87 wd->embed_start = (extent >> 16) & 0xFFFF;
@@ -68,7 +104,8 @@ static int hfsplus_get_last_session(struct super_block *sb,
68 if (HFSPLUS_SB(sb)->session >= 0) { 104 if (HFSPLUS_SB(sb)->session >= 0) {
69 te.cdte_track = HFSPLUS_SB(sb)->session; 105 te.cdte_track = HFSPLUS_SB(sb)->session;
70 te.cdte_format = CDROM_LBA; 106 te.cdte_format = CDROM_LBA;
71 res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); 107 res = ioctl_by_bdev(sb->s_bdev,
108 CDROMREADTOCENTRY, (unsigned long)&te);
72 if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { 109 if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
73 *start = (sector_t)te.cdte_addr.lba << 2; 110 *start = (sector_t)te.cdte_addr.lba << 2;
74 return 0; 111 return 0;
@@ -77,7 +114,8 @@ static int hfsplus_get_last_session(struct super_block *sb,
77 return -EINVAL; 114 return -EINVAL;
78 } 115 }
79 ms_info.addr_format = CDROM_LBA; 116 ms_info.addr_format = CDROM_LBA;
80 res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, (unsigned long)&ms_info); 117 res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION,
118 (unsigned long)&ms_info);
81 if (!res && ms_info.xa_flag) 119 if (!res && ms_info.xa_flag)
82 *start = (sector_t)ms_info.addr.lba << 2; 120 *start = (sector_t)ms_info.addr.lba << 2;
83 return 0; 121 return 0;
@@ -88,100 +126,112 @@ static int hfsplus_get_last_session(struct super_block *sb,
88int hfsplus_read_wrapper(struct super_block *sb) 126int hfsplus_read_wrapper(struct super_block *sb)
89{ 127{
90 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 128 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
91 struct buffer_head *bh;
92 struct hfsplus_vh *vhdr;
93 struct hfsplus_wd wd; 129 struct hfsplus_wd wd;
94 sector_t part_start, part_size; 130 sector_t part_start, part_size;
95 u32 blocksize; 131 u32 blocksize;
132 int error = 0;
96 133
134 error = -EINVAL;
97 blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE); 135 blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE);
98 if (!blocksize) 136 if (!blocksize)
99 return -EINVAL; 137 goto out;
100 138
101 if (hfsplus_get_last_session(sb, &part_start, &part_size)) 139 if (hfsplus_get_last_session(sb, &part_start, &part_size))
102 return -EINVAL; 140 goto out;
103 if ((u64)part_start + part_size > 0x100000000ULL) { 141 if ((u64)part_start + part_size > 0x100000000ULL) {
104 pr_err("hfs: volumes larger than 2TB are not supported yet\n"); 142 pr_err("hfs: volumes larger than 2TB are not supported yet\n");
105 return -EINVAL; 143 goto out;
106 } 144 }
107 while (1) {
108 bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
109 if (!bh)
110 return -EIO;
111
112 if (vhdr->signature == cpu_to_be16(HFSP_WRAP_MAGIC)) {
113 if (!hfsplus_read_mdb(vhdr, &wd))
114 goto error;
115 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
116 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
117 part_size = wd.embed_count * wd.ablk_size;
118 brelse(bh);
119 bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
120 if (!bh)
121 return -EIO;
122 }
123 if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
124 break;
125 if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) {
126 set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
127 break;
128 }
129 brelse(bh);
130 145
131 /* check for a partition block 146 error = -ENOMEM;
147 sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
148 if (!sbi->s_vhdr)
149 goto out;
150 sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
151 if (!sbi->s_backup_vhdr)
152 goto out_free_vhdr;
153
154reread:
155 error = hfsplus_submit_bio(sb->s_bdev,
156 part_start + HFSPLUS_VOLHEAD_SECTOR,
157 sbi->s_vhdr, READ);
158 if (error)
159 goto out_free_backup_vhdr;
160
161 error = -EINVAL;
162 switch (sbi->s_vhdr->signature) {
163 case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX):
164 set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
165 /*FALLTHRU*/
166 case cpu_to_be16(HFSPLUS_VOLHEAD_SIG):
167 break;
168 case cpu_to_be16(HFSP_WRAP_MAGIC):
169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
170 goto out;
171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
173 part_size = wd.embed_count * wd.ablk_size;
174 goto reread;
175 default:
176 /*
177 * Check for a partition block.
178 *
132 * (should do this only for cdrom/loop though) 179 * (should do this only for cdrom/loop though)
133 */ 180 */
134 if (hfs_part_find(sb, &part_start, &part_size)) 181 if (hfs_part_find(sb, &part_start, &part_size))
135 return -EINVAL; 182 goto out;
183 goto reread;
184 }
185
186 error = hfsplus_submit_bio(sb->s_bdev,
187 part_start + part_size - 2,
188 sbi->s_backup_vhdr, READ);
189 if (error)
190 goto out_free_backup_vhdr;
191
192 error = -EINVAL;
193 if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) {
194 printk(KERN_WARNING
195 "hfs: invalid secondary volume header\n");
196 goto out_free_backup_vhdr;
136 } 197 }
137 198
138 blocksize = be32_to_cpu(vhdr->blocksize); 199 blocksize = be32_to_cpu(sbi->s_vhdr->blocksize);
139 brelse(bh);
140 200
141 /* block size must be at least as large as a sector 201 /*
142 * and a multiple of 2 202 * Block size must be at least as large as a sector and a multiple of 2.
143 */ 203 */
144 if (blocksize < HFSPLUS_SECTOR_SIZE || 204 if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize))
145 ((blocksize - 1) & blocksize)) 205 goto out_free_backup_vhdr;
146 return -EINVAL;
147 sbi->alloc_blksz = blocksize; 206 sbi->alloc_blksz = blocksize;
148 sbi->alloc_blksz_shift = 0; 207 sbi->alloc_blksz_shift = 0;
149 while ((blocksize >>= 1) != 0) 208 while ((blocksize >>= 1) != 0)
150 sbi->alloc_blksz_shift++; 209 sbi->alloc_blksz_shift++;
151 blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); 210 blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE);
152 211
153 /* align block size to block offset */ 212 /*
213 * Align block size to block offset.
214 */
154 while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) 215 while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1))
155 blocksize >>= 1; 216 blocksize >>= 1;
156 217
157 if (sb_set_blocksize(sb, blocksize) != blocksize) { 218 if (sb_set_blocksize(sb, blocksize) != blocksize) {
158 printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", blocksize); 219 printk(KERN_ERR "hfs: unable to set blocksize to %u!\n",
159 return -EINVAL; 220 blocksize);
221 goto out_free_backup_vhdr;
160 } 222 }
161 223
162 sbi->blockoffset = 224 sbi->blockoffset =
163 part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); 225 part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT);
226 sbi->part_start = part_start;
164 sbi->sect_count = part_size; 227 sbi->sect_count = part_size;
165 sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; 228 sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits;
166
167 bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
168 if (!bh)
169 return -EIO;
170
171 /* should still be the same... */
172 if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) {
173 if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
174 goto error;
175 } else {
176 if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
177 goto error;
178 }
179
180 sbi->s_vhbh = bh;
181 sbi->s_vhdr = vhdr;
182
183 return 0; 229 return 0;
184 error: 230
185 brelse(bh); 231out_free_backup_vhdr:
186 return -EINVAL; 232 kfree(sbi->s_backup_vhdr);
233out_free_vhdr:
234 kfree(sbi->s_vhdr);
235out:
236 return error;
187} 237}
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e..d3244d949a4 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
32 32
33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) 33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
34 34
35static int hostfs_d_delete(struct dentry *dentry) 35static int hostfs_d_delete(const struct dentry *dentry)
36{ 36{
37 return 1; 37 return 1;
38} 38}
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
92 92
93static char *__dentry_name(struct dentry *dentry, char *name) 93static char *__dentry_name(struct dentry *dentry, char *name)
94{ 94{
95 char *p = __dentry_path(dentry, name, PATH_MAX); 95 char *p = dentry_path_raw(dentry, name, PATH_MAX);
96 char *root; 96 char *root;
97 size_t len; 97 size_t len;
98 98
99 spin_unlock(&dcache_lock);
100
101 root = dentry->d_sb->s_fs_info; 99 root = dentry->d_sb->s_fs_info;
102 len = strlen(root); 100 len = strlen(root);
103 if (IS_ERR(p)) { 101 if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
123 if (!name) 121 if (!name)
124 return NULL; 122 return NULL;
125 123
126 spin_lock(&dcache_lock);
127 return __dentry_name(dentry, name); /* will unlock */ 124 return __dentry_name(dentry, name); /* will unlock */
128} 125}
129 126
130static char *inode_name(struct inode *ino) 127static char *inode_name(struct inode *ino)
131{ 128{
132 struct dentry *dentry; 129 struct dentry *dentry;
133 char *name = __getname(); 130 char *name;
134 if (!name)
135 return NULL;
136 131
137 spin_lock(&dcache_lock); 132 dentry = d_find_alias(ino);
138 if (list_empty(&ino->i_dentry)) { 133 if (!dentry)
139 spin_unlock(&dcache_lock);
140 __putname(name);
141 return NULL; 134 return NULL;
142 } 135
143 dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); 136 name = dentry_name(dentry);
144 return __dentry_name(dentry, name); /* will unlock */ 137
138 dput(dentry);
139
140 return name;
145} 141}
146 142
147static char *follow_link(char *link) 143static char *follow_link(char *link)
@@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
251 } 247 }
252} 248}
253 249
254static void hostfs_destroy_inode(struct inode *inode) 250static void hostfs_i_callback(struct rcu_head *head)
255{ 251{
252 struct inode *inode = container_of(head, struct inode, i_rcu);
253 INIT_LIST_HEAD(&inode->i_dentry);
256 kfree(HOSTFS_I(inode)); 254 kfree(HOSTFS_I(inode));
257} 255}
258 256
257static void hostfs_destroy_inode(struct inode *inode)
258{
259 call_rcu(&inode->i_rcu, hostfs_i_callback);
260}
261
259static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 262static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
260{ 263{
261 const char *root_path = vfs->mnt_sb->s_fs_info; 264 const char *root_path = vfs->mnt_sb->s_fs_info;
@@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
609 goto out_put; 612 goto out_put;
610 613
611 d_add(dentry, inode); 614 d_add(dentry, inode);
612 dentry->d_op = &hostfs_dentry_ops; 615 d_set_d_op(dentry, &hostfs_dentry_ops);
613 return NULL; 616 return NULL;
614 617
615 out_put: 618 out_put:
@@ -746,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
746 return err; 749 return err;
747} 750}
748 751
749int hostfs_permission(struct inode *ino, int desired) 752int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
750{ 753{
751 char *name; 754 char *name;
752 int r = 0, w = 0, x = 0, err; 755 int r = 0, w = 0, x = 0, err;
753 756
757 if (flags & IPERM_FLAG_RCU)
758 return -ECHILD;
759
754 if (desired & MAY_READ) r = 1; 760 if (desired & MAY_READ) r = 1;
755 if (desired & MAY_WRITE) w = 1; 761 if (desired & MAY_WRITE) w = 1;
756 if (desired & MAY_EXEC) x = 1; 762 if (desired & MAY_EXEC) x = 1;
@@ -765,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired)
765 err = access_file(name, r, w, x); 771 err = access_file(name, r, w, x);
766 __putname(name); 772 __putname(name);
767 if (!err) 773 if (!err)
768 err = generic_permission(ino, desired, NULL); 774 err = generic_permission(ino, desired, flags, NULL);
769 return err; 775 return err;
770} 776}
771 777
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36b3d5..32c13a94e1e 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
12 * Note: the dentry argument is the parent dentry. 12 * Note: the dentry argument is the parent dentry.
13 */ 13 */
14 14
15static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 15static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
16 struct qstr *qstr)
16{ 17{
17 unsigned long hash; 18 unsigned long hash;
18 int i; 19 int i;
@@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
34 return 0; 35 return 0;
35} 36}
36 37
37static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 38static int hpfs_compare_dentry(const struct dentry *parent,
39 const struct inode *pinode,
40 const struct dentry *dentry, const struct inode *inode,
41 unsigned int len, const char *str, const struct qstr *name)
38{ 42{
39 unsigned al=a->len; 43 unsigned al = len;
40 unsigned bl=b->len; 44 unsigned bl = name->len;
41 hpfs_adjust_length(a->name, &al); 45
46 hpfs_adjust_length(str, &al);
42 /*hpfs_adjust_length(b->name, &bl);*/ 47 /*hpfs_adjust_length(b->name, &bl);*/
43 /* 'a' is the qstr of an already existing dentry, so the name 48
44 * must be valid. 'b' must be validated first. 49 /*
50 * 'str' is the nane of an already existing dentry, so the name
51 * must be valid. 'name' must be validated first.
45 */ 52 */
46 53
47 if (hpfs_chk_name(b->name, &bl)) 54 if (hpfs_chk_name(name->name, &bl))
48 return 1; 55 return 1;
49 if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) 56 if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
50 return 1; 57 return 1;
51 return 0; 58 return 0;
52} 59}
@@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
58 65
59void hpfs_set_dentry_operations(struct dentry *dentry) 66void hpfs_set_dentry_operations(struct dentry *dentry)
60{ 67{
61 dentry->d_op = &hpfs_dentry_operations; 68 d_set_d_op(dentry, &hpfs_dentry_operations);
62} 69}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 11c2b4080f6..f4ad9e31ddc 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -419,7 +419,7 @@ again:
419 unlock_kernel(); 419 unlock_kernel();
420 return -ENOSPC; 420 return -ENOSPC;
421 } 421 }
422 if (generic_permission(inode, MAY_WRITE, NULL) || 422 if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
423 !S_ISREG(inode->i_mode) || 423 !S_ISREG(inode->i_mode) ||
424 get_write_access(inode)) { 424 get_write_access(inode)) {
425 d_rehash(dentry); 425 d_rehash(dentry);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3..49935ba78db 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
177 return &ei->vfs_inode; 177 return &ei->vfs_inode;
178} 178}
179 179
180static void hpfs_destroy_inode(struct inode *inode) 180static void hpfs_i_callback(struct rcu_head *head)
181{ 181{
182 struct inode *inode = container_of(head, struct inode, i_rcu);
183 INIT_LIST_HEAD(&inode->i_dentry);
182 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 184 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
183} 185}
184 186
187static void hpfs_destroy_inode(struct inode *inode)
188{
189 call_rcu(&inode->i_rcu, hpfs_i_callback);
190}
191
185static void init_once(void *foo) 192static void init_once(void *foo)
186{ 193{
187 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 194 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713f..87ed48e0343 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
632 mntput(ino->i_sb->s_fs_info); 632 mntput(ino->i_sb->s_fs_info);
633} 633}
634 634
635static void hppfs_destroy_inode(struct inode *inode) 635static void hppfs_i_callback(struct rcu_head *head)
636{ 636{
637 struct inode *inode = container_of(head, struct inode, i_rcu);
638 INIT_LIST_HEAD(&inode->i_dentry);
637 kfree(HPPFS_I(inode)); 639 kfree(HPPFS_I(inode));
638} 640}
639 641
642static void hppfs_destroy_inode(struct inode *inode)
643{
644 call_rcu(&inode->i_rcu, hppfs_i_callback);
645}
646
640static const struct super_operations hppfs_sbops = { 647static const struct super_operations hppfs_sbops = {
641 .alloc_inode = hppfs_alloc_inode, 648 .alloc_inode = hppfs_alloc_inode,
642 .destroy_inode = hppfs_destroy_inode, 649 .destroy_inode = hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a5fe68189ee..9885082b470 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
663 return &p->vfs_inode; 663 return &p->vfs_inode;
664} 664}
665 665
666static void hugetlbfs_i_callback(struct rcu_head *head)
667{
668 struct inode *inode = container_of(head, struct inode, i_rcu);
669 INIT_LIST_HEAD(&inode->i_dentry);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
671}
672
666static void hugetlbfs_destroy_inode(struct inode *inode) 673static void hugetlbfs_destroy_inode(struct inode *inode)
667{ 674{
668 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 675 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
669 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 676 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 677 call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
671} 678}
672 679
673static const struct address_space_operations hugetlbfs_aops = { 680static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index ae2727ab0c3..da85e56378f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem);
102 */ 102 */
103struct inodes_stat_t inodes_stat; 103struct inodes_stat_t inodes_stat;
104 104
105static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; 105static DEFINE_PER_CPU(unsigned int, nr_inodes);
106static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
107 106
108static struct kmem_cache *inode_cachep __read_mostly; 107static struct kmem_cache *inode_cachep __read_mostly;
109 108
110static inline int get_nr_inodes(void) 109static int get_nr_inodes(void)
111{ 110{
112 return percpu_counter_sum_positive(&nr_inodes); 111 int i;
112 int sum = 0;
113 for_each_possible_cpu(i)
114 sum += per_cpu(nr_inodes, i);
115 return sum < 0 ? 0 : sum;
113} 116}
114 117
115static inline int get_nr_inodes_unused(void) 118static inline int get_nr_inodes_unused(void)
116{ 119{
117 return percpu_counter_sum_positive(&nr_inodes_unused); 120 return inodes_stat.nr_unused;
118} 121}
119 122
120int get_nr_dirty_inodes(void) 123int get_nr_dirty_inodes(void)
121{ 124{
125 /* not actually dirty inodes, but a wild approximation */
122 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 126 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
123 return nr_dirty > 0 ? nr_dirty : 0; 127 return nr_dirty > 0 ? nr_dirty : 0;
124
125} 128}
126 129
127/* 130/*
@@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write,
132 void __user *buffer, size_t *lenp, loff_t *ppos) 135 void __user *buffer, size_t *lenp, loff_t *ppos)
133{ 136{
134 inodes_stat.nr_inodes = get_nr_inodes(); 137 inodes_stat.nr_inodes = get_nr_inodes();
135 inodes_stat.nr_unused = get_nr_inodes_unused();
136 return proc_dointvec(table, write, buffer, lenp, ppos); 138 return proc_dointvec(table, write, buffer, lenp, ppos);
137} 139}
138#endif 140#endif
@@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
224 inode->i_fsnotify_mask = 0; 226 inode->i_fsnotify_mask = 0;
225#endif 227#endif
226 228
227 percpu_counter_inc(&nr_inodes); 229 this_cpu_inc(nr_inodes);
228 230
229 return 0; 231 return 0;
230out: 232out:
@@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
255 return inode; 257 return inode;
256} 258}
257 259
260void free_inode_nonrcu(struct inode *inode)
261{
262 kmem_cache_free(inode_cachep, inode);
263}
264EXPORT_SYMBOL(free_inode_nonrcu);
265
258void __destroy_inode(struct inode *inode) 266void __destroy_inode(struct inode *inode)
259{ 267{
260 BUG_ON(inode_has_buffers(inode)); 268 BUG_ON(inode_has_buffers(inode));
@@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode)
266 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 274 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
267 posix_acl_release(inode->i_default_acl); 275 posix_acl_release(inode->i_default_acl);
268#endif 276#endif
269 percpu_counter_dec(&nr_inodes); 277 this_cpu_dec(nr_inodes);
270} 278}
271EXPORT_SYMBOL(__destroy_inode); 279EXPORT_SYMBOL(__destroy_inode);
272 280
281static void i_callback(struct rcu_head *head)
282{
283 struct inode *inode = container_of(head, struct inode, i_rcu);
284 INIT_LIST_HEAD(&inode->i_dentry);
285 kmem_cache_free(inode_cachep, inode);
286}
287
273static void destroy_inode(struct inode *inode) 288static void destroy_inode(struct inode *inode)
274{ 289{
275 BUG_ON(!list_empty(&inode->i_lru)); 290 BUG_ON(!list_empty(&inode->i_lru));
@@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode)
277 if (inode->i_sb->s_op->destroy_inode) 292 if (inode->i_sb->s_op->destroy_inode)
278 inode->i_sb->s_op->destroy_inode(inode); 293 inode->i_sb->s_op->destroy_inode(inode);
279 else 294 else
280 kmem_cache_free(inode_cachep, (inode)); 295 call_rcu(&inode->i_rcu, i_callback);
281} 296}
282 297
283/* 298/*
@@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode)
335{ 350{
336 if (list_empty(&inode->i_lru)) { 351 if (list_empty(&inode->i_lru)) {
337 list_add(&inode->i_lru, &inode_lru); 352 list_add(&inode->i_lru, &inode_lru);
338 percpu_counter_inc(&nr_inodes_unused); 353 inodes_stat.nr_unused++;
339 } 354 }
340} 355}
341 356
@@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode)
343{ 358{
344 if (!list_empty(&inode->i_lru)) { 359 if (!list_empty(&inode->i_lru)) {
345 list_del_init(&inode->i_lru); 360 list_del_init(&inode->i_lru);
346 percpu_counter_dec(&nr_inodes_unused); 361 inodes_stat.nr_unused--;
347 } 362 }
348} 363}
349 364
@@ -430,6 +445,7 @@ void end_writeback(struct inode *inode)
430 BUG_ON(!(inode->i_state & I_FREEING)); 445 BUG_ON(!(inode->i_state & I_FREEING));
431 BUG_ON(inode->i_state & I_CLEAR); 446 BUG_ON(inode->i_state & I_CLEAR);
432 inode_sync_wait(inode); 447 inode_sync_wait(inode);
448 /* don't need i_lock here, no concurrent mods to i_state */
433 inode->i_state = I_FREEING | I_CLEAR; 449 inode->i_state = I_FREEING | I_CLEAR;
434} 450}
435EXPORT_SYMBOL(end_writeback); 451EXPORT_SYMBOL(end_writeback);
@@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb)
513 list_move(&inode->i_lru, &dispose); 529 list_move(&inode->i_lru, &dispose);
514 list_del_init(&inode->i_wb_list); 530 list_del_init(&inode->i_wb_list);
515 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 531 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
516 percpu_counter_dec(&nr_inodes_unused); 532 inodes_stat.nr_unused--;
517 } 533 }
518 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
519 535
@@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb)
554 list_move(&inode->i_lru, &dispose); 570 list_move(&inode->i_lru, &dispose);
555 list_del_init(&inode->i_wb_list); 571 list_del_init(&inode->i_wb_list);
556 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 572 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
557 percpu_counter_dec(&nr_inodes_unused); 573 inodes_stat.nr_unused--;
558 } 574 }
559 spin_unlock(&inode_lock); 575 spin_unlock(&inode_lock);
560 576
@@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan)
616 if (atomic_read(&inode->i_count) || 632 if (atomic_read(&inode->i_count) ||
617 (inode->i_state & ~I_REFERENCED)) { 633 (inode->i_state & ~I_REFERENCED)) {
618 list_del_init(&inode->i_lru); 634 list_del_init(&inode->i_lru);
619 percpu_counter_dec(&nr_inodes_unused); 635 inodes_stat.nr_unused--;
620 continue; 636 continue;
621 } 637 }
622 638
@@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan)
650 */ 666 */
651 list_move(&inode->i_lru, &freeable); 667 list_move(&inode->i_lru, &freeable);
652 list_del_init(&inode->i_wb_list); 668 list_del_init(&inode->i_wb_list);
653 percpu_counter_dec(&nr_inodes_unused); 669 inodes_stat.nr_unused--;
654 } 670 }
655 if (current_is_kswapd()) 671 if (current_is_kswapd())
656 __count_vm_events(KSWAPD_INODESTEAL, reap); 672 __count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -1648,8 +1664,6 @@ void __init inode_init(void)
1648 SLAB_MEM_SPREAD), 1664 SLAB_MEM_SPREAD),
1649 init_once); 1665 init_once);
1650 register_shrinker(&icache_shrinker); 1666 register_shrinker(&icache_shrinker);
1651 percpu_counter_init(&nr_inodes, 0);
1652 percpu_counter_init(&nr_inodes_unused, 0);
1653 1667
1654 /* Hash may have been set up in inode_init_early */ 1668 /* Hash may have been set up in inode_init_early */
1655 if (!hashdist) 1669 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4..9687c2ee273 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
63 63
64extern void free_vfsmnt(struct vfsmount *); 64extern void free_vfsmnt(struct vfsmount *);
65extern struct vfsmount *alloc_vfsmnt(const char *); 65extern struct vfsmount *alloc_vfsmnt(const char *);
66extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 67extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bfdeb82a53b..844a7903c72 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,16 +26,32 @@
26 26
27#define BEQUIET 27#define BEQUIET
28 28
29static int isofs_hashi(struct dentry *parent, struct qstr *qstr); 29static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
30static int isofs_hash(struct dentry *parent, struct qstr *qstr); 30 struct qstr *qstr);
31static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); 31static int isofs_hash(const struct dentry *parent, const struct inode *inode,
32static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); 32 struct qstr *qstr);
33static int isofs_dentry_cmpi(const struct dentry *parent,
34 const struct inode *pinode,
35 const struct dentry *dentry, const struct inode *inode,
36 unsigned int len, const char *str, const struct qstr *name);
37static int isofs_dentry_cmp(const struct dentry *parent,
38 const struct inode *pinode,
39 const struct dentry *dentry, const struct inode *inode,
40 unsigned int len, const char *str, const struct qstr *name);
33 41
34#ifdef CONFIG_JOLIET 42#ifdef CONFIG_JOLIET
35static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); 43static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
36static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); 44 struct qstr *qstr);
37static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 45static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
38static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 46 struct qstr *qstr);
47static int isofs_dentry_cmpi_ms(const struct dentry *parent,
48 const struct inode *pinode,
49 const struct dentry *dentry, const struct inode *inode,
50 unsigned int len, const char *str, const struct qstr *name);
51static int isofs_dentry_cmp_ms(const struct dentry *parent,
52 const struct inode *pinode,
53 const struct dentry *dentry, const struct inode *inode,
54 unsigned int len, const char *str, const struct qstr *name);
39#endif 55#endif
40 56
41static void isofs_put_super(struct super_block *sb) 57static void isofs_put_super(struct super_block *sb)
@@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 81 return &ei->vfs_inode;
66} 82}
67 83
68static void isofs_destroy_inode(struct inode *inode) 84static void isofs_i_callback(struct rcu_head *head)
69{ 85{
86 struct inode *inode = container_of(head, struct inode, i_rcu);
87 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 88 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
71} 89}
72 90
91static void isofs_destroy_inode(struct inode *inode)
92{
93 call_rcu(&inode->i_rcu, isofs_i_callback);
94}
95
73static void init_once(void *foo) 96static void init_once(void *foo)
74{ 97{
75 struct iso_inode_info *ei = foo; 98 struct iso_inode_info *ei = foo;
@@ -160,7 +183,7 @@ struct iso9660_options{
160 * Compute the hash for the isofs name corresponding to the dentry. 183 * Compute the hash for the isofs name corresponding to the dentry.
161 */ 184 */
162static int 185static int
163isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) 186isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
164{ 187{
165 const char *name; 188 const char *name;
166 int len; 189 int len;
@@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
181 * Compute the hash for the isofs name corresponding to the dentry. 204 * Compute the hash for the isofs name corresponding to the dentry.
182 */ 205 */
183static int 206static int
184isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) 207isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
185{ 208{
186 const char *name; 209 const char *name;
187 int len; 210 int len;
@@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
206} 229}
207 230
208/* 231/*
209 * Case insensitive compare of two isofs names. 232 * Compare of two isofs names.
210 */
211static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
212 struct qstr *b, int ms)
213{
214 int alen, blen;
215
216 /* A filename cannot end in '.' or we treat it like it has none */
217 alen = a->len;
218 blen = b->len;
219 if (ms) {
220 while (alen && a->name[alen-1] == '.')
221 alen--;
222 while (blen && b->name[blen-1] == '.')
223 blen--;
224 }
225 if (alen == blen) {
226 if (strnicmp(a->name, b->name, alen) == 0)
227 return 0;
228 }
229 return 1;
230}
231
232/*
233 * Case sensitive compare of two isofs names.
234 */ 233 */
235static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, 234static int isofs_dentry_cmp_common(
236 struct qstr *b, int ms) 235 unsigned int len, const char *str,
236 const struct qstr *name, int ms, int ci)
237{ 237{
238 int alen, blen; 238 int alen, blen;
239 239
240 /* A filename cannot end in '.' or we treat it like it has none */ 240 /* A filename cannot end in '.' or we treat it like it has none */
241 alen = a->len; 241 alen = name->len;
242 blen = b->len; 242 blen = len;
243 if (ms) { 243 if (ms) {
244 while (alen && a->name[alen-1] == '.') 244 while (alen && name->name[alen-1] == '.')
245 alen--; 245 alen--;
246 while (blen && b->name[blen-1] == '.') 246 while (blen && str[blen-1] == '.')
247 blen--; 247 blen--;
248 } 248 }
249 if (alen == blen) { 249 if (alen == blen) {
250 if (strncmp(a->name, b->name, alen) == 0) 250 if (ci) {
251 return 0; 251 if (strnicmp(name->name, str, alen) == 0)
252 return 0;
253 } else {
254 if (strncmp(name->name, str, alen) == 0)
255 return 0;
256 }
252 } 257 }
253 return 1; 258 return 1;
254} 259}
255 260
256static int 261static int
257isofs_hash(struct dentry *dentry, struct qstr *qstr) 262isofs_hash(const struct dentry *dentry, const struct inode *inode,
263 struct qstr *qstr)
258{ 264{
259 return isofs_hash_common(dentry, qstr, 0); 265 return isofs_hash_common(dentry, qstr, 0);
260} 266}
261 267
262static int 268static int
263isofs_hashi(struct dentry *dentry, struct qstr *qstr) 269isofs_hashi(const struct dentry *dentry, const struct inode *inode,
270 struct qstr *qstr)
264{ 271{
265 return isofs_hashi_common(dentry, qstr, 0); 272 return isofs_hashi_common(dentry, qstr, 0);
266} 273}
267 274
268static int 275static int
269isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) 276isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
277 const struct dentry *dentry, const struct inode *inode,
278 unsigned int len, const char *str, const struct qstr *name)
270{ 279{
271 return isofs_dentry_cmp_common(dentry, a, b, 0); 280 return isofs_dentry_cmp_common(len, str, name, 0, 0);
272} 281}
273 282
274static int 283static int
275isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) 284isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
285 const struct dentry *dentry, const struct inode *inode,
286 unsigned int len, const char *str, const struct qstr *name)
276{ 287{
277 return isofs_dentry_cmpi_common(dentry, a, b, 0); 288 return isofs_dentry_cmp_common(len, str, name, 0, 1);
278} 289}
279 290
280#ifdef CONFIG_JOLIET 291#ifdef CONFIG_JOLIET
281static int 292static int
282isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) 293isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
294 struct qstr *qstr)
283{ 295{
284 return isofs_hash_common(dentry, qstr, 1); 296 return isofs_hash_common(dentry, qstr, 1);
285} 297}
286 298
287static int 299static int
288isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) 300isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
301 struct qstr *qstr)
289{ 302{
290 return isofs_hashi_common(dentry, qstr, 1); 303 return isofs_hashi_common(dentry, qstr, 1);
291} 304}
292 305
293static int 306static int
294isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 307isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
308 const struct dentry *dentry, const struct inode *inode,
309 unsigned int len, const char *str, const struct qstr *name)
295{ 310{
296 return isofs_dentry_cmp_common(dentry, a, b, 1); 311 return isofs_dentry_cmp_common(len, str, name, 1, 0);
297} 312}
298 313
299static int 314static int
300isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 315isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
316 const struct dentry *dentry, const struct inode *inode,
317 unsigned int len, const char *str, const struct qstr *name)
301{ 318{
302 return isofs_dentry_cmpi_common(dentry, a, b, 1); 319 return isofs_dentry_cmp_common(len, str, name, 1, 1);
303} 320}
304#endif 321#endif
305 322
@@ -932,7 +949,7 @@ root_found:
932 table += 2; 949 table += 2;
933 if (opt.check == 'r') 950 if (opt.check == 'r')
934 table++; 951 table++;
935 s->s_root->d_op = &isofs_dentry_ops[table]; 952 d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
936 953
937 kfree(opt.iocharset); 954 kfree(opt.iocharset);
938 955
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abfd428..679a849c3b2 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
37 37
38 qstr.name = compare; 38 qstr.name = compare;
39 qstr.len = dlen; 39 qstr.len = dlen;
40 return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); 40 return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
41 dentry->d_name.len, dentry->d_name.name, &qstr);
41} 42}
42 43
43/* 44/*
@@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
171 struct inode *inode; 172 struct inode *inode;
172 struct page *page; 173 struct page *page;
173 174
174 dentry->d_op = dir->i_sb->s_root->d_op; 175 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
175 176
176 page = alloc_page(GFP_USER); 177 page = alloc_page(GFP_USER);
177 if (!page) 178 if (!page)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f837ba95352..9e4686900f1 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -43,6 +43,7 @@
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h> 45#include <linux/bitops.h>
46#include <linux/ratelimit.h>
46 47
47#define CREATE_TRACE_POINTS 48#define CREATE_TRACE_POINTS
48#include <trace/events/jbd2.h> 49#include <trace/events/jbd2.h>
@@ -93,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
93EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 94EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
94EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 95EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
95EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 96EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
97EXPORT_SYMBOL(jbd2_inode_cache);
96 98
97static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 99static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
98static void __journal_abort_soft (journal_t *journal, int errno); 100static void __journal_abort_soft (journal_t *journal, int errno);
@@ -827,7 +829,7 @@ static journal_t * journal_init_common (void)
827 829
828 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 830 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
829 if (!journal) 831 if (!journal)
830 goto fail; 832 return NULL;
831 833
832 init_waitqueue_head(&journal->j_wait_transaction_locked); 834 init_waitqueue_head(&journal->j_wait_transaction_locked);
833 init_waitqueue_head(&journal->j_wait_logspace); 835 init_waitqueue_head(&journal->j_wait_logspace);
@@ -852,14 +854,12 @@ static journal_t * journal_init_common (void)
852 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 854 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
853 if (err) { 855 if (err) {
854 kfree(journal); 856 kfree(journal);
855 goto fail; 857 return NULL;
856 } 858 }
857 859
858 spin_lock_init(&journal->j_history_lock); 860 spin_lock_init(&journal->j_history_lock);
859 861
860 return journal; 862 return journal;
861fail:
862 return NULL;
863} 863}
864 864
865/* jbd2_journal_init_dev and jbd2_journal_init_inode: 865/* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@ -1982,7 +1982,6 @@ static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1982static struct journal_head *journal_alloc_journal_head(void) 1982static struct journal_head *journal_alloc_journal_head(void)
1983{ 1983{
1984 struct journal_head *ret; 1984 struct journal_head *ret;
1985 static unsigned long last_warning;
1986 1985
1987#ifdef CONFIG_JBD2_DEBUG 1986#ifdef CONFIG_JBD2_DEBUG
1988 atomic_inc(&nr_journal_heads); 1987 atomic_inc(&nr_journal_heads);
@@ -1990,11 +1989,7 @@ static struct journal_head *journal_alloc_journal_head(void)
1990 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1989 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
1991 if (!ret) { 1990 if (!ret) {
1992 jbd_debug(1, "out of memory for journal_head\n"); 1991 jbd_debug(1, "out of memory for journal_head\n");
1993 if (time_after(jiffies, last_warning + 5*HZ)) { 1992 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
1994 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1995 __func__);
1996 last_warning = jiffies;
1997 }
1998 while (!ret) { 1993 while (!ret) {
1999 yield(); 1994 yield();
2000 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1995 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
@@ -2292,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2292 2287
2293#endif 2288#endif
2294 2289
2295struct kmem_cache *jbd2_handle_cache; 2290struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2296 2291
2297static int __init journal_init_handle_cache(void) 2292static int __init journal_init_handle_cache(void)
2298{ 2293{
2299 jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", 2294 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2300 sizeof(handle_t),
2301 0, /* offset */
2302 SLAB_TEMPORARY, /* flags */
2303 NULL); /* ctor */
2304 if (jbd2_handle_cache == NULL) { 2295 if (jbd2_handle_cache == NULL) {
2305 printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2296 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
2297 return -ENOMEM;
2298 }
2299 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
2300 if (jbd2_inode_cache == NULL) {
2301 printk(KERN_EMERG "JBD2: failed to create inode cache\n");
2302 kmem_cache_destroy(jbd2_handle_cache);
2306 return -ENOMEM; 2303 return -ENOMEM;
2307 } 2304 }
2308 return 0; 2305 return 0;
@@ -2312,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void)
2312{ 2309{
2313 if (jbd2_handle_cache) 2310 if (jbd2_handle_cache)
2314 kmem_cache_destroy(jbd2_handle_cache); 2311 kmem_cache_destroy(jbd2_handle_cache);
2312 if (jbd2_inode_cache)
2313 kmem_cache_destroy(jbd2_inode_cache);
2314
2315} 2315}
2316 2316
2317/* 2317/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2bc4d5f116f..1cad869494f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -299,10 +299,10 @@ int jbd2_journal_skip_recovery(journal_t *journal)
299#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence); 301 be32_to_cpu(journal->j_superblock->s_sequence);
302#endif
303 jbd_debug(1, 302 jbd_debug(1,
304 "JBD: ignoring %d transaction%s from the journal.\n", 303 "JBD: ignoring %d transaction%s from the journal.\n",
305 dropped, (dropped == 1) ? "" : "s"); 304 dropped, (dropped == 1) ? "" : "s");
305#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 306 journal->j_transaction_sequence = ++info.end_transaction;
307 } 307 }
308 308
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c7934900dcd..faad2bd787c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -340,9 +340,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
340 jbd2_free_handle(handle); 340 jbd2_free_handle(handle);
341 current->journal_info = NULL; 341 current->journal_info = NULL;
342 handle = ERR_PTR(err); 342 handle = ERR_PTR(err);
343 goto out;
344 } 343 }
345out:
346 return handle; 344 return handle;
347} 345}
348EXPORT_SYMBOL(jbd2__journal_start); 346EXPORT_SYMBOL(jbd2__journal_start);
@@ -589,7 +587,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
589 transaction = handle->h_transaction; 587 transaction = handle->h_transaction;
590 journal = transaction->t_journal; 588 journal = transaction->t_journal;
591 589
592 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 590 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
593 591
594 JBUFFER_TRACE(jh, "entry"); 592 JBUFFER_TRACE(jh, "entry");
595repeat: 593repeat:
@@ -774,7 +772,7 @@ done:
774 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 772 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
775 "Possible IO failure.\n"); 773 "Possible IO failure.\n");
776 page = jh2bh(jh)->b_page; 774 page = jh2bh(jh)->b_page;
777 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 775 offset = offset_in_page(jh2bh(jh)->b_data);
778 source = kmap_atomic(page, KM_USER0); 776 source = kmap_atomic(page, KM_USER0);
779 /* Fire data frozen trigger just before we copy the data */ 777 /* Fire data frozen trigger just before we copy the data */
780 jbd2_buffer_frozen_trigger(jh, source + offset, 778 jbd2_buffer_frozen_trigger(jh, source + offset,
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 54a92fd02bb..95b79672150 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_check_acl(struct inode *inode, int mask) 262int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
266 266
267 if (flags & IPERM_FLAG_RCU)
268 return -ECHILD;
269
267 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); 270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
268 if (IS_ERR(acl)) 271 if (IS_ERR(acl))
269 return PTR_ERR(acl); 272 return PTR_ERR(acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 5e42de8d954..3119f59253d 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_check_acl(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int, unsigned int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a..853b8e30008 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
40 return &f->vfs_inode; 40 return &f->vfs_inode;
41} 41}
42 42
43static void jffs2_destroy_inode(struct inode *inode) 43static void jffs2_i_callback(struct rcu_head *head)
44{ 44{
45 struct inode *inode = container_of(head, struct inode, i_rcu);
46 INIT_LIST_HEAD(&inode->i_dentry);
45 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 47 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
46} 48}
47 49
50static void jffs2_destroy_inode(struct inode *inode)
51{
52 call_rcu(&inode->i_rcu, jffs2_i_callback);
53}
54
48static void jffs2_i_init_once(void *foo) 55static void jffs2_i_init_once(void *foo)
49{ 56{
50 struct jffs2_inode_info *f = foo; 57 struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 1057a4998e4..e5de9422fa3 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,10 +114,14 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl;
120
121 if (flags & IPERM_FLAG_RCU)
122 return -ECHILD;
120 123
124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
121 if (IS_ERR(acl)) 125 if (IS_ERR(acl))
122 return PTR_ERR(acl); 126 return PTR_ERR(acl);
123 if (acl) { 127 if (acl) {
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 54e07559878..f9285c4900f 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_check_acl(struct inode *, int); 23int jfs_check_acl(struct inode *, int, unsigned int flags);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_acl_chmod(struct inode *inode); 25int jfs_acl_chmod(struct inode *inode);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 231ca4af9bc..4414e3a4226 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h>
21#include <linux/ctype.h> 22#include <linux/ctype.h>
22#include <linux/quotaops.h> 23#include <linux/quotaops.h>
23#include <linux/exportfs.h> 24#include <linux/exportfs.h>
@@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1465 jfs_info("jfs_lookup: name = %s", name); 1466 jfs_info("jfs_lookup: name = %s", name);
1466 1467
1467 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) 1468 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
1468 dentry->d_op = &jfs_ci_dentry_operations; 1469 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1469 1470
1470 if ((name[0] == '.') && (len == 1)) 1471 if ((name[0] == '.') && (len == 1))
1471 inum = dip->i_ino; 1472 inum = dip->i_ino;
@@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1494 dentry = d_splice_alias(ip, dentry); 1495 dentry = d_splice_alias(ip, dentry);
1495 1496
1496 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) 1497 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
1497 dentry->d_op = &jfs_ci_dentry_operations; 1498 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1498 1499
1499 return dentry; 1500 return dentry;
1500} 1501}
@@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
1573 .llseek = generic_file_llseek, 1574 .llseek = generic_file_llseek,
1574}; 1575};
1575 1576
1576static int jfs_ci_hash(struct dentry *dir, struct qstr *this) 1577static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
1578 struct qstr *this)
1577{ 1579{
1578 unsigned long hash; 1580 unsigned long hash;
1579 int i; 1581 int i;
@@ -1586,32 +1588,63 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
1586 return 0; 1588 return 0;
1587} 1589}
1588 1590
1589static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) 1591static int jfs_ci_compare(const struct dentry *parent,
1592 const struct inode *pinode,
1593 const struct dentry *dentry, const struct inode *inode,
1594 unsigned int len, const char *str, const struct qstr *name)
1590{ 1595{
1591 int i, result = 1; 1596 int i, result = 1;
1592 1597
1593 if (a->len != b->len) 1598 if (len != name->len)
1594 goto out; 1599 goto out;
1595 for (i=0; i < a->len; i++) { 1600 for (i=0; i < len; i++) {
1596 if (tolower(a->name[i]) != tolower(b->name[i])) 1601 if (tolower(str[i]) != tolower(name->name[i]))
1597 goto out; 1602 goto out;
1598 } 1603 }
1599 result = 0; 1604 result = 0;
1605out:
1606 return result;
1607}
1600 1608
1609static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1610{
1611 if (nd->flags & LOOKUP_RCU)
1612 return -ECHILD;
1601 /* 1613 /*
1602 * We want creates to preserve case. A negative dentry, a, that 1614 * This is not negative dentry. Always valid.
1603 * has a different case than b may cause a new entry to be created 1615 *
1604 * with the wrong case. Since we can't tell if a comes from a negative 1616 * Note, rename() to existing directory entry will have ->d_inode,
1605 * dentry, we blindly replace it with b. This should be harmless if 1617 * and will use existing name which isn't specified name by user.
1606 * a is not a negative dentry. 1618 *
1619 * We may be able to drop this positive dentry here. But dropping
1620 * positive dentry isn't good idea. So it's unsupported like
1621 * rename("filename", "FILENAME") for now.
1607 */ 1622 */
1608 memcpy((unsigned char *)a->name, b->name, a->len); 1623 if (dentry->d_inode)
1609out: 1624 return 1;
1610 return result; 1625
1626 /*
1627 * This may be nfsd (or something), anyway, we can't see the
1628 * intent of this. So, since this can be for creation, drop it.
1629 */
1630 if (!nd)
1631 return 0;
1632
1633 /*
1634 * Drop the negative dentry, in order to make sure to use the
1635 * case sensitive name which is specified by user if this is
1636 * for creation.
1637 */
1638 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
1639 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1640 return 0;
1641 }
1642 return 1;
1611} 1643}
1612 1644
1613const struct dentry_operations jfs_ci_dentry_operations = 1645const struct dentry_operations jfs_ci_dentry_operations =
1614{ 1646{
1615 .d_hash = jfs_ci_hash, 1647 .d_hash = jfs_ci_hash,
1616 .d_compare = jfs_ci_compare, 1648 .d_compare = jfs_ci_compare,
1649 .d_revalidate = jfs_ci_revalidate,
1617}; 1650};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3b..3150d766e0d 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
115 return &jfs_inode->vfs_inode; 115 return &jfs_inode->vfs_inode;
116} 116}
117 117
118static void jfs_i_callback(struct rcu_head *head)
119{
120 struct inode *inode = container_of(head, struct inode, i_rcu);
121 struct jfs_inode_info *ji = JFS_IP(inode);
122 INIT_LIST_HEAD(&inode->i_dentry);
123 kmem_cache_free(jfs_inode_cachep, ji);
124}
125
118static void jfs_destroy_inode(struct inode *inode) 126static void jfs_destroy_inode(struct inode *inode)
119{ 127{
120 struct jfs_inode_info *ji = JFS_IP(inode); 128 struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 136 ji->active_ag = -1;
129 } 137 }
130 spin_unlock_irq(&ji->ag_lock); 138 spin_unlock_irq(&ji->ag_lock);
131 kmem_cache_free(jfs_inode_cachep, ji); 139 call_rcu(&inode->i_rcu, jfs_i_callback);
132} 140}
133 141
134static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 142static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
517 goto out_no_root; 525 goto out_no_root;
518 526
519 if (sbi->mntflag & JFS_OS2) 527 if (sbi->mntflag & JFS_OS2)
520 sb->s_root->d_op = &jfs_ci_dentry_operations; 528 d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
521 529
522 /* logical blocks are represented by 40 bits in pxd_t, etc. */ 530 /* logical blocks are represented by 40 bits in pxd_t, etc. */
523 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; 531 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528a..889311e3d06 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,11 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
19static inline int simple_positive(struct dentry *dentry)
20{
21 return dentry->d_inode && !d_unhashed(dentry);
22}
23
19int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, 24int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
20 struct kstat *stat) 25 struct kstat *stat)
21{ 26{
@@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
37 * Retaining negative dentries for an in-memory filesystem just wastes 42 * Retaining negative dentries for an in-memory filesystem just wastes
38 * memory and lookup time: arrange for them to be deleted immediately. 43 * memory and lookup time: arrange for them to be deleted immediately.
39 */ 44 */
40static int simple_delete_dentry(struct dentry *dentry) 45static int simple_delete_dentry(const struct dentry *dentry)
41{ 46{
42 return 1; 47 return 1;
43} 48}
@@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
54 59
55 if (dentry->d_name.len > NAME_MAX) 60 if (dentry->d_name.len > NAME_MAX)
56 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
57 dentry->d_op = &simple_dentry_operations; 62 d_set_d_op(dentry, &simple_dentry_operations);
58 d_add(dentry, NULL); 63 d_add(dentry, NULL);
59 return NULL; 64 return NULL;
60} 65}
@@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
76 81
77loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) 82loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
78{ 83{
79 mutex_lock(&file->f_path.dentry->d_inode->i_mutex); 84 struct dentry *dentry = file->f_path.dentry;
85 mutex_lock(&dentry->d_inode->i_mutex);
80 switch (origin) { 86 switch (origin) {
81 case 1: 87 case 1:
82 offset += file->f_pos; 88 offset += file->f_pos;
@@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
84 if (offset >= 0) 90 if (offset >= 0)
85 break; 91 break;
86 default: 92 default:
87 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 93 mutex_unlock(&dentry->d_inode->i_mutex);
88 return -EINVAL; 94 return -EINVAL;
89 } 95 }
90 if (offset != file->f_pos) { 96 if (offset != file->f_pos) {
@@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
94 struct dentry *cursor = file->private_data; 100 struct dentry *cursor = file->private_data;
95 loff_t n = file->f_pos - 2; 101 loff_t n = file->f_pos - 2;
96 102
97 spin_lock(&dcache_lock); 103 spin_lock(&dentry->d_lock);
104 /* d_lock not required for cursor */
98 list_del(&cursor->d_u.d_child); 105 list_del(&cursor->d_u.d_child);
99 p = file->f_path.dentry->d_subdirs.next; 106 p = dentry->d_subdirs.next;
100 while (n && p != &file->f_path.dentry->d_subdirs) { 107 while (n && p != &dentry->d_subdirs) {
101 struct dentry *next; 108 struct dentry *next;
102 next = list_entry(p, struct dentry, d_u.d_child); 109 next = list_entry(p, struct dentry, d_u.d_child);
103 if (!d_unhashed(next) && next->d_inode) 110 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
111 if (simple_positive(next))
104 n--; 112 n--;
113 spin_unlock(&next->d_lock);
105 p = p->next; 114 p = p->next;
106 } 115 }
107 list_add_tail(&cursor->d_u.d_child, p); 116 list_add_tail(&cursor->d_u.d_child, p);
108 spin_unlock(&dcache_lock); 117 spin_unlock(&dentry->d_lock);
109 } 118 }
110 } 119 }
111 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 120 mutex_unlock(&dentry->d_inode->i_mutex);
112 return offset; 121 return offset;
113} 122}
114 123
@@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
148 i++; 157 i++;
149 /* fallthrough */ 158 /* fallthrough */
150 default: 159 default:
151 spin_lock(&dcache_lock); 160 spin_lock(&dentry->d_lock);
152 if (filp->f_pos == 2) 161 if (filp->f_pos == 2)
153 list_move(q, &dentry->d_subdirs); 162 list_move(q, &dentry->d_subdirs);
154 163
155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 164 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
156 struct dentry *next; 165 struct dentry *next;
157 next = list_entry(p, struct dentry, d_u.d_child); 166 next = list_entry(p, struct dentry, d_u.d_child);
158 if (d_unhashed(next) || !next->d_inode) 167 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 if (!simple_positive(next)) {
169 spin_unlock(&next->d_lock);
159 continue; 170 continue;
171 }
160 172
161 spin_unlock(&dcache_lock); 173 spin_unlock(&next->d_lock);
174 spin_unlock(&dentry->d_lock);
162 if (filldir(dirent, next->d_name.name, 175 if (filldir(dirent, next->d_name.name,
163 next->d_name.len, filp->f_pos, 176 next->d_name.len, filp->f_pos,
164 next->d_inode->i_ino, 177 next->d_inode->i_ino,
165 dt_type(next->d_inode)) < 0) 178 dt_type(next->d_inode)) < 0)
166 return 0; 179 return 0;
167 spin_lock(&dcache_lock); 180 spin_lock(&dentry->d_lock);
181 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 /* next is still alive */ 182 /* next is still alive */
169 list_move(q, p); 183 list_move(q, p);
184 spin_unlock(&next->d_lock);
170 p = q; 185 p = q;
171 filp->f_pos++; 186 filp->f_pos++;
172 } 187 }
173 spin_unlock(&dcache_lock); 188 spin_unlock(&dentry->d_lock);
174 } 189 }
175 return 0; 190 return 0;
176} 191}
@@ -259,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
259 return 0; 274 return 0;
260} 275}
261 276
262static inline int simple_positive(struct dentry *dentry)
263{
264 return dentry->d_inode && !d_unhashed(dentry);
265}
266
267int simple_empty(struct dentry *dentry) 277int simple_empty(struct dentry *dentry)
268{ 278{
269 struct dentry *child; 279 struct dentry *child;
270 int ret = 0; 280 int ret = 0;
271 281
272 spin_lock(&dcache_lock); 282 spin_lock(&dentry->d_lock);
273 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 283 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
274 if (simple_positive(child)) 284 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
285 if (simple_positive(child)) {
286 spin_unlock(&child->d_lock);
275 goto out; 287 goto out;
288 }
289 spin_unlock(&child->d_lock);
290 }
276 ret = 1; 291 ret = 1;
277out: 292out:
278 spin_unlock(&dcache_lock); 293 spin_unlock(&dentry->d_lock);
279 return ret; 294 return ret;
280} 295}
281 296
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 97f6073ab33..ca58d64374c 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_LOCKD) += lockd.o 5obj-$(CONFIG_LOCKD) += lockd.o
6 6
7lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ 7lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
8 svcproc.o svcsubs.o mon.o xdr.o grace.o 8 svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
9lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o 9lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
10lockd-objs := $(lockd-objs-y) 10lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
new file mode 100644
index 00000000000..f848b52c67b
--- /dev/null
+++ b/fs/lockd/clnt4xdr.c
@@ -0,0 +1,605 @@
1/*
2 * linux/fs/lockd/clnt4xdr.c
3 *
4 * XDR functions to encode/decode NLM version 4 RPC arguments and results.
5 *
6 * NLM client-side only.
7 *
8 * Copyright (C) 2010, Oracle. All rights reserved.
9 */
10
11#include <linux/types.h>
12#include <linux/sunrpc/xdr.h>
13#include <linux/sunrpc/clnt.h>
14#include <linux/sunrpc/stats.h>
15#include <linux/lockd/lockd.h>
16
17#define NLMDBG_FACILITY NLMDBG_XDR
18
19#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
20# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
21#endif
22
23#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
24# error "NLM host name cannot be larger than NLM's maximum string length!"
25#endif
26
27/*
28 * Declare the space requirements for NLM arguments and replies as
29 * number of 32bit-words
30 */
31#define NLM4_void_sz (0)
32#define NLM4_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
33#define NLM4_caller_sz (1+(NLMCLNT_OHSIZE>>2))
34#define NLM4_owner_sz (1+(NLMCLNT_OHSIZE>>2))
35#define NLM4_fhandle_sz (1+(NFS3_FHSIZE>>2))
36#define NLM4_lock_sz (5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz)
37#define NLM4_holder_sz (6+NLM4_owner_sz)
38
39#define NLM4_testargs_sz (NLM4_cookie_sz+1+NLM4_lock_sz)
40#define NLM4_lockargs_sz (NLM4_cookie_sz+4+NLM4_lock_sz)
41#define NLM4_cancargs_sz (NLM4_cookie_sz+2+NLM4_lock_sz)
42#define NLM4_unlockargs_sz (NLM4_cookie_sz+NLM4_lock_sz)
43
44#define NLM4_testres_sz (NLM4_cookie_sz+1+NLM4_holder_sz)
45#define NLM4_res_sz (NLM4_cookie_sz+1)
46#define NLM4_norep_sz (0)
47
48
49static s64 loff_t_to_s64(loff_t offset)
50{
51 s64 res;
52
53 if (offset >= NLM4_OFFSET_MAX)
54 res = NLM4_OFFSET_MAX;
55 else if (offset <= -NLM4_OFFSET_MAX)
56 res = -NLM4_OFFSET_MAX;
57 else
58 res = offset;
59 return res;
60}
61
62static void nlm4_compute_offsets(const struct nlm_lock *lock,
63 u64 *l_offset, u64 *l_len)
64{
65 const struct file_lock *fl = &lock->fl;
66
67 BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
68 BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
69 fl->fl_end != OFFSET_MAX);
70
71 *l_offset = loff_t_to_s64(fl->fl_start);
72 if (fl->fl_end == OFFSET_MAX)
73 *l_len = 0;
74 else
75 *l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
76}
77
78/*
79 * Handle decode buffer overflows out-of-line.
80 */
81static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
82{
83 dprintk("lockd: %s prematurely hit the end of our receive buffer. "
84 "Remaining buffer length is %tu words.\n",
85 func, xdr->end - xdr->p);
86}
87
88
89/*
90 * Encode/decode NLMv4 basic data types
91 *
92 * Basic NLMv4 data types are defined in Appendix II, section 6.1.4
93 * of RFC 1813: "NFS Version 3 Protocol Specification" and in Chapter
94 * 10 of X/Open's "Protocols for Interworking: XNFS, Version 3W".
95 *
96 * Not all basic data types have their own encoding and decoding
97 * functions. For run-time efficiency, some data types are encoded
98 * or decoded inline.
99 */
100
101static void encode_bool(struct xdr_stream *xdr, const int value)
102{
103 __be32 *p;
104
105 p = xdr_reserve_space(xdr, 4);
106 *p = value ? xdr_one : xdr_zero;
107}
108
109static void encode_int32(struct xdr_stream *xdr, const s32 value)
110{
111 __be32 *p;
112
113 p = xdr_reserve_space(xdr, 4);
114 *p = cpu_to_be32(value);
115}
116
117/*
118 * typedef opaque netobj<MAXNETOBJ_SZ>
119 */
120static void encode_netobj(struct xdr_stream *xdr,
121 const u8 *data, const unsigned int length)
122{
123 __be32 *p;
124
125 BUG_ON(length > XDR_MAX_NETOBJ);
126 p = xdr_reserve_space(xdr, 4 + length);
127 xdr_encode_opaque(p, data, length);
128}
129
130static int decode_netobj(struct xdr_stream *xdr,
131 struct xdr_netobj *obj)
132{
133 u32 length;
134 __be32 *p;
135
136 p = xdr_inline_decode(xdr, 4);
137 if (unlikely(p == NULL))
138 goto out_overflow;
139 length = be32_to_cpup(p++);
140 if (unlikely(length > XDR_MAX_NETOBJ))
141 goto out_size;
142 obj->len = length;
143 obj->data = (u8 *)p;
144 return 0;
145out_size:
146 dprintk("NFS: returned netobj was too long: %u\n", length);
147 return -EIO;
148out_overflow:
149 print_overflow_msg(__func__, xdr);
150 return -EIO;
151}
152
153/*
154 * netobj cookie;
155 */
156static void encode_cookie(struct xdr_stream *xdr,
157 const struct nlm_cookie *cookie)
158{
159 BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
160 encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
161}
162
163static int decode_cookie(struct xdr_stream *xdr,
164 struct nlm_cookie *cookie)
165{
166 u32 length;
167 __be32 *p;
168
169 p = xdr_inline_decode(xdr, 4);
170 if (unlikely(p == NULL))
171 goto out_overflow;
172 length = be32_to_cpup(p++);
173 /* apparently HPUX can return empty cookies */
174 if (length == 0)
175 goto out_hpux;
176 if (length > NLM_MAXCOOKIELEN)
177 goto out_size;
178 p = xdr_inline_decode(xdr, length);
179 if (unlikely(p == NULL))
180 goto out_overflow;
181 cookie->len = length;
182 memcpy(cookie->data, p, length);
183 return 0;
184out_hpux:
185 cookie->len = 4;
186 memset(cookie->data, 0, 4);
187 return 0;
188out_size:
189 dprintk("NFS: returned cookie was too long: %u\n", length);
190 return -EIO;
191out_overflow:
192 print_overflow_msg(__func__, xdr);
193 return -EIO;
194}
195
196/*
197 * netobj fh;
198 */
199static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
200{
201 BUG_ON(fh->size > NFS3_FHSIZE);
202 encode_netobj(xdr, (u8 *)&fh->data, fh->size);
203}
204
205/*
206 * enum nlm4_stats {
207 * NLM4_GRANTED = 0,
208 * NLM4_DENIED = 1,
209 * NLM4_DENIED_NOLOCKS = 2,
210 * NLM4_BLOCKED = 3,
211 * NLM4_DENIED_GRACE_PERIOD = 4,
212 * NLM4_DEADLCK = 5,
213 * NLM4_ROFS = 6,
214 * NLM4_STALE_FH = 7,
215 * NLM4_FBIG = 8,
216 * NLM4_FAILED = 9
217 * };
218 *
219 * struct nlm4_stat {
220 * nlm4_stats stat;
221 * };
222 *
223 * NB: we don't swap bytes for the NLM status values. The upper
224 * layers deal directly with the status value in network byte
225 * order.
226 */
227static void encode_nlm4_stat(struct xdr_stream *xdr,
228 const __be32 stat)
229{
230 __be32 *p;
231
232 BUG_ON(be32_to_cpu(stat) > NLM_FAILED);
233 p = xdr_reserve_space(xdr, 4);
234 *p = stat;
235}
236
237static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
238{
239 __be32 *p;
240
241 p = xdr_inline_decode(xdr, 4);
242 if (unlikely(p == NULL))
243 goto out_overflow;
244 if (unlikely(*p > nlm4_failed))
245 goto out_bad_xdr;
246 *stat = *p;
247 return 0;
248out_bad_xdr:
249 dprintk("%s: server returned invalid nlm4_stats value: %u\n",
250 __func__, be32_to_cpup(p));
251 return -EIO;
252out_overflow:
253 print_overflow_msg(__func__, xdr);
254 return -EIO;
255}
256
257/*
258 * struct nlm4_holder {
259 * bool exclusive;
260 * int32 svid;
261 * netobj oh;
262 * uint64 l_offset;
263 * uint64 l_len;
264 * };
265 */
266static void encode_nlm4_holder(struct xdr_stream *xdr,
267 const struct nlm_res *result)
268{
269 const struct nlm_lock *lock = &result->lock;
270 u64 l_offset, l_len;
271 __be32 *p;
272
273 encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
274 encode_int32(xdr, lock->svid);
275 encode_netobj(xdr, lock->oh.data, lock->oh.len);
276
277 p = xdr_reserve_space(xdr, 4 + 4);
278 nlm4_compute_offsets(lock, &l_offset, &l_len);
279 p = xdr_encode_hyper(p, l_offset);
280 xdr_encode_hyper(p, l_len);
281}
282
283static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
284{
285 struct nlm_lock *lock = &result->lock;
286 struct file_lock *fl = &lock->fl;
287 u64 l_offset, l_len;
288 u32 exclusive;
289 int error;
290 __be32 *p;
291 s32 end;
292
293 memset(lock, 0, sizeof(*lock));
294 locks_init_lock(fl);
295
296 p = xdr_inline_decode(xdr, 4 + 4);
297 if (unlikely(p == NULL))
298 goto out_overflow;
299 exclusive = be32_to_cpup(p++);
300 lock->svid = be32_to_cpup(p);
301 fl->fl_pid = (pid_t)lock->svid;
302
303 error = decode_netobj(xdr, &lock->oh);
304 if (unlikely(error))
305 goto out;
306
307 p = xdr_inline_decode(xdr, 8 + 8);
308 if (unlikely(p == NULL))
309 goto out_overflow;
310
311 fl->fl_flags = FL_POSIX;
312 fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
313 p = xdr_decode_hyper(p, &l_offset);
314 xdr_decode_hyper(p, &l_len);
315 end = l_offset + l_len - 1;
316
317 fl->fl_start = (loff_t)l_offset;
318 if (l_len == 0 || end < 0)
319 fl->fl_end = OFFSET_MAX;
320 else
321 fl->fl_end = (loff_t)end;
322 error = 0;
323out:
324 return error;
325out_overflow:
326 print_overflow_msg(__func__, xdr);
327 return -EIO;
328}
329
330/*
331 * string caller_name<LM_MAXSTRLEN>;
332 */
333static void encode_caller_name(struct xdr_stream *xdr, const char *name)
334{
335 /* NB: client-side does not set lock->len */
336 u32 length = strlen(name);
337 __be32 *p;
338
339 BUG_ON(length > NLM_MAXSTRLEN);
340 p = xdr_reserve_space(xdr, 4 + length);
341 xdr_encode_opaque(p, name, length);
342}
343
344/*
345 * struct nlm4_lock {
346 * string caller_name<LM_MAXSTRLEN>;
347 * netobj fh;
348 * netobj oh;
349 * int32 svid;
350 * uint64 l_offset;
351 * uint64 l_len;
352 * };
353 */
354static void encode_nlm4_lock(struct xdr_stream *xdr,
355 const struct nlm_lock *lock)
356{
357 u64 l_offset, l_len;
358 __be32 *p;
359
360 encode_caller_name(xdr, lock->caller);
361 encode_fh(xdr, &lock->fh);
362 encode_netobj(xdr, lock->oh.data, lock->oh.len);
363
364 p = xdr_reserve_space(xdr, 4 + 8 + 8);
365 *p++ = cpu_to_be32(lock->svid);
366
367 nlm4_compute_offsets(lock, &l_offset, &l_len);
368 p = xdr_encode_hyper(p, l_offset);
369 xdr_encode_hyper(p, l_len);
370}
371
372
373/*
374 * NLMv4 XDR encode functions
375 *
376 * NLMv4 argument types are defined in Appendix II of RFC 1813:
377 * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
378 * "Protocols for Interworking: XNFS, Version 3W".
379 */
380
381/*
382 * struct nlm4_testargs {
383 * netobj cookie;
384 * bool exclusive;
385 * struct nlm4_lock alock;
386 * };
387 */
388static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
389 struct xdr_stream *xdr,
390 const struct nlm_args *args)
391{
392 const struct nlm_lock *lock = &args->lock;
393
394 encode_cookie(xdr, &args->cookie);
395 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
396 encode_nlm4_lock(xdr, lock);
397}
398
399/*
400 * struct nlm4_lockargs {
401 * netobj cookie;
402 * bool block;
403 * bool exclusive;
404 * struct nlm4_lock alock;
405 * bool reclaim;
406 * int state;
407 * };
408 */
409static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
410 struct xdr_stream *xdr,
411 const struct nlm_args *args)
412{
413 const struct nlm_lock *lock = &args->lock;
414
415 encode_cookie(xdr, &args->cookie);
416 encode_bool(xdr, args->block);
417 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
418 encode_nlm4_lock(xdr, lock);
419 encode_bool(xdr, args->reclaim);
420 encode_int32(xdr, args->state);
421}
422
423/*
424 * struct nlm4_cancargs {
425 * netobj cookie;
426 * bool block;
427 * bool exclusive;
428 * struct nlm4_lock alock;
429 * };
430 */
431static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
432 struct xdr_stream *xdr,
433 const struct nlm_args *args)
434{
435 const struct nlm_lock *lock = &args->lock;
436
437 encode_cookie(xdr, &args->cookie);
438 encode_bool(xdr, args->block);
439 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
440 encode_nlm4_lock(xdr, lock);
441}
442
443/*
444 * struct nlm4_unlockargs {
445 * netobj cookie;
446 * struct nlm4_lock alock;
447 * };
448 */
449static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
450 struct xdr_stream *xdr,
451 const struct nlm_args *args)
452{
453 const struct nlm_lock *lock = &args->lock;
454
455 encode_cookie(xdr, &args->cookie);
456 encode_nlm4_lock(xdr, lock);
457}
458
459/*
460 * struct nlm4_res {
461 * netobj cookie;
462 * nlm4_stat stat;
463 * };
464 */
465static void nlm4_xdr_enc_res(struct rpc_rqst *req,
466 struct xdr_stream *xdr,
467 const struct nlm_res *result)
468{
469 encode_cookie(xdr, &result->cookie);
470 encode_nlm4_stat(xdr, result->status);
471}
472
473/*
474 * union nlm4_testrply switch (nlm4_stats stat) {
475 * case NLM4_DENIED:
476 * struct nlm4_holder holder;
477 * default:
478 * void;
479 * };
480 *
481 * struct nlm4_testres {
482 * netobj cookie;
483 * nlm4_testrply test_stat;
484 * };
485 */
486static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
487 struct xdr_stream *xdr,
488 const struct nlm_res *result)
489{
490 encode_cookie(xdr, &result->cookie);
491 encode_nlm4_stat(xdr, result->status);
492 if (result->status == nlm_lck_denied)
493 encode_nlm4_holder(xdr, result);
494}
495
496
497/*
498 * NLMv4 XDR decode functions
499 *
500 * NLMv4 argument types are defined in Appendix II of RFC 1813:
501 * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
502 * "Protocols for Interworking: XNFS, Version 3W".
503 */
504
505/*
506 * union nlm4_testrply switch (nlm4_stats stat) {
507 * case NLM4_DENIED:
508 * struct nlm4_holder holder;
509 * default:
510 * void;
511 * };
512 *
513 * struct nlm4_testres {
514 * netobj cookie;
515 * nlm4_testrply test_stat;
516 * };
517 */
518static int decode_nlm4_testrply(struct xdr_stream *xdr,
519 struct nlm_res *result)
520{
521 int error;
522
523 error = decode_nlm4_stat(xdr, &result->status);
524 if (unlikely(error))
525 goto out;
526 if (result->status == nlm_lck_denied)
527 error = decode_nlm4_holder(xdr, result);
528out:
529 return error;
530}
531
532static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
533 struct xdr_stream *xdr,
534 struct nlm_res *result)
535{
536 int error;
537
538 error = decode_cookie(xdr, &result->cookie);
539 if (unlikely(error))
540 goto out;
541 error = decode_nlm4_testrply(xdr, result);
542out:
543 return error;
544}
545
546/*
547 * struct nlm4_res {
548 * netobj cookie;
549 * nlm4_stat stat;
550 * };
551 */
552static int nlm4_xdr_dec_res(struct rpc_rqst *req,
553 struct xdr_stream *xdr,
554 struct nlm_res *result)
555{
556 int error;
557
558 error = decode_cookie(xdr, &result->cookie);
559 if (unlikely(error))
560 goto out;
561 error = decode_nlm4_stat(xdr, &result->status);
562out:
563 return error;
564}
565
566
567/*
568 * For NLM, a void procedure really returns nothing
569 */
570#define nlm4_xdr_dec_norep NULL
571
572#define PROC(proc, argtype, restype) \
573[NLMPROC_##proc] = { \
574 .p_proc = NLMPROC_##proc, \
575 .p_encode = (kxdreproc_t)nlm4_xdr_enc_##argtype, \
576 .p_decode = (kxdrdproc_t)nlm4_xdr_dec_##restype, \
577 .p_arglen = NLM4_##argtype##_sz, \
578 .p_replen = NLM4_##restype##_sz, \
579 .p_statidx = NLMPROC_##proc, \
580 .p_name = #proc, \
581 }
582
583static struct rpc_procinfo nlm4_procedures[] = {
584 PROC(TEST, testargs, testres),
585 PROC(LOCK, lockargs, res),
586 PROC(CANCEL, cancargs, res),
587 PROC(UNLOCK, unlockargs, res),
588 PROC(GRANTED, testargs, res),
589 PROC(TEST_MSG, testargs, norep),
590 PROC(LOCK_MSG, lockargs, norep),
591 PROC(CANCEL_MSG, cancargs, norep),
592 PROC(UNLOCK_MSG, unlockargs, norep),
593 PROC(GRANTED_MSG, testargs, norep),
594 PROC(TEST_RES, testres, norep),
595 PROC(LOCK_RES, res, norep),
596 PROC(CANCEL_RES, res, norep),
597 PROC(UNLOCK_RES, res, norep),
598 PROC(GRANTED_RES, res, norep),
599};
600
601struct rpc_version nlm_version4 = {
602 .number = 4,
603 .nrprocs = ARRAY_SIZE(nlm4_procedures),
604 .procs = nlm4_procedures,
605};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 25509eb28fd..8d4ea8351e3 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
79 */ 79 */
80void nlmclnt_done(struct nlm_host *host) 80void nlmclnt_done(struct nlm_host *host)
81{ 81{
82 nlm_release_host(host); 82 nlmclnt_release_host(host);
83 lockd_down(); 83 lockd_down();
84} 84}
85EXPORT_SYMBOL_GPL(nlmclnt_done); 85EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -273,7 +273,7 @@ restart:
273 spin_unlock(&nlm_blocked_lock); 273 spin_unlock(&nlm_blocked_lock);
274 274
275 /* Release host handle after use */ 275 /* Release host handle after use */
276 nlm_release_host(host); 276 nlmclnt_release_host(host);
277 lockd_down(); 277 lockd_down();
278 return 0; 278 return 0;
279} 279}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 332c54cf75e..adb45ec9038 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -58,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
58 return; 58 return;
59 list_del(&lockowner->list); 59 list_del(&lockowner->list);
60 spin_unlock(&lockowner->host->h_lock); 60 spin_unlock(&lockowner->host->h_lock);
61 nlm_release_host(lockowner->host); 61 nlmclnt_release_host(lockowner->host);
62 kfree(lockowner); 62 kfree(lockowner);
63} 63}
64 64
@@ -207,22 +207,22 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
207 printk("nlm_alloc_call: failed, waiting for memory\n"); 207 printk("nlm_alloc_call: failed, waiting for memory\n");
208 schedule_timeout_interruptible(5*HZ); 208 schedule_timeout_interruptible(5*HZ);
209 } 209 }
210 nlm_release_host(host); 210 nlmclnt_release_host(host);
211 return NULL; 211 return NULL;
212} 212}
213 213
214void nlm_release_call(struct nlm_rqst *call) 214void nlmclnt_release_call(struct nlm_rqst *call)
215{ 215{
216 if (!atomic_dec_and_test(&call->a_count)) 216 if (!atomic_dec_and_test(&call->a_count))
217 return; 217 return;
218 nlm_release_host(call->a_host); 218 nlmclnt_release_host(call->a_host);
219 nlmclnt_release_lockargs(call); 219 nlmclnt_release_lockargs(call);
220 kfree(call); 220 kfree(call);
221} 221}
222 222
223static void nlmclnt_rpc_release(void *data) 223static void nlmclnt_rpc_release(void *data)
224{ 224{
225 nlm_release_call(data); 225 nlmclnt_release_call(data);
226} 226}
227 227
228static int nlm_wait_on_grace(wait_queue_head_t *queue) 228static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -436,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
436 status = nlm_stat_to_errno(req->a_res.status); 436 status = nlm_stat_to_errno(req->a_res.status);
437 } 437 }
438out: 438out:
439 nlm_release_call(req); 439 nlmclnt_release_call(req);
440 return status; 440 return status;
441} 441}
442 442
@@ -593,7 +593,7 @@ again:
593out_unblock: 593out_unblock:
594 nlmclnt_finish_block(block); 594 nlmclnt_finish_block(block);
595out: 595out:
596 nlm_release_call(req); 596 nlmclnt_release_call(req);
597 return status; 597 return status;
598out_unlock: 598out_unlock:
599 /* Fatal error: ensure that we remove the lock altogether */ 599 /* Fatal error: ensure that we remove the lock altogether */
@@ -694,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
694 /* What to do now? I'm out of my depth... */ 694 /* What to do now? I'm out of my depth... */
695 status = -ENOLCK; 695 status = -ENOLCK;
696out: 696out:
697 nlm_release_call(req); 697 nlmclnt_release_call(req);
698 return status; 698 return status;
699} 699}
700 700
@@ -755,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
755 NLMPROC_CANCEL, &nlmclnt_cancel_ops); 755 NLMPROC_CANCEL, &nlmclnt_cancel_ops);
756 if (status == 0 && req->a_res.status == nlm_lck_denied) 756 if (status == 0 && req->a_res.status == nlm_lck_denied)
757 status = -ENOLCK; 757 status = -ENOLCK;
758 nlm_release_call(req); 758 nlmclnt_release_call(req);
759 return status; 759 return status;
760} 760}
761 761
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
new file mode 100644
index 00000000000..180ac34feb9
--- /dev/null
+++ b/fs/lockd/clntxdr.c
@@ -0,0 +1,627 @@
1/*
2 * linux/fs/lockd/clntxdr.c
3 *
4 * XDR functions to encode/decode NLM version 3 RPC arguments and results.
5 * NLM version 3 is backwards compatible with NLM versions 1 and 2.
6 *
7 * NLM client-side only.
8 *
9 * Copyright (C) 2010, Oracle. All rights reserved.
10 */
11
12#include <linux/types.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/stats.h>
16#include <linux/lockd/lockd.h>
17
18#define NLMDBG_FACILITY NLMDBG_XDR
19
20#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
21# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
22#endif
23
24/*
25 * Declare the space requirements for NLM arguments and replies as
26 * number of 32bit-words
27 */
28#define NLM_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
29#define NLM_caller_sz (1+(NLMCLNT_OHSIZE>>2))
30#define NLM_owner_sz (1+(NLMCLNT_OHSIZE>>2))
31#define NLM_fhandle_sz (1+(NFS2_FHSIZE>>2))
32#define NLM_lock_sz (3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz)
33#define NLM_holder_sz (4+NLM_owner_sz)
34
35#define NLM_testargs_sz (NLM_cookie_sz+1+NLM_lock_sz)
36#define NLM_lockargs_sz (NLM_cookie_sz+4+NLM_lock_sz)
37#define NLM_cancargs_sz (NLM_cookie_sz+2+NLM_lock_sz)
38#define NLM_unlockargs_sz (NLM_cookie_sz+NLM_lock_sz)
39
40#define NLM_testres_sz (NLM_cookie_sz+1+NLM_holder_sz)
41#define NLM_res_sz (NLM_cookie_sz+1)
42#define NLM_norep_sz (0)
43
44
45static s32 loff_t_to_s32(loff_t offset)
46{
47 s32 res;
48
49 if (offset >= NLM_OFFSET_MAX)
50 res = NLM_OFFSET_MAX;
51 else if (offset <= -NLM_OFFSET_MAX)
52 res = -NLM_OFFSET_MAX;
53 else
54 res = offset;
55 return res;
56}
57
58static void nlm_compute_offsets(const struct nlm_lock *lock,
59 u32 *l_offset, u32 *l_len)
60{
61 const struct file_lock *fl = &lock->fl;
62
63 BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
64 BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
65 fl->fl_end != OFFSET_MAX);
66
67 *l_offset = loff_t_to_s32(fl->fl_start);
68 if (fl->fl_end == OFFSET_MAX)
69 *l_len = 0;
70 else
71 *l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
72}
73
74/*
75 * Handle decode buffer overflows out-of-line.
76 */
77static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
78{
79 dprintk("lockd: %s prematurely hit the end of our receive buffer. "
80 "Remaining buffer length is %tu words.\n",
81 func, xdr->end - xdr->p);
82}
83
84
85/*
86 * Encode/decode NLMv3 basic data types
87 *
88 * Basic NLMv3 data types are not defined in an IETF standards
89 * document. X/Open has a description of these data types that
90 * is useful. See Chapter 10 of "Protocols for Interworking:
91 * XNFS, Version 3W".
92 *
93 * Not all basic data types have their own encoding and decoding
94 * functions. For run-time efficiency, some data types are encoded
95 * or decoded inline.
96 */
97
98static void encode_bool(struct xdr_stream *xdr, const int value)
99{
100 __be32 *p;
101
102 p = xdr_reserve_space(xdr, 4);
103 *p = value ? xdr_one : xdr_zero;
104}
105
106static void encode_int32(struct xdr_stream *xdr, const s32 value)
107{
108 __be32 *p;
109
110 p = xdr_reserve_space(xdr, 4);
111 *p = cpu_to_be32(value);
112}
113
114/*
115 * typedef opaque netobj<MAXNETOBJ_SZ>
116 */
117static void encode_netobj(struct xdr_stream *xdr,
118 const u8 *data, const unsigned int length)
119{
120 __be32 *p;
121
122 BUG_ON(length > XDR_MAX_NETOBJ);
123 p = xdr_reserve_space(xdr, 4 + length);
124 xdr_encode_opaque(p, data, length);
125}
126
127static int decode_netobj(struct xdr_stream *xdr,
128 struct xdr_netobj *obj)
129{
130 u32 length;
131 __be32 *p;
132
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(p == NULL))
135 goto out_overflow;
136 length = be32_to_cpup(p++);
137 if (unlikely(length > XDR_MAX_NETOBJ))
138 goto out_size;
139 obj->len = length;
140 obj->data = (u8 *)p;
141 return 0;
142out_size:
143 dprintk("NFS: returned netobj was too long: %u\n", length);
144 return -EIO;
145out_overflow:
146 print_overflow_msg(__func__, xdr);
147 return -EIO;
148}
149
150/*
151 * netobj cookie;
152 */
153static void encode_cookie(struct xdr_stream *xdr,
154 const struct nlm_cookie *cookie)
155{
156 BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
157 encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
158}
159
160static int decode_cookie(struct xdr_stream *xdr,
161 struct nlm_cookie *cookie)
162{
163 u32 length;
164 __be32 *p;
165
166 p = xdr_inline_decode(xdr, 4);
167 if (unlikely(p == NULL))
168 goto out_overflow;
169 length = be32_to_cpup(p++);
170 /* apparently HPUX can return empty cookies */
171 if (length == 0)
172 goto out_hpux;
173 if (length > NLM_MAXCOOKIELEN)
174 goto out_size;
175 p = xdr_inline_decode(xdr, length);
176 if (unlikely(p == NULL))
177 goto out_overflow;
178 cookie->len = length;
179 memcpy(cookie->data, p, length);
180 return 0;
181out_hpux:
182 cookie->len = 4;
183 memset(cookie->data, 0, 4);
184 return 0;
185out_size:
186 dprintk("NFS: returned cookie was too long: %u\n", length);
187 return -EIO;
188out_overflow:
189 print_overflow_msg(__func__, xdr);
190 return -EIO;
191}
192
193/*
194 * netobj fh;
195 */
196static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
197{
198 BUG_ON(fh->size != NFS2_FHSIZE);
199 encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
200}
201
202/*
203 * enum nlm_stats {
204 * LCK_GRANTED = 0,
205 * LCK_DENIED = 1,
206 * LCK_DENIED_NOLOCKS = 2,
207 * LCK_BLOCKED = 3,
208 * LCK_DENIED_GRACE_PERIOD = 4
209 * };
210 *
211 *
212 * struct nlm_stat {
213 * nlm_stats stat;
214 * };
215 *
216 * NB: we don't swap bytes for the NLM status values. The upper
217 * layers deal directly with the status value in network byte
218 * order.
219 */
220
221static void encode_nlm_stat(struct xdr_stream *xdr,
222 const __be32 stat)
223{
224 __be32 *p;
225
226 BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
227 p = xdr_reserve_space(xdr, 4);
228 *p = stat;
229}
230
231static int decode_nlm_stat(struct xdr_stream *xdr,
232 __be32 *stat)
233{
234 __be32 *p;
235
236 p = xdr_inline_decode(xdr, 4);
237 if (unlikely(p == NULL))
238 goto out_overflow;
239 if (unlikely(*p > nlm_lck_denied_grace_period))
240 goto out_enum;
241 *stat = *p;
242 return 0;
243out_enum:
244 dprintk("%s: server returned invalid nlm_stats value: %u\n",
245 __func__, be32_to_cpup(p));
246 return -EIO;
247out_overflow:
248 print_overflow_msg(__func__, xdr);
249 return -EIO;
250}
251
252/*
253 * struct nlm_holder {
254 * bool exclusive;
255 * int uppid;
256 * netobj oh;
257 * unsigned l_offset;
258 * unsigned l_len;
259 * };
260 */
261static void encode_nlm_holder(struct xdr_stream *xdr,
262 const struct nlm_res *result)
263{
264 const struct nlm_lock *lock = &result->lock;
265 u32 l_offset, l_len;
266 __be32 *p;
267
268 encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
269 encode_int32(xdr, lock->svid);
270 encode_netobj(xdr, lock->oh.data, lock->oh.len);
271
272 p = xdr_reserve_space(xdr, 4 + 4);
273 nlm_compute_offsets(lock, &l_offset, &l_len);
274 *p++ = cpu_to_be32(l_offset);
275 *p = cpu_to_be32(l_len);
276}
277
278static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
279{
280 struct nlm_lock *lock = &result->lock;
281 struct file_lock *fl = &lock->fl;
282 u32 exclusive, l_offset, l_len;
283 int error;
284 __be32 *p;
285 s32 end;
286
287 memset(lock, 0, sizeof(*lock));
288 locks_init_lock(fl);
289
290 p = xdr_inline_decode(xdr, 4 + 4);
291 if (unlikely(p == NULL))
292 goto out_overflow;
293 exclusive = be32_to_cpup(p++);
294 lock->svid = be32_to_cpup(p);
295 fl->fl_pid = (pid_t)lock->svid;
296
297 error = decode_netobj(xdr, &lock->oh);
298 if (unlikely(error))
299 goto out;
300
301 p = xdr_inline_decode(xdr, 4 + 4);
302 if (unlikely(p == NULL))
303 goto out_overflow;
304
305 fl->fl_flags = FL_POSIX;
306 fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
307 l_offset = be32_to_cpup(p++);
308 l_len = be32_to_cpup(p);
309 end = l_offset + l_len - 1;
310
311 fl->fl_start = (loff_t)l_offset;
312 if (l_len == 0 || end < 0)
313 fl->fl_end = OFFSET_MAX;
314 else
315 fl->fl_end = (loff_t)end;
316 error = 0;
317out:
318 return error;
319out_overflow:
320 print_overflow_msg(__func__, xdr);
321 return -EIO;
322}
323
324/*
325 * string caller_name<LM_MAXSTRLEN>;
326 */
327static void encode_caller_name(struct xdr_stream *xdr, const char *name)
328{
329 /* NB: client-side does not set lock->len */
330 u32 length = strlen(name);
331 __be32 *p;
332
333 BUG_ON(length > NLM_MAXSTRLEN);
334 p = xdr_reserve_space(xdr, 4 + length);
335 xdr_encode_opaque(p, name, length);
336}
337
338/*
339 * struct nlm_lock {
340 * string caller_name<LM_MAXSTRLEN>;
341 * netobj fh;
342 * netobj oh;
343 * int uppid;
344 * unsigned l_offset;
345 * unsigned l_len;
346 * };
347 */
348static void encode_nlm_lock(struct xdr_stream *xdr,
349 const struct nlm_lock *lock)
350{
351 u32 l_offset, l_len;
352 __be32 *p;
353
354 encode_caller_name(xdr, lock->caller);
355 encode_fh(xdr, &lock->fh);
356 encode_netobj(xdr, lock->oh.data, lock->oh.len);
357
358 p = xdr_reserve_space(xdr, 4 + 4 + 4);
359 *p++ = cpu_to_be32(lock->svid);
360
361 nlm_compute_offsets(lock, &l_offset, &l_len);
362 *p++ = cpu_to_be32(l_offset);
363 *p = cpu_to_be32(l_len);
364}
365
366
367/*
368 * NLMv3 XDR encode functions
369 *
370 * NLMv3 argument types are defined in Chapter 10 of The Open Group's
371 * "Protocols for Interworking: XNFS, Version 3W".
372 */
373
374/*
375 * struct nlm_testargs {
376 * netobj cookie;
377 * bool exclusive;
378 * struct nlm_lock alock;
379 * };
380 */
381static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
382 struct xdr_stream *xdr,
383 const struct nlm_args *args)
384{
385 const struct nlm_lock *lock = &args->lock;
386
387 encode_cookie(xdr, &args->cookie);
388 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
389 encode_nlm_lock(xdr, lock);
390}
391
392/*
393 * struct nlm_lockargs {
394 * netobj cookie;
395 * bool block;
396 * bool exclusive;
397 * struct nlm_lock alock;
398 * bool reclaim;
399 * int state;
400 * };
401 */
402static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
403 struct xdr_stream *xdr,
404 const struct nlm_args *args)
405{
406 const struct nlm_lock *lock = &args->lock;
407
408 encode_cookie(xdr, &args->cookie);
409 encode_bool(xdr, args->block);
410 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
411 encode_nlm_lock(xdr, lock);
412 encode_bool(xdr, args->reclaim);
413 encode_int32(xdr, args->state);
414}
415
416/*
417 * struct nlm_cancargs {
418 * netobj cookie;
419 * bool block;
420 * bool exclusive;
421 * struct nlm_lock alock;
422 * };
423 */
424static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
425 struct xdr_stream *xdr,
426 const struct nlm_args *args)
427{
428 const struct nlm_lock *lock = &args->lock;
429
430 encode_cookie(xdr, &args->cookie);
431 encode_bool(xdr, args->block);
432 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
433 encode_nlm_lock(xdr, lock);
434}
435
436/*
437 * struct nlm_unlockargs {
438 * netobj cookie;
439 * struct nlm_lock alock;
440 * };
441 */
442static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
443 struct xdr_stream *xdr,
444 const struct nlm_args *args)
445{
446 const struct nlm_lock *lock = &args->lock;
447
448 encode_cookie(xdr, &args->cookie);
449 encode_nlm_lock(xdr, lock);
450}
451
452/*
453 * struct nlm_res {
454 * netobj cookie;
455 * nlm_stat stat;
456 * };
457 */
458static void nlm_xdr_enc_res(struct rpc_rqst *req,
459 struct xdr_stream *xdr,
460 const struct nlm_res *result)
461{
462 encode_cookie(xdr, &result->cookie);
463 encode_nlm_stat(xdr, result->status);
464}
465
466/*
467 * union nlm_testrply switch (nlm_stats stat) {
468 * case LCK_DENIED:
469 * struct nlm_holder holder;
470 * default:
471 * void;
472 * };
473 *
474 * struct nlm_testres {
475 * netobj cookie;
476 * nlm_testrply test_stat;
477 * };
478 */
479static void encode_nlm_testrply(struct xdr_stream *xdr,
480 const struct nlm_res *result)
481{
482 if (result->status == nlm_lck_denied)
483 encode_nlm_holder(xdr, result);
484}
485
486static void nlm_xdr_enc_testres(struct rpc_rqst *req,
487 struct xdr_stream *xdr,
488 const struct nlm_res *result)
489{
490 encode_cookie(xdr, &result->cookie);
491 encode_nlm_stat(xdr, result->status);
492 encode_nlm_testrply(xdr, result);
493}
494
495
496/*
497 * NLMv3 XDR decode functions
498 *
499 * NLMv3 result types are defined in Chapter 10 of The Open Group's
500 * "Protocols for Interworking: XNFS, Version 3W".
501 */
502
503/*
504 * union nlm_testrply switch (nlm_stats stat) {
505 * case LCK_DENIED:
506 * struct nlm_holder holder;
507 * default:
508 * void;
509 * };
510 *
511 * struct nlm_testres {
512 * netobj cookie;
513 * nlm_testrply test_stat;
514 * };
515 */
516static int decode_nlm_testrply(struct xdr_stream *xdr,
517 struct nlm_res *result)
518{
519 int error;
520
521 error = decode_nlm_stat(xdr, &result->status);
522 if (unlikely(error))
523 goto out;
524 if (result->status == nlm_lck_denied)
525 error = decode_nlm_holder(xdr, result);
526out:
527 return error;
528}
529
530static int nlm_xdr_dec_testres(struct rpc_rqst *req,
531 struct xdr_stream *xdr,
532 struct nlm_res *result)
533{
534 int error;
535
536 error = decode_cookie(xdr, &result->cookie);
537 if (unlikely(error))
538 goto out;
539 error = decode_nlm_testrply(xdr, result);
540out:
541 return error;
542}
543
544/*
545 * struct nlm_res {
546 * netobj cookie;
547 * nlm_stat stat;
548 * };
549 */
550static int nlm_xdr_dec_res(struct rpc_rqst *req,
551 struct xdr_stream *xdr,
552 struct nlm_res *result)
553{
554 int error;
555
556 error = decode_cookie(xdr, &result->cookie);
557 if (unlikely(error))
558 goto out;
559 error = decode_nlm_stat(xdr, &result->status);
560out:
561 return error;
562}
563
564
565/*
566 * For NLM, a void procedure really returns nothing
567 */
568#define nlm_xdr_dec_norep NULL
569
570#define PROC(proc, argtype, restype) \
571[NLMPROC_##proc] = { \
572 .p_proc = NLMPROC_##proc, \
573 .p_encode = (kxdreproc_t)nlm_xdr_enc_##argtype, \
574 .p_decode = (kxdrdproc_t)nlm_xdr_dec_##restype, \
575 .p_arglen = NLM_##argtype##_sz, \
576 .p_replen = NLM_##restype##_sz, \
577 .p_statidx = NLMPROC_##proc, \
578 .p_name = #proc, \
579 }
580
581static struct rpc_procinfo nlm_procedures[] = {
582 PROC(TEST, testargs, testres),
583 PROC(LOCK, lockargs, res),
584 PROC(CANCEL, cancargs, res),
585 PROC(UNLOCK, unlockargs, res),
586 PROC(GRANTED, testargs, res),
587 PROC(TEST_MSG, testargs, norep),
588 PROC(LOCK_MSG, lockargs, norep),
589 PROC(CANCEL_MSG, cancargs, norep),
590 PROC(UNLOCK_MSG, unlockargs, norep),
591 PROC(GRANTED_MSG, testargs, norep),
592 PROC(TEST_RES, testres, norep),
593 PROC(LOCK_RES, res, norep),
594 PROC(CANCEL_RES, res, norep),
595 PROC(UNLOCK_RES, res, norep),
596 PROC(GRANTED_RES, res, norep),
597};
598
599static struct rpc_version nlm_version1 = {
600 .number = 1,
601 .nrprocs = ARRAY_SIZE(nlm_procedures),
602 .procs = nlm_procedures,
603};
604
605static struct rpc_version nlm_version3 = {
606 .number = 3,
607 .nrprocs = ARRAY_SIZE(nlm_procedures),
608 .procs = nlm_procedures,
609};
610
611static struct rpc_version *nlm_versions[] = {
612 [1] = &nlm_version1,
613 [3] = &nlm_version3,
614#ifdef CONFIG_LOCKD_V4
615 [4] = &nlm_version4,
616#endif
617};
618
619static struct rpc_stat nlm_rpc_stats;
620
621struct rpc_program nlm_program = {
622 .name = "lockd",
623 .number = NLM_PROGRAM,
624 .nrvers = ARRAY_SIZE(nlm_versions),
625 .version = nlm_versions,
626 .stats = &nlm_rpc_stats,
627};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ed0c59fe23c..5f1bcb2f06f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -25,9 +25,22 @@
25#define NLM_HOST_EXPIRE (300 * HZ) 25#define NLM_HOST_EXPIRE (300 * HZ)
26#define NLM_HOST_COLLECT (120 * HZ) 26#define NLM_HOST_COLLECT (120 * HZ)
27 27
28static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; 28static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
29static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
30
31#define for_each_host(host, pos, chain, table) \
32 for ((chain) = (table); \
33 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
34 hlist_for_each_entry((host), (pos), (chain), h_hash)
35
36#define for_each_host_safe(host, pos, next, chain, table) \
37 for ((chain) = (table); \
38 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
39 hlist_for_each_entry_safe((host), (pos), (next), \
40 (chain), h_hash)
41
29static unsigned long next_gc; 42static unsigned long next_gc;
30static int nrhosts; 43static unsigned long nrhosts;
31static DEFINE_MUTEX(nlm_host_mutex); 44static DEFINE_MUTEX(nlm_host_mutex);
32 45
33static void nlm_gc_hosts(void); 46static void nlm_gc_hosts(void);
@@ -40,8 +53,6 @@ struct nlm_lookup_host_info {
40 const u32 version; /* NLM version to search for */ 53 const u32 version; /* NLM version to search for */
41 const char *hostname; /* remote's hostname */ 54 const char *hostname; /* remote's hostname */
42 const size_t hostname_len; /* it's length */ 55 const size_t hostname_len; /* it's length */
43 const struct sockaddr *src_sap; /* our address (optional) */
44 const size_t src_len; /* it's length */
45 const int noresvport; /* use non-priv port */ 56 const int noresvport; /* use non-priv port */
46}; 57};
47 58
@@ -88,127 +99,83 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
88} 99}
89 100
90/* 101/*
91 * Common host lookup routine for server & client 102 * Allocate and initialize an nlm_host. Common to both client and server.
92 */ 103 */
93static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) 104static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
105 struct nsm_handle *nsm)
94{ 106{
95 struct hlist_head *chain; 107 struct nlm_host *host = NULL;
96 struct hlist_node *pos; 108 unsigned long now = jiffies;
97 struct nlm_host *host;
98 struct nsm_handle *nsm = NULL;
99
100 mutex_lock(&nlm_host_mutex);
101 109
102 if (time_after_eq(jiffies, next_gc)) 110 if (nsm != NULL)
103 nlm_gc_hosts();
104
105 /* We may keep several nlm_host objects for a peer, because each
106 * nlm_host is identified by
107 * (address, protocol, version, server/client)
108 * We could probably simplify this a little by putting all those
109 * different NLM rpc_clients into one single nlm_host object.
110 * This would allow us to have one nlm_host per address.
111 */
112 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
113 hlist_for_each_entry(host, pos, chain, h_hash) {
114 if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
115 continue;
116
117 /* See if we have an NSM handle for this client */
118 if (!nsm)
119 nsm = host->h_nsmhandle;
120
121 if (host->h_proto != ni->protocol)
122 continue;
123 if (host->h_version != ni->version)
124 continue;
125 if (host->h_server != ni->server)
126 continue;
127 if (ni->server && ni->src_len != 0 &&
128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue;
130
131 /* Move to head of hash chain. */
132 hlist_del(&host->h_hash);
133 hlist_add_head(&host->h_hash, chain);
134
135 nlm_get_host(host);
136 dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
137 host->h_name, host->h_addrbuf);
138 goto out;
139 }
140
141 /*
142 * The host wasn't in our hash table. If we don't
143 * have an NSM handle for it yet, create one.
144 */
145 if (nsm)
146 atomic_inc(&nsm->sm_count); 111 atomic_inc(&nsm->sm_count);
147 else { 112 else {
148 host = NULL; 113 host = NULL;
149 nsm = nsm_get_handle(ni->sap, ni->salen, 114 nsm = nsm_get_handle(ni->sap, ni->salen,
150 ni->hostname, ni->hostname_len); 115 ni->hostname, ni->hostname_len);
151 if (!nsm) { 116 if (unlikely(nsm == NULL)) {
152 dprintk("lockd: nlm_lookup_host failed; " 117 dprintk("lockd: %s failed; no nsm handle\n",
153 "no nsm handle\n"); 118 __func__);
154 goto out; 119 goto out;
155 } 120 }
156 } 121 }
157 122
158 host = kzalloc(sizeof(*host), GFP_KERNEL); 123 host = kmalloc(sizeof(*host), GFP_KERNEL);
159 if (!host) { 124 if (unlikely(host == NULL)) {
125 dprintk("lockd: %s failed; no memory\n", __func__);
160 nsm_release(nsm); 126 nsm_release(nsm);
161 dprintk("lockd: nlm_lookup_host failed; no memory\n");
162 goto out; 127 goto out;
163 } 128 }
164 host->h_name = nsm->sm_name; 129
165 host->h_addrbuf = nsm->sm_addrbuf;
166 memcpy(nlm_addr(host), ni->sap, ni->salen); 130 memcpy(nlm_addr(host), ni->sap, ni->salen);
167 host->h_addrlen = ni->salen; 131 host->h_addrlen = ni->salen;
168 rpc_set_port(nlm_addr(host), 0); 132 rpc_set_port(nlm_addr(host), 0);
169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 133 host->h_srcaddrlen = 0;
170 host->h_srcaddrlen = ni->src_len; 134
135 host->h_rpcclnt = NULL;
136 host->h_name = nsm->sm_name;
171 host->h_version = ni->version; 137 host->h_version = ni->version;
172 host->h_proto = ni->protocol; 138 host->h_proto = ni->protocol;
173 host->h_rpcclnt = NULL; 139 host->h_reclaiming = 0;
174 mutex_init(&host->h_mutex); 140 host->h_server = ni->server;
175 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 141 host->h_noresvport = ni->noresvport;
176 host->h_expires = jiffies + NLM_HOST_EXPIRE; 142 host->h_inuse = 0;
177 atomic_set(&host->h_count, 1);
178 init_waitqueue_head(&host->h_gracewait); 143 init_waitqueue_head(&host->h_gracewait);
179 init_rwsem(&host->h_rwsem); 144 init_rwsem(&host->h_rwsem);
180 host->h_state = 0; /* pseudo NSM state */ 145 host->h_state = 0;
181 host->h_nsmstate = 0; /* real NSM state */ 146 host->h_nsmstate = 0;
182 host->h_nsmhandle = nsm; 147 host->h_pidcount = 0;
183 host->h_server = ni->server; 148 atomic_set(&host->h_count, 1);
184 host->h_noresvport = ni->noresvport; 149 mutex_init(&host->h_mutex);
185 hlist_add_head(&host->h_hash, chain); 150 host->h_nextrebind = now + NLM_HOST_REBIND;
151 host->h_expires = now + NLM_HOST_EXPIRE;
186 INIT_LIST_HEAD(&host->h_lockowners); 152 INIT_LIST_HEAD(&host->h_lockowners);
187 spin_lock_init(&host->h_lock); 153 spin_lock_init(&host->h_lock);
188 INIT_LIST_HEAD(&host->h_granted); 154 INIT_LIST_HEAD(&host->h_granted);
189 INIT_LIST_HEAD(&host->h_reclaim); 155 INIT_LIST_HEAD(&host->h_reclaim);
190 156 host->h_nsmhandle = nsm;
191 nrhosts++; 157 host->h_addrbuf = nsm->sm_addrbuf;
192
193 dprintk("lockd: nlm_lookup_host created host %s\n",
194 host->h_name);
195 158
196out: 159out:
197 mutex_unlock(&nlm_host_mutex);
198 return host; 160 return host;
199} 161}
200 162
201/* 163/*
202 * Destroy a host 164 * Destroy an nlm_host and free associated resources
165 *
166 * Caller must hold nlm_host_mutex.
203 */ 167 */
204static void 168static void nlm_destroy_host_locked(struct nlm_host *host)
205nlm_destroy_host(struct nlm_host *host)
206{ 169{
207 struct rpc_clnt *clnt; 170 struct rpc_clnt *clnt;
208 171
172 dprintk("lockd: destroy host %s\n", host->h_name);
173
209 BUG_ON(!list_empty(&host->h_lockowners)); 174 BUG_ON(!list_empty(&host->h_lockowners));
210 BUG_ON(atomic_read(&host->h_count)); 175 BUG_ON(atomic_read(&host->h_count));
211 176
177 hlist_del_init(&host->h_hash);
178
212 nsm_unmonitor(host); 179 nsm_unmonitor(host);
213 nsm_release(host->h_nsmhandle); 180 nsm_release(host->h_nsmhandle);
214 181
@@ -216,6 +183,8 @@ nlm_destroy_host(struct nlm_host *host)
216 if (clnt != NULL) 183 if (clnt != NULL)
217 rpc_shutdown_client(clnt); 184 rpc_shutdown_client(clnt);
218 kfree(host); 185 kfree(host);
186
187 nrhosts--;
219} 188}
220 189
221/** 190/**
@@ -249,12 +218,76 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
249 .hostname_len = strlen(hostname), 218 .hostname_len = strlen(hostname),
250 .noresvport = noresvport, 219 .noresvport = noresvport,
251 }; 220 };
221 struct hlist_head *chain;
222 struct hlist_node *pos;
223 struct nlm_host *host;
224 struct nsm_handle *nsm = NULL;
252 225
253 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, 226 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
254 (hostname ? hostname : "<none>"), version, 227 (hostname ? hostname : "<none>"), version,
255 (protocol == IPPROTO_UDP ? "udp" : "tcp")); 228 (protocol == IPPROTO_UDP ? "udp" : "tcp"));
256 229
257 return nlm_lookup_host(&ni); 230 mutex_lock(&nlm_host_mutex);
231
232 chain = &nlm_client_hosts[nlm_hash_address(sap)];
233 hlist_for_each_entry(host, pos, chain, h_hash) {
234 if (!rpc_cmp_addr(nlm_addr(host), sap))
235 continue;
236
237 /* Same address. Share an NSM handle if we already have one */
238 if (nsm == NULL)
239 nsm = host->h_nsmhandle;
240
241 if (host->h_proto != protocol)
242 continue;
243 if (host->h_version != version)
244 continue;
245
246 nlm_get_host(host);
247 dprintk("lockd: %s found host %s (%s)\n", __func__,
248 host->h_name, host->h_addrbuf);
249 goto out;
250 }
251
252 host = nlm_alloc_host(&ni, nsm);
253 if (unlikely(host == NULL))
254 goto out;
255
256 hlist_add_head(&host->h_hash, chain);
257 nrhosts++;
258
259 dprintk("lockd: %s created host %s (%s)\n", __func__,
260 host->h_name, host->h_addrbuf);
261
262out:
263 mutex_unlock(&nlm_host_mutex);
264 return host;
265}
266
267/**
268 * nlmclnt_release_host - release client nlm_host
269 * @host: nlm_host to release
270 *
271 */
272void nlmclnt_release_host(struct nlm_host *host)
273{
274 if (host == NULL)
275 return;
276
277 dprintk("lockd: release client host %s\n", host->h_name);
278
279 BUG_ON(atomic_read(&host->h_count) < 0);
280 BUG_ON(host->h_server);
281
282 if (atomic_dec_and_test(&host->h_count)) {
283 BUG_ON(!list_empty(&host->h_lockowners));
284 BUG_ON(!list_empty(&host->h_granted));
285 BUG_ON(!list_empty(&host->h_reclaim));
286
287 mutex_lock(&nlm_host_mutex);
288 nlm_destroy_host_locked(host);
289 mutex_unlock(&nlm_host_mutex);
290 }
258} 291}
259 292
260/** 293/**
@@ -279,12 +312,18 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
279 const char *hostname, 312 const char *hostname,
280 const size_t hostname_len) 313 const size_t hostname_len)
281{ 314{
315 struct hlist_head *chain;
316 struct hlist_node *pos;
317 struct nlm_host *host = NULL;
318 struct nsm_handle *nsm = NULL;
282 struct sockaddr_in sin = { 319 struct sockaddr_in sin = {
283 .sin_family = AF_INET, 320 .sin_family = AF_INET,
284 }; 321 };
285 struct sockaddr_in6 sin6 = { 322 struct sockaddr_in6 sin6 = {
286 .sin6_family = AF_INET6, 323 .sin6_family = AF_INET6,
287 }; 324 };
325 struct sockaddr *src_sap;
326 size_t src_len = rqstp->rq_addrlen;
288 struct nlm_lookup_host_info ni = { 327 struct nlm_lookup_host_info ni = {
289 .server = 1, 328 .server = 1,
290 .sap = svc_addr(rqstp), 329 .sap = svc_addr(rqstp),
@@ -293,27 +332,91 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
293 .version = rqstp->rq_vers, 332 .version = rqstp->rq_vers,
294 .hostname = hostname, 333 .hostname = hostname,
295 .hostname_len = hostname_len, 334 .hostname_len = hostname_len,
296 .src_len = rqstp->rq_addrlen,
297 }; 335 };
298 336
299 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, 337 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
300 (int)hostname_len, hostname, rqstp->rq_vers, 338 (int)hostname_len, hostname, rqstp->rq_vers,
301 (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); 339 (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
302 340
341 mutex_lock(&nlm_host_mutex);
342
303 switch (ni.sap->sa_family) { 343 switch (ni.sap->sa_family) {
304 case AF_INET: 344 case AF_INET:
305 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; 345 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
306 ni.src_sap = (struct sockaddr *)&sin; 346 src_sap = (struct sockaddr *)&sin;
307 break; 347 break;
308 case AF_INET6: 348 case AF_INET6:
309 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); 349 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
310 ni.src_sap = (struct sockaddr *)&sin6; 350 src_sap = (struct sockaddr *)&sin6;
311 break; 351 break;
312 default: 352 default:
313 return NULL; 353 dprintk("lockd: %s failed; unrecognized address family\n",
354 __func__);
355 goto out;
356 }
357
358 if (time_after_eq(jiffies, next_gc))
359 nlm_gc_hosts();
360
361 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
362 hlist_for_each_entry(host, pos, chain, h_hash) {
363 if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
364 continue;
365
366 /* Same address. Share an NSM handle if we already have one */
367 if (nsm == NULL)
368 nsm = host->h_nsmhandle;
369
370 if (host->h_proto != ni.protocol)
371 continue;
372 if (host->h_version != ni.version)
373 continue;
374 if (!rpc_cmp_addr(nlm_srcaddr(host), src_sap))
375 continue;
376
377 /* Move to head of hash chain. */
378 hlist_del(&host->h_hash);
379 hlist_add_head(&host->h_hash, chain);
380
381 nlm_get_host(host);
382 dprintk("lockd: %s found host %s (%s)\n",
383 __func__, host->h_name, host->h_addrbuf);
384 goto out;
314 } 385 }
315 386
316 return nlm_lookup_host(&ni); 387 host = nlm_alloc_host(&ni, nsm);
388 if (unlikely(host == NULL))
389 goto out;
390
391 memcpy(nlm_srcaddr(host), src_sap, src_len);
392 host->h_srcaddrlen = src_len;
393 hlist_add_head(&host->h_hash, chain);
394 nrhosts++;
395
396 dprintk("lockd: %s created host %s (%s)\n",
397 __func__, host->h_name, host->h_addrbuf);
398
399out:
400 mutex_unlock(&nlm_host_mutex);
401 return host;
402}
403
404/**
405 * nlmsvc_release_host - release server nlm_host
406 * @host: nlm_host to release
407 *
408 * Host is destroyed later in nlm_gc_host().
409 */
410void nlmsvc_release_host(struct nlm_host *host)
411{
412 if (host == NULL)
413 return;
414
415 dprintk("lockd: release server host %s\n", host->h_name);
416
417 BUG_ON(atomic_read(&host->h_count) < 0);
418 BUG_ON(!host->h_server);
419 atomic_dec(&host->h_count);
317} 420}
318 421
319/* 422/*
@@ -413,20 +516,28 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
413 return host; 516 return host;
414} 517}
415 518
416/* 519static struct nlm_host *next_host_state(struct hlist_head *cache,
417 * Release NLM host after use 520 struct nsm_handle *nsm,
418 */ 521 const struct nlm_reboot *info)
419void nlm_release_host(struct nlm_host *host)
420{ 522{
421 if (host != NULL) { 523 struct nlm_host *host = NULL;
422 dprintk("lockd: release host %s\n", host->h_name); 524 struct hlist_head *chain;
423 BUG_ON(atomic_read(&host->h_count) < 0); 525 struct hlist_node *pos;
424 if (atomic_dec_and_test(&host->h_count)) { 526
425 BUG_ON(!list_empty(&host->h_lockowners)); 527 mutex_lock(&nlm_host_mutex);
426 BUG_ON(!list_empty(&host->h_granted)); 528 for_each_host(host, pos, chain, cache) {
427 BUG_ON(!list_empty(&host->h_reclaim)); 529 if (host->h_nsmhandle == nsm
530 && host->h_nsmstate != info->state) {
531 host->h_nsmstate = info->state;
532 host->h_state++;
533
534 nlm_get_host(host);
535 goto out;
428 } 536 }
429 } 537 }
538out:
539 mutex_unlock(&nlm_host_mutex);
540 return host;
430} 541}
431 542
432/** 543/**
@@ -438,8 +549,6 @@ void nlm_release_host(struct nlm_host *host)
438 */ 549 */
439void nlm_host_rebooted(const struct nlm_reboot *info) 550void nlm_host_rebooted(const struct nlm_reboot *info)
440{ 551{
441 struct hlist_head *chain;
442 struct hlist_node *pos;
443 struct nsm_handle *nsm; 552 struct nsm_handle *nsm;
444 struct nlm_host *host; 553 struct nlm_host *host;
445 554
@@ -452,32 +561,15 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
452 * lock for this. 561 * lock for this.
453 * To avoid processing a host several times, we match the nsmstate. 562 * To avoid processing a host several times, we match the nsmstate.
454 */ 563 */
455again: mutex_lock(&nlm_host_mutex); 564 while ((host = next_host_state(nlm_server_hosts, nsm, info)) != NULL) {
456 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 565 nlmsvc_free_host_resources(host);
457 hlist_for_each_entry(host, pos, chain, h_hash) { 566 nlmsvc_release_host(host);
458 if (host->h_nsmhandle == nsm
459 && host->h_nsmstate != info->state) {
460 host->h_nsmstate = info->state;
461 host->h_state++;
462
463 nlm_get_host(host);
464 mutex_unlock(&nlm_host_mutex);
465
466 if (host->h_server) {
467 /* We're server for this guy, just ditch
468 * all the locks he held. */
469 nlmsvc_free_host_resources(host);
470 } else {
471 /* He's the server, initiate lock recovery. */
472 nlmclnt_recovery(host);
473 }
474
475 nlm_release_host(host);
476 goto again;
477 }
478 }
479 } 567 }
480 mutex_unlock(&nlm_host_mutex); 568 while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
569 nlmclnt_recovery(host);
570 nlmclnt_release_host(host);
571 }
572
481 nsm_release(nsm); 573 nsm_release(nsm);
482} 574}
483 575
@@ -497,13 +589,11 @@ nlm_shutdown_hosts(void)
497 589
498 /* First, make all hosts eligible for gc */ 590 /* First, make all hosts eligible for gc */
499 dprintk("lockd: nuking all hosts...\n"); 591 dprintk("lockd: nuking all hosts...\n");
500 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 592 for_each_host(host, pos, chain, nlm_server_hosts) {
501 hlist_for_each_entry(host, pos, chain, h_hash) { 593 host->h_expires = jiffies - 1;
502 host->h_expires = jiffies - 1; 594 if (host->h_rpcclnt) {
503 if (host->h_rpcclnt) { 595 rpc_shutdown_client(host->h_rpcclnt);
504 rpc_shutdown_client(host->h_rpcclnt); 596 host->h_rpcclnt = NULL;
505 host->h_rpcclnt = NULL;
506 }
507 } 597 }
508 } 598 }
509 599
@@ -512,15 +602,13 @@ nlm_shutdown_hosts(void)
512 mutex_unlock(&nlm_host_mutex); 602 mutex_unlock(&nlm_host_mutex);
513 603
514 /* complain if any hosts are left */ 604 /* complain if any hosts are left */
515 if (nrhosts) { 605 if (nrhosts != 0) {
516 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 606 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
517 dprintk("lockd: %d hosts left:\n", nrhosts); 607 dprintk("lockd: %lu hosts left:\n", nrhosts);
518 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 608 for_each_host(host, pos, chain, nlm_server_hosts) {
519 hlist_for_each_entry(host, pos, chain, h_hash) { 609 dprintk(" %s (cnt %d use %d exp %ld)\n",
520 dprintk(" %s (cnt %d use %d exp %ld)\n", 610 host->h_name, atomic_read(&host->h_count),
521 host->h_name, atomic_read(&host->h_count), 611 host->h_inuse, host->h_expires);
522 host->h_inuse, host->h_expires);
523 }
524 } 612 }
525 } 613 }
526} 614}
@@ -538,29 +626,22 @@ nlm_gc_hosts(void)
538 struct nlm_host *host; 626 struct nlm_host *host;
539 627
540 dprintk("lockd: host garbage collection\n"); 628 dprintk("lockd: host garbage collection\n");
541 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 629 for_each_host(host, pos, chain, nlm_server_hosts)
542 hlist_for_each_entry(host, pos, chain, h_hash) 630 host->h_inuse = 0;
543 host->h_inuse = 0;
544 }
545 631
546 /* Mark all hosts that hold locks, blocks or shares */ 632 /* Mark all hosts that hold locks, blocks or shares */
547 nlmsvc_mark_resources(); 633 nlmsvc_mark_resources();
548 634
549 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 635 for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
550 hlist_for_each_entry_safe(host, pos, next, chain, h_hash) { 636 if (atomic_read(&host->h_count) || host->h_inuse
551 if (atomic_read(&host->h_count) || host->h_inuse 637 || time_before(jiffies, host->h_expires)) {
552 || time_before(jiffies, host->h_expires)) { 638 dprintk("nlm_gc_hosts skipping %s "
553 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", 639 "(cnt %d use %d exp %ld)\n",
554 host->h_name, atomic_read(&host->h_count), 640 host->h_name, atomic_read(&host->h_count),
555 host->h_inuse, host->h_expires); 641 host->h_inuse, host->h_expires);
556 continue; 642 continue;
557 }
558 dprintk("lockd: delete host %s\n", host->h_name);
559 hlist_del_init(&host->h_hash);
560
561 nlm_destroy_host(host);
562 nrhosts--;
563 } 643 }
644 nlm_destroy_host_locked(host);
564 } 645 }
565 646
566 next_gc = jiffies + NLM_HOST_COLLECT; 647 next_gc = jiffies + NLM_HOST_COLLECT;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0c91894964..23d7451b293 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -401,26 +401,22 @@ void nsm_release(struct nsm_handle *nsm)
401 * Status Monitor wire protocol. 401 * Status Monitor wire protocol.
402 */ 402 */
403 403
404static int encode_nsm_string(struct xdr_stream *xdr, const char *string) 404static void encode_nsm_string(struct xdr_stream *xdr, const char *string)
405{ 405{
406 const u32 len = strlen(string); 406 const u32 len = strlen(string);
407 __be32 *p; 407 __be32 *p;
408 408
409 if (unlikely(len > SM_MAXSTRLEN)) 409 BUG_ON(len > SM_MAXSTRLEN);
410 return -EIO; 410 p = xdr_reserve_space(xdr, 4 + len);
411 p = xdr_reserve_space(xdr, sizeof(u32) + len);
412 if (unlikely(p == NULL))
413 return -EIO;
414 xdr_encode_opaque(p, string, len); 411 xdr_encode_opaque(p, string, len);
415 return 0;
416} 412}
417 413
418/* 414/*
419 * "mon_name" specifies the host to be monitored. 415 * "mon_name" specifies the host to be monitored.
420 */ 416 */
421static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp) 417static void encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
422{ 418{
423 return encode_nsm_string(xdr, argp->mon_name); 419 encode_nsm_string(xdr, argp->mon_name);
424} 420}
425 421
426/* 422/*
@@ -429,35 +425,25 @@ static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
429 * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name" 425 * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
430 * has changed. 426 * has changed.
431 */ 427 */
432static int encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp) 428static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
433{ 429{
434 int status;
435 __be32 *p; 430 __be32 *p;
436 431
437 status = encode_nsm_string(xdr, utsname()->nodename); 432 encode_nsm_string(xdr, utsname()->nodename);
438 if (unlikely(status != 0)) 433 p = xdr_reserve_space(xdr, 4 + 4 + 4);
439 return status; 434 *p++ = cpu_to_be32(argp->prog);
440 p = xdr_reserve_space(xdr, 3 * sizeof(u32)); 435 *p++ = cpu_to_be32(argp->vers);
441 if (unlikely(p == NULL)) 436 *p = cpu_to_be32(argp->proc);
442 return -EIO;
443 *p++ = htonl(argp->prog);
444 *p++ = htonl(argp->vers);
445 *p++ = htonl(argp->proc);
446 return 0;
447} 437}
448 438
449/* 439/*
450 * The "mon_id" argument specifies the non-private arguments 440 * The "mon_id" argument specifies the non-private arguments
451 * of an NSMPROC_MON or NSMPROC_UNMON call. 441 * of an NSMPROC_MON or NSMPROC_UNMON call.
452 */ 442 */
453static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp) 443static void encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
454{ 444{
455 int status; 445 encode_mon_name(xdr, argp);
456 446 encode_my_id(xdr, argp);
457 status = encode_mon_name(xdr, argp);
458 if (unlikely(status != 0))
459 return status;
460 return encode_my_id(xdr, argp);
461} 447}
462 448
463/* 449/*
@@ -465,68 +451,56 @@ static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
465 * by the NSMPROC_MON call. This information will be supplied in the 451 * by the NSMPROC_MON call. This information will be supplied in the
466 * NLMPROC_SM_NOTIFY call. 452 * NLMPROC_SM_NOTIFY call.
467 */ 453 */
468static int encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp) 454static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
469{ 455{
470 __be32 *p; 456 __be32 *p;
471 457
472 p = xdr_reserve_space(xdr, SM_PRIV_SIZE); 458 p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
473 if (unlikely(p == NULL))
474 return -EIO;
475 xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE); 459 xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
476 return 0;
477} 460}
478 461
479static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p, 462static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
480 const struct nsm_args *argp) 463 const struct nsm_args *argp)
481{ 464{
482 struct xdr_stream xdr; 465 encode_mon_id(xdr, argp);
483 int status; 466 encode_priv(xdr, argp);
484
485 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
486 status = encode_mon_id(&xdr, argp);
487 if (unlikely(status))
488 return status;
489 return encode_priv(&xdr, argp);
490} 467}
491 468
492static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p, 469static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
493 const struct nsm_args *argp) 470 const struct nsm_args *argp)
494{ 471{
495 struct xdr_stream xdr; 472 encode_mon_id(xdr, argp);
496
497 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
498 return encode_mon_id(&xdr, argp);
499} 473}
500 474
501static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p, 475static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
502 struct nsm_res *resp) 476 struct xdr_stream *xdr,
477 struct nsm_res *resp)
503{ 478{
504 struct xdr_stream xdr; 479 __be32 *p;
505 480
506 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 481 p = xdr_inline_decode(xdr, 4 + 4);
507 p = xdr_inline_decode(&xdr, 2 * sizeof(u32));
508 if (unlikely(p == NULL)) 482 if (unlikely(p == NULL))
509 return -EIO; 483 return -EIO;
510 resp->status = ntohl(*p++); 484 resp->status = be32_to_cpup(p++);
511 resp->state = ntohl(*p); 485 resp->state = be32_to_cpup(p);
512 486
513 dprintk("lockd: xdr_dec_stat_res status %d state %d\n", 487 dprintk("lockd: %s status %d state %d\n",
514 resp->status, resp->state); 488 __func__, resp->status, resp->state);
515 return 0; 489 return 0;
516} 490}
517 491
518static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p, 492static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
519 struct nsm_res *resp) 493 struct xdr_stream *xdr,
494 struct nsm_res *resp)
520{ 495{
521 struct xdr_stream xdr; 496 __be32 *p;
522 497
523 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 498 p = xdr_inline_decode(xdr, 4);
524 p = xdr_inline_decode(&xdr, sizeof(u32));
525 if (unlikely(p == NULL)) 499 if (unlikely(p == NULL))
526 return -EIO; 500 return -EIO;
527 resp->state = ntohl(*p); 501 resp->state = be32_to_cpup(p);
528 502
529 dprintk("lockd: xdr_dec_stat state %d\n", resp->state); 503 dprintk("lockd: %s state %d\n", __func__, resp->state);
530 return 0; 504 return 0;
531} 505}
532 506
@@ -542,8 +516,8 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
542static struct rpc_procinfo nsm_procedures[] = { 516static struct rpc_procinfo nsm_procedures[] = {
543[NSMPROC_MON] = { 517[NSMPROC_MON] = {
544 .p_proc = NSMPROC_MON, 518 .p_proc = NSMPROC_MON,
545 .p_encode = (kxdrproc_t)xdr_enc_mon, 519 .p_encode = (kxdreproc_t)nsm_xdr_enc_mon,
546 .p_decode = (kxdrproc_t)xdr_dec_stat_res, 520 .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat_res,
547 .p_arglen = SM_mon_sz, 521 .p_arglen = SM_mon_sz,
548 .p_replen = SM_monres_sz, 522 .p_replen = SM_monres_sz,
549 .p_statidx = NSMPROC_MON, 523 .p_statidx = NSMPROC_MON,
@@ -551,8 +525,8 @@ static struct rpc_procinfo nsm_procedures[] = {
551 }, 525 },
552[NSMPROC_UNMON] = { 526[NSMPROC_UNMON] = {
553 .p_proc = NSMPROC_UNMON, 527 .p_proc = NSMPROC_UNMON,
554 .p_encode = (kxdrproc_t)xdr_enc_unmon, 528 .p_encode = (kxdreproc_t)nsm_xdr_enc_unmon,
555 .p_decode = (kxdrproc_t)xdr_dec_stat, 529 .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat,
556 .p_arglen = SM_mon_id_sz, 530 .p_arglen = SM_mon_id_sz,
557 .p_replen = SM_unmonres_sz, 531 .p_replen = SM_unmonres_sz,
558 .p_statidx = NSMPROC_UNMON, 532 .p_statidx = NSMPROC_UNMON,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 38d26119245..9a41fdc1951 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -51,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
51 return 0; 51 return 0;
52 52
53no_locks: 53no_locks:
54 nlm_release_host(host); 54 nlmsvc_release_host(host);
55 if (error) 55 if (error)
56 return error; 56 return error;
57 return nlm_lck_denied_nolocks; 57 return nlm_lck_denied_nolocks;
@@ -92,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
92 else 92 else
93 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); 93 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
94 94
95 nlm_release_host(host); 95 nlmsvc_release_host(host);
96 nlm_release_file(file); 96 nlm_release_file(file);
97 return rc; 97 return rc;
98} 98}
@@ -134,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
134 else 134 else
135 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 135 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
136 136
137 nlm_release_host(host); 137 nlmsvc_release_host(host);
138 nlm_release_file(file); 138 nlm_release_file(file);
139 return rc; 139 return rc;
140} 140}
@@ -164,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
164 resp->status = nlmsvc_cancel_blocked(file, &argp->lock); 164 resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
165 165
166 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); 166 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
167 nlm_release_host(host); 167 nlmsvc_release_host(host);
168 nlm_release_file(file); 168 nlm_release_file(file);
169 return rpc_success; 169 return rpc_success;
170} 170}
@@ -197,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
197 resp->status = nlmsvc_unlock(file, &argp->lock); 197 resp->status = nlmsvc_unlock(file, &argp->lock);
198 198
199 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); 199 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
200 nlm_release_host(host); 200 nlmsvc_release_host(host);
201 nlm_release_file(file); 201 nlm_release_file(file);
202 return rpc_success; 202 return rpc_success;
203} 203}
@@ -229,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
229 229
230static void nlm4svc_callback_release(void *data) 230static void nlm4svc_callback_release(void *data)
231{ 231{
232 nlm_release_call(data); 232 nlmsvc_release_call(data);
233} 233}
234 234
235static const struct rpc_call_ops nlm4svc_callback_ops = { 235static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -261,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
261 261
262 stat = func(rqstp, argp, &call->a_res); 262 stat = func(rqstp, argp, &call->a_res);
263 if (stat != 0) { 263 if (stat != 0) {
264 nlm_release_call(call); 264 nlmsvc_release_call(call);
265 return stat; 265 return stat;
266 } 266 }
267 267
@@ -334,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
334 resp->status = nlmsvc_share_file(host, file, argp); 334 resp->status = nlmsvc_share_file(host, file, argp);
335 335
336 dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); 336 dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
337 nlm_release_host(host); 337 nlmsvc_release_host(host);
338 nlm_release_file(file); 338 nlm_release_file(file);
339 return rpc_success; 339 return rpc_success;
340} 340}
@@ -367,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
367 resp->status = nlmsvc_unshare_file(host, file, argp); 367 resp->status = nlmsvc_unshare_file(host, file, argp);
368 368
369 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); 369 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
370 nlm_release_host(host); 370 nlmsvc_release_host(host);
371 nlm_release_file(file); 371 nlm_release_file(file);
372 return rpc_success; 372 return rpc_success;
373} 373}
@@ -399,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
399 return rpc_success; 399 return rpc_success;
400 400
401 nlmsvc_free_host_resources(host); 401 nlmsvc_free_host_resources(host);
402 nlm_release_host(host); 402 nlmsvc_release_host(host);
403 return rpc_success; 403 return rpc_success;
404} 404}
405 405
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ef5659b211e..6e31695d046 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,6 +46,7 @@ static void nlmsvc_remove_block(struct nlm_block *block);
46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
47static void nlmsvc_freegrantargs(struct nlm_rqst *call); 47static void nlmsvc_freegrantargs(struct nlm_rqst *call);
48static const struct rpc_call_ops nlmsvc_grant_ops; 48static const struct rpc_call_ops nlmsvc_grant_ops;
49static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
49 50
50/* 51/*
51 * The list of blocked locks to retry 52 * The list of blocked locks to retry
@@ -233,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
233failed_free: 234failed_free:
234 kfree(block); 235 kfree(block);
235failed: 236failed:
236 nlm_release_call(call); 237 nlmsvc_release_call(call);
237 return NULL; 238 return NULL;
238} 239}
239 240
@@ -266,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
266 mutex_unlock(&file->f_mutex); 267 mutex_unlock(&file->f_mutex);
267 268
268 nlmsvc_freegrantargs(block->b_call); 269 nlmsvc_freegrantargs(block->b_call);
269 nlm_release_call(block->b_call); 270 nlmsvc_release_call(block->b_call);
270 nlm_release_file(block->b_file); 271 nlm_release_file(block->b_file);
271 kfree(block->b_fl); 272 kfree(block->b_fl);
272 kfree(block); 273 kfree(block);
@@ -934,3 +935,32 @@ nlmsvc_retry_blocked(void)
934 935
935 return timeout; 936 return timeout;
936} 937}
938
939#ifdef RPC_DEBUG
940static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
941{
942 /*
943 * We can get away with a static buffer because we're only
944 * called with BKL held.
945 */
946 static char buf[2*NLM_MAXCOOKIELEN+1];
947 unsigned int i, len = sizeof(buf);
948 char *p = buf;
949
950 len--; /* allow for trailing \0 */
951 if (len < 3)
952 return "???";
953 for (i = 0 ; i < cookie->len ; i++) {
954 if (len < 2) {
955 strcpy(p-3, "...");
956 break;
957 }
958 sprintf(p, "%02x", cookie->data[i]);
959 p += 2;
960 len -= 2;
961 }
962 *p = '\0';
963
964 return buf;
965}
966#endif
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0caea5310ac..d27aab11f32 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -80,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
80 return 0; 80 return 0;
81 81
82no_locks: 82no_locks:
83 nlm_release_host(host); 83 nlmsvc_release_host(host);
84 if (error) 84 if (error)
85 return error; 85 return error;
86 return nlm_lck_denied_nolocks; 86 return nlm_lck_denied_nolocks;
@@ -122,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
122 dprintk("lockd: TEST status %d vers %d\n", 122 dprintk("lockd: TEST status %d vers %d\n",
123 ntohl(resp->status), rqstp->rq_vers); 123 ntohl(resp->status), rqstp->rq_vers);
124 124
125 nlm_release_host(host); 125 nlmsvc_release_host(host);
126 nlm_release_file(file); 126 nlm_release_file(file);
127 return rc; 127 return rc;
128} 128}
@@ -164,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
164 else 164 else
165 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 165 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
166 166
167 nlm_release_host(host); 167 nlmsvc_release_host(host);
168 nlm_release_file(file); 168 nlm_release_file(file);
169 return rc; 169 return rc;
170} 170}
@@ -194,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
194 resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock)); 194 resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
195 195
196 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); 196 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
197 nlm_release_host(host); 197 nlmsvc_release_host(host);
198 nlm_release_file(file); 198 nlm_release_file(file);
199 return rpc_success; 199 return rpc_success;
200} 200}
@@ -227,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
227 resp->status = cast_status(nlmsvc_unlock(file, &argp->lock)); 227 resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
228 228
229 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); 229 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
230 nlm_release_host(host); 230 nlmsvc_release_host(host);
231 nlm_release_file(file); 231 nlm_release_file(file);
232 return rpc_success; 232 return rpc_success;
233} 233}
@@ -257,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
257 -task->tk_status); 257 -task->tk_status);
258} 258}
259 259
260void nlmsvc_release_call(struct nlm_rqst *call)
261{
262 if (!atomic_dec_and_test(&call->a_count))
263 return;
264 nlmsvc_release_host(call->a_host);
265 kfree(call);
266}
267
260static void nlmsvc_callback_release(void *data) 268static void nlmsvc_callback_release(void *data)
261{ 269{
262 nlm_release_call(data); 270 nlmsvc_release_call(data);
263} 271}
264 272
265static const struct rpc_call_ops nlmsvc_callback_ops = { 273static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -291,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
291 299
292 stat = func(rqstp, argp, &call->a_res); 300 stat = func(rqstp, argp, &call->a_res);
293 if (stat != 0) { 301 if (stat != 0) {
294 nlm_release_call(call); 302 nlmsvc_release_call(call);
295 return stat; 303 return stat;
296 } 304 }
297 305
@@ -366,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
366 resp->status = cast_status(nlmsvc_share_file(host, file, argp)); 374 resp->status = cast_status(nlmsvc_share_file(host, file, argp));
367 375
368 dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); 376 dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
369 nlm_release_host(host); 377 nlmsvc_release_host(host);
370 nlm_release_file(file); 378 nlm_release_file(file);
371 return rpc_success; 379 return rpc_success;
372} 380}
@@ -399,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
399 resp->status = cast_status(nlmsvc_unshare_file(host, file, argp)); 407 resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
400 408
401 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); 409 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
402 nlm_release_host(host); 410 nlmsvc_release_host(host);
403 nlm_release_file(file); 411 nlm_release_file(file);
404 return rpc_success; 412 return rpc_success;
405} 413}
@@ -431,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
431 return rpc_success; 439 return rpc_success;
432 440
433 nlmsvc_free_host_resources(host); 441 nlmsvc_free_host_resources(host);
434 nlm_release_host(host); 442 nlmsvc_release_host(host);
435 return rpc_success; 443 return rpc_success;
436} 444}
437 445
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b583ab0a4cb..964666c68a8 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -149,37 +149,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
149} 149}
150 150
151/* 151/*
152 * Encode a lock as part of an NLM call
153 */
154static __be32 *
155nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
156{
157 struct file_lock *fl = &lock->fl;
158 __s32 start, len;
159
160 if (!(p = xdr_encode_string(p, lock->caller))
161 || !(p = nlm_encode_fh(p, &lock->fh))
162 || !(p = nlm_encode_oh(p, &lock->oh)))
163 return NULL;
164
165 if (fl->fl_start > NLM_OFFSET_MAX
166 || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
167 return NULL;
168
169 start = loff_t_to_s32(fl->fl_start);
170 if (fl->fl_end == OFFSET_MAX)
171 len = 0;
172 else
173 len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
174
175 *p++ = htonl(lock->svid);
176 *p++ = htonl(start);
177 *p++ = htonl(len);
178
179 return p;
180}
181
182/*
183 * Encode result of a TEST/TEST_MSG call 152 * Encode result of a TEST/TEST_MSG call
184 */ 153 */
185static __be32 * 154static __be32 *
@@ -372,259 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
372{ 341{
373 return xdr_ressize_check(rqstp, p); 342 return xdr_ressize_check(rqstp, p);
374} 343}
375
376/*
377 * Now, the client side XDR functions
378 */
379#ifdef NLMCLNT_SUPPORT_SHARES
380static int
381nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
382{
383 return 0;
384}
385#endif
386
387static int
388nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
389{
390 struct nlm_lock *lock = &argp->lock;
391
392 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
393 return -EIO;
394 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
395 if (!(p = nlm_encode_lock(p, lock)))
396 return -EIO;
397 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
398 return 0;
399}
400
401static int
402nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
403{
404 if (!(p = nlm_decode_cookie(p, &resp->cookie)))
405 return -EIO;
406 resp->status = *p++;
407 if (resp->status == nlm_lck_denied) {
408 struct file_lock *fl = &resp->lock.fl;
409 u32 excl;
410 s32 start, len, end;
411
412 memset(&resp->lock, 0, sizeof(resp->lock));
413 locks_init_lock(fl);
414 excl = ntohl(*p++);
415 resp->lock.svid = ntohl(*p++);
416 fl->fl_pid = (pid_t)resp->lock.svid;
417 if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
418 return -EIO;
419
420 fl->fl_flags = FL_POSIX;
421 fl->fl_type = excl? F_WRLCK : F_RDLCK;
422 start = ntohl(*p++);
423 len = ntohl(*p++);
424 end = start + len - 1;
425
426 fl->fl_start = s32_to_loff_t(start);
427 if (len == 0 || end < 0)
428 fl->fl_end = OFFSET_MAX;
429 else
430 fl->fl_end = s32_to_loff_t(end);
431 }
432 return 0;
433}
434
435
436static int
437nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
438{
439 struct nlm_lock *lock = &argp->lock;
440
441 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
442 return -EIO;
443 *p++ = argp->block? xdr_one : xdr_zero;
444 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
445 if (!(p = nlm_encode_lock(p, lock)))
446 return -EIO;
447 *p++ = argp->reclaim? xdr_one : xdr_zero;
448 *p++ = htonl(argp->state);
449 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
450 return 0;
451}
452
453static int
454nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
455{
456 struct nlm_lock *lock = &argp->lock;
457
458 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
459 return -EIO;
460 *p++ = argp->block? xdr_one : xdr_zero;
461 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
462 if (!(p = nlm_encode_lock(p, lock)))
463 return -EIO;
464 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
465 return 0;
466}
467
468static int
469nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
470{
471 struct nlm_lock *lock = &argp->lock;
472
473 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
474 return -EIO;
475 if (!(p = nlm_encode_lock(p, lock)))
476 return -EIO;
477 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
478 return 0;
479}
480
481static int
482nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
483{
484 if (!(p = nlm_encode_cookie(p, &resp->cookie)))
485 return -EIO;
486 *p++ = resp->status;
487 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
488 return 0;
489}
490
491static int
492nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
493{
494 if (!(p = nlm_encode_testres(p, resp)))
495 return -EIO;
496 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
497 return 0;
498}
499
500static int
501nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
502{
503 if (!(p = nlm_decode_cookie(p, &resp->cookie)))
504 return -EIO;
505 resp->status = *p++;
506 return 0;
507}
508
509#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
510# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
511#endif
512
513/*
514 * Buffer requirements for NLM
515 */
516#define NLM_void_sz 0
517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
518#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
519#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
520#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
521#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
522#define NLM_holder_sz 4+NLM_owner_sz
523
524#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
525#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
526#define NLM_cancargs_sz NLM_cookie_sz+2+NLM_lock_sz
527#define NLM_unlockargs_sz NLM_cookie_sz+NLM_lock_sz
528
529#define NLM_testres_sz NLM_cookie_sz+1+NLM_holder_sz
530#define NLM_res_sz NLM_cookie_sz+1
531#define NLM_norep_sz 0
532
533/*
534 * For NLM, a void procedure really returns nothing
535 */
536#define nlmclt_decode_norep NULL
537
538#define PROC(proc, argtype, restype) \
539[NLMPROC_##proc] = { \
540 .p_proc = NLMPROC_##proc, \
541 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
542 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
543 .p_arglen = NLM_##argtype##_sz, \
544 .p_replen = NLM_##restype##_sz, \
545 .p_statidx = NLMPROC_##proc, \
546 .p_name = #proc, \
547 }
548
549static struct rpc_procinfo nlm_procedures[] = {
550 PROC(TEST, testargs, testres),
551 PROC(LOCK, lockargs, res),
552 PROC(CANCEL, cancargs, res),
553 PROC(UNLOCK, unlockargs, res),
554 PROC(GRANTED, testargs, res),
555 PROC(TEST_MSG, testargs, norep),
556 PROC(LOCK_MSG, lockargs, norep),
557 PROC(CANCEL_MSG, cancargs, norep),
558 PROC(UNLOCK_MSG, unlockargs, norep),
559 PROC(GRANTED_MSG, testargs, norep),
560 PROC(TEST_RES, testres, norep),
561 PROC(LOCK_RES, res, norep),
562 PROC(CANCEL_RES, res, norep),
563 PROC(UNLOCK_RES, res, norep),
564 PROC(GRANTED_RES, res, norep),
565#ifdef NLMCLNT_SUPPORT_SHARES
566 PROC(SHARE, shareargs, shareres),
567 PROC(UNSHARE, shareargs, shareres),
568 PROC(NM_LOCK, lockargs, res),
569 PROC(FREE_ALL, notify, void),
570#endif
571};
572
573static struct rpc_version nlm_version1 = {
574 .number = 1,
575 .nrprocs = 16,
576 .procs = nlm_procedures,
577};
578
579static struct rpc_version nlm_version3 = {
580 .number = 3,
581 .nrprocs = 24,
582 .procs = nlm_procedures,
583};
584
585static struct rpc_version * nlm_versions[] = {
586 [1] = &nlm_version1,
587 [3] = &nlm_version3,
588#ifdef CONFIG_LOCKD_V4
589 [4] = &nlm_version4,
590#endif
591};
592
593static struct rpc_stat nlm_stats;
594
595struct rpc_program nlm_program = {
596 .name = "lockd",
597 .number = NLM_PROGRAM,
598 .nrvers = ARRAY_SIZE(nlm_versions),
599 .version = nlm_versions,
600 .stats = &nlm_stats,
601};
602
603#ifdef RPC_DEBUG
604const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
605{
606 /*
607 * We can get away with a static buffer because we're only
608 * called with BKL held.
609 */
610 static char buf[2*NLM_MAXCOOKIELEN+1];
611 unsigned int i, len = sizeof(buf);
612 char *p = buf;
613
614 len--; /* allow for trailing \0 */
615 if (len < 3)
616 return "???";
617 for (i = 0 ; i < cookie->len ; i++) {
618 if (len < 2) {
619 strcpy(p-3, "...");
620 break;
621 }
622 sprintf(p, "%02x", cookie->data[i]);
623 p += 2;
624 len -= 2;
625 }
626 *p = '\0';
627
628 return buf;
629}
630#endif
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ad9dbbc9145..dfa4789cd46 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -93,15 +93,6 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
93 return p + XDR_QUADLEN(f->size); 93 return p + XDR_QUADLEN(f->size);
94} 94}
95 95
96static __be32 *
97nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
98{
99 *p++ = htonl(f->size);
100 if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
101 memcpy(p, f->data, f->size);
102 return p + XDR_QUADLEN(f->size);
103}
104
105/* 96/*
106 * Encode and decode owner handle 97 * Encode and decode owner handle
107 */ 98 */
@@ -112,12 +103,6 @@ nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
112} 103}
113 104
114static __be32 * 105static __be32 *
115nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
116{
117 return xdr_encode_netobj(p, oh);
118}
119
120static __be32 *
121nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) 106nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
122{ 107{
123 struct file_lock *fl = &lock->fl; 108 struct file_lock *fl = &lock->fl;
@@ -150,38 +135,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
150} 135}
151 136
152/* 137/*
153 * Encode a lock as part of an NLM call
154 */
155static __be32 *
156nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
157{
158 struct file_lock *fl = &lock->fl;
159 __s64 start, len;
160
161 if (!(p = xdr_encode_string(p, lock->caller))
162 || !(p = nlm4_encode_fh(p, &lock->fh))
163 || !(p = nlm4_encode_oh(p, &lock->oh)))
164 return NULL;
165
166 if (fl->fl_start > NLM4_OFFSET_MAX
167 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
168 return NULL;
169
170 *p++ = htonl(lock->svid);
171
172 start = loff_t_to_s64(fl->fl_start);
173 if (fl->fl_end == OFFSET_MAX)
174 len = 0;
175 else
176 len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
177
178 p = xdr_encode_hyper(p, start);
179 p = xdr_encode_hyper(p, len);
180
181 return p;
182}
183
184/*
185 * Encode result of a TEST/TEST_MSG call 138 * Encode result of a TEST/TEST_MSG call
186 */ 139 */
187static __be32 * 140static __be32 *
@@ -379,211 +332,3 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
379{ 332{
380 return xdr_ressize_check(rqstp, p); 333 return xdr_ressize_check(rqstp, p);
381} 334}
382
383/*
384 * Now, the client side XDR functions
385 */
386#ifdef NLMCLNT_SUPPORT_SHARES
387static int
388nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
389{
390 return 0;
391}
392#endif
393
394static int
395nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
396{
397 struct nlm_lock *lock = &argp->lock;
398
399 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
400 return -EIO;
401 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
402 if (!(p = nlm4_encode_lock(p, lock)))
403 return -EIO;
404 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
405 return 0;
406}
407
408static int
409nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
410{
411 if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
412 return -EIO;
413 resp->status = *p++;
414 if (resp->status == nlm_lck_denied) {
415 struct file_lock *fl = &resp->lock.fl;
416 u32 excl;
417 __u64 start, len;
418 __s64 end;
419
420 memset(&resp->lock, 0, sizeof(resp->lock));
421 locks_init_lock(fl);
422 excl = ntohl(*p++);
423 resp->lock.svid = ntohl(*p++);
424 fl->fl_pid = (pid_t)resp->lock.svid;
425 if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
426 return -EIO;
427
428 fl->fl_flags = FL_POSIX;
429 fl->fl_type = excl? F_WRLCK : F_RDLCK;
430 p = xdr_decode_hyper(p, &start);
431 p = xdr_decode_hyper(p, &len);
432 end = start + len - 1;
433
434 fl->fl_start = s64_to_loff_t(start);
435 if (len == 0 || end < 0)
436 fl->fl_end = OFFSET_MAX;
437 else
438 fl->fl_end = s64_to_loff_t(end);
439 }
440 return 0;
441}
442
443
444static int
445nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
446{
447 struct nlm_lock *lock = &argp->lock;
448
449 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
450 return -EIO;
451 *p++ = argp->block? xdr_one : xdr_zero;
452 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
453 if (!(p = nlm4_encode_lock(p, lock)))
454 return -EIO;
455 *p++ = argp->reclaim? xdr_one : xdr_zero;
456 *p++ = htonl(argp->state);
457 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
458 return 0;
459}
460
461static int
462nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
463{
464 struct nlm_lock *lock = &argp->lock;
465
466 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
467 return -EIO;
468 *p++ = argp->block? xdr_one : xdr_zero;
469 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
470 if (!(p = nlm4_encode_lock(p, lock)))
471 return -EIO;
472 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
473 return 0;
474}
475
476static int
477nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
478{
479 struct nlm_lock *lock = &argp->lock;
480
481 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
482 return -EIO;
483 if (!(p = nlm4_encode_lock(p, lock)))
484 return -EIO;
485 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
486 return 0;
487}
488
489static int
490nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
491{
492 if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
493 return -EIO;
494 *p++ = resp->status;
495 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
496 return 0;
497}
498
499static int
500nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
501{
502 if (!(p = nlm4_encode_testres(p, resp)))
503 return -EIO;
504 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
505 return 0;
506}
507
508static int
509nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
510{
511 if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
512 return -EIO;
513 resp->status = *p++;
514 return 0;
515}
516
517#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
518# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
519#endif
520
521#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
522# error "NLM host name cannot be larger than NLM's maximum string length!"
523#endif
524
525/*
526 * Buffer requirements for NLM
527 */
528#define NLM4_void_sz 0
529#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
530#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
531#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
532#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
533#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
534#define NLM4_holder_sz 6+NLM4_owner_sz
535
536#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
537#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
538#define NLM4_cancargs_sz NLM4_cookie_sz+2+NLM4_lock_sz
539#define NLM4_unlockargs_sz NLM4_cookie_sz+NLM4_lock_sz
540
541#define NLM4_testres_sz NLM4_cookie_sz+1+NLM4_holder_sz
542#define NLM4_res_sz NLM4_cookie_sz+1
543#define NLM4_norep_sz 0
544
545/*
546 * For NLM, a void procedure really returns nothing
547 */
548#define nlm4clt_decode_norep NULL
549
550#define PROC(proc, argtype, restype) \
551[NLMPROC_##proc] = { \
552 .p_proc = NLMPROC_##proc, \
553 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
554 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
555 .p_arglen = NLM4_##argtype##_sz, \
556 .p_replen = NLM4_##restype##_sz, \
557 .p_statidx = NLMPROC_##proc, \
558 .p_name = #proc, \
559 }
560
561static struct rpc_procinfo nlm4_procedures[] = {
562 PROC(TEST, testargs, testres),
563 PROC(LOCK, lockargs, res),
564 PROC(CANCEL, cancargs, res),
565 PROC(UNLOCK, unlockargs, res),
566 PROC(GRANTED, testargs, res),
567 PROC(TEST_MSG, testargs, norep),
568 PROC(LOCK_MSG, lockargs, norep),
569 PROC(CANCEL_MSG, cancargs, norep),
570 PROC(UNLOCK_MSG, unlockargs, norep),
571 PROC(GRANTED_MSG, testargs, norep),
572 PROC(TEST_RES, testres, norep),
573 PROC(LOCK_RES, res, norep),
574 PROC(CANCEL_RES, res, norep),
575 PROC(UNLOCK_RES, res, norep),
576 PROC(GRANTED_RES, res, norep),
577#ifdef NLMCLNT_SUPPORT_SHARES
578 PROC(SHARE, shareargs, shareres),
579 PROC(UNSHARE, shareargs, shareres),
580 PROC(NM_LOCK, lockargs, res),
581 PROC(FREE_ALL, notify, void),
582#endif
583};
584
585struct rpc_version nlm_version4 = {
586 .number = 4,
587 .nrprocs = 24,
588 .procs = nlm4_procedures,
589};
diff --git a/fs/locks.c b/fs/locks.c
index 8729347bcd1..08415b2a6d3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1389 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1389 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1390 goto out; 1390 goto out;
1391 if ((arg == F_WRLCK) 1391 if ((arg == F_WRLCK)
1392 && ((atomic_read(&dentry->d_count) > 1) 1392 && ((dentry->d_count > 1)
1393 || (atomic_read(&inode->i_count) > 1))) 1393 || (atomic_read(&inode->i_count) > 1)))
1394 goto out; 1394 goto out;
1395 } 1395 }
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 409dfd65e9a..f9ddf0c388c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
555 return __logfs_create(dir, dentry, inode, target, destlen); 555 return __logfs_create(dir, dentry, inode, target, destlen);
556} 556}
557 557
558static int logfs_permission(struct inode *inode, int mask) 558static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
559{ 559{
560 return generic_permission(inode, mask, NULL); 560 if (flags & IPERM_FLAG_RCU)
561 return -ECHILD;
562 return generic_permission(inode, mask, flags, NULL);
561} 563}
562 564
563static int logfs_link(struct dentry *old_dentry, struct inode *dir, 565static int logfs_link(struct dentry *old_dentry, struct inode *dir,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098..03b8c240aed 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
141 return __logfs_iget(sb, ino); 141 return __logfs_iget(sb, ino);
142} 142}
143 143
144static void logfs_i_callback(struct rcu_head *head)
145{
146 struct inode *inode = container_of(head, struct inode, i_rcu);
147 INIT_LIST_HEAD(&inode->i_dentry);
148 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
149}
150
144static void __logfs_destroy_inode(struct inode *inode) 151static void __logfs_destroy_inode(struct inode *inode)
145{ 152{
146 struct logfs_inode *li = logfs_inode(inode); 153 struct logfs_inode *li = logfs_inode(inode);
147 154
148 BUG_ON(li->li_block); 155 BUG_ON(li->li_block);
149 list_del(&li->li_freeing_list); 156 list_del(&li->li_freeing_list);
150 kmem_cache_free(logfs_inode_cache, li); 157 call_rcu(&inode->i_rcu, logfs_i_callback);
151} 158}
152 159
153static void logfs_destroy_inode(struct inode *inode) 160static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e13..9da29706f91 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
828 super->s_journal_seg[i] = segno; 828 super->s_journal_seg[i] = segno;
829 super->s_journal_ec[i] = ec; 829 super->s_journal_ec[i] = ec;
830 logfs_set_segment_reserved(sb, segno); 830 logfs_set_segment_reserved(sb, segno);
831 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); 831 err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
832 BUG_ON(err); /* mempool should prevent this */ 832 BUG_ON(err); /* mempool should prevent this */
833 err = logfs_erase_segment(sb, segno, 1); 833 err = logfs_erase_segment(sb, segno, 1);
834 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e18..ee99a9f5dfd 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
1994 1994
1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */ 1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */
1996 err = logfs_write_buf(master_inode, page, 0); 1996 err = logfs_write_buf(master_inode, page, 0);
1997 if (err)
1998 move_page_to_inode(inode, page);
1999
1997 logfs_put_write_page(page); 2000 logfs_put_write_page(page);
1998 return err; 2001 return err;
1999} 2002}
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 93444747237..a25444ab2ba 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -76,18 +76,6 @@ EXPORT_SYMBOL(mb_cache_entry_find_first);
76EXPORT_SYMBOL(mb_cache_entry_find_next); 76EXPORT_SYMBOL(mb_cache_entry_find_next);
77#endif 77#endif
78 78
79struct mb_cache {
80 struct list_head c_cache_list;
81 const char *c_name;
82 atomic_t c_entry_count;
83 int c_max_entries;
84 int c_bucket_bits;
85 struct kmem_cache *c_entry_cache;
86 struct list_head *c_block_hash;
87 struct list_head *c_index_hash;
88};
89
90
91/* 79/*
92 * Global data: list of all mbcache's, lru list, and a spinlock for 80 * Global data: list of all mbcache's, lru list, and a spinlock for
93 * accessing cache data structures on SMP machines. The lru list is 81 * accessing cache data structures on SMP machines. The lru list is
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a3..ae0b83f476a 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
68 return &ei->vfs_inode; 68 return &ei->vfs_inode;
69} 69}
70 70
71static void minix_destroy_inode(struct inode *inode) 71static void minix_i_callback(struct rcu_head *head)
72{ 72{
73 struct inode *inode = container_of(head, struct inode, i_rcu);
74 INIT_LIST_HEAD(&inode->i_dentry);
73 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 75 kmem_cache_free(minix_inode_cachep, minix_i(inode));
74} 76}
75 77
78static void minix_destroy_inode(struct inode *inode)
79{
80 call_rcu(&inode->i_rcu, minix_i_callback);
81}
82
76static void init_once(void *foo) 83static void init_once(void *foo)
77{ 84{
78 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 85 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3acce..1b9e07728a9 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
23 struct inode * inode = NULL; 23 struct inode * inode = NULL;
24 ino_t ino; 24 ino_t ino;
25 25
26 dentry->d_op = dir->i_sb->s_root->d_op; 26 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
27 27
28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) 28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
29 return ERR_PTR(-ENAMETOOLONG); 29 return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index 4ff7ca53053..24ece10470b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 int error = check_acl(inode, mask, flags);
184 if (error != -EAGAIN) 184 if (error != -EAGAIN)
185 return error; 185 return error;
186 } 186 }
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask,
198} 198}
199 199
200/** 200/**
201 * generic_permission - check for access rights on a Posix-like filesystem 201 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 202 * @inode: inode to check access rights for
203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
204 * @check_acl: optional callback to check for Posix ACLs 204 * @check_acl: optional callback to check for Posix ACLs
205 * @flags: IPERM_FLAG_ flags.
205 * 206 *
206 * Used to check for read/write/execute permissions on a file. 207 * Used to check for read/write/execute permissions on a file.
207 * We use "fsuid" for this, letting us set arbitrary permissions 208 * We use "fsuid" for this, letting us set arbitrary permissions
208 * for filesystem access without changing the "normal" uids which 209 * for filesystem access without changing the "normal" uids which
209 * are used for other things.. 210 * are used for other things.
211 *
212 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
213 * request cannot be satisfied (eg. requires blocking or too much complexity).
214 * It would then be called again in ref-walk mode.
210 */ 215 */
211int generic_permission(struct inode *inode, int mask, 216int generic_permission(struct inode *inode, int mask, unsigned int flags,
212 int (*check_acl)(struct inode *inode, int mask)) 217 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
213{ 218{
214 int ret; 219 int ret;
215 220
216 /* 221 /*
217 * Do the basic POSIX ACL permission checks. 222 * Do the basic POSIX ACL permission checks.
218 */ 223 */
219 ret = acl_permission_check(inode, mask, check_acl); 224 ret = acl_permission_check(inode, mask, flags, check_acl);
220 if (ret != -EACCES) 225 if (ret != -EACCES)
221 return ret; 226 return ret;
222 227
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
271 } 276 }
272 277
273 if (inode->i_op->permission) 278 if (inode->i_op->permission)
274 retval = inode->i_op->permission(inode, mask); 279 retval = inode->i_op->permission(inode, mask, 0);
275 else 280 else
276 retval = generic_permission(inode, mask, inode->i_op->check_acl); 281 retval = generic_permission(inode, mask, 0,
282 inode->i_op->check_acl);
277 283
278 if (retval) 284 if (retval)
279 return retval; 285 return retval;
@@ -362,6 +368,18 @@ void path_get(struct path *path)
362EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
363 369
364/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
365 * path_put - put a reference to a path 383 * path_put - put a reference to a path
366 * @path: path to put the reference to 384 * @path: path to put the reference to
367 * 385 *
@@ -375,6 +393,185 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 393EXPORT_SYMBOL(path_put);
376 394
377/** 395/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop
410 * Returns: 0 on success, -ECHILD on failure
411 *
412 * Path walking has 2 modes, rcu-walk and ref-walk (see
413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
414 * to drop out of rcu-walk mode and take normal reference counts on dentries
415 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
416 * refcounts at the last known good point before rcu-walk got stuck, so
417 * ref-walk may continue from there. If this is not successful (eg. a seqcount
418 * has changed), then failure is returned and path walk restarts from the
419 * beginning in ref-walk mode.
420 *
421 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
422 * ref-walk. Must be called from rcu-walk context.
423 */
424static int nameidata_drop_rcu(struct nameidata *nd)
425{
426 struct fs_struct *fs = current->fs;
427 struct dentry *dentry = nd->path.dentry;
428
429 BUG_ON(!(nd->flags & LOOKUP_RCU));
430 if (nd->root.mnt) {
431 spin_lock(&fs->lock);
432 if (nd->root.mnt != fs->root.mnt ||
433 nd->root.dentry != fs->root.dentry)
434 goto err_root;
435 }
436 spin_lock(&dentry->d_lock);
437 if (!__d_rcu_to_refcount(dentry, nd->seq))
438 goto err;
439 BUG_ON(nd->inode != dentry->d_inode);
440 spin_unlock(&dentry->d_lock);
441 if (nd->root.mnt) {
442 path_get(&nd->root);
443 spin_unlock(&fs->lock);
444 }
445 mntget(nd->path.mnt);
446
447 rcu_read_unlock();
448 br_read_unlock(vfsmount_lock);
449 nd->flags &= ~LOOKUP_RCU;
450 return 0;
451err:
452 spin_unlock(&dentry->d_lock);
453err_root:
454 if (nd->root.mnt)
455 spin_unlock(&fs->lock);
456 return -ECHILD;
457}
458
459/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
460static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
461{
462 if (nd->flags & LOOKUP_RCU)
463 return nameidata_drop_rcu(nd);
464 return 0;
465}
466
467/**
468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
469 * @nd: nameidata pathwalk data to drop
470 * @dentry: dentry to drop
471 * Returns: 0 on success, -ECHILD on failure
472 *
473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
475 * @nd. Must be called from rcu-walk context.
476 */
477static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
478{
479 struct fs_struct *fs = current->fs;
480 struct dentry *parent = nd->path.dentry;
481
482 BUG_ON(!(nd->flags & LOOKUP_RCU));
483 if (nd->root.mnt) {
484 spin_lock(&fs->lock);
485 if (nd->root.mnt != fs->root.mnt ||
486 nd->root.dentry != fs->root.dentry)
487 goto err_root;
488 }
489 spin_lock(&parent->d_lock);
490 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
491 if (!__d_rcu_to_refcount(dentry, nd->seq))
492 goto err;
493 /*
494 * If the sequence check on the child dentry passed, then the child has
495 * not been removed from its parent. This means the parent dentry must
496 * be valid and able to take a reference at this point.
497 */
498 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
499 BUG_ON(!parent->d_count);
500 parent->d_count++;
501 spin_unlock(&dentry->d_lock);
502 spin_unlock(&parent->d_lock);
503 if (nd->root.mnt) {
504 path_get(&nd->root);
505 spin_unlock(&fs->lock);
506 }
507 mntget(nd->path.mnt);
508
509 rcu_read_unlock();
510 br_read_unlock(vfsmount_lock);
511 nd->flags &= ~LOOKUP_RCU;
512 return 0;
513err:
514 spin_unlock(&dentry->d_lock);
515 spin_unlock(&parent->d_lock);
516err_root:
517 if (nd->root.mnt)
518 spin_unlock(&fs->lock);
519 return -ECHILD;
520}
521
522/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
523static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
524{
525 if (nd->flags & LOOKUP_RCU)
526 return nameidata_dentry_drop_rcu(nd, dentry);
527 return 0;
528}
529
530/**
531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
532 * @nd: nameidata pathwalk data to drop
533 * Returns: 0 on success, -ECHILD on failure
534 *
535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
536 * nd->path should be the final element of the lookup, so nd->root is discarded.
537 * Must be called from rcu-walk context.
538 */
539static int nameidata_drop_rcu_last(struct nameidata *nd)
540{
541 struct dentry *dentry = nd->path.dentry;
542
543 BUG_ON(!(nd->flags & LOOKUP_RCU));
544 nd->flags &= ~LOOKUP_RCU;
545 nd->root.mnt = NULL;
546 spin_lock(&dentry->d_lock);
547 if (!__d_rcu_to_refcount(dentry, nd->seq))
548 goto err_unlock;
549 BUG_ON(nd->inode != dentry->d_inode);
550 spin_unlock(&dentry->d_lock);
551
552 mntget(nd->path.mnt);
553
554 rcu_read_unlock();
555 br_read_unlock(vfsmount_lock);
556
557 return 0;
558
559err_unlock:
560 spin_unlock(&dentry->d_lock);
561 rcu_read_unlock();
562 br_read_unlock(vfsmount_lock);
563 return -ECHILD;
564}
565
566/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
567static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
568{
569 if (likely(nd->flags & LOOKUP_RCU))
570 return nameidata_drop_rcu_last(nd);
571 return 0;
572}
573
574/**
378 * release_open_intent - free up open intent resources 575 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 576 * @nd: pointer to nameidata
380 */ 577 */
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd)
386 fput(nd->intent.open.file); 583 fput(nd->intent.open.file);
387} 584}
388 585
586static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
587{
588 int status;
589
590 status = dentry->d_op->d_revalidate(dentry, nd);
591 if (status == -ECHILD) {
592 if (nameidata_dentry_drop_rcu(nd, dentry))
593 return status;
594 status = dentry->d_op->d_revalidate(dentry, nd);
595 }
596
597 return status;
598}
599
389static inline struct dentry * 600static inline struct dentry *
390do_revalidate(struct dentry *dentry, struct nameidata *nd) 601do_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 602{
392 int status = dentry->d_op->d_revalidate(dentry, nd); 603 int status;
604
605 status = d_revalidate(dentry, nd);
393 if (unlikely(status <= 0)) { 606 if (unlikely(status <= 0)) {
394 /* 607 /*
395 * The dentry failed validation. 608 * The dentry failed validation.
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
397 * the dentry otherwise d_revalidate is asking us 610 * the dentry otherwise d_revalidate is asking us
398 * to return a fail status. 611 * to return a fail status.
399 */ 612 */
400 if (!status) { 613 if (status < 0) {
614 /* If we're in rcu-walk, we don't have a ref */
615 if (!(nd->flags & LOOKUP_RCU))
616 dput(dentry);
617 dentry = ERR_PTR(status);
618
619 } else {
620 /* Don't d_invalidate in rcu-walk mode */
621 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
622 return ERR_PTR(-ECHILD);
401 if (!d_invalidate(dentry)) { 623 if (!d_invalidate(dentry)) {
402 dput(dentry); 624 dput(dentry);
403 dentry = NULL; 625 dentry = NULL;
404 } 626 }
405 } else {
406 dput(dentry);
407 dentry = ERR_PTR(status);
408 } 627 }
409 } 628 }
410 return dentry; 629 return dentry;
411} 630}
412 631
632static inline int need_reval_dot(struct dentry *dentry)
633{
634 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
635 return 0;
636
637 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
638 return 0;
639
640 return 1;
641}
642
413/* 643/*
414 * force_reval_path - force revalidation of a dentry 644 * force_reval_path - force revalidation of a dentry
415 * 645 *
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
433 663
434 /* 664 /*
435 * only check on filesystems where it's possible for the dentry to 665 * only check on filesystems where it's possible for the dentry to
436 * become stale. It's assumed that if this flag is set then the 666 * become stale.
437 * d_revalidate op will also be defined.
438 */ 667 */
439 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) 668 if (!need_reval_dot(dentry))
440 return 0; 669 return 0;
441 670
442 status = dentry->d_op->d_revalidate(dentry, nd); 671 status = d_revalidate(dentry, nd);
443 if (status > 0) 672 if (status > 0)
444 return 0; 673 return 0;
445 674
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 688 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 689 * complete permission check.
461 */ 690 */
462static int exec_permission(struct inode *inode) 691static inline int exec_permission(struct inode *inode, unsigned int flags)
463{ 692{
464 int ret; 693 int ret;
465 694
466 if (inode->i_op->permission) { 695 if (inode->i_op->permission) {
467 ret = inode->i_op->permission(inode, MAY_EXEC); 696 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
468 if (!ret) 697 } else {
469 goto ok; 698 ret = acl_permission_check(inode, MAY_EXEC, flags,
470 return ret; 699 inode->i_op->check_acl);
471 } 700 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 701 if (likely(!ret))
473 if (!ret)
474 goto ok; 702 goto ok;
703 if (ret == -ECHILD)
704 return ret;
475 705
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 706 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 707 goto ok;
478 708
479 return ret; 709 return ret;
480ok: 710ok:
481 return security_inode_permission(inode, MAY_EXEC); 711 return security_inode_exec_permission(inode, flags);
482} 712}
483 713
484static __always_inline void set_root(struct nameidata *nd) 714static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd)
489 719
490static int link_path_walk(const char *, struct nameidata *); 720static int link_path_walk(const char *, struct nameidata *);
491 721
722static __always_inline void set_root_rcu(struct nameidata *nd)
723{
724 if (!nd->root.mnt) {
725 struct fs_struct *fs = current->fs;
726 unsigned seq;
727
728 do {
729 seq = read_seqcount_begin(&fs->seq);
730 nd->root = fs->root;
731 } while (read_seqcount_retry(&fs->seq, seq));
732 }
733}
734
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 735static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 736{
737 int ret;
738
494 if (IS_ERR(link)) 739 if (IS_ERR(link))
495 goto fail; 740 goto fail;
496 741
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 745 nd->path = nd->root;
501 path_get(&nd->root); 746 path_get(&nd->root);
502 } 747 }
748 nd->inode = nd->path.dentry->d_inode;
503 749
504 return link_path_walk(link, nd); 750 ret = link_path_walk(link, nd);
751 return ret;
505fail: 752fail:
506 path_put(&nd->path); 753 path_put(&nd->path);
507 return PTR_ERR(link); 754 return PTR_ERR(link);
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 763
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 764static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 765{
519 dput(nd->path.dentry); 766 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 767 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 768 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 769 mntput(nd->path.mnt);
523 } 770 }
771 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 772 nd->path.dentry = path->dentry;
525} 773}
526 774
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 783
536 if (path->mnt != nd->path.mnt) { 784 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 785 path_to_nameidata(path, nd);
786 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 787 dget(dentry);
539 } 788 }
540 mntget(path->mnt); 789 mntget(path->mnt);
790
541 nd->last_type = LAST_BIND; 791 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 792 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 793 error = PTR_ERR(*p);
@@ -591,6 +841,20 @@ loop:
591 return err; 841 return err;
592} 842}
593 843
844static int follow_up_rcu(struct path *path)
845{
846 struct vfsmount *parent;
847 struct dentry *mountpoint;
848
849 parent = path->mnt->mnt_parent;
850 if (parent == path->mnt)
851 return 0;
852 mountpoint = path->mnt->mnt_mountpoint;
853 path->dentry = mountpoint;
854 path->mnt = parent;
855 return 1;
856}
857
594int follow_up(struct path *path) 858int follow_up(struct path *path)
595{ 859{
596 struct vfsmount *parent; 860 struct vfsmount *parent;
@@ -612,9 +876,24 @@ int follow_up(struct path *path)
612 return 1; 876 return 1;
613} 877}
614 878
615/* no need for dcache_lock, as serialization is taken care in 879/*
616 * namespace.c 880 * serialization is taken care of in namespace.c
617 */ 881 */
882static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
883 struct inode **inode)
884{
885 while (d_mountpoint(path->dentry)) {
886 struct vfsmount *mounted;
887 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
888 if (!mounted)
889 return;
890 path->mnt = mounted;
891 path->dentry = mounted->mnt_root;
892 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
893 *inode = path->dentry->d_inode;
894 }
895}
896
618static int __follow_mount(struct path *path) 897static int __follow_mount(struct path *path)
619{ 898{
620 int res = 0; 899 int res = 0;
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path)
645 } 924 }
646} 925}
647 926
648/* no need for dcache_lock, as serialization is taken care in
649 * namespace.c
650 */
651int follow_down(struct path *path) 927int follow_down(struct path *path)
652{ 928{
653 struct vfsmount *mounted; 929 struct vfsmount *mounted;
@@ -663,7 +939,42 @@ int follow_down(struct path *path)
663 return 0; 939 return 0;
664} 940}
665 941
666static __always_inline void follow_dotdot(struct nameidata *nd) 942static int follow_dotdot_rcu(struct nameidata *nd)
943{
944 struct inode *inode = nd->inode;
945
946 set_root_rcu(nd);
947
948 while(1) {
949 if (nd->path.dentry == nd->root.dentry &&
950 nd->path.mnt == nd->root.mnt) {
951 break;
952 }
953 if (nd->path.dentry != nd->path.mnt->mnt_root) {
954 struct dentry *old = nd->path.dentry;
955 struct dentry *parent = old->d_parent;
956 unsigned seq;
957
958 seq = read_seqcount_begin(&parent->d_seq);
959 if (read_seqcount_retry(&old->d_seq, nd->seq))
960 return -ECHILD;
961 inode = parent->d_inode;
962 nd->path.dentry = parent;
963 nd->seq = seq;
964 break;
965 }
966 if (!follow_up_rcu(&nd->path))
967 break;
968 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
969 inode = nd->path.dentry->d_inode;
970 }
971 __follow_mount_rcu(nd, &nd->path, &inode);
972 nd->inode = inode;
973
974 return 0;
975}
976
977static void follow_dotdot(struct nameidata *nd)
667{ 978{
668 set_root(nd); 979 set_root(nd);
669 980
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
684 break; 995 break;
685 } 996 }
686 follow_mount(&nd->path); 997 follow_mount(&nd->path);
998 nd->inode = nd->path.dentry->d_inode;
687} 999}
688 1000
689/* 1001/*
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
721 * It _is_ time-critical. 1033 * It _is_ time-critical.
722 */ 1034 */
723static int do_lookup(struct nameidata *nd, struct qstr *name, 1035static int do_lookup(struct nameidata *nd, struct qstr *name,
724 struct path *path) 1036 struct path *path, struct inode **inode)
725{ 1037{
726 struct vfsmount *mnt = nd->path.mnt; 1038 struct vfsmount *mnt = nd->path.mnt;
727 struct dentry *dentry, *parent; 1039 struct dentry *dentry, *parent = nd->path.dentry;
728 struct inode *dir; 1040 struct inode *dir;
729 /* 1041 /*
730 * See if the low-level filesystem might want 1042 * See if the low-level filesystem might want
731 * to use its own hash.. 1043 * to use its own hash..
732 */ 1044 */
733 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1045 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
734 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); 1046 int err = parent->d_op->d_hash(parent, nd->inode, name);
735 if (err < 0) 1047 if (err < 0)
736 return err; 1048 return err;
737 } 1049 }
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
741 * of a false negative due to a concurrent rename, we're going to 1053 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below. 1054 * do the non-racy lookup, below.
743 */ 1055 */
744 dentry = __d_lookup(nd->path.dentry, name); 1056 if (nd->flags & LOOKUP_RCU) {
745 if (!dentry) 1057 unsigned seq;
746 goto need_lookup; 1058
1059 *inode = nd->inode;
1060 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1061 if (!dentry) {
1062 if (nameidata_drop_rcu(nd))
1063 return -ECHILD;
1064 goto need_lookup;
1065 }
1066 /* Memory barrier in read_seqcount_begin of child is enough */
1067 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1068 return -ECHILD;
1069
1070 nd->seq = seq;
1071 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1072 goto need_revalidate;
1073 path->mnt = mnt;
1074 path->dentry = dentry;
1075 __follow_mount_rcu(nd, path, inode);
1076 } else {
1077 dentry = __d_lookup(parent, name);
1078 if (!dentry)
1079 goto need_lookup;
747found: 1080found:
748 if (dentry->d_op && dentry->d_op->d_revalidate) 1081 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
749 goto need_revalidate; 1082 goto need_revalidate;
750done: 1083done:
751 path->mnt = mnt; 1084 path->mnt = mnt;
752 path->dentry = dentry; 1085 path->dentry = dentry;
753 __follow_mount(path); 1086 __follow_mount(path);
1087 *inode = path->dentry->d_inode;
1088 }
754 return 0; 1089 return 0;
755 1090
756need_lookup: 1091need_lookup:
757 parent = nd->path.dentry;
758 dir = parent->d_inode; 1092 dir = parent->d_inode;
1093 BUG_ON(nd->inode != dir);
759 1094
760 mutex_lock(&dir->i_mutex); 1095 mutex_lock(&dir->i_mutex);
761 /* 1096 /*
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
817static int link_path_walk(const char *name, struct nameidata *nd) 1152static int link_path_walk(const char *name, struct nameidata *nd)
818{ 1153{
819 struct path next; 1154 struct path next;
820 struct inode *inode;
821 int err; 1155 int err;
822 unsigned int lookup_flags = nd->flags; 1156 unsigned int lookup_flags = nd->flags;
823 1157
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
826 if (!*name) 1160 if (!*name)
827 goto return_reval; 1161 goto return_reval;
828 1162
829 inode = nd->path.dentry->d_inode;
830 if (nd->depth) 1163 if (nd->depth)
831 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1164 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
832 1165
833 /* At this point we know we have a real path component. */ 1166 /* At this point we know we have a real path component. */
834 for(;;) { 1167 for(;;) {
1168 struct inode *inode;
835 unsigned long hash; 1169 unsigned long hash;
836 struct qstr this; 1170 struct qstr this;
837 unsigned int c; 1171 unsigned int c;
838 1172
839 nd->flags |= LOOKUP_CONTINUE; 1173 nd->flags |= LOOKUP_CONTINUE;
840 err = exec_permission(inode); 1174 if (nd->flags & LOOKUP_RCU) {
1175 err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1176 if (err == -ECHILD) {
1177 if (nameidata_drop_rcu(nd))
1178 return -ECHILD;
1179 goto exec_again;
1180 }
1181 } else {
1182exec_again:
1183 err = exec_permission(nd->inode, 0);
1184 }
841 if (err) 1185 if (err)
842 break; 1186 break;
843 1187
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
868 if (this.name[0] == '.') switch (this.len) { 1212 if (this.name[0] == '.') switch (this.len) {
869 default: 1213 default:
870 break; 1214 break;
871 case 2: 1215 case 2:
872 if (this.name[1] != '.') 1216 if (this.name[1] != '.')
873 break; 1217 break;
874 follow_dotdot(nd); 1218 if (nd->flags & LOOKUP_RCU) {
875 inode = nd->path.dentry->d_inode; 1219 if (follow_dotdot_rcu(nd))
1220 return -ECHILD;
1221 } else
1222 follow_dotdot(nd);
876 /* fallthrough */ 1223 /* fallthrough */
877 case 1: 1224 case 1:
878 continue; 1225 continue;
879 } 1226 }
880 /* This does the actual lookups.. */ 1227 /* This does the actual lookups.. */
881 err = do_lookup(nd, &this, &next); 1228 err = do_lookup(nd, &this, &next, &inode);
882 if (err) 1229 if (err)
883 break; 1230 break;
884
885 err = -ENOENT; 1231 err = -ENOENT;
886 inode = next.dentry->d_inode;
887 if (!inode) 1232 if (!inode)
888 goto out_dput; 1233 goto out_dput;
889 1234
890 if (inode->i_op->follow_link) { 1235 if (inode->i_op->follow_link) {
1236 /* We commonly drop rcu-walk here */
1237 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1238 return -ECHILD;
1239 BUG_ON(inode != next.dentry->d_inode);
891 err = do_follow_link(&next, nd); 1240 err = do_follow_link(&next, nd);
892 if (err) 1241 if (err)
893 goto return_err; 1242 goto return_err;
1243 nd->inode = nd->path.dentry->d_inode;
894 err = -ENOENT; 1244 err = -ENOENT;
895 inode = nd->path.dentry->d_inode; 1245 if (!nd->inode)
896 if (!inode)
897 break; 1246 break;
898 } else 1247 } else {
899 path_to_nameidata(&next, nd); 1248 path_to_nameidata(&next, nd);
1249 nd->inode = inode;
1250 }
900 err = -ENOTDIR; 1251 err = -ENOTDIR;
901 if (!inode->i_op->lookup) 1252 if (!nd->inode->i_op->lookup)
902 break; 1253 break;
903 continue; 1254 continue;
904 /* here ends the main loop */ 1255 /* here ends the main loop */
@@ -913,32 +1264,39 @@ last_component:
913 if (this.name[0] == '.') switch (this.len) { 1264 if (this.name[0] == '.') switch (this.len) {
914 default: 1265 default:
915 break; 1266 break;
916 case 2: 1267 case 2:
917 if (this.name[1] != '.') 1268 if (this.name[1] != '.')
918 break; 1269 break;
919 follow_dotdot(nd); 1270 if (nd->flags & LOOKUP_RCU) {
920 inode = nd->path.dentry->d_inode; 1271 if (follow_dotdot_rcu(nd))
1272 return -ECHILD;
1273 } else
1274 follow_dotdot(nd);
921 /* fallthrough */ 1275 /* fallthrough */
922 case 1: 1276 case 1:
923 goto return_reval; 1277 goto return_reval;
924 } 1278 }
925 err = do_lookup(nd, &this, &next); 1279 err = do_lookup(nd, &this, &next, &inode);
926 if (err) 1280 if (err)
927 break; 1281 break;
928 inode = next.dentry->d_inode;
929 if (follow_on_final(inode, lookup_flags)) { 1282 if (follow_on_final(inode, lookup_flags)) {
1283 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1284 return -ECHILD;
1285 BUG_ON(inode != next.dentry->d_inode);
930 err = do_follow_link(&next, nd); 1286 err = do_follow_link(&next, nd);
931 if (err) 1287 if (err)
932 goto return_err; 1288 goto return_err;
933 inode = nd->path.dentry->d_inode; 1289 nd->inode = nd->path.dentry->d_inode;
934 } else 1290 } else {
935 path_to_nameidata(&next, nd); 1291 path_to_nameidata(&next, nd);
1292 nd->inode = inode;
1293 }
936 err = -ENOENT; 1294 err = -ENOENT;
937 if (!inode) 1295 if (!nd->inode)
938 break; 1296 break;
939 if (lookup_flags & LOOKUP_DIRECTORY) { 1297 if (lookup_flags & LOOKUP_DIRECTORY) {
940 err = -ENOTDIR; 1298 err = -ENOTDIR;
941 if (!inode->i_op->lookup) 1299 if (!nd->inode->i_op->lookup)
942 break; 1300 break;
943 } 1301 }
944 goto return_base; 1302 goto return_base;
@@ -958,25 +1316,43 @@ return_reval:
958 * We bypassed the ordinary revalidation routines. 1316 * We bypassed the ordinary revalidation routines.
959 * We may need to check the cached dentry for staleness. 1317 * We may need to check the cached dentry for staleness.
960 */ 1318 */
961 if (nd->path.dentry && nd->path.dentry->d_sb && 1319 if (need_reval_dot(nd->path.dentry)) {
962 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
963 err = -ESTALE;
964 /* Note: we do not d_invalidate() */ 1320 /* Note: we do not d_invalidate() */
965 if (!nd->path.dentry->d_op->d_revalidate( 1321 err = d_revalidate(nd->path.dentry, nd);
966 nd->path.dentry, nd)) 1322 if (!err)
1323 err = -ESTALE;
1324 if (err < 0)
967 break; 1325 break;
968 } 1326 }
969return_base: 1327return_base:
1328 if (nameidata_drop_rcu_last_maybe(nd))
1329 return -ECHILD;
970 return 0; 1330 return 0;
971out_dput: 1331out_dput:
972 path_put_conditional(&next, nd); 1332 if (!(nd->flags & LOOKUP_RCU))
1333 path_put_conditional(&next, nd);
973 break; 1334 break;
974 } 1335 }
975 path_put(&nd->path); 1336 if (!(nd->flags & LOOKUP_RCU))
1337 path_put(&nd->path);
976return_err: 1338return_err:
977 return err; 1339 return err;
978} 1340}
979 1341
1342static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1343{
1344 current->total_link_count = 0;
1345
1346 return link_path_walk(name, nd);
1347}
1348
1349static inline int path_walk_simple(const char *name, struct nameidata *nd)
1350{
1351 current->total_link_count = 0;
1352
1353 return link_path_walk(name, nd);
1354}
1355
980static int path_walk(const char *name, struct nameidata *nd) 1356static int path_walk(const char *name, struct nameidata *nd)
981{ 1357{
982 struct path save = nd->path; 1358 struct path save = nd->path;
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd)
1002 return result; 1378 return result;
1003} 1379}
1004 1380
1381static void path_finish_rcu(struct nameidata *nd)
1382{
1383 if (nd->flags & LOOKUP_RCU) {
1384 /* RCU dangling. Cancel it. */
1385 nd->flags &= ~LOOKUP_RCU;
1386 nd->root.mnt = NULL;
1387 rcu_read_unlock();
1388 br_read_unlock(vfsmount_lock);
1389 }
1390 if (nd->file)
1391 fput(nd->file);
1392}
1393
1394static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1395{
1396 int retval = 0;
1397 int fput_needed;
1398 struct file *file;
1399
1400 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1401 nd->flags = flags | LOOKUP_RCU;
1402 nd->depth = 0;
1403 nd->root.mnt = NULL;
1404 nd->file = NULL;
1405
1406 if (*name=='/') {
1407 struct fs_struct *fs = current->fs;
1408 unsigned seq;
1409
1410 br_read_lock(vfsmount_lock);
1411 rcu_read_lock();
1412
1413 do {
1414 seq = read_seqcount_begin(&fs->seq);
1415 nd->root = fs->root;
1416 nd->path = nd->root;
1417 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1418 } while (read_seqcount_retry(&fs->seq, seq));
1419
1420 } else if (dfd == AT_FDCWD) {
1421 struct fs_struct *fs = current->fs;
1422 unsigned seq;
1423
1424 br_read_lock(vfsmount_lock);
1425 rcu_read_lock();
1426
1427 do {
1428 seq = read_seqcount_begin(&fs->seq);
1429 nd->path = fs->pwd;
1430 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1431 } while (read_seqcount_retry(&fs->seq, seq));
1432
1433 } else {
1434 struct dentry *dentry;
1435
1436 file = fget_light(dfd, &fput_needed);
1437 retval = -EBADF;
1438 if (!file)
1439 goto out_fail;
1440
1441 dentry = file->f_path.dentry;
1442
1443 retval = -ENOTDIR;
1444 if (!S_ISDIR(dentry->d_inode->i_mode))
1445 goto fput_fail;
1446
1447 retval = file_permission(file, MAY_EXEC);
1448 if (retval)
1449 goto fput_fail;
1450
1451 nd->path = file->f_path;
1452 if (fput_needed)
1453 nd->file = file;
1454
1455 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1456 br_read_lock(vfsmount_lock);
1457 rcu_read_lock();
1458 }
1459 nd->inode = nd->path.dentry->d_inode;
1460 return 0;
1461
1462fput_fail:
1463 fput_light(file, fput_needed);
1464out_fail:
1465 return retval;
1466}
1467
1005static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1468static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1006{ 1469{
1007 int retval = 0; 1470 int retval = 0;
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1042 1505
1043 fput_light(file, fput_needed); 1506 fput_light(file, fput_needed);
1044 } 1507 }
1508 nd->inode = nd->path.dentry->d_inode;
1045 return 0; 1509 return 0;
1046 1510
1047fput_fail: 1511fput_fail:
@@ -1054,16 +1518,53 @@ out_fail:
1054static int do_path_lookup(int dfd, const char *name, 1518static int do_path_lookup(int dfd, const char *name,
1055 unsigned int flags, struct nameidata *nd) 1519 unsigned int flags, struct nameidata *nd)
1056{ 1520{
1057 int retval = path_init(dfd, name, flags, nd); 1521 int retval;
1058 if (!retval) 1522
1059 retval = path_walk(name, nd); 1523 /*
1060 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1524 * Path walking is largely split up into 2 different synchronisation
1061 nd->path.dentry->d_inode)) 1525 * schemes, rcu-walk and ref-walk (explained in
1062 audit_inode(name, nd->path.dentry); 1526 * Documentation/filesystems/path-lookup.txt). These share much of the
1527 * path walk code, but some things particularly setup, cleanup, and
1528 * following mounts are sufficiently divergent that functions are
1529 * duplicated. Typically there is a function foo(), and its RCU
1530 * analogue, foo_rcu().
1531 *
1532 * -ECHILD is the error number of choice (just to avoid clashes) that
1533 * is returned if some aspect of an rcu-walk fails. Such an error must
1534 * be handled by restarting a traditional ref-walk (which will always
1535 * be able to complete).
1536 */
1537 retval = path_init_rcu(dfd, name, flags, nd);
1538 if (unlikely(retval))
1539 return retval;
1540 retval = path_walk_rcu(name, nd);
1541 path_finish_rcu(nd);
1063 if (nd->root.mnt) { 1542 if (nd->root.mnt) {
1064 path_put(&nd->root); 1543 path_put(&nd->root);
1065 nd->root.mnt = NULL; 1544 nd->root.mnt = NULL;
1066 } 1545 }
1546
1547 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1548 /* slower, locked walk */
1549 if (retval == -ESTALE)
1550 flags |= LOOKUP_REVAL;
1551 retval = path_init(dfd, name, flags, nd);
1552 if (unlikely(retval))
1553 return retval;
1554 retval = path_walk(name, nd);
1555 if (nd->root.mnt) {
1556 path_put(&nd->root);
1557 nd->root.mnt = NULL;
1558 }
1559 }
1560
1561 if (likely(!retval)) {
1562 if (unlikely(!audit_dummy_context())) {
1563 if (nd->path.dentry && nd->inode)
1564 audit_inode(name, nd->path.dentry);
1565 }
1566 }
1567
1067 return retval; 1568 return retval;
1068} 1569}
1069 1570
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1106 path_get(&nd->path); 1607 path_get(&nd->path);
1107 nd->root = nd->path; 1608 nd->root = nd->path;
1108 path_get(&nd->root); 1609 path_get(&nd->root);
1610 nd->inode = nd->path.dentry->d_inode;
1109 1611
1110 retval = path_walk(name, nd); 1612 retval = path_walk(name, nd);
1111 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1613 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1112 nd->path.dentry->d_inode)) 1614 nd->inode))
1113 audit_inode(name, nd->path.dentry); 1615 audit_inode(name, nd->path.dentry);
1114 1616
1115 path_put(&nd->root); 1617 path_put(&nd->root);
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1125 struct dentry *dentry; 1627 struct dentry *dentry;
1126 int err; 1628 int err;
1127 1629
1128 err = exec_permission(inode); 1630 err = exec_permission(inode, 0);
1129 if (err) 1631 if (err)
1130 return ERR_PTR(err); 1632 return ERR_PTR(err);
1131 1633
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
1133 * See if the low-level filesystem might want 1635 * See if the low-level filesystem might want
1134 * to use its own hash.. 1636 * to use its own hash..
1135 */ 1637 */
1136 if (base->d_op && base->d_op->d_hash) { 1638 if (base->d_flags & DCACHE_OP_HASH) {
1137 err = base->d_op->d_hash(base, name); 1639 err = base->d_op->d_hash(base, inode, name);
1138 dentry = ERR_PTR(err); 1640 dentry = ERR_PTR(err);
1139 if (err < 0) 1641 if (err < 0)
1140 goto out; 1642 goto out;
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1147 */ 1649 */
1148 dentry = d_lookup(base, name); 1650 dentry = d_lookup(base, name);
1149 1651
1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1652 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
1151 dentry = do_revalidate(dentry, nd); 1653 dentry = do_revalidate(dentry, nd);
1152 1654
1153 if (!dentry) 1655 if (!dentry)
@@ -1490,6 +1992,7 @@ out_unlock:
1490 mutex_unlock(&dir->d_inode->i_mutex); 1992 mutex_unlock(&dir->d_inode->i_mutex);
1491 dput(nd->path.dentry); 1993 dput(nd->path.dentry);
1492 nd->path.dentry = path->dentry; 1994 nd->path.dentry = path->dentry;
1995
1493 if (error) 1996 if (error)
1494 return error; 1997 return error;
1495 /* Don't check for write permission, don't truncate */ 1998 /* Don't check for write permission, don't truncate */
@@ -1584,6 +2087,9 @@ exit:
1584 return ERR_PTR(error); 2087 return ERR_PTR(error);
1585} 2088}
1586 2089
2090/*
2091 * Handle O_CREAT case for do_filp_open
2092 */
1587static struct file *do_last(struct nameidata *nd, struct path *path, 2093static struct file *do_last(struct nameidata *nd, struct path *path,
1588 int open_flag, int acc_mode, 2094 int open_flag, int acc_mode,
1589 int mode, const char *pathname) 2095 int mode, const char *pathname)
@@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1597 follow_dotdot(nd); 2103 follow_dotdot(nd);
1598 dir = nd->path.dentry; 2104 dir = nd->path.dentry;
1599 case LAST_DOT: 2105 case LAST_DOT:
1600 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { 2106 if (need_reval_dot(dir)) {
1601 if (!dir->d_op->d_revalidate(dir, nd)) { 2107 error = d_revalidate(nd->path.dentry, nd);
2108 if (!error)
1602 error = -ESTALE; 2109 error = -ESTALE;
2110 if (error < 0)
1603 goto exit; 2111 goto exit;
1604 }
1605 } 2112 }
1606 /* fallthrough */ 2113 /* fallthrough */
1607 case LAST_ROOT: 2114 case LAST_ROOT:
1608 if (open_flag & O_CREAT) 2115 goto exit;
1609 goto exit;
1610 /* fallthrough */
1611 case LAST_BIND: 2116 case LAST_BIND:
1612 audit_inode(pathname, dir); 2117 audit_inode(pathname, dir);
1613 goto ok; 2118 goto ok;
1614 } 2119 }
1615 2120
1616 /* trailing slashes? */ 2121 /* trailing slashes? */
1617 if (nd->last.name[nd->last.len]) { 2122 if (nd->last.name[nd->last.len])
1618 if (open_flag & O_CREAT) 2123 goto exit;
1619 goto exit;
1620 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1621 }
1622
1623 /* just plain open? */
1624 if (!(open_flag & O_CREAT)) {
1625 error = do_lookup(nd, &nd->last, path);
1626 if (error)
1627 goto exit;
1628 error = -ENOENT;
1629 if (!path->dentry->d_inode)
1630 goto exit_dput;
1631 if (path->dentry->d_inode->i_op->follow_link)
1632 return NULL;
1633 error = -ENOTDIR;
1634 if (nd->flags & LOOKUP_DIRECTORY) {
1635 if (!path->dentry->d_inode->i_op->lookup)
1636 goto exit_dput;
1637 }
1638 path_to_nameidata(path, nd);
1639 audit_inode(pathname, nd->path.dentry);
1640 goto ok;
1641 }
1642 2124
1643 /* OK, it's O_CREAT */
1644 mutex_lock(&dir->d_inode->i_mutex); 2125 mutex_lock(&dir->d_inode->i_mutex);
1645 2126
1646 path->dentry = lookup_hash(nd); 2127 path->dentry = lookup_hash(nd);
@@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1711 return NULL; 2192 return NULL;
1712 2193
1713 path_to_nameidata(path, nd); 2194 path_to_nameidata(path, nd);
2195 nd->inode = path->dentry->d_inode;
1714 error = -EISDIR; 2196 error = -EISDIR;
1715 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2197 if (S_ISDIR(nd->inode->i_mode))
1716 goto exit; 2198 goto exit;
1717ok: 2199ok:
1718 filp = finish_open(nd, open_flag, acc_mode); 2200 filp = finish_open(nd, open_flag, acc_mode);
@@ -1743,7 +2225,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1743 struct path path; 2225 struct path path;
1744 int count = 0; 2226 int count = 0;
1745 int flag = open_to_namei_flags(open_flag); 2227 int flag = open_to_namei_flags(open_flag);
1746 int force_reval = 0; 2228 int flags;
1747 2229
1748 if (!(open_flag & O_CREAT)) 2230 if (!(open_flag & O_CREAT))
1749 mode = 0; 2231 mode = 0;
@@ -1772,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1772 if (open_flag & O_APPEND) 2254 if (open_flag & O_APPEND)
1773 acc_mode |= MAY_APPEND; 2255 acc_mode |= MAY_APPEND;
1774 2256
1775 /* find the parent */ 2257 flags = LOOKUP_OPEN;
1776reval: 2258 if (open_flag & O_CREAT) {
1777 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2259 flags |= LOOKUP_CREATE;
2260 if (open_flag & O_EXCL)
2261 flags |= LOOKUP_EXCL;
2262 }
2263 if (open_flag & O_DIRECTORY)
2264 flags |= LOOKUP_DIRECTORY;
2265 if (!(open_flag & O_NOFOLLOW))
2266 flags |= LOOKUP_FOLLOW;
2267
2268 filp = get_empty_filp();
2269 if (!filp)
2270 return ERR_PTR(-ENFILE);
2271
2272 filp->f_flags = open_flag;
2273 nd.intent.open.file = filp;
2274 nd.intent.open.flags = flag;
2275 nd.intent.open.create_mode = mode;
2276
2277 if (open_flag & O_CREAT)
2278 goto creat;
2279
2280 /* !O_CREAT, simple open */
2281 error = do_path_lookup(dfd, pathname, flags, &nd);
2282 if (unlikely(error))
2283 goto out_filp;
2284 error = -ELOOP;
2285 if (!(nd.flags & LOOKUP_FOLLOW)) {
2286 if (nd.inode->i_op->follow_link)
2287 goto out_path;
2288 }
2289 error = -ENOTDIR;
2290 if (nd.flags & LOOKUP_DIRECTORY) {
2291 if (!nd.inode->i_op->lookup)
2292 goto out_path;
2293 }
2294 audit_inode(pathname, nd.path.dentry);
2295 filp = finish_open(&nd, open_flag, acc_mode);
2296 return filp;
2297
2298creat:
2299 /* OK, have to create the file. Find the parent. */
2300 error = path_init_rcu(dfd, pathname,
2301 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1778 if (error) 2302 if (error)
1779 return ERR_PTR(error); 2303 goto out_filp;
1780 if (force_reval) 2304 error = path_walk_rcu(pathname, &nd);
1781 nd.flags |= LOOKUP_REVAL; 2305 path_finish_rcu(&nd);
2306 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2307 /* slower, locked walk */
2308 if (error == -ESTALE) {
2309reval:
2310 flags |= LOOKUP_REVAL;
2311 }
2312 error = path_init(dfd, pathname,
2313 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2314 if (error)
2315 goto out_filp;
1782 2316
1783 current->total_link_count = 0; 2317 error = path_walk_simple(pathname, &nd);
1784 error = link_path_walk(pathname, &nd);
1785 if (error) {
1786 filp = ERR_PTR(error);
1787 goto out;
1788 } 2318 }
1789 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2319 if (unlikely(error))
2320 goto out_filp;
2321 if (unlikely(!audit_dummy_context()))
1790 audit_inode(pathname, nd.path.dentry); 2322 audit_inode(pathname, nd.path.dentry);
1791 2323
1792 /* 2324 /*
1793 * We have the parent and last component. 2325 * We have the parent and last component.
1794 */ 2326 */
1795 2327 nd.flags = flags;
1796 error = -ENFILE;
1797 filp = get_empty_filp();
1798 if (filp == NULL)
1799 goto exit_parent;
1800 nd.intent.open.file = filp;
1801 filp->f_flags = open_flag;
1802 nd.intent.open.flags = flag;
1803 nd.intent.open.create_mode = mode;
1804 nd.flags &= ~LOOKUP_PARENT;
1805 nd.flags |= LOOKUP_OPEN;
1806 if (open_flag & O_CREAT) {
1807 nd.flags |= LOOKUP_CREATE;
1808 if (open_flag & O_EXCL)
1809 nd.flags |= LOOKUP_EXCL;
1810 }
1811 if (open_flag & O_DIRECTORY)
1812 nd.flags |= LOOKUP_DIRECTORY;
1813 if (!(open_flag & O_NOFOLLOW))
1814 nd.flags |= LOOKUP_FOLLOW;
1815 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1816 while (unlikely(!filp)) { /* trailing symlink */ 2329 while (unlikely(!filp)) { /* trailing symlink */
1817 struct path holder; 2330 struct path holder;
1818 struct inode *inode = path.dentry->d_inode;
1819 void *cookie; 2331 void *cookie;
1820 error = -ELOOP; 2332 error = -ELOOP;
1821 /* S_ISDIR part is a temporary automount kludge */ 2333 /* S_ISDIR part is a temporary automount kludge */
1822 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2334 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1823 goto exit_dput; 2335 goto exit_dput;
1824 if (count++ == 32) 2336 if (count++ == 32)
1825 goto exit_dput; 2337 goto exit_dput;
@@ -1840,36 +2352,33 @@ reval:
1840 goto exit_dput; 2352 goto exit_dput;
1841 error = __do_follow_link(&path, &nd, &cookie); 2353 error = __do_follow_link(&path, &nd, &cookie);
1842 if (unlikely(error)) { 2354 if (unlikely(error)) {
2355 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2356 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1843 /* nd.path had been dropped */ 2357 /* nd.path had been dropped */
1844 if (!IS_ERR(cookie) && inode->i_op->put_link) 2358 nd.path = path;
1845 inode->i_op->put_link(path.dentry, &nd, cookie); 2359 goto out_path;
1846 path_put(&path);
1847 release_open_intent(&nd);
1848 filp = ERR_PTR(error);
1849 goto out;
1850 } 2360 }
1851 holder = path; 2361 holder = path;
1852 nd.flags &= ~LOOKUP_PARENT; 2362 nd.flags &= ~LOOKUP_PARENT;
1853 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2363 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1854 if (inode->i_op->put_link) 2364 if (nd.inode->i_op->put_link)
1855 inode->i_op->put_link(holder.dentry, &nd, cookie); 2365 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1856 path_put(&holder); 2366 path_put(&holder);
1857 } 2367 }
1858out: 2368out:
1859 if (nd.root.mnt) 2369 if (nd.root.mnt)
1860 path_put(&nd.root); 2370 path_put(&nd.root);
1861 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2371 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1862 force_reval = 1;
1863 goto reval; 2372 goto reval;
1864 }
1865 return filp; 2373 return filp;
1866 2374
1867exit_dput: 2375exit_dput:
1868 path_put_conditional(&path, &nd); 2376 path_put_conditional(&path, &nd);
2377out_path:
2378 path_put(&nd.path);
2379out_filp:
1869 if (!IS_ERR(nd.intent.open.file)) 2380 if (!IS_ERR(nd.intent.open.file))
1870 release_open_intent(&nd); 2381 release_open_intent(&nd);
1871exit_parent:
1872 path_put(&nd.path);
1873 filp = ERR_PTR(error); 2382 filp = ERR_PTR(error);
1874 goto out; 2383 goto out;
1875} 2384}
@@ -2130,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry)
2130{ 2639{
2131 dget(dentry); 2640 dget(dentry);
2132 shrink_dcache_parent(dentry); 2641 shrink_dcache_parent(dentry);
2133 spin_lock(&dcache_lock);
2134 spin_lock(&dentry->d_lock); 2642 spin_lock(&dentry->d_lock);
2135 if (atomic_read(&dentry->d_count) == 2) 2643 if (dentry->d_count == 2)
2136 __d_drop(dentry); 2644 __d_drop(dentry);
2137 spin_unlock(&dentry->d_lock); 2645 spin_unlock(&dentry->d_lock);
2138 spin_unlock(&dcache_lock);
2139} 2646}
2140 2647
2141int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2648int vfs_rmdir(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dbfc072ec7..3ddfd9046c4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
138 mnt->mnt_group_id = 0; 138 mnt->mnt_group_id = 0;
139} 139}
140 140
141/*
142 * vfsmount lock must be held for read
143 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n)
145{
146#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
148#else
149 preempt_disable();
150 mnt->mnt_count += n;
151 preempt_enable();
152#endif
153}
154
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/*
181 * vfsmount lock must be held for write
182 */
183unsigned int mnt_get_count(struct vfsmount *mnt)
184{
185#ifdef CONFIG_SMP
186 unsigned int count = atomic_read(&mnt->mnt_longrefs);
187 int cpu;
188
189 for_each_possible_cpu(cpu) {
190 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
191 }
192
193 return count;
194#else
195 return mnt->mnt_count;
196#endif
197}
198
141struct vfsmount *alloc_vfsmnt(const char *name) 199struct vfsmount *alloc_vfsmnt(const char *name)
142{ 200{
143 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
154 goto out_free_id; 212 goto out_free_id;
155 } 213 }
156 214
157 atomic_set(&mnt->mnt_count, 1); 215#ifdef CONFIG_SMP
216 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
217 if (!mnt->mnt_pcp)
218 goto out_free_devname;
219
220 atomic_set(&mnt->mnt_longrefs, 1);
221#else
222 mnt->mnt_count = 1;
223 mnt->mnt_writers = 0;
224#endif
225
158 INIT_LIST_HEAD(&mnt->mnt_hash); 226 INIT_LIST_HEAD(&mnt->mnt_hash);
159 INIT_LIST_HEAD(&mnt->mnt_child); 227 INIT_LIST_HEAD(&mnt->mnt_child);
160 INIT_LIST_HEAD(&mnt->mnt_mounts); 228 INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
166#ifdef CONFIG_FSNOTIFY 234#ifdef CONFIG_FSNOTIFY
167 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 235 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
168#endif 236#endif
169#ifdef CONFIG_SMP
170 mnt->mnt_writers = alloc_percpu(int);
171 if (!mnt->mnt_writers)
172 goto out_free_devname;
173#else
174 mnt->mnt_writers = 0;
175#endif
176 } 237 }
177 return mnt; 238 return mnt;
178 239
@@ -216,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt)
216} 277}
217EXPORT_SYMBOL_GPL(__mnt_is_readonly); 278EXPORT_SYMBOL_GPL(__mnt_is_readonly);
218 279
219static inline void inc_mnt_writers(struct vfsmount *mnt) 280static inline void mnt_inc_writers(struct vfsmount *mnt)
220{ 281{
221#ifdef CONFIG_SMP 282#ifdef CONFIG_SMP
222 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; 283 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
223#else 284#else
224 mnt->mnt_writers++; 285 mnt->mnt_writers++;
225#endif 286#endif
226} 287}
227 288
228static inline void dec_mnt_writers(struct vfsmount *mnt) 289static inline void mnt_dec_writers(struct vfsmount *mnt)
229{ 290{
230#ifdef CONFIG_SMP 291#ifdef CONFIG_SMP
231 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; 292 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
232#else 293#else
233 mnt->mnt_writers--; 294 mnt->mnt_writers--;
234#endif 295#endif
235} 296}
236 297
237static unsigned int count_mnt_writers(struct vfsmount *mnt) 298static unsigned int mnt_get_writers(struct vfsmount *mnt)
238{ 299{
239#ifdef CONFIG_SMP 300#ifdef CONFIG_SMP
240 unsigned int count = 0; 301 unsigned int count = 0;
241 int cpu; 302 int cpu;
242 303
243 for_each_possible_cpu(cpu) { 304 for_each_possible_cpu(cpu) {
244 count += *per_cpu_ptr(mnt->mnt_writers, cpu); 305 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
245 } 306 }
246 307
247 return count; 308 return count;
@@ -273,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt)
273 int ret = 0; 334 int ret = 0;
274 335
275 preempt_disable(); 336 preempt_disable();
276 inc_mnt_writers(mnt); 337 mnt_inc_writers(mnt);
277 /* 338 /*
278 * The store to inc_mnt_writers must be visible before we pass 339 * The store to mnt_inc_writers must be visible before we pass
279 * MNT_WRITE_HOLD loop below, so that the slowpath can see our 340 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
280 * incremented count after it has set MNT_WRITE_HOLD. 341 * incremented count after it has set MNT_WRITE_HOLD.
281 */ 342 */
@@ -289,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt)
289 */ 350 */
290 smp_rmb(); 351 smp_rmb();
291 if (__mnt_is_readonly(mnt)) { 352 if (__mnt_is_readonly(mnt)) {
292 dec_mnt_writers(mnt); 353 mnt_dec_writers(mnt);
293 ret = -EROFS; 354 ret = -EROFS;
294 goto out; 355 goto out;
295 } 356 }
@@ -317,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt)
317 if (__mnt_is_readonly(mnt)) 378 if (__mnt_is_readonly(mnt))
318 return -EROFS; 379 return -EROFS;
319 preempt_disable(); 380 preempt_disable();
320 inc_mnt_writers(mnt); 381 mnt_inc_writers(mnt);
321 preempt_enable(); 382 preempt_enable();
322 return 0; 383 return 0;
323} 384}
@@ -351,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
351void mnt_drop_write(struct vfsmount *mnt) 412void mnt_drop_write(struct vfsmount *mnt)
352{ 413{
353 preempt_disable(); 414 preempt_disable();
354 dec_mnt_writers(mnt); 415 mnt_dec_writers(mnt);
355 preempt_enable(); 416 preempt_enable();
356} 417}
357EXPORT_SYMBOL_GPL(mnt_drop_write); 418EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -384,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
384 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 445 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
385 * we're counting up here. 446 * we're counting up here.
386 */ 447 */
387 if (count_mnt_writers(mnt) > 0) 448 if (mnt_get_writers(mnt) > 0)
388 ret = -EBUSY; 449 ret = -EBUSY;
389 else 450 else
390 mnt->mnt_flags |= MNT_READONLY; 451 mnt->mnt_flags |= MNT_READONLY;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
418 kfree(mnt->mnt_devname); 479 kfree(mnt->mnt_devname);
419 mnt_free_id(mnt); 480 mnt_free_id(mnt);
420#ifdef CONFIG_SMP 481#ifdef CONFIG_SMP
421 free_percpu(mnt->mnt_writers); 482 free_percpu(mnt->mnt_pcp);
422#endif 483#endif
423 kmem_cache_free(mnt_cache, mnt); 484 kmem_cache_free(mnt_cache, mnt);
424} 485}
@@ -492,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
492} 553}
493 554
494/* 555/*
556 * Clear dentry's mounted state if it has no remaining mounts.
557 * vfsmount_lock must be held for write.
558 */
559static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
560{
561 unsigned u;
562
563 for (u = 0; u < HASH_SIZE; u++) {
564 struct vfsmount *p;
565
566 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
567 if (p->mnt_mountpoint == dentry)
568 return;
569 }
570 }
571 spin_lock(&dentry->d_lock);
572 dentry->d_flags &= ~DCACHE_MOUNTED;
573 spin_unlock(&dentry->d_lock);
574}
575
576/*
495 * vfsmount lock must be held for write 577 * vfsmount lock must be held for write
496 */ 578 */
497static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 579static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
@@ -502,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
502 mnt->mnt_mountpoint = mnt->mnt_root; 584 mnt->mnt_mountpoint = mnt->mnt_root;
503 list_del_init(&mnt->mnt_child); 585 list_del_init(&mnt->mnt_child);
504 list_del_init(&mnt->mnt_hash); 586 list_del_init(&mnt->mnt_hash);
505 old_path->dentry->d_mounted--; 587 dentry_reset_mounted(old_path->mnt, old_path->dentry);
506} 588}
507 589
508/* 590/*
@@ -513,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
513{ 595{
514 child_mnt->mnt_parent = mntget(mnt); 596 child_mnt->mnt_parent = mntget(mnt);
515 child_mnt->mnt_mountpoint = dget(dentry); 597 child_mnt->mnt_mountpoint = dget(dentry);
516 dentry->d_mounted++; 598 spin_lock(&dentry->d_lock);
599 dentry->d_flags |= DCACHE_MOUNTED;
600 spin_unlock(&dentry->d_lock);
517} 601}
518 602
519/* 603/*
@@ -629,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
629 return NULL; 713 return NULL;
630} 714}
631 715
632static inline void __mntput(struct vfsmount *mnt) 716static inline void mntfree(struct vfsmount *mnt)
633{ 717{
634 struct super_block *sb = mnt->mnt_sb; 718 struct super_block *sb = mnt->mnt_sb;
719
635 /* 720 /*
636 * This probably indicates that somebody messed 721 * This probably indicates that somebody messed
637 * up a mnt_want/drop_write() pair. If this 722 * up a mnt_want/drop_write() pair. If this
@@ -639,38 +724,123 @@ static inline void __mntput(struct vfsmount *mnt)
639 * to make r/w->r/o transitions. 724 * to make r/w->r/o transitions.
640 */ 725 */
641 /* 726 /*
642 * atomic_dec_and_lock() used to deal with ->mnt_count decrements 727 * The locking used to deal with mnt_count decrement provides barriers,
643 * provides barriers, so count_mnt_writers() below is safe. AV 728 * so mnt_get_writers() below is safe.
644 */ 729 */
645 WARN_ON(count_mnt_writers(mnt)); 730 WARN_ON(mnt_get_writers(mnt));
646 fsnotify_vfsmount_delete(mnt); 731 fsnotify_vfsmount_delete(mnt);
647 dput(mnt->mnt_root); 732 dput(mnt->mnt_root);
648 free_vfsmnt(mnt); 733 free_vfsmnt(mnt);
649 deactivate_super(sb); 734 deactivate_super(sb);
650} 735}
651 736
652void mntput_no_expire(struct vfsmount *mnt) 737#ifdef CONFIG_SMP
653{ 738static inline void __mntput(struct vfsmount *mnt, int longrefs)
654repeat: 739{
655 if (atomic_add_unless(&mnt->mnt_count, -1, 1)) 740 if (!longrefs) {
656 return; 741put_again:
742 br_read_lock(vfsmount_lock);
743 if (likely(atomic_read(&mnt->mnt_longrefs))) {
744 mnt_dec_count(mnt);
745 br_read_unlock(vfsmount_lock);
746 return;
747 }
748 br_read_unlock(vfsmount_lock);
749 } else {
750 BUG_ON(!atomic_read(&mnt->mnt_longrefs));
751 if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
752 return;
753 }
754
657 br_write_lock(vfsmount_lock); 755 br_write_lock(vfsmount_lock);
658 if (!atomic_dec_and_test(&mnt->mnt_count)) { 756 if (!longrefs)
757 mnt_dec_count(mnt);
758 else
759 atomic_dec(&mnt->mnt_longrefs);
760 if (mnt_get_count(mnt)) {
659 br_write_unlock(vfsmount_lock); 761 br_write_unlock(vfsmount_lock);
660 return; 762 return;
661 } 763 }
662 if (likely(!mnt->mnt_pinned)) { 764 if (unlikely(mnt->mnt_pinned)) {
765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
766 mnt->mnt_pinned = 0;
663 br_write_unlock(vfsmount_lock); 767 br_write_unlock(vfsmount_lock);
664 __mntput(mnt); 768 acct_auto_close_mnt(mnt);
769 goto put_again;
770 }
771 br_write_unlock(vfsmount_lock);
772 mntfree(mnt);
773}
774#else
775static inline void __mntput(struct vfsmount *mnt, int longrefs)
776{
777put_again:
778 mnt_dec_count(mnt);
779 if (likely(mnt_get_count(mnt)))
665 return; 780 return;
781 br_write_lock(vfsmount_lock);
782 if (unlikely(mnt->mnt_pinned)) {
783 mnt_add_count(mnt, mnt->mnt_pinned + 1);
784 mnt->mnt_pinned = 0;
785 br_write_unlock(vfsmount_lock);
786 acct_auto_close_mnt(mnt);
787 goto put_again;
666 } 788 }
667 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
668 mnt->mnt_pinned = 0;
669 br_write_unlock(vfsmount_lock); 789 br_write_unlock(vfsmount_lock);
670 acct_auto_close_mnt(mnt); 790 mntfree(mnt);
671 goto repeat; 791}
792#endif
793
794static void mntput_no_expire(struct vfsmount *mnt)
795{
796 __mntput(mnt, 0);
797}
798
799void mntput(struct vfsmount *mnt)
800{
801 if (mnt) {
802 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
803 if (unlikely(mnt->mnt_expiry_mark))
804 mnt->mnt_expiry_mark = 0;
805 __mntput(mnt, 0);
806 }
807}
808EXPORT_SYMBOL(mntput);
809
810struct vfsmount *mntget(struct vfsmount *mnt)
811{
812 if (mnt)
813 mnt_inc_count(mnt);
814 return mnt;
672} 815}
673EXPORT_SYMBOL(mntput_no_expire); 816EXPORT_SYMBOL(mntget);
817
818void mntput_long(struct vfsmount *mnt)
819{
820#ifdef CONFIG_SMP
821 if (mnt) {
822 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
823 if (unlikely(mnt->mnt_expiry_mark))
824 mnt->mnt_expiry_mark = 0;
825 __mntput(mnt, 1);
826 }
827#else
828 mntput(mnt);
829#endif
830}
831EXPORT_SYMBOL(mntput_long);
832
833struct vfsmount *mntget_long(struct vfsmount *mnt)
834{
835#ifdef CONFIG_SMP
836 if (mnt)
837 atomic_inc(&mnt->mnt_longrefs);
838 return mnt;
839#else
840 return mntget(mnt);
841#endif
842}
843EXPORT_SYMBOL(mntget_long);
674 844
675void mnt_pin(struct vfsmount *mnt) 845void mnt_pin(struct vfsmount *mnt)
676{ 846{
@@ -678,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
678 mnt->mnt_pinned++; 848 mnt->mnt_pinned++;
679 br_write_unlock(vfsmount_lock); 849 br_write_unlock(vfsmount_lock);
680} 850}
681
682EXPORT_SYMBOL(mnt_pin); 851EXPORT_SYMBOL(mnt_pin);
683 852
684void mnt_unpin(struct vfsmount *mnt) 853void mnt_unpin(struct vfsmount *mnt)
685{ 854{
686 br_write_lock(vfsmount_lock); 855 br_write_lock(vfsmount_lock);
687 if (mnt->mnt_pinned) { 856 if (mnt->mnt_pinned) {
688 atomic_inc(&mnt->mnt_count); 857 mnt_inc_count(mnt);
689 mnt->mnt_pinned--; 858 mnt->mnt_pinned--;
690 } 859 }
691 br_write_unlock(vfsmount_lock); 860 br_write_unlock(vfsmount_lock);
692} 861}
693
694EXPORT_SYMBOL(mnt_unpin); 862EXPORT_SYMBOL(mnt_unpin);
695 863
696static inline void mangle(struct seq_file *m, const char *s) 864static inline void mangle(struct seq_file *m, const char *s)
@@ -985,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
985 int minimum_refs = 0; 1153 int minimum_refs = 0;
986 struct vfsmount *p; 1154 struct vfsmount *p;
987 1155
988 br_read_lock(vfsmount_lock); 1156 /* write lock needed for mnt_get_count */
1157 br_write_lock(vfsmount_lock);
989 for (p = mnt; p; p = next_mnt(p, mnt)) { 1158 for (p = mnt; p; p = next_mnt(p, mnt)) {
990 actual_refs += atomic_read(&p->mnt_count); 1159 actual_refs += mnt_get_count(p);
991 minimum_refs += 2; 1160 minimum_refs += 2;
992 } 1161 }
993 br_read_unlock(vfsmount_lock); 1162 br_write_unlock(vfsmount_lock);
994 1163
995 if (actual_refs > minimum_refs) 1164 if (actual_refs > minimum_refs)
996 return 0; 1165 return 0;
@@ -1017,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
1017{ 1186{
1018 int ret = 1; 1187 int ret = 1;
1019 down_read(&namespace_sem); 1188 down_read(&namespace_sem);
1020 br_read_lock(vfsmount_lock); 1189 br_write_lock(vfsmount_lock);
1021 if (propagate_mount_busy(mnt, 2)) 1190 if (propagate_mount_busy(mnt, 2))
1022 ret = 0; 1191 ret = 0;
1023 br_read_unlock(vfsmount_lock); 1192 br_write_unlock(vfsmount_lock);
1024 up_read(&namespace_sem); 1193 up_read(&namespace_sem);
1025 return ret; 1194 return ret;
1026} 1195}
@@ -1047,7 +1216,7 @@ void release_mounts(struct list_head *head)
1047 dput(dentry); 1216 dput(dentry);
1048 mntput(m); 1217 mntput(m);
1049 } 1218 }
1050 mntput(mnt); 1219 mntput_long(mnt);
1051 } 1220 }
1052} 1221}
1053 1222
@@ -1073,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1073 list_del_init(&p->mnt_child); 1242 list_del_init(&p->mnt_child);
1074 if (p->mnt_parent != p) { 1243 if (p->mnt_parent != p) {
1075 p->mnt_parent->mnt_ghosts++; 1244 p->mnt_parent->mnt_ghosts++;
1076 p->mnt_mountpoint->d_mounted--; 1245 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
1077 } 1246 }
1078 change_mnt_propagation(p, MS_PRIVATE); 1247 change_mnt_propagation(p, MS_PRIVATE);
1079 } 1248 }
@@ -1102,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
1102 flags & (MNT_FORCE | MNT_DETACH)) 1271 flags & (MNT_FORCE | MNT_DETACH))
1103 return -EINVAL; 1272 return -EINVAL;
1104 1273
1105 if (atomic_read(&mnt->mnt_count) != 2) 1274 /*
1275 * probably don't strictly need the lock here if we examined
1276 * all race cases, but it's a slowpath.
1277 */
1278 br_write_lock(vfsmount_lock);
1279 if (mnt_get_count(mnt) != 2) {
1280 br_write_lock(vfsmount_lock);
1106 return -EBUSY; 1281 return -EBUSY;
1282 }
1283 br_write_unlock(vfsmount_lock);
1107 1284
1108 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1285 if (!xchg(&mnt->mnt_expiry_mark, 1))
1109 return -EAGAIN; 1286 return -EAGAIN;
@@ -1792,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1792 1969
1793unlock: 1970unlock:
1794 up_write(&namespace_sem); 1971 up_write(&namespace_sem);
1795 mntput(newmnt); 1972 mntput_long(newmnt);
1796 return err; 1973 return err;
1797} 1974}
1798 1975
@@ -2125,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2125 if (fs) { 2302 if (fs) {
2126 if (p == fs->root.mnt) { 2303 if (p == fs->root.mnt) {
2127 rootmnt = p; 2304 rootmnt = p;
2128 fs->root.mnt = mntget(q); 2305 fs->root.mnt = mntget_long(q);
2129 } 2306 }
2130 if (p == fs->pwd.mnt) { 2307 if (p == fs->pwd.mnt) {
2131 pwdmnt = p; 2308 pwdmnt = p;
2132 fs->pwd.mnt = mntget(q); 2309 fs->pwd.mnt = mntget_long(q);
2133 } 2310 }
2134 } 2311 }
2135 p = next_mnt(p, mnt_ns->root); 2312 p = next_mnt(p, mnt_ns->root);
@@ -2138,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2138 up_write(&namespace_sem); 2315 up_write(&namespace_sem);
2139 2316
2140 if (rootmnt) 2317 if (rootmnt)
2141 mntput(rootmnt); 2318 mntput_long(rootmnt);
2142 if (pwdmnt) 2319 if (pwdmnt)
2143 mntput(pwdmnt); 2320 mntput_long(pwdmnt);
2144 2321
2145 return new_ns; 2322 return new_ns;
2146} 2323}
@@ -2327,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2327 touch_mnt_namespace(current->nsproxy->mnt_ns); 2504 touch_mnt_namespace(current->nsproxy->mnt_ns);
2328 br_write_unlock(vfsmount_lock); 2505 br_write_unlock(vfsmount_lock);
2329 chroot_fs_refs(&root, &new); 2506 chroot_fs_refs(&root, &new);
2507
2330 error = 0; 2508 error = 0;
2331 path_put(&root_parent); 2509 path_put(&root_parent);
2332 path_put(&parent_path); 2510 path_put(&parent_path);
@@ -2353,6 +2531,7 @@ static void __init init_mount_tree(void)
2353 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2531 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2354 if (IS_ERR(mnt)) 2532 if (IS_ERR(mnt))
2355 panic("Can't create rootfs"); 2533 panic("Can't create rootfs");
2534
2356 ns = create_mnt_ns(mnt); 2535 ns = create_mnt_ns(mnt);
2357 if (IS_ERR(ns)) 2536 if (IS_ERR(ns))
2358 panic("Can't allocate initial namespace"); 2537 panic("Can't allocate initial namespace");
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f22b12e7d33..28f136d4aae 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/namei.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/byteorder.h> 22#include <asm/byteorder.h>
22 23
@@ -74,9 +75,12 @@ const struct inode_operations ncp_dir_inode_operations =
74 * Dentry operations routines 75 * Dentry operations routines
75 */ 76 */
76static int ncp_lookup_validate(struct dentry *, struct nameidata *); 77static int ncp_lookup_validate(struct dentry *, struct nameidata *);
77static int ncp_hash_dentry(struct dentry *, struct qstr *); 78static int ncp_hash_dentry(const struct dentry *, const struct inode *,
78static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); 79 struct qstr *);
79static int ncp_delete_dentry(struct dentry *); 80static int ncp_compare_dentry(const struct dentry *, const struct inode *,
81 const struct dentry *, const struct inode *,
82 unsigned int, const char *, const struct qstr *);
83static int ncp_delete_dentry(const struct dentry *);
80 84
81static const struct dentry_operations ncp_dentry_operations = 85static const struct dentry_operations ncp_dentry_operations =
82{ 86{
@@ -113,10 +117,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
113 117
114#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) 118#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
115 119
116static inline int ncp_case_sensitive(struct dentry *dentry) 120static inline int ncp_case_sensitive(const struct inode *i)
117{ 121{
118#ifdef CONFIG_NCPFS_NFS_NS 122#ifdef CONFIG_NCPFS_NFS_NS
119 return ncp_namespace(dentry->d_inode) == NW_NS_NFS; 123 return ncp_namespace(i) == NW_NS_NFS;
120#else 124#else
121 return 0; 125 return 0;
122#endif /* CONFIG_NCPFS_NFS_NS */ 126#endif /* CONFIG_NCPFS_NFS_NS */
@@ -127,14 +131,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
127 * is case-sensitive. 131 * is case-sensitive.
128 */ 132 */
129static int 133static int
130ncp_hash_dentry(struct dentry *dentry, struct qstr *this) 134ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
135 struct qstr *this)
131{ 136{
132 if (!ncp_case_sensitive(dentry)) { 137 if (!ncp_case_sensitive(inode)) {
138 struct super_block *sb = dentry->d_sb;
133 struct nls_table *t; 139 struct nls_table *t;
134 unsigned long hash; 140 unsigned long hash;
135 int i; 141 int i;
136 142
137 t = NCP_IO_TABLE(dentry); 143 t = NCP_IO_TABLE(sb);
138 hash = init_name_hash(); 144 hash = init_name_hash();
139 for (i=0; i<this->len ; i++) 145 for (i=0; i<this->len ; i++)
140 hash = partial_name_hash(ncp_tolower(t, this->name[i]), 146 hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -145,15 +151,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
145} 151}
146 152
147static int 153static int
148ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 154ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
155 const struct dentry *dentry, const struct inode *inode,
156 unsigned int len, const char *str, const struct qstr *name)
149{ 157{
150 if (a->len != b->len) 158 if (len != name->len)
151 return 1; 159 return 1;
152 160
153 if (ncp_case_sensitive(dentry)) 161 if (ncp_case_sensitive(pinode))
154 return strncmp(a->name, b->name, a->len); 162 return strncmp(str, name->name, len);
155 163
156 return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); 164 return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
157} 165}
158 166
159/* 167/*
@@ -162,7 +170,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
162 * Closing files can be safely postponed until iput() - it's done there anyway. 170 * Closing files can be safely postponed until iput() - it's done there anyway.
163 */ 171 */
164static int 172static int
165ncp_delete_dentry(struct dentry * dentry) 173ncp_delete_dentry(const struct dentry * dentry)
166{ 174{
167 struct inode *inode = dentry->d_inode; 175 struct inode *inode = dentry->d_inode;
168 176
@@ -301,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
301 int res, val = 0, len; 309 int res, val = 0, len;
302 __u8 __name[NCP_MAXPATHLEN + 1]; 310 __u8 __name[NCP_MAXPATHLEN + 1];
303 311
312 if (nd->flags & LOOKUP_RCU)
313 return -ECHILD;
314
304 parent = dget_parent(dentry); 315 parent = dget_parent(dentry);
305 dir = parent->d_inode; 316 dir = parent->d_inode;
306 317
@@ -384,21 +395,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
384 } 395 }
385 396
386 /* If a pointer is invalid, we search the dentry. */ 397 /* If a pointer is invalid, we search the dentry. */
387 spin_lock(&dcache_lock); 398 spin_lock(&parent->d_lock);
388 next = parent->d_subdirs.next; 399 next = parent->d_subdirs.next;
389 while (next != &parent->d_subdirs) { 400 while (next != &parent->d_subdirs) {
390 dent = list_entry(next, struct dentry, d_u.d_child); 401 dent = list_entry(next, struct dentry, d_u.d_child);
391 if ((unsigned long)dent->d_fsdata == fpos) { 402 if ((unsigned long)dent->d_fsdata == fpos) {
392 if (dent->d_inode) 403 if (dent->d_inode)
393 dget_locked(dent); 404 dget(dent);
394 else 405 else
395 dent = NULL; 406 dent = NULL;
396 spin_unlock(&dcache_lock); 407 spin_unlock(&parent->d_lock);
397 goto out; 408 goto out;
398 } 409 }
399 next = next->next; 410 next = next->next;
400 } 411 }
401 spin_unlock(&dcache_lock); 412 spin_unlock(&parent->d_lock);
402 return NULL; 413 return NULL;
403 414
404out: 415out:
@@ -592,7 +603,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
592 qname.hash = full_name_hash(qname.name, qname.len); 603 qname.hash = full_name_hash(qname.name, qname.len);
593 604
594 if (dentry->d_op && dentry->d_op->d_hash) 605 if (dentry->d_op && dentry->d_op->d_hash)
595 if (dentry->d_op->d_hash(dentry, &qname) != 0) 606 if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
596 goto end_advance; 607 goto end_advance;
597 608
598 newdent = d_lookup(dentry, &qname); 609 newdent = d_lookup(dentry, &qname);
@@ -611,35 +622,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
611 shrink_dcache_parent(newdent); 622 shrink_dcache_parent(newdent);
612 623
613 /* 624 /*
614 * It is not as dangerous as it looks. NetWare's OS2 namespace is 625 * NetWare's OS2 namespace is case preserving yet case
615 * case preserving yet case insensitive. So we update dentry's name 626 * insensitive. So we update dentry's name as received from
616 * as received from server. We found dentry via d_lookup with our 627 * server. Parent dir's i_mutex is locked because we're in
617 * hash, so we know that hash does not change, and so replacing name 628 * readdir.
618 * should be reasonably safe.
619 */ 629 */
620 if (qname.len == newdent->d_name.len && 630 dentry_update_name_case(newdent, &qname);
621 memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
622 struct inode *inode = newdent->d_inode;
623
624 /*
625 * Inside ncpfs all uses of d_name are either for debugging,
626 * or on functions which acquire inode mutex (mknod, creat,
627 * lookup). So grab i_mutex here, to be sure. d_path
628 * uses dcache_lock when generating path, so we should too.
629 * And finally d_compare is protected by dentry's d_lock, so
630 * here we go.
631 */
632 if (inode)
633 mutex_lock(&inode->i_mutex);
634 spin_lock(&dcache_lock);
635 spin_lock(&newdent->d_lock);
636 memcpy((char *) newdent->d_name.name, qname.name,
637 newdent->d_name.len);
638 spin_unlock(&newdent->d_lock);
639 spin_unlock(&dcache_lock);
640 if (inode)
641 mutex_unlock(&inode->i_mutex);
642 }
643 } 631 }
644 632
645 if (!newdent->d_inode) { 633 if (!newdent->d_inode) {
@@ -649,7 +637,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
649 entry->ino = iunique(dir->i_sb, 2); 637 entry->ino = iunique(dir->i_sb, 2);
650 inode = ncp_iget(dir->i_sb, entry); 638 inode = ncp_iget(dir->i_sb, entry);
651 if (inode) { 639 if (inode) {
652 newdent->d_op = &ncp_dentry_operations; 640 d_set_d_op(newdent, &ncp_dentry_operations);
653 d_instantiate(newdent, inode); 641 d_instantiate(newdent, inode);
654 if (!hashed) 642 if (!hashed)
655 d_rehash(newdent); 643 d_rehash(newdent);
@@ -657,7 +645,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
657 } else { 645 } else {
658 struct inode *inode = newdent->d_inode; 646 struct inode *inode = newdent->d_inode;
659 647
660 mutex_lock(&inode->i_mutex); 648 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
661 ncp_update_inode2(inode, entry); 649 ncp_update_inode2(inode, entry);
662 mutex_unlock(&inode->i_mutex); 650 mutex_unlock(&inode->i_mutex);
663 } 651 }
@@ -905,7 +893,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
905 if (inode) { 893 if (inode) {
906 ncp_new_dentry(dentry); 894 ncp_new_dentry(dentry);
907add_entry: 895add_entry:
908 dentry->d_op = &ncp_dentry_operations; 896 d_set_d_op(dentry, &ncp_dentry_operations);
909 d_add(dentry, inode); 897 d_add(dentry, inode);
910 error = 0; 898 error = 0;
911 } 899 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8fb93b604e7..9b39a5dd413 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -29,6 +29,7 @@
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/mount.h> 30#include <linux/mount.h>
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/namei.h>
32 33
33#include <linux/ncp_fs.h> 34#include <linux/ncp_fs.h>
34 35
@@ -58,11 +59,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
58 return &ei->vfs_inode; 59 return &ei->vfs_inode;
59} 60}
60 61
61static void ncp_destroy_inode(struct inode *inode) 62static void ncp_i_callback(struct rcu_head *head)
62{ 63{
64 struct inode *inode = container_of(head, struct inode, i_rcu);
65 INIT_LIST_HEAD(&inode->i_dentry);
63 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 66 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
64} 67}
65 68
69static void ncp_destroy_inode(struct inode *inode)
70{
71 call_rcu(&inode->i_rcu, ncp_i_callback);
72}
73
66static void init_once(void *foo) 74static void init_once(void *foo)
67{ 75{
68 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 76 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
@@ -309,7 +317,12 @@ static void ncp_stop_tasks(struct ncp_server *server) {
309 sk->sk_write_space = server->write_space; 317 sk->sk_write_space = server->write_space;
310 release_sock(sk); 318 release_sock(sk);
311 del_timer_sync(&server->timeout_tm); 319 del_timer_sync(&server->timeout_tm);
312 flush_scheduled_work(); 320
321 flush_work_sync(&server->rcv.tq);
322 if (sk->sk_socket->type == SOCK_STREAM)
323 flush_work_sync(&server->tx.tq);
324 else
325 flush_work_sync(&server->timeout_tq);
313} 326}
314 327
315static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) 328static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt)
@@ -710,7 +723,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
710 sb->s_root = d_alloc_root(root_inode); 723 sb->s_root = d_alloc_root(root_inode);
711 if (!sb->s_root) 724 if (!sb->s_root)
712 goto out_no_root; 725 goto out_no_root;
713 sb->s_root->d_op = &ncp_root_dentry_operations; 726 d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
714 return 0; 727 return 0;
715 728
716out_no_root: 729out_no_root:
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca634c..1220df75ff2 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
135 const unsigned char *, unsigned int, int); 135 const unsigned char *, unsigned int, int);
136 136
137#define NCP_ESC ':' 137#define NCP_ESC ':'
138#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) 138#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
139#define ncp_tolower(t, c) nls_tolower(t, c) 139#define ncp_tolower(t, c) nls_tolower(t, c)
140#define ncp_toupper(t, c) nls_toupper(t, c) 140#define ncp_toupper(t, c) nls_toupper(t, c)
141#define ncp_strnicmp(t, s1, s2, len) \ 141#define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
150int ncp__vol2io(unsigned char *, unsigned int *, 150int ncp__vol2io(unsigned char *, unsigned int *,
151 const unsigned char *, unsigned int, int); 151 const unsigned char *, unsigned int, int);
152 152
153#define NCP_IO_TABLE(dentry) NULL 153#define NCP_IO_TABLE(sb) NULL
154#define ncp_tolower(t, c) tolower(c) 154#define ncp_tolower(t, c) tolower(c)
155#define ncp_toupper(t, c) toupper(c) 155#define ncp_toupper(t, c) toupper(c)
156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) 156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) 157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
158 158
159 159
160static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, 160static inline int ncp_strnicmp(const struct nls_table *t,
161 const unsigned char *s2, int len) 161 const unsigned char *s1, const unsigned char *s2, int len)
162{ 162{
163 while (len--) { 163 while (len--) {
164 if (tolower(*s1++) != tolower(*s2++)) 164 if (tolower(*s1++) != tolower(*s2++))
@@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent)
193 struct list_head *next; 193 struct list_head *next;
194 struct dentry *dentry; 194 struct dentry *dentry;
195 195
196 spin_lock(&dcache_lock); 196 spin_lock(&parent->d_lock);
197 next = parent->d_subdirs.next; 197 next = parent->d_subdirs.next;
198 while (next != &parent->d_subdirs) { 198 while (next != &parent->d_subdirs) {
199 dentry = list_entry(next, struct dentry, d_u.d_child); 199 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent)
205 205
206 next = next->next; 206 next = next->next;
207 } 207 }
208 spin_unlock(&dcache_lock); 208 spin_unlock(&parent->d_lock);
209} 209}
210 210
211static inline void 211static inline void
@@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
215 struct list_head *next; 215 struct list_head *next;
216 struct dentry *dentry; 216 struct dentry *dentry;
217 217
218 spin_lock(&dcache_lock); 218 spin_lock(&parent->d_lock);
219 next = parent->d_subdirs.next; 219 next = parent->d_subdirs.next;
220 while (next != &parent->d_subdirs) { 220 while (next != &parent->d_subdirs) {
221 dentry = list_entry(next, struct dentry, d_u.d_child); 221 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
223 ncp_age_dentry(server, dentry); 223 ncp_age_dentry(server, dentry);
224 next = next->next; 224 next = next->next;
225 } 225 }
226 spin_unlock(&dcache_lock); 226 spin_unlock(&parent->d_lock);
227} 227}
228 228
229struct ncp_cache_head { 229struct ncp_cache_head {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 93a8b3bd69e..199016528fc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,9 +16,7 @@
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/kthread.h> 17#include <linux/kthread.h>
18#include <linux/sunrpc/svcauth_gss.h> 18#include <linux/sunrpc/svcauth_gss.h>
19#if defined(CONFIG_NFS_V4_1)
20#include <linux/sunrpc/bc_xprt.h> 19#include <linux/sunrpc/bc_xprt.h>
21#endif
22 20
23#include <net/inet_sock.h> 21#include <net/inet_sock.h>
24 22
@@ -137,6 +135,33 @@ out_err:
137 135
138#if defined(CONFIG_NFS_V4_1) 136#if defined(CONFIG_NFS_V4_1)
139/* 137/*
138 * * CB_SEQUENCE operations will fail until the callback sessionid is set.
139 * */
140int nfs4_set_callback_sessionid(struct nfs_client *clp)
141{
142 struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
143 struct nfs4_sessionid *bc_sid;
144
145 if (!serv->sv_bc_xprt)
146 return -EINVAL;
147
148 /* on success freed in xprt_free */
149 bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
150 if (!bc_sid)
151 return -ENOMEM;
152 memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
153 NFS4_MAX_SESSIONID_LEN);
154 spin_lock_bh(&serv->sv_cb_lock);
155 serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
156 spin_unlock_bh(&serv->sv_cb_lock);
157 dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
158 ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
159 ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
160 serv->sv_bc_xprt);
161 return 0;
162}
163
164/*
140 * The callback service for NFSv4.1 callbacks 165 * The callback service for NFSv4.1 callbacks
141 */ 166 */
142static int 167static int
@@ -177,30 +202,38 @@ nfs41_callback_svc(void *vrqstp)
177struct svc_rqst * 202struct svc_rqst *
178nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 203nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
179{ 204{
180 struct svc_xprt *bc_xprt; 205 struct svc_rqst *rqstp;
181 struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); 206 int ret;
182 207
183 dprintk("--> %s\n", __func__); 208 /*
184 /* Create a svc_sock for the service */ 209 * Create an svc_sock for the back channel service that shares the
185 bc_xprt = svc_sock_create(serv, xprt->prot); 210 * fore channel connection.
186 if (!bc_xprt) 211 * Returns the input port (0) and sets the svc_serv bc_xprt on success
212 */
213 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
214 SVC_SOCK_ANONYMOUS);
215 if (ret < 0) {
216 rqstp = ERR_PTR(ret);
187 goto out; 217 goto out;
218 }
188 219
189 /* 220 /*
190 * Save the svc_serv in the transport so that it can 221 * Save the svc_serv in the transport so that it can
191 * be referenced when the session backchannel is initialized 222 * be referenced when the session backchannel is initialized
192 */ 223 */
193 serv->bc_xprt = bc_xprt;
194 xprt->bc_serv = serv; 224 xprt->bc_serv = serv;
195 225
196 INIT_LIST_HEAD(&serv->sv_cb_list); 226 INIT_LIST_HEAD(&serv->sv_cb_list);
197 spin_lock_init(&serv->sv_cb_lock); 227 spin_lock_init(&serv->sv_cb_lock);
198 init_waitqueue_head(&serv->sv_cb_waitq); 228 init_waitqueue_head(&serv->sv_cb_waitq);
199 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); 229 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
200 if (IS_ERR(rqstp)) 230 if (IS_ERR(rqstp)) {
201 svc_sock_destroy(bc_xprt); 231 svc_xprt_put(serv->sv_bc_xprt);
232 serv->sv_bc_xprt = NULL;
233 }
202out: 234out:
203 dprintk("--> %s return %p\n", __func__, rqstp); 235 dprintk("--> %s return %ld\n", __func__,
236 IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
204 return rqstp; 237 return rqstp;
205} 238}
206 239
@@ -233,6 +266,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
233 struct nfs_callback_data *cb_info) 266 struct nfs_callback_data *cb_info)
234{ 267{
235} 268}
269int nfs4_set_callback_sessionid(struct nfs_client *clp)
270{
271 return 0;
272}
236#endif /* CONFIG_NFS_V4_1 */ 273#endif /* CONFIG_NFS_V4_1 */
237 274
238/* 275/*
@@ -328,6 +365,9 @@ static int check_gss_callback_principal(struct nfs_client *clp,
328 struct rpc_clnt *r = clp->cl_rpcclient; 365 struct rpc_clnt *r = clp->cl_rpcclient;
329 char *p = svc_gss_principal(rqstp); 366 char *p = svc_gss_principal(rqstp);
330 367
368 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
369 if (clp->cl_minorversion != 0)
370 return SVC_DROP;
331 /* 371 /*
332 * It might just be a normal user principal, in which case 372 * It might just be a normal user principal, in which case
333 * userspace won't bother to tell us the name at all. 373 * userspace won't bother to tell us the name at all.
@@ -345,6 +385,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
345 return SVC_OK; 385 return SVC_OK;
346} 386}
347 387
388/* pg_authenticate method helper */
389static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
390{
391 struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
392 int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
393
394 dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
395 if (svc_is_backchannel(rqstp))
396 /* Sessionid (usually) set after CB_NULL ping */
397 return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
398 is_cb_compound);
399 else
400 /* No callback identifier in pg_authenticate */
401 return nfs4_find_client_no_ident(svc_addr(rqstp));
402}
403
404/* pg_authenticate method for nfsv4 callback threads. */
348static int nfs_callback_authenticate(struct svc_rqst *rqstp) 405static int nfs_callback_authenticate(struct svc_rqst *rqstp)
349{ 406{
350 struct nfs_client *clp; 407 struct nfs_client *clp;
@@ -352,7 +409,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
352 int ret = SVC_OK; 409 int ret = SVC_OK;
353 410
354 /* Don't talk to strangers */ 411 /* Don't talk to strangers */
355 clp = nfs_find_client(svc_addr(rqstp), 4); 412 clp = nfs_cb_find_client(rqstp);
356 if (clp == NULL) 413 if (clp == NULL)
357 return SVC_DROP; 414 return SVC_DROP;
358 415
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd1b8d..d3b44f9bd74 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
34 OP_CB_ILLEGAL = 10044, 34 OP_CB_ILLEGAL = 10044,
35}; 35};
36 36
37struct cb_process_state {
38 __be32 drc_status;
39 struct nfs_client *clp;
40 struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
41};
42
37struct cb_compound_hdr_arg { 43struct cb_compound_hdr_arg {
38 unsigned int taglen; 44 unsigned int taglen;
39 const char *tag; 45 const char *tag;
40 unsigned int minorversion; 46 unsigned int minorversion;
47 unsigned int cb_ident; /* v4.0 callback identifier */
41 unsigned nops; 48 unsigned nops;
42}; 49};
43 50
@@ -103,14 +110,23 @@ struct cb_sequenceres {
103 uint32_t csr_target_highestslotid; 110 uint32_t csr_target_highestslotid;
104}; 111};
105 112
106extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, 113extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
107 struct cb_sequenceres *res); 114 struct cb_sequenceres *res,
115 struct cb_process_state *cps);
108 116
109extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, 117extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
110 const nfs4_stateid *stateid); 118 const nfs4_stateid *stateid);
111 119
112#define RCA4_TYPE_MASK_RDATA_DLG 0 120#define RCA4_TYPE_MASK_RDATA_DLG 0
113#define RCA4_TYPE_MASK_WDATA_DLG 1 121#define RCA4_TYPE_MASK_WDATA_DLG 1
122#define RCA4_TYPE_MASK_DIR_DLG 2
123#define RCA4_TYPE_MASK_FILE_LAYOUT 3
124#define RCA4_TYPE_MASK_BLK_LAYOUT 4
125#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
126#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
127#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
128#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
129#define RCA4_TYPE_MASK_ALL 0xf31f
114 130
115struct cb_recallanyargs { 131struct cb_recallanyargs {
116 struct sockaddr *craa_addr; 132 struct sockaddr *craa_addr;
@@ -118,25 +134,52 @@ struct cb_recallanyargs {
118 uint32_t craa_type_mask; 134 uint32_t craa_type_mask;
119}; 135};
120 136
121extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); 137extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
138 void *dummy,
139 struct cb_process_state *cps);
122 140
123struct cb_recallslotargs { 141struct cb_recallslotargs {
124 struct sockaddr *crsa_addr; 142 struct sockaddr *crsa_addr;
125 uint32_t crsa_target_max_slots; 143 uint32_t crsa_target_max_slots;
126}; 144};
127extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, 145extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
128 void *dummy); 146 void *dummy,
147 struct cb_process_state *cps);
148
149struct cb_layoutrecallargs {
150 struct sockaddr *cbl_addr;
151 uint32_t cbl_recall_type;
152 uint32_t cbl_layout_type;
153 uint32_t cbl_layoutchanged;
154 union {
155 struct {
156 struct nfs_fh cbl_fh;
157 struct pnfs_layout_range cbl_range;
158 nfs4_stateid cbl_stateid;
159 };
160 struct nfs_fsid cbl_fsid;
161 };
162};
129 163
130#endif /* CONFIG_NFS_V4_1 */ 164extern unsigned nfs4_callback_layoutrecall(
165 struct cb_layoutrecallargs *args,
166 void *dummy, struct cb_process_state *cps);
131 167
132extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
133extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); 169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170#endif /* CONFIG_NFS_V4_1 */
134 171
172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
173 struct cb_getattrres *res,
174 struct cb_process_state *cps);
175extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
176 struct cb_process_state *cps);
135#ifdef CONFIG_NFS_V4 177#ifdef CONFIG_NFS_V4
136extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); 178extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
137extern void nfs_callback_down(int minorversion); 179extern void nfs_callback_down(int minorversion);
138extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, 180extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
139 const nfs4_stateid *stateid); 181 const nfs4_stateid *stateid);
182extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
140#endif /* CONFIG_NFS_V4 */ 183#endif /* CONFIG_NFS_V4 */
141/* 184/*
142 * nfs41: Callbacks are expected to not cause substantial latency, 185 * nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2950fca0c61..4bb91cb2620 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,30 +12,33 @@
12#include "callback.h" 12#include "callback.h"
13#include "delegation.h" 13#include "delegation.h"
14#include "internal.h" 14#include "internal.h"
15#include "pnfs.h"
15 16
16#ifdef NFS_DEBUG 17#ifdef NFS_DEBUG
17#define NFSDBG_FACILITY NFSDBG_CALLBACK 18#define NFSDBG_FACILITY NFSDBG_CALLBACK
18#endif 19#endif
19 20
20__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) 21__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
22 struct cb_getattrres *res,
23 struct cb_process_state *cps)
21{ 24{
22 struct nfs_client *clp;
23 struct nfs_delegation *delegation; 25 struct nfs_delegation *delegation;
24 struct nfs_inode *nfsi; 26 struct nfs_inode *nfsi;
25 struct inode *inode; 27 struct inode *inode;
26 28
29 res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
30 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
31 goto out;
32
27 res->bitmap[0] = res->bitmap[1] = 0; 33 res->bitmap[0] = res->bitmap[1] = 0;
28 res->status = htonl(NFS4ERR_BADHANDLE); 34 res->status = htonl(NFS4ERR_BADHANDLE);
29 clp = nfs_find_client(args->addr, 4);
30 if (clp == NULL)
31 goto out;
32 35
33 dprintk("NFS: GETATTR callback request from %s\n", 36 dprintk("NFS: GETATTR callback request from %s\n",
34 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 37 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
35 38
36 inode = nfs_delegation_find_inode(clp, &args->fh); 39 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
37 if (inode == NULL) 40 if (inode == NULL)
38 goto out_putclient; 41 goto out;
39 nfsi = NFS_I(inode); 42 nfsi = NFS_I(inode);
40 rcu_read_lock(); 43 rcu_read_lock();
41 delegation = rcu_dereference(nfsi->delegation); 44 delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
55out_iput: 58out_iput:
56 rcu_read_unlock(); 59 rcu_read_unlock();
57 iput(inode); 60 iput(inode);
58out_putclient:
59 nfs_put_client(clp);
60out: 61out:
61 dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); 62 dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
62 return res->status; 63 return res->status;
63} 64}
64 65
65__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 66__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
67 struct cb_process_state *cps)
66{ 68{
67 struct nfs_client *clp;
68 struct inode *inode; 69 struct inode *inode;
69 __be32 res; 70 __be32 res;
70 71
71 res = htonl(NFS4ERR_BADHANDLE); 72 res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
72 clp = nfs_find_client(args->addr, 4); 73 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
73 if (clp == NULL)
74 goto out; 74 goto out;
75 75
76 dprintk("NFS: RECALL callback request from %s\n", 76 dprintk("NFS: RECALL callback request from %s\n",
77 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 77 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
78 78
79 do { 79 res = htonl(NFS4ERR_BADHANDLE);
80 struct nfs_client *prev = clp; 80 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
81 81 if (inode == NULL)
82 inode = nfs_delegation_find_inode(clp, &args->fh); 82 goto out;
83 if (inode != NULL) { 83 /* Set up a helper thread to actually return the delegation */
84 /* Set up a helper thread to actually return the delegation */ 84 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
85 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { 85 case 0:
86 case 0: 86 res = 0;
87 res = 0; 87 break;
88 break; 88 case -ENOENT:
89 case -ENOENT: 89 if (res != 0)
90 if (res != 0) 90 res = htonl(NFS4ERR_BAD_STATEID);
91 res = htonl(NFS4ERR_BAD_STATEID); 91 break;
92 break; 92 default:
93 default: 93 res = htonl(NFS4ERR_RESOURCE);
94 res = htonl(NFS4ERR_RESOURCE); 94 }
95 } 95 iput(inode);
96 iput(inode);
97 }
98 clp = nfs_find_client_next(prev);
99 nfs_put_client(prev);
100 } while (clp != NULL);
101out: 96out:
102 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); 97 dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
103 return res; 98 return res;
@@ -113,6 +108,139 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
113 108
114#if defined(CONFIG_NFS_V4_1) 109#if defined(CONFIG_NFS_V4_1)
115 110
111static u32 initiate_file_draining(struct nfs_client *clp,
112 struct cb_layoutrecallargs *args)
113{
114 struct pnfs_layout_hdr *lo;
115 struct inode *ino;
116 bool found = false;
117 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
118 LIST_HEAD(free_me_list);
119
120 spin_lock(&clp->cl_lock);
121 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
122 if (nfs_compare_fh(&args->cbl_fh,
123 &NFS_I(lo->plh_inode)->fh))
124 continue;
125 ino = igrab(lo->plh_inode);
126 if (!ino)
127 continue;
128 found = true;
129 /* Without this, layout can be freed as soon
130 * as we release cl_lock.
131 */
132 get_layout_hdr(lo);
133 break;
134 }
135 spin_unlock(&clp->cl_lock);
136 if (!found)
137 return NFS4ERR_NOMATCHING_LAYOUT;
138
139 spin_lock(&ino->i_lock);
140 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
141 mark_matching_lsegs_invalid(lo, &free_me_list,
142 args->cbl_range.iomode))
143 rv = NFS4ERR_DELAY;
144 else
145 rv = NFS4ERR_NOMATCHING_LAYOUT;
146 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
147 spin_unlock(&ino->i_lock);
148 pnfs_free_lseg_list(&free_me_list);
149 put_layout_hdr(lo);
150 iput(ino);
151 return rv;
152}
153
154static u32 initiate_bulk_draining(struct nfs_client *clp,
155 struct cb_layoutrecallargs *args)
156{
157 struct pnfs_layout_hdr *lo;
158 struct inode *ino;
159 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
160 struct pnfs_layout_hdr *tmp;
161 LIST_HEAD(recall_list);
162 LIST_HEAD(free_me_list);
163 struct pnfs_layout_range range = {
164 .iomode = IOMODE_ANY,
165 .offset = 0,
166 .length = NFS4_MAX_UINT64,
167 };
168
169 spin_lock(&clp->cl_lock);
170 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
171 if ((args->cbl_recall_type == RETURN_FSID) &&
172 memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
173 &args->cbl_fsid, sizeof(struct nfs_fsid)))
174 continue;
175 if (!igrab(lo->plh_inode))
176 continue;
177 get_layout_hdr(lo);
178 BUG_ON(!list_empty(&lo->plh_bulk_recall));
179 list_add(&lo->plh_bulk_recall, &recall_list);
180 }
181 spin_unlock(&clp->cl_lock);
182 list_for_each_entry_safe(lo, tmp,
183 &recall_list, plh_bulk_recall) {
184 ino = lo->plh_inode;
185 spin_lock(&ino->i_lock);
186 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
187 if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock);
191 put_layout_hdr(lo);
192 iput(ino);
193 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv;
196}
197
198static u32 do_callback_layoutrecall(struct nfs_client *clp,
199 struct cb_layoutrecallargs *args)
200{
201 u32 res = NFS4ERR_DELAY;
202
203 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
204 if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
205 goto out;
206 if (args->cbl_recall_type == RETURN_FILE)
207 res = initiate_file_draining(clp, args);
208 else
209 res = initiate_bulk_draining(clp, args);
210 clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
211out:
212 dprintk("%s returning %i\n", __func__, res);
213 return res;
214
215}
216
217__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
218 void *dummy, struct cb_process_state *cps)
219{
220 u32 res;
221
222 dprintk("%s: -->\n", __func__);
223
224 if (cps->clp)
225 res = do_callback_layoutrecall(cps->clp, args);
226 else
227 res = NFS4ERR_OP_NOT_IN_SESSION;
228
229 dprintk("%s: exit with status = %d\n", __func__, res);
230 return cpu_to_be32(res);
231}
232
233static void pnfs_recall_all_layouts(struct nfs_client *clp)
234{
235 struct cb_layoutrecallargs args;
236
237 /* Pretend we got a CB_LAYOUTRECALL(ALL) */
238 memset(&args, 0, sizeof(args));
239 args.cbl_recall_type = RETURN_ALL;
240 /* FIXME we ignore errors, what should we do? */
241 do_callback_layoutrecall(clp, &args);
242}
243
116int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) 244int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
117{ 245{
118 if (delegation == NULL) 246 if (delegation == NULL)
@@ -185,42 +313,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
185} 313}
186 314
187/* 315/*
188 * Returns a pointer to a held 'struct nfs_client' that matches the server's
189 * address, major version number, and session ID. It is the caller's
190 * responsibility to release the returned reference.
191 *
192 * Returns NULL if there are no connections with sessions, or if no session
193 * matches the one of interest.
194 */
195 static struct nfs_client *find_client_with_session(
196 const struct sockaddr *addr, u32 nfsversion,
197 struct nfs4_sessionid *sessionid)
198{
199 struct nfs_client *clp;
200
201 clp = nfs_find_client(addr, 4);
202 if (clp == NULL)
203 return NULL;
204
205 do {
206 struct nfs_client *prev = clp;
207
208 if (clp->cl_session != NULL) {
209 if (memcmp(clp->cl_session->sess_id.data,
210 sessionid->data,
211 NFS4_MAX_SESSIONID_LEN) == 0) {
212 /* Returns a held reference to clp */
213 return clp;
214 }
215 }
216 clp = nfs_find_client_next(prev);
217 nfs_put_client(prev);
218 } while (clp != NULL);
219
220 return NULL;
221}
222
223/*
224 * For each referring call triple, check the session's slot table for 316 * For each referring call triple, check the session's slot table for
225 * a match. If the slot is in use and the sequence numbers match, the 317 * a match. If the slot is in use and the sequence numbers match, the
226 * client is still waiting for a response to the original request. 318 * client is still waiting for a response to the original request.
@@ -276,20 +368,34 @@ out:
276} 368}
277 369
278__be32 nfs4_callback_sequence(struct cb_sequenceargs *args, 370__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
279 struct cb_sequenceres *res) 371 struct cb_sequenceres *res,
372 struct cb_process_state *cps)
280{ 373{
281 struct nfs_client *clp; 374 struct nfs_client *clp;
282 int i; 375 int i;
283 __be32 status; 376 __be32 status;
284 377
378 cps->clp = NULL;
379
285 status = htonl(NFS4ERR_BADSESSION); 380 status = htonl(NFS4ERR_BADSESSION);
286 clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); 381 /* Incoming session must match the callback session */
382 if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
383 goto out;
384
385 clp = nfs4_find_client_sessionid(args->csa_addr,
386 &args->csa_sessionid, 1);
287 if (clp == NULL) 387 if (clp == NULL)
288 goto out; 388 goto out;
289 389
390 /* state manager is resetting the session */
391 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
392 status = NFS4ERR_DELAY;
393 goto out;
394 }
395
290 status = validate_seqid(&clp->cl_session->bc_slot_table, args); 396 status = validate_seqid(&clp->cl_session->bc_slot_table, args);
291 if (status) 397 if (status)
292 goto out_putclient; 398 goto out;
293 399
294 /* 400 /*
295 * Check for pending referring calls. If a match is found, a 401 * Check for pending referring calls. If a match is found, a
@@ -298,7 +404,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
298 */ 404 */
299 if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { 405 if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
300 status = htonl(NFS4ERR_DELAY); 406 status = htonl(NFS4ERR_DELAY);
301 goto out_putclient; 407 goto out;
302 } 408 }
303 409
304 memcpy(&res->csr_sessionid, &args->csa_sessionid, 410 memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,83 +413,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
307 res->csr_slotid = args->csa_slotid; 413 res->csr_slotid = args->csa_slotid;
308 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 414 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
309 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 415 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
416 nfs4_cb_take_slot(clp);
417 cps->clp = clp; /* put in nfs4_callback_compound */
310 418
311out_putclient:
312 nfs_put_client(clp);
313out: 419out:
314 for (i = 0; i < args->csa_nrclists; i++) 420 for (i = 0; i < args->csa_nrclists; i++)
315 kfree(args->csa_rclists[i].rcl_refcalls); 421 kfree(args->csa_rclists[i].rcl_refcalls);
316 kfree(args->csa_rclists); 422 kfree(args->csa_rclists);
317 423
318 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) 424 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
319 res->csr_status = 0; 425 cps->drc_status = status;
320 else 426 status = 0;
427 } else
321 res->csr_status = status; 428 res->csr_status = status;
429
322 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, 430 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
323 ntohl(status), ntohl(res->csr_status)); 431 ntohl(status), ntohl(res->csr_status));
324 return status; 432 return status;
325} 433}
326 434
327__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) 435static bool
436validate_bitmap_values(unsigned long mask)
437{
438 return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
439}
440
441__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
442 struct cb_process_state *cps)
328{ 443{
329 struct nfs_client *clp;
330 __be32 status; 444 __be32 status;
331 fmode_t flags = 0; 445 fmode_t flags = 0;
332 446
333 status = htonl(NFS4ERR_OP_NOT_IN_SESSION); 447 status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
334 clp = nfs_find_client(args->craa_addr, 4); 448 if (!cps->clp) /* set in cb_sequence */
335 if (clp == NULL)
336 goto out; 449 goto out;
337 450
338 dprintk("NFS: RECALL_ANY callback request from %s\n", 451 dprintk("NFS: RECALL_ANY callback request from %s\n",
339 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 452 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
453
454 status = cpu_to_be32(NFS4ERR_INVAL);
455 if (!validate_bitmap_values(args->craa_type_mask))
456 goto out;
340 457
458 status = cpu_to_be32(NFS4_OK);
341 if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) 459 if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
342 &args->craa_type_mask)) 460 &args->craa_type_mask))
343 flags = FMODE_READ; 461 flags = FMODE_READ;
344 if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) 462 if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
345 &args->craa_type_mask)) 463 &args->craa_type_mask))
346 flags |= FMODE_WRITE; 464 flags |= FMODE_WRITE;
347 465 if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
466 &args->craa_type_mask))
467 pnfs_recall_all_layouts(cps->clp);
348 if (flags) 468 if (flags)
349 nfs_expire_all_delegation_types(clp, flags); 469 nfs_expire_all_delegation_types(cps->clp, flags);
350 status = htonl(NFS4_OK);
351out: 470out:
352 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 471 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
353 return status; 472 return status;
354} 473}
355 474
356/* Reduce the fore channel's max_slots to the target value */ 475/* Reduce the fore channel's max_slots to the target value */
357__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) 476__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
477 struct cb_process_state *cps)
358{ 478{
359 struct nfs_client *clp;
360 struct nfs4_slot_table *fc_tbl; 479 struct nfs4_slot_table *fc_tbl;
361 __be32 status; 480 __be32 status;
362 481
363 status = htonl(NFS4ERR_OP_NOT_IN_SESSION); 482 status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
364 clp = nfs_find_client(args->crsa_addr, 4); 483 if (!cps->clp) /* set in cb_sequence */
365 if (clp == NULL)
366 goto out; 484 goto out;
367 485
368 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", 486 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
369 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 487 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
370 args->crsa_target_max_slots); 488 args->crsa_target_max_slots);
371 489
372 fc_tbl = &clp->cl_session->fc_slot_table; 490 fc_tbl = &cps->clp->cl_session->fc_slot_table;
373 491
374 status = htonl(NFS4ERR_BAD_HIGH_SLOT); 492 status = htonl(NFS4ERR_BAD_HIGH_SLOT);
375 if (args->crsa_target_max_slots > fc_tbl->max_slots || 493 if (args->crsa_target_max_slots > fc_tbl->max_slots ||
376 args->crsa_target_max_slots < 1) 494 args->crsa_target_max_slots < 1)
377 goto out_putclient; 495 goto out;
378 496
379 status = htonl(NFS4_OK); 497 status = htonl(NFS4_OK);
380 if (args->crsa_target_max_slots == fc_tbl->max_slots) 498 if (args->crsa_target_max_slots == fc_tbl->max_slots)
381 goto out_putclient; 499 goto out;
382 500
383 fc_tbl->target_max_slots = args->crsa_target_max_slots; 501 fc_tbl->target_max_slots = args->crsa_target_max_slots;
384 nfs41_handle_recall_slot(clp); 502 nfs41_handle_recall_slot(cps->clp);
385out_putclient:
386 nfs_put_client(clp); /* balance nfs_find_client */
387out: 503out:
388 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 504 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
389 return status; 505 return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05af212f0ed..23112c263f8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,8 +10,10 @@
10#include <linux/nfs4.h> 10#include <linux/nfs4.h>
11#include <linux/nfs_fs.h> 11#include <linux/nfs_fs.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sunrpc/bc_xprt.h>
13#include "nfs4_fs.h" 14#include "nfs4_fs.h"
14#include "callback.h" 15#include "callback.h"
16#include "internal.h"
15 17
16#define CB_OP_TAGLEN_MAXSZ (512) 18#define CB_OP_TAGLEN_MAXSZ (512)
17#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) 19#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
@@ -22,6 +24,7 @@
22#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 24#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
23 25
24#if defined(CONFIG_NFS_V4_1) 26#if defined(CONFIG_NFS_V4_1)
27#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
25#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 28#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
26 4 + 1 + 3) 29 4 + 1 + 3)
27#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 30#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
@@ -33,7 +36,8 @@
33/* Internal error code */ 36/* Internal error code */
34#define NFS4ERR_RESOURCE_HDR 11050 37#define NFS4ERR_RESOURCE_HDR 11050
35 38
36typedef __be32 (*callback_process_op_t)(void *, void *); 39typedef __be32 (*callback_process_op_t)(void *, void *,
40 struct cb_process_state *);
37typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); 41typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
38typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); 42typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
39 43
@@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
160 hdr->minorversion = ntohl(*p++); 164 hdr->minorversion = ntohl(*p++);
161 /* Check minor version is zero or one. */ 165 /* Check minor version is zero or one. */
162 if (hdr->minorversion <= 1) { 166 if (hdr->minorversion <= 1) {
163 p++; /* skip callback_ident */ 167 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
164 } else { 168 } else {
165 printk(KERN_WARNING "%s: NFSv4 server callback with " 169 printk(KERN_WARNING "%s: NFSv4 server callback with "
166 "illegal minor version %u!\n", 170 "illegal minor version %u!\n",
@@ -220,6 +224,66 @@ out:
220 224
221#if defined(CONFIG_NFS_V4_1) 225#if defined(CONFIG_NFS_V4_1)
222 226
227static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
228 struct xdr_stream *xdr,
229 struct cb_layoutrecallargs *args)
230{
231 __be32 *p;
232 __be32 status = 0;
233 uint32_t iomode;
234
235 args->cbl_addr = svc_addr(rqstp);
236 p = read_buf(xdr, 4 * sizeof(uint32_t));
237 if (unlikely(p == NULL)) {
238 status = htonl(NFS4ERR_BADXDR);
239 goto out;
240 }
241
242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types.
245 */
246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++);
248 args->cbl_recall_type = ntohl(*p++);
249
250 if (args->cbl_recall_type == RETURN_FILE) {
251 args->cbl_range.iomode = iomode;
252 status = decode_fh(xdr, &args->cbl_fh);
253 if (unlikely(status != 0))
254 goto out;
255
256 p = read_buf(xdr, 2 * sizeof(uint64_t));
257 if (unlikely(p == NULL)) {
258 status = htonl(NFS4ERR_BADXDR);
259 goto out;
260 }
261 p = xdr_decode_hyper(p, &args->cbl_range.offset);
262 p = xdr_decode_hyper(p, &args->cbl_range.length);
263 status = decode_stateid(xdr, &args->cbl_stateid);
264 if (unlikely(status != 0))
265 goto out;
266 } else if (args->cbl_recall_type == RETURN_FSID) {
267 p = read_buf(xdr, 2 * sizeof(uint64_t));
268 if (unlikely(p == NULL)) {
269 status = htonl(NFS4ERR_BADXDR);
270 goto out;
271 }
272 p = xdr_decode_hyper(p, &args->cbl_fsid.major);
273 p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
274 } else if (args->cbl_recall_type != RETURN_ALL) {
275 status = htonl(NFS4ERR_BADXDR);
276 goto out;
277 }
278 dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
279 __func__,
280 args->cbl_layout_type, iomode,
281 args->cbl_layoutchanged, args->cbl_recall_type);
282out:
283 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
284 return status;
285}
286
223static __be32 decode_sessionid(struct xdr_stream *xdr, 287static __be32 decode_sessionid(struct xdr_stream *xdr,
224 struct nfs4_sessionid *sid) 288 struct nfs4_sessionid *sid)
225{ 289{
@@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
574 case OP_CB_SEQUENCE: 638 case OP_CB_SEQUENCE:
575 case OP_CB_RECALL_ANY: 639 case OP_CB_RECALL_ANY:
576 case OP_CB_RECALL_SLOT: 640 case OP_CB_RECALL_SLOT:
641 case OP_CB_LAYOUTRECALL:
577 *op = &callback_ops[op_nr]; 642 *op = &callback_ops[op_nr];
578 break; 643 break;
579 644
580 case OP_CB_LAYOUTRECALL:
581 case OP_CB_NOTIFY_DEVICEID: 645 case OP_CB_NOTIFY_DEVICEID:
582 case OP_CB_NOTIFY: 646 case OP_CB_NOTIFY:
583 case OP_CB_PUSH_DELEG: 647 case OP_CB_PUSH_DELEG:
@@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
593 return htonl(NFS_OK); 657 return htonl(NFS_OK);
594} 658}
595 659
660static void nfs4_callback_free_slot(struct nfs4_session *session)
661{
662 struct nfs4_slot_table *tbl = &session->bc_slot_table;
663
664 spin_lock(&tbl->slot_tbl_lock);
665 /*
666 * Let the state manager know callback processing done.
667 * A single slot, so highest used slotid is either 0 or -1
668 */
669 tbl->highest_used_slotid--;
670 nfs4_check_drain_bc_complete(session);
671 spin_unlock(&tbl->slot_tbl_lock);
672}
673
674static void nfs4_cb_free_slot(struct nfs_client *clp)
675{
676 if (clp && clp->cl_session)
677 nfs4_callback_free_slot(clp->cl_session);
678}
679
680/* A single slot, so highest used slotid is either 0 or -1 */
681void nfs4_cb_take_slot(struct nfs_client *clp)
682{
683 struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
684
685 spin_lock(&tbl->slot_tbl_lock);
686 tbl->highest_used_slotid++;
687 BUG_ON(tbl->highest_used_slotid != 0);
688 spin_unlock(&tbl->slot_tbl_lock);
689}
690
596#else /* CONFIG_NFS_V4_1 */ 691#else /* CONFIG_NFS_V4_1 */
597 692
598static __be32 693static __be32
@@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
601 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 696 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
602} 697}
603 698
699static void nfs4_cb_free_slot(struct nfs_client *clp)
700{
701}
604#endif /* CONFIG_NFS_V4_1 */ 702#endif /* CONFIG_NFS_V4_1 */
605 703
606static __be32 704static __be32
@@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
621static __be32 process_op(uint32_t minorversion, int nop, 719static __be32 process_op(uint32_t minorversion, int nop,
622 struct svc_rqst *rqstp, 720 struct svc_rqst *rqstp,
623 struct xdr_stream *xdr_in, void *argp, 721 struct xdr_stream *xdr_in, void *argp,
624 struct xdr_stream *xdr_out, void *resp, int* drc_status) 722 struct xdr_stream *xdr_out, void *resp,
723 struct cb_process_state *cps)
625{ 724{
626 struct callback_op *op = &callback_ops[0]; 725 struct callback_op *op = &callback_ops[0];
627 unsigned int op_nr; 726 unsigned int op_nr;
@@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
644 if (status) 743 if (status)
645 goto encode_hdr; 744 goto encode_hdr;
646 745
647 if (*drc_status) { 746 if (cps->drc_status) {
648 status = *drc_status; 747 status = cps->drc_status;
649 goto encode_hdr; 748 goto encode_hdr;
650 } 749 }
651 750
@@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
653 if (maxlen > 0 && maxlen < PAGE_SIZE) { 752 if (maxlen > 0 && maxlen < PAGE_SIZE) {
654 status = op->decode_args(rqstp, xdr_in, argp); 753 status = op->decode_args(rqstp, xdr_in, argp);
655 if (likely(status == 0)) 754 if (likely(status == 0))
656 status = op->process_op(argp, resp); 755 status = op->process_op(argp, resp, cps);
657 } else 756 } else
658 status = htonl(NFS4ERR_RESOURCE); 757 status = htonl(NFS4ERR_RESOURCE);
659 758
660 /* Only set by OP_CB_SEQUENCE processing */
661 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
662 *drc_status = status;
663 status = 0;
664 }
665
666encode_hdr: 759encode_hdr:
667 res = encode_op_hdr(xdr_out, op_nr, status); 760 res = encode_op_hdr(xdr_out, op_nr, status);
668 if (unlikely(res)) 761 if (unlikely(res))
@@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
681 struct cb_compound_hdr_arg hdr_arg = { 0 }; 774 struct cb_compound_hdr_arg hdr_arg = { 0 };
682 struct cb_compound_hdr_res hdr_res = { NULL }; 775 struct cb_compound_hdr_res hdr_res = { NULL };
683 struct xdr_stream xdr_in, xdr_out; 776 struct xdr_stream xdr_in, xdr_out;
684 __be32 *p; 777 __be32 *p, status;
685 __be32 status, drc_status = 0; 778 struct cb_process_state cps = {
779 .drc_status = 0,
780 .clp = NULL,
781 };
686 unsigned int nops = 0; 782 unsigned int nops = 0;
687 783
688 dprintk("%s: start\n", __func__); 784 dprintk("%s: start\n", __func__);
@@ -696,6 +792,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
696 if (status == __constant_htonl(NFS4ERR_RESOURCE)) 792 if (status == __constant_htonl(NFS4ERR_RESOURCE))
697 return rpc_garbage_args; 793 return rpc_garbage_args;
698 794
795 if (hdr_arg.minorversion == 0) {
796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
797 if (!cps.clp)
798 return rpc_drop_reply;
799 } else
800 cps.svc_sid = bc_xprt_sid(rqstp);
801
699 hdr_res.taglen = hdr_arg.taglen; 802 hdr_res.taglen = hdr_arg.taglen;
700 hdr_res.tag = hdr_arg.tag; 803 hdr_res.tag = hdr_arg.tag;
701 if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) 804 if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +806,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
703 806
704 while (status == 0 && nops != hdr_arg.nops) { 807 while (status == 0 && nops != hdr_arg.nops) {
705 status = process_op(hdr_arg.minorversion, nops, rqstp, 808 status = process_op(hdr_arg.minorversion, nops, rqstp,
706 &xdr_in, argp, &xdr_out, resp, &drc_status); 809 &xdr_in, argp, &xdr_out, resp, &cps);
707 nops++; 810 nops++;
708 } 811 }
709 812
@@ -716,6 +819,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
716 819
717 *hdr_res.status = status; 820 *hdr_res.status = status;
718 *hdr_res.nops = htonl(nops); 821 *hdr_res.nops = htonl(nops);
822 nfs4_cb_free_slot(cps.clp);
823 nfs_put_client(cps.clp);
719 dprintk("%s: done, status = %u\n", __func__, ntohl(status)); 824 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
720 return rpc_success; 825 return rpc_success;
721} 826}
@@ -739,6 +844,12 @@ static struct callback_op callback_ops[] = {
739 .res_maxsize = CB_OP_RECALL_RES_MAXSZ, 844 .res_maxsize = CB_OP_RECALL_RES_MAXSZ,
740 }, 845 },
741#if defined(CONFIG_NFS_V4_1) 846#if defined(CONFIG_NFS_V4_1)
847 [OP_CB_LAYOUTRECALL] = {
848 .process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
849 .decode_args =
850 (callback_decode_arg_t)decode_layoutrecall_args,
851 .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
852 },
742 [OP_CB_SEQUENCE] = { 853 [OP_CB_SEQUENCE] = {
743 .process_op = (callback_process_op_t)nfs4_callback_sequence, 854 .process_op = (callback_process_op_t)nfs4_callback_sequence,
744 .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, 855 .decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0870d0d4efc..192f2f86026 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
56static LIST_HEAD(nfs_client_list); 56static LIST_HEAD(nfs_client_list);
57static LIST_HEAD(nfs_volume_list); 57static LIST_HEAD(nfs_volume_list);
58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); 58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
59#ifdef CONFIG_NFS_V4
60static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
61
62/*
63 * Get a unique NFSv4.0 callback identifier which will be used
64 * by the V4.0 callback service to lookup the nfs_client struct
65 */
66static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
67{
68 int ret = 0;
69
70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret;
72retry:
73 if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
74 return -ENOMEM;
75 spin_lock(&nfs_client_lock);
76 ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
77 spin_unlock(&nfs_client_lock);
78 if (ret == -EAGAIN)
79 goto retry;
80 return ret;
81}
82#endif /* CONFIG_NFS_V4 */
59 83
60/* 84/*
61 * RPC cruft for NFS 85 * RPC cruft for NFS
@@ -144,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
144 clp->cl_proto = cl_init->proto; 168 clp->cl_proto = cl_init->proto;
145 169
146#ifdef CONFIG_NFS_V4 170#ifdef CONFIG_NFS_V4
147 INIT_LIST_HEAD(&clp->cl_delegations); 171 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
172 if (err)
173 goto error_cleanup;
174
148 spin_lock_init(&clp->cl_lock); 175 spin_lock_init(&clp->cl_lock);
149 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 176 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
150 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 177 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -170,21 +197,17 @@ error_0:
170} 197}
171 198
172#ifdef CONFIG_NFS_V4 199#ifdef CONFIG_NFS_V4
173/*
174 * Clears/puts all minor version specific parts from an nfs_client struct
175 * reverting it to minorversion 0.
176 */
177static void nfs4_clear_client_minor_version(struct nfs_client *clp)
178{
179#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
180 if (nfs4_has_session(clp)) { 201static void nfs4_shutdown_session(struct nfs_client *clp)
202{
203 if (nfs4_has_session(clp))
181 nfs4_destroy_session(clp->cl_session); 204 nfs4_destroy_session(clp->cl_session);
182 clp->cl_session = NULL;
183 }
184
185 clp->cl_mvops = nfs_v4_minor_ops[0];
186#endif /* CONFIG_NFS_V4_1 */
187} 205}
206#else /* CONFIG_NFS_V4_1 */
207static void nfs4_shutdown_session(struct nfs_client *clp)
208{
209}
210#endif /* CONFIG_NFS_V4_1 */
188 211
189/* 212/*
190 * Destroy the NFS4 callback service 213 * Destroy the NFS4 callback service
@@ -199,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
199{ 222{
200 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) 223 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
201 nfs4_kill_renewd(clp); 224 nfs4_kill_renewd(clp);
202 nfs4_clear_client_minor_version(clp); 225 nfs4_shutdown_session(clp);
203 nfs4_destroy_callback(clp); 226 nfs4_destroy_callback(clp);
204 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 227 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
205 nfs_idmap_delete(clp); 228 nfs_idmap_delete(clp);
206 229
207 rpc_destroy_wait_queue(&clp->cl_rpcwaitq); 230 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
208} 231}
232
233/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
234void nfs_cleanup_cb_ident_idr(void)
235{
236 idr_destroy(&cb_ident_idr);
237}
238
239/* nfs_client_lock held */
240static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
241{
242 if (clp->cl_cb_ident)
243 idr_remove(&cb_ident_idr, clp->cl_cb_ident);
244}
245
246static void pnfs_init_server(struct nfs_server *server)
247{
248 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
249}
250
209#else 251#else
210static void nfs4_shutdown_client(struct nfs_client *clp) 252static void nfs4_shutdown_client(struct nfs_client *clp)
211{ 253{
212} 254}
255
256void nfs_cleanup_cb_ident_idr(void)
257{
258}
259
260static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
261{
262}
263
264static void pnfs_init_server(struct nfs_server *server)
265{
266}
267
213#endif /* CONFIG_NFS_V4 */ 268#endif /* CONFIG_NFS_V4 */
214 269
215/* 270/*
@@ -248,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp)
248 303
249 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { 304 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
250 list_del(&clp->cl_share_link); 305 list_del(&clp->cl_share_link);
306 nfs_cb_idr_remove_locked(clp);
251 spin_unlock(&nfs_client_lock); 307 spin_unlock(&nfs_client_lock);
252 308
253 BUG_ON(!list_empty(&clp->cl_superblocks)); 309 BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -363,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
363 return 0; 419 return 0;
364} 420}
365 421
366/* 422/* Common match routine for v4.0 and v4.1 callback services */
367 * Find a client by IP address and protocol version 423bool
368 * - returns NULL if no such client 424nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
369 */ 425 u32 minorversion)
370struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
371{
372 struct nfs_client *clp;
373
374 spin_lock(&nfs_client_lock);
375 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
376 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
377
378 /* Don't match clients that failed to initialise properly */
379 if (!(clp->cl_cons_state == NFS_CS_READY ||
380 clp->cl_cons_state == NFS_CS_SESSION_INITING))
381 continue;
382
383 /* Different NFS versions cannot share the same nfs_client */
384 if (clp->rpc_ops->version != nfsversion)
385 continue;
386
387 /* Match only the IP address, not the port number */
388 if (!nfs_sockaddr_match_ipaddr(addr, clap))
389 continue;
390
391 atomic_inc(&clp->cl_count);
392 spin_unlock(&nfs_client_lock);
393 return clp;
394 }
395 spin_unlock(&nfs_client_lock);
396 return NULL;
397}
398
399/*
400 * Find a client by IP address and protocol version
401 * - returns NULL if no such client
402 */
403struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
404{ 426{
405 struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; 427 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
406 u32 nfsvers = clp->rpc_ops->version;
407 428
408 spin_lock(&nfs_client_lock); 429 /* Don't match clients that failed to initialise */
409 list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { 430 if (!(clp->cl_cons_state == NFS_CS_READY ||
410 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 431 clp->cl_cons_state == NFS_CS_SESSION_INITING))
432 return false;
411 433
412 /* Don't match clients that failed to initialise properly */ 434 /* Match the version and minorversion */
413 if (clp->cl_cons_state != NFS_CS_READY) 435 if (clp->rpc_ops->version != 4 ||
414 continue; 436 clp->cl_minorversion != minorversion)
437 return false;
415 438
416 /* Different NFS versions cannot share the same nfs_client */ 439 /* Match only the IP address, not the port number */
417 if (clp->rpc_ops->version != nfsvers) 440 if (!nfs_sockaddr_match_ipaddr(addr, clap))
418 continue; 441 return false;
419 442
420 /* Match only the IP address, not the port number */ 443 return true;
421 if (!nfs_sockaddr_match_ipaddr(sap, clap))
422 continue;
423
424 atomic_inc(&clp->cl_count);
425 spin_unlock(&nfs_client_lock);
426 return clp;
427 }
428 spin_unlock(&nfs_client_lock);
429 return NULL;
430} 444}
431 445
432/* 446/*
@@ -988,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
988 target->options = source->options; 1002 target->options = source->options;
989} 1003}
990 1004
1005static void nfs_server_insert_lists(struct nfs_server *server)
1006{
1007 struct nfs_client *clp = server->nfs_client;
1008
1009 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list);
1012 spin_unlock(&nfs_client_lock);
1013
1014}
1015
1016static void nfs_server_remove_lists(struct nfs_server *server)
1017{
1018 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link);
1020 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock);
1022
1023 synchronize_rcu();
1024}
1025
991/* 1026/*
992 * Allocate and initialise a server record 1027 * Allocate and initialise a server record
993 */ 1028 */
@@ -1004,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
1004 /* Zero out the NFS state stuff */ 1039 /* Zero out the NFS state stuff */
1005 INIT_LIST_HEAD(&server->client_link); 1040 INIT_LIST_HEAD(&server->client_link);
1006 INIT_LIST_HEAD(&server->master_link); 1041 INIT_LIST_HEAD(&server->master_link);
1042 INIT_LIST_HEAD(&server->delegations);
1007 1043
1008 atomic_set(&server->active, 0); 1044 atomic_set(&server->active, 0);
1009 1045
@@ -1019,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void)
1019 return NULL; 1055 return NULL;
1020 } 1056 }
1021 1057
1058 pnfs_init_server(server);
1059
1022 return server; 1060 return server;
1023} 1061}
1024 1062
@@ -1029,11 +1067,8 @@ void nfs_free_server(struct nfs_server *server)
1029{ 1067{
1030 dprintk("--> nfs_free_server()\n"); 1068 dprintk("--> nfs_free_server()\n");
1031 1069
1070 nfs_server_remove_lists(server);
1032 unset_pnfs_layoutdriver(server); 1071 unset_pnfs_layoutdriver(server);
1033 spin_lock(&nfs_client_lock);
1034 list_del(&server->client_link);
1035 list_del(&server->master_link);
1036 spin_unlock(&nfs_client_lock);
1037 1072
1038 if (server->destroy != NULL) 1073 if (server->destroy != NULL)
1039 server->destroy(server); 1074 server->destroy(server);
@@ -1108,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1108 (unsigned long long) server->fsid.major, 1143 (unsigned long long) server->fsid.major,
1109 (unsigned long long) server->fsid.minor); 1144 (unsigned long long) server->fsid.minor);
1110 1145
1111 spin_lock(&nfs_client_lock); 1146 nfs_server_insert_lists(server);
1112 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1113 list_add_tail(&server->master_link, &nfs_volume_list);
1114 spin_unlock(&nfs_client_lock);
1115
1116 server->mount_time = jiffies; 1147 server->mount_time = jiffies;
1117 nfs_free_fattr(fattr); 1148 nfs_free_fattr(fattr);
1118 return server; 1149 return server;
@@ -1125,6 +1156,101 @@ error:
1125 1156
1126#ifdef CONFIG_NFS_V4 1157#ifdef CONFIG_NFS_V4
1127/* 1158/*
1159 * NFSv4.0 callback thread helper
1160 *
1161 * Find a client by IP address, protocol version, and minorversion
1162 *
1163 * Called from the pg_authenticate method. The callback identifier
1164 * is not used as it has not been decoded.
1165 *
1166 * Returns NULL if no such client
1167 */
1168struct nfs_client *
1169nfs4_find_client_no_ident(const struct sockaddr *addr)
1170{
1171 struct nfs_client *clp;
1172
1173 spin_lock(&nfs_client_lock);
1174 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1175 if (nfs4_cb_match_client(addr, clp, 0) == false)
1176 continue;
1177 atomic_inc(&clp->cl_count);
1178 spin_unlock(&nfs_client_lock);
1179 return clp;
1180 }
1181 spin_unlock(&nfs_client_lock);
1182 return NULL;
1183}
1184
1185/*
1186 * NFSv4.0 callback thread helper
1187 *
1188 * Find a client by callback identifier
1189 */
1190struct nfs_client *
1191nfs4_find_client_ident(int cb_ident)
1192{
1193 struct nfs_client *clp;
1194
1195 spin_lock(&nfs_client_lock);
1196 clp = idr_find(&cb_ident_idr, cb_ident);
1197 if (clp)
1198 atomic_inc(&clp->cl_count);
1199 spin_unlock(&nfs_client_lock);
1200 return clp;
1201}
1202
1203#if defined(CONFIG_NFS_V4_1)
1204/*
1205 * NFSv4.1 callback thread helper
1206 * For CB_COMPOUND calls, find a client by IP address, protocol version,
1207 * minorversion, and sessionID
1208 *
1209 * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
1210 * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
1211 * can arrive before the callback sessionid is set. For CB_NULL calls,
1212 * find a client by IP address protocol version, and minorversion.
1213 *
1214 * Returns NULL if no such client
1215 */
1216struct nfs_client *
1217nfs4_find_client_sessionid(const struct sockaddr *addr,
1218 struct nfs4_sessionid *sid, int is_cb_compound)
1219{
1220 struct nfs_client *clp;
1221
1222 spin_lock(&nfs_client_lock);
1223 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1224 if (nfs4_cb_match_client(addr, clp, 1) == false)
1225 continue;
1226
1227 if (!nfs4_has_session(clp))
1228 continue;
1229
1230 /* Match sessionid unless cb_null call*/
1231 if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
1232 sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
1233 continue;
1234
1235 atomic_inc(&clp->cl_count);
1236 spin_unlock(&nfs_client_lock);
1237 return clp;
1238 }
1239 spin_unlock(&nfs_client_lock);
1240 return NULL;
1241}
1242
1243#else /* CONFIG_NFS_V4_1 */
1244
1245struct nfs_client *
1246nfs4_find_client_sessionid(const struct sockaddr *addr,
1247 struct nfs4_sessionid *sid, int is_cb_compound)
1248{
1249 return NULL;
1250}
1251#endif /* CONFIG_NFS_V4_1 */
1252
1253/*
1128 * Initialize the NFS4 callback service 1254 * Initialize the NFS4 callback service
1129 */ 1255 */
1130static int nfs4_init_callback(struct nfs_client *clp) 1256static int nfs4_init_callback(struct nfs_client *clp)
@@ -1342,11 +1468,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1342 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1468 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1343 server->namelen = NFS4_MAXNAMLEN; 1469 server->namelen = NFS4_MAXNAMLEN;
1344 1470
1345 spin_lock(&nfs_client_lock); 1471 nfs_server_insert_lists(server);
1346 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1347 list_add_tail(&server->master_link, &nfs_volume_list);
1348 spin_unlock(&nfs_client_lock);
1349
1350 server->mount_time = jiffies; 1472 server->mount_time = jiffies;
1351out: 1473out:
1352 nfs_free_fattr(fattr); 1474 nfs_free_fattr(fattr);
@@ -1551,11 +1673,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1551 if (error < 0) 1673 if (error < 0)
1552 goto out_free_server; 1674 goto out_free_server;
1553 1675
1554 spin_lock(&nfs_client_lock); 1676 nfs_server_insert_lists(server);
1555 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1556 list_add_tail(&server->master_link, &nfs_volume_list);
1557 spin_unlock(&nfs_client_lock);
1558
1559 server->mount_time = jiffies; 1677 server->mount_time = jiffies;
1560 1678
1561 nfs_free_fattr(fattr_fsinfo); 1679 nfs_free_fattr(fattr_fsinfo);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1fd62fc49be..364e4328f39 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -40,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
40 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 40 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
41} 41}
42 42
43/**
44 * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
45 * @delegation: delegation to process
46 *
47 */
43void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
44{ 49{
45 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); 50 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
46} 51}
47 52
53/**
54 * nfs_have_delegation - check if inode has a delegation
55 * @inode: inode to check
56 * @flags: delegation types to check for
57 *
58 * Returns one if inode has the indicated delegation, otherwise zero.
59 */
48int nfs_have_delegation(struct inode *inode, fmode_t flags) 60int nfs_have_delegation(struct inode *inode, fmode_t flags)
49{ 61{
50 struct nfs_delegation *delegation; 62 struct nfs_delegation *delegation;
@@ -119,10 +131,15 @@ again:
119 return 0; 131 return 0;
120} 132}
121 133
122/* 134/**
123 * Set up a delegation on an inode 135 * nfs_inode_reclaim_delegation - process a delegation reclaim request
136 * @inode: inode to process
137 * @cred: credential to use for request
138 * @res: new delegation state from server
139 *
124 */ 140 */
125void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 141void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
142 struct nfs_openres *res)
126{ 143{
127 struct nfs_delegation *delegation; 144 struct nfs_delegation *delegation;
128 struct rpc_cred *oldcred = NULL; 145 struct rpc_cred *oldcred = NULL;
@@ -175,38 +192,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
175 return inode; 192 return inode;
176} 193}
177 194
178static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, 195static struct nfs_delegation *
179 const nfs4_stateid *stateid, 196nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 struct nfs_client *clp) 197 struct nfs_server *server)
181{ 198{
182 struct nfs_delegation *delegation = 199 struct nfs_delegation *delegation =
183 rcu_dereference_protected(nfsi->delegation, 200 rcu_dereference_protected(nfsi->delegation,
184 lockdep_is_held(&clp->cl_lock)); 201 lockdep_is_held(&server->nfs_client->cl_lock));
185 202
186 if (delegation == NULL) 203 if (delegation == NULL)
187 goto nomatch; 204 goto nomatch;
205
188 spin_lock(&delegation->lock); 206 spin_lock(&delegation->lock);
189 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
190 sizeof(delegation->stateid.data)) != 0)
191 goto nomatch_unlock;
192 list_del_rcu(&delegation->super_list); 207 list_del_rcu(&delegation->super_list);
193 delegation->inode = NULL; 208 delegation->inode = NULL;
194 nfsi->delegation_state = 0; 209 nfsi->delegation_state = 0;
195 rcu_assign_pointer(nfsi->delegation, NULL); 210 rcu_assign_pointer(nfsi->delegation, NULL);
196 spin_unlock(&delegation->lock); 211 spin_unlock(&delegation->lock);
197 return delegation; 212 return delegation;
198nomatch_unlock:
199 spin_unlock(&delegation->lock);
200nomatch: 213nomatch:
201 return NULL; 214 return NULL;
202} 215}
203 216
204/* 217static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
205 * Set up a delegation on an inode 218 struct nfs_server *server)
219{
220 struct nfs_client *clp = server->nfs_client;
221 struct nfs_delegation *delegation;
222
223 spin_lock(&clp->cl_lock);
224 delegation = nfs_detach_delegation_locked(nfsi, server);
225 spin_unlock(&clp->cl_lock);
226 return delegation;
227}
228
229/**
230 * nfs_inode_set_delegation - set up a delegation on an inode
231 * @inode: inode to which delegation applies
232 * @cred: cred to use for subsequent delegation processing
233 * @res: new delegation state from server
234 *
235 * Returns zero on success, or a negative errno value.
206 */ 236 */
207int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 237int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
208{ 238{
209 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 239 struct nfs_server *server = NFS_SERVER(inode);
240 struct nfs_client *clp = server->nfs_client;
210 struct nfs_inode *nfsi = NFS_I(inode); 241 struct nfs_inode *nfsi = NFS_I(inode);
211 struct nfs_delegation *delegation, *old_delegation; 242 struct nfs_delegation *delegation, *old_delegation;
212 struct nfs_delegation *freeme = NULL; 243 struct nfs_delegation *freeme = NULL;
@@ -227,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
227 258
228 spin_lock(&clp->cl_lock); 259 spin_lock(&clp->cl_lock);
229 old_delegation = rcu_dereference_protected(nfsi->delegation, 260 old_delegation = rcu_dereference_protected(nfsi->delegation,
230 lockdep_is_held(&clp->cl_lock)); 261 lockdep_is_held(&clp->cl_lock));
231 if (old_delegation != NULL) { 262 if (old_delegation != NULL) {
232 if (memcmp(&delegation->stateid, &old_delegation->stateid, 263 if (memcmp(&delegation->stateid, &old_delegation->stateid,
233 sizeof(old_delegation->stateid)) == 0 && 264 sizeof(old_delegation->stateid)) == 0 &&
@@ -246,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
246 delegation = NULL; 277 delegation = NULL;
247 goto out; 278 goto out;
248 } 279 }
249 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); 280 freeme = nfs_detach_delegation_locked(nfsi, server);
250 } 281 }
251 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 282 list_add_rcu(&delegation->super_list, &server->delegations);
252 nfsi->delegation_state = delegation->type; 283 nfsi->delegation_state = delegation->type;
253 rcu_assign_pointer(nfsi->delegation, delegation); 284 rcu_assign_pointer(nfsi->delegation, delegation);
254 delegation = NULL; 285 delegation = NULL;
@@ -290,73 +321,85 @@ out:
290 return err; 321 return err;
291} 322}
292 323
293/* 324/**
294 * Return all delegations that have been marked for return 325 * nfs_client_return_marked_delegations - return previously marked delegations
326 * @clp: nfs_client to process
327 *
328 * Returns zero on success, or a negative errno value.
295 */ 329 */
296int nfs_client_return_marked_delegations(struct nfs_client *clp) 330int nfs_client_return_marked_delegations(struct nfs_client *clp)
297{ 331{
298 struct nfs_delegation *delegation; 332 struct nfs_delegation *delegation;
333 struct nfs_server *server;
299 struct inode *inode; 334 struct inode *inode;
300 int err = 0; 335 int err = 0;
301 336
302restart: 337restart:
303 rcu_read_lock(); 338 rcu_read_lock();
304 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 339 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
305 if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) 340 list_for_each_entry_rcu(delegation, &server->delegations,
306 continue; 341 super_list) {
307 inode = nfs_delegation_grab_inode(delegation); 342 if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
308 if (inode == NULL) 343 &delegation->flags))
309 continue; 344 continue;
310 spin_lock(&clp->cl_lock); 345 inode = nfs_delegation_grab_inode(delegation);
311 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); 346 if (inode == NULL)
312 spin_unlock(&clp->cl_lock); 347 continue;
313 rcu_read_unlock(); 348 delegation = nfs_detach_delegation(NFS_I(inode),
314 if (delegation != NULL) { 349 server);
315 filemap_flush(inode->i_mapping); 350 rcu_read_unlock();
316 err = __nfs_inode_return_delegation(inode, delegation, 0); 351
352 if (delegation != NULL) {
353 filemap_flush(inode->i_mapping);
354 err = __nfs_inode_return_delegation(inode,
355 delegation, 0);
356 }
357 iput(inode);
358 if (!err)
359 goto restart;
360 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
361 return err;
317 } 362 }
318 iput(inode);
319 if (!err)
320 goto restart;
321 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
322 return err;
323 } 363 }
324 rcu_read_unlock(); 364 rcu_read_unlock();
325 return 0; 365 return 0;
326} 366}
327 367
328/* 368/**
329 * This function returns the delegation without reclaiming opens 369 * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
330 * or protecting against delegation reclaims. 370 * @inode: inode to process
331 * It is therefore really only safe to be called from 371 *
332 * nfs4_clear_inode() 372 * Does not protect against delegation reclaims, therefore really only safe
373 * to be called from nfs4_clear_inode().
333 */ 374 */
334void nfs_inode_return_delegation_noreclaim(struct inode *inode) 375void nfs_inode_return_delegation_noreclaim(struct inode *inode)
335{ 376{
336 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 377 struct nfs_server *server = NFS_SERVER(inode);
337 struct nfs_inode *nfsi = NFS_I(inode); 378 struct nfs_inode *nfsi = NFS_I(inode);
338 struct nfs_delegation *delegation; 379 struct nfs_delegation *delegation;
339 380
340 if (rcu_access_pointer(nfsi->delegation) != NULL) { 381 if (rcu_access_pointer(nfsi->delegation) != NULL) {
341 spin_lock(&clp->cl_lock); 382 delegation = nfs_detach_delegation(nfsi, server);
342 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
343 spin_unlock(&clp->cl_lock);
344 if (delegation != NULL) 383 if (delegation != NULL)
345 nfs_do_return_delegation(inode, delegation, 0); 384 nfs_do_return_delegation(inode, delegation, 0);
346 } 385 }
347} 386}
348 387
388/**
389 * nfs_inode_return_delegation - synchronously return a delegation
390 * @inode: inode to process
391 *
392 * Returns zero on success, or a negative errno value.
393 */
349int nfs_inode_return_delegation(struct inode *inode) 394int nfs_inode_return_delegation(struct inode *inode)
350{ 395{
351 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 396 struct nfs_server *server = NFS_SERVER(inode);
352 struct nfs_inode *nfsi = NFS_I(inode); 397 struct nfs_inode *nfsi = NFS_I(inode);
353 struct nfs_delegation *delegation; 398 struct nfs_delegation *delegation;
354 int err = 0; 399 int err = 0;
355 400
356 if (rcu_access_pointer(nfsi->delegation) != NULL) { 401 if (rcu_access_pointer(nfsi->delegation) != NULL) {
357 spin_lock(&clp->cl_lock); 402 delegation = nfs_detach_delegation(nfsi, server);
358 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
359 spin_unlock(&clp->cl_lock);
360 if (delegation != NULL) { 403 if (delegation != NULL) {
361 nfs_wb_all(inode); 404 nfs_wb_all(inode);
362 err = __nfs_inode_return_delegation(inode, delegation, 1); 405 err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -365,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
365 return err; 408 return err;
366} 409}
367 410
368static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation) 411static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
369{ 412{
413 struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
414
370 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 415 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
371 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 416 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
372} 417}
373 418
374/* 419/**
375 * Return all delegations associated to a super block 420 * nfs_super_return_all_delegations - return delegations for one superblock
421 * @sb: sb to process
422 *
376 */ 423 */
377void nfs_super_return_all_delegations(struct super_block *sb) 424void nfs_super_return_all_delegations(struct super_block *sb)
378{ 425{
379 struct nfs_client *clp = NFS_SB(sb)->nfs_client; 426 struct nfs_server *server = NFS_SB(sb);
427 struct nfs_client *clp = server->nfs_client;
380 struct nfs_delegation *delegation; 428 struct nfs_delegation *delegation;
381 429
382 if (clp == NULL) 430 if (clp == NULL)
383 return; 431 return;
432
384 rcu_read_lock(); 433 rcu_read_lock();
385 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 434 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
386 spin_lock(&delegation->lock); 435 spin_lock(&delegation->lock);
387 if (delegation->inode != NULL && delegation->inode->i_sb == sb) 436 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
388 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
389 spin_unlock(&delegation->lock); 437 spin_unlock(&delegation->lock);
390 } 438 }
391 rcu_read_unlock(); 439 rcu_read_unlock();
440
392 if (nfs_client_return_marked_delegations(clp) != 0) 441 if (nfs_client_return_marked_delegations(clp) != 0)
393 nfs4_schedule_state_manager(clp); 442 nfs4_schedule_state_manager(clp);
394} 443}
395 444
396static 445static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
397void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags) 446 fmode_t flags)
398{ 447{
399 struct nfs_delegation *delegation; 448 struct nfs_delegation *delegation;
400 449
401 rcu_read_lock(); 450 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
402 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
403 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 451 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
404 continue; 452 continue;
405 if (delegation->type & flags) 453 if (delegation->type & flags)
406 nfs_mark_return_delegation(clp, delegation); 454 nfs_mark_return_delegation(delegation);
407 } 455 }
456}
457
458static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
459 fmode_t flags)
460{
461 struct nfs_server *server;
462
463 rcu_read_lock();
464 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
465 nfs_mark_return_all_delegation_types(server, flags);
408 rcu_read_unlock(); 466 rcu_read_unlock();
409} 467}
410 468
@@ -419,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
419 nfs4_schedule_state_manager(clp); 477 nfs4_schedule_state_manager(clp);
420} 478}
421 479
480/**
481 * nfs_expire_all_delegation_types
482 * @clp: client to process
483 * @flags: delegation types to expire
484 *
485 */
422void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) 486void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
423{ 487{
424 nfs_client_mark_return_all_delegation_types(clp, flags); 488 nfs_client_mark_return_all_delegation_types(clp, flags);
425 nfs_delegation_run_state_manager(clp); 489 nfs_delegation_run_state_manager(clp);
426} 490}
427 491
492/**
493 * nfs_expire_all_delegations
494 * @clp: client to process
495 *
496 */
428void nfs_expire_all_delegations(struct nfs_client *clp) 497void nfs_expire_all_delegations(struct nfs_client *clp)
429{ 498{
430 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); 499 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
431} 500}
432 501
433/* 502/**
434 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. 503 * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
504 * @clp: client to process
505 *
435 */ 506 */
436void nfs_handle_cb_pathdown(struct nfs_client *clp) 507void nfs_handle_cb_pathdown(struct nfs_client *clp)
437{ 508{
@@ -440,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
440 nfs_client_mark_return_all_delegations(clp); 511 nfs_client_mark_return_all_delegations(clp);
441} 512}
442 513
443static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp) 514static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
444{ 515{
445 struct nfs_delegation *delegation; 516 struct nfs_delegation *delegation;
446 517
447 rcu_read_lock(); 518 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
448 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
449 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 519 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
450 continue; 520 continue;
451 nfs_mark_return_delegation(clp, delegation); 521 nfs_mark_return_delegation(delegation);
452 } 522 }
453 rcu_read_unlock();
454} 523}
455 524
525/**
526 * nfs_expire_unreferenced_delegations - Eliminate unused delegations
527 * @clp: nfs_client to process
528 *
529 */
456void nfs_expire_unreferenced_delegations(struct nfs_client *clp) 530void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
457{ 531{
458 nfs_client_mark_return_unreferenced_delegations(clp); 532 struct nfs_server *server;
533
534 rcu_read_lock();
535 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
536 nfs_mark_return_unreferenced_delegations(server);
537 rcu_read_unlock();
538
459 nfs_delegation_run_state_manager(clp); 539 nfs_delegation_run_state_manager(clp);
460} 540}
461 541
462/* 542/**
463 * Asynchronous delegation recall! 543 * nfs_async_inode_return_delegation - asynchronously return a delegation
544 * @inode: inode to process
545 * @stateid: state ID information from CB_RECALL arguments
546 *
547 * Returns zero on success, or a negative errno value.
464 */ 548 */
465int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) 549int nfs_async_inode_return_delegation(struct inode *inode,
550 const nfs4_stateid *stateid)
466{ 551{
467 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 552 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
468 struct nfs_delegation *delegation; 553 struct nfs_delegation *delegation;
@@ -474,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
474 rcu_read_unlock(); 559 rcu_read_unlock();
475 return -ENOENT; 560 return -ENOENT;
476 } 561 }
477 562 nfs_mark_return_delegation(delegation);
478 nfs_mark_return_delegation(clp, delegation);
479 rcu_read_unlock(); 563 rcu_read_unlock();
564
480 nfs_delegation_run_state_manager(clp); 565 nfs_delegation_run_state_manager(clp);
481 return 0; 566 return 0;
482} 567}
483 568
484/* 569static struct inode *
485 * Retrieve the inode associated with a delegation 570nfs_delegation_find_inode_server(struct nfs_server *server,
486 */ 571 const struct nfs_fh *fhandle)
487struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
488{ 572{
489 struct nfs_delegation *delegation; 573 struct nfs_delegation *delegation;
490 struct inode *res = NULL; 574 struct inode *res = NULL;
491 rcu_read_lock(); 575
492 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 576 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
493 spin_lock(&delegation->lock); 577 spin_lock(&delegation->lock);
494 if (delegation->inode != NULL && 578 if (delegation->inode != NULL &&
495 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 579 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -499,49 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
499 if (res != NULL) 583 if (res != NULL)
500 break; 584 break;
501 } 585 }
586 return res;
587}
588
589/**
590 * nfs_delegation_find_inode - retrieve the inode associated with a delegation
591 * @clp: client state handle
592 * @fhandle: filehandle from a delegation recall
593 *
594 * Returns pointer to inode matching "fhandle," or NULL if a matching inode
595 * cannot be found.
596 */
597struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
598 const struct nfs_fh *fhandle)
599{
600 struct nfs_server *server;
601 struct inode *res = NULL;
602
603 rcu_read_lock();
604 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
605 res = nfs_delegation_find_inode_server(server, fhandle);
606 if (res != NULL)
607 break;
608 }
502 rcu_read_unlock(); 609 rcu_read_unlock();
503 return res; 610 return res;
504} 611}
505 612
506/* 613static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
507 * Mark all delegations as needing to be reclaimed 614{
615 struct nfs_delegation *delegation;
616
617 list_for_each_entry_rcu(delegation, &server->delegations, super_list)
618 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
619}
620
621/**
622 * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
623 * @clp: nfs_client to process
624 *
508 */ 625 */
509void nfs_delegation_mark_reclaim(struct nfs_client *clp) 626void nfs_delegation_mark_reclaim(struct nfs_client *clp)
510{ 627{
511 struct nfs_delegation *delegation; 628 struct nfs_server *server;
629
512 rcu_read_lock(); 630 rcu_read_lock();
513 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) 631 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
514 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 632 nfs_delegation_mark_reclaim_server(server);
515 rcu_read_unlock(); 633 rcu_read_unlock();
516} 634}
517 635
518/* 636/**
519 * Reap all unclaimed delegations after reboot recovery is done 637 * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
638 * @clp: nfs_client to process
639 *
520 */ 640 */
521void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 641void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
522{ 642{
523 struct nfs_delegation *delegation; 643 struct nfs_delegation *delegation;
644 struct nfs_server *server;
524 struct inode *inode; 645 struct inode *inode;
646
525restart: 647restart:
526 rcu_read_lock(); 648 rcu_read_lock();
527 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 649 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
528 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) 650 list_for_each_entry_rcu(delegation, &server->delegations,
529 continue; 651 super_list) {
530 inode = nfs_delegation_grab_inode(delegation); 652 if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
531 if (inode == NULL) 653 &delegation->flags) == 0)
532 continue; 654 continue;
533 spin_lock(&clp->cl_lock); 655 inode = nfs_delegation_grab_inode(delegation);
534 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); 656 if (inode == NULL)
535 spin_unlock(&clp->cl_lock); 657 continue;
536 rcu_read_unlock(); 658 delegation = nfs_detach_delegation(NFS_I(inode),
537 if (delegation != NULL) 659 server);
538 nfs_free_delegation(delegation); 660 rcu_read_unlock();
539 iput(inode); 661
540 goto restart; 662 if (delegation != NULL)
663 nfs_free_delegation(delegation);
664 iput(inode);
665 goto restart;
666 }
541 } 667 }
542 rcu_read_unlock(); 668 rcu_read_unlock();
543} 669}
544 670
671/**
672 * nfs_delegations_present - check for existence of delegations
673 * @clp: client state handle
674 *
675 * Returns one if there are any nfs_delegation structures attached
676 * to this nfs_client.
677 */
678int nfs_delegations_present(struct nfs_client *clp)
679{
680 struct nfs_server *server;
681 int ret = 0;
682
683 rcu_read_lock();
684 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
685 if (!list_empty(&server->delegations)) {
686 ret = 1;
687 break;
688 }
689 rcu_read_unlock();
690 return ret;
691}
692
693/**
694 * nfs4_copy_delegation_stateid - Copy inode's state ID information
695 * @dst: stateid data structure to fill in
696 * @inode: inode to check
697 *
698 * Returns one and fills in "dst->data" * if inode had a delegation,
699 * otherwise zero is returned.
700 */
545int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 701int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
546{ 702{
547 struct nfs_inode *nfsi = NFS_I(inode); 703 struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2026304bda1..d9322e490c5 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
44void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 44void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
45void nfs_handle_cb_pathdown(struct nfs_client *clp); 45void nfs_handle_cb_pathdown(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 46int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp);
47 48
48void nfs_delegation_mark_reclaim(struct nfs_client *clp); 49void nfs_delegation_mark_reclaim(struct nfs_client *clp);
49void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 50void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a9..abe4f0c8dc5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,8 +33,8 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
37#include <linux/kmemleak.h> 36#include <linux/kmemleak.h>
37#include <linux/xattr.h>
38 38
39#include "delegation.h" 39#include "delegation.h"
40#include "iostat.h" 40#include "iostat.h"
@@ -125,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = {
125 .permission = nfs_permission, 125 .permission = nfs_permission,
126 .getattr = nfs_getattr, 126 .getattr = nfs_getattr,
127 .setattr = nfs_setattr, 127 .setattr = nfs_setattr,
128 .getxattr = nfs4_getxattr, 128 .getxattr = generic_getxattr,
129 .setxattr = nfs4_setxattr, 129 .setxattr = generic_setxattr,
130 .listxattr = nfs4_listxattr, 130 .listxattr = generic_listxattr,
131 .removexattr = generic_removexattr,
131}; 132};
132 133
133#endif /* CONFIG_NFS_V4 */ 134#endif /* CONFIG_NFS_V4 */
@@ -172,7 +173,7 @@ struct nfs_cache_array {
172 struct nfs_cache_array_entry array[0]; 173 struct nfs_cache_array_entry array[0];
173}; 174};
174 175
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 176typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
176typedef struct { 177typedef struct {
177 struct file *file; 178 struct file *file;
178 struct page *page; 179 struct page *page;
@@ -378,14 +379,14 @@ error:
378 return error; 379 return error;
379} 380}
380 381
381/* Fill in an entry based on the xdr code stored in desc->page */ 382static int xdr_decode(nfs_readdir_descriptor_t *desc,
382static 383 struct nfs_entry *entry, struct xdr_stream *xdr)
383int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
384{ 384{
385 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus); 385 int error;
386 if (IS_ERR(p))
387 return PTR_ERR(p);
388 386
387 error = desc->decode(xdr, entry, desc->plus);
388 if (error)
389 return error;
389 entry->fattr->time_start = desc->timestamp; 390 entry->fattr->time_start = desc->timestamp;
390 entry->fattr->gencount = desc->gencount; 391 entry->fattr->gencount = desc->gencount;
391 return 0; 392 return 0;
@@ -438,7 +439,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
438 if (dentry == NULL) 439 if (dentry == NULL)
439 return; 440 return;
440 441
441 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 442 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
442 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); 443 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
443 if (IS_ERR(inode)) 444 if (IS_ERR(inode))
444 goto out; 445 goto out;
@@ -459,25 +460,26 @@ out:
459/* Perform conversion from xdr to cache array */ 460/* Perform conversion from xdr to cache array */
460static 461static
461int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, 462int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
462 void *xdr_page, struct page *page, unsigned int buflen) 463 struct page **xdr_pages, struct page *page, unsigned int buflen)
463{ 464{
464 struct xdr_stream stream; 465 struct xdr_stream stream;
465 struct xdr_buf buf; 466 struct xdr_buf buf = {
466 __be32 *ptr = xdr_page; 467 .pages = xdr_pages,
468 .page_len = buflen,
469 .buflen = buflen,
470 .len = buflen,
471 };
472 struct page *scratch;
467 struct nfs_cache_array *array; 473 struct nfs_cache_array *array;
468 unsigned int count = 0; 474 unsigned int count = 0;
469 int status; 475 int status;
470 476
471 buf.head->iov_base = xdr_page; 477 scratch = alloc_page(GFP_KERNEL);
472 buf.head->iov_len = buflen; 478 if (scratch == NULL)
473 buf.tail->iov_len = 0; 479 return -ENOMEM;
474 buf.page_base = 0;
475 buf.page_len = 0;
476 buf.buflen = buf.head->iov_len;
477 buf.len = buf.head->iov_len;
478
479 xdr_init_decode(&stream, &buf, ptr);
480 480
481 xdr_init_decode(&stream, &buf, NULL);
482 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
481 483
482 do { 484 do {
483 status = xdr_decode(desc, entry, &stream); 485 status = xdr_decode(desc, entry, &stream);
@@ -506,6 +508,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
506 } else 508 } else
507 status = PTR_ERR(array); 509 status = PTR_ERR(array);
508 } 510 }
511
512 put_page(scratch);
509 return status; 513 return status;
510} 514}
511 515
@@ -521,7 +525,6 @@ static
521void nfs_readdir_free_large_page(void *ptr, struct page **pages, 525void nfs_readdir_free_large_page(void *ptr, struct page **pages,
522 unsigned int npages) 526 unsigned int npages)
523{ 527{
524 vm_unmap_ram(ptr, npages);
525 nfs_readdir_free_pagearray(pages, npages); 528 nfs_readdir_free_pagearray(pages, npages);
526} 529}
527 530
@@ -530,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
530 * to nfs_readdir_free_large_page 533 * to nfs_readdir_free_large_page
531 */ 534 */
532static 535static
533void *nfs_readdir_large_page(struct page **pages, unsigned int npages) 536int nfs_readdir_large_page(struct page **pages, unsigned int npages)
534{ 537{
535 void *ptr;
536 unsigned int i; 538 unsigned int i;
537 539
538 for (i = 0; i < npages; i++) { 540 for (i = 0; i < npages; i++) {
@@ -541,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
541 goto out_freepages; 543 goto out_freepages;
542 pages[i] = page; 544 pages[i] = page;
543 } 545 }
546 return 0;
544 547
545 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
546 if (!IS_ERR_OR_NULL(ptr))
547 return ptr;
548out_freepages: 548out_freepages:
549 nfs_readdir_free_pagearray(pages, i); 549 nfs_readdir_free_pagearray(pages, i);
550 return NULL; 550 return -ENOMEM;
551} 551}
552 552
553static 553static
@@ -566,6 +566,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
566 entry.eof = 0; 566 entry.eof = 0;
567 entry.fh = nfs_alloc_fhandle(); 567 entry.fh = nfs_alloc_fhandle();
568 entry.fattr = nfs_alloc_fattr(); 568 entry.fattr = nfs_alloc_fattr();
569 entry.server = NFS_SERVER(inode);
569 if (entry.fh == NULL || entry.fattr == NULL) 570 if (entry.fh == NULL || entry.fattr == NULL)
570 goto out; 571 goto out;
571 572
@@ -577,8 +578,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
577 memset(array, 0, sizeof(struct nfs_cache_array)); 578 memset(array, 0, sizeof(struct nfs_cache_array));
578 array->eof_index = -1; 579 array->eof_index = -1;
579 580
580 pages_ptr = nfs_readdir_large_page(pages, array_size); 581 status = nfs_readdir_large_page(pages, array_size);
581 if (!pages_ptr) 582 if (status < 0)
582 goto out_release_array; 583 goto out_release_array;
583 do { 584 do {
584 unsigned int pglen; 585 unsigned int pglen;
@@ -587,7 +588,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
587 if (status < 0) 588 if (status < 0)
588 break; 589 break;
589 pglen = status; 590 pglen = status;
590 status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); 591 status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
591 if (status < 0) { 592 if (status < 0) {
592 if (status == -ENOSPC) 593 if (status == -ENOSPC)
593 status = 0; 594 status = 0;
@@ -938,7 +939,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
938 * component of the path. 939 * component of the path.
939 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. 940 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
940 */ 941 */
941static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) 942static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
943 unsigned int mask)
942{ 944{
943 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) 945 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
944 return 0; 946 return 0;
@@ -1018,7 +1020,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1018 * If the parent directory is seen to have changed, we throw out the 1020 * If the parent directory is seen to have changed, we throw out the
1019 * cached dentry and do a new lookup. 1021 * cached dentry and do a new lookup.
1020 */ 1022 */
1021static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) 1023static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1022{ 1024{
1023 struct inode *dir; 1025 struct inode *dir;
1024 struct inode *inode; 1026 struct inode *inode;
@@ -1027,6 +1029,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
1027 struct nfs_fattr *fattr = NULL; 1029 struct nfs_fattr *fattr = NULL;
1028 int error; 1030 int error;
1029 1031
1032 if (nd->flags & LOOKUP_RCU)
1033 return -ECHILD;
1034
1030 parent = dget_parent(dentry); 1035 parent = dget_parent(dentry);
1031 dir = parent->d_inode; 1036 dir = parent->d_inode;
1032 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1037 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
@@ -1117,7 +1122,7 @@ out_error:
1117/* 1122/*
1118 * This is called from dput() when d_count is going to 0. 1123 * This is called from dput() when d_count is going to 0.
1119 */ 1124 */
1120static int nfs_dentry_delete(struct dentry *dentry) 1125static int nfs_dentry_delete(const struct dentry *dentry)
1121{ 1126{
1122 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", 1127 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
1123 dentry->d_parent->d_name.name, dentry->d_name.name, 1128 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -1188,7 +1193,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1188 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 1193 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1189 goto out; 1194 goto out;
1190 1195
1191 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1196 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1192 1197
1193 /* 1198 /*
1194 * If we're doing an exclusive create, optimize away the lookup 1199 * If we're doing an exclusive create, optimize away the lookup
@@ -1217,7 +1222,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1217 goto out_unblock_sillyrename; 1222 goto out_unblock_sillyrename;
1218 } 1223 }
1219 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1224 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1220 res = (struct dentry *)inode; 1225 res = ERR_CAST(inode);
1221 if (IS_ERR(res)) 1226 if (IS_ERR(res))
1222 goto out_unblock_sillyrename; 1227 goto out_unblock_sillyrename;
1223 1228
@@ -1333,7 +1338,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1333 res = ERR_PTR(-ENAMETOOLONG); 1338 res = ERR_PTR(-ENAMETOOLONG);
1334 goto out; 1339 goto out;
1335 } 1340 }
1336 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1341 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1337 1342
1338 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash 1343 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1339 * the dentry. */ 1344 * the dentry. */
@@ -1351,8 +1356,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1351 if (nd->flags & LOOKUP_CREATE) { 1356 if (nd->flags & LOOKUP_CREATE) {
1352 attr.ia_mode = nd->intent.open.create_mode; 1357 attr.ia_mode = nd->intent.open.create_mode;
1353 attr.ia_valid = ATTR_MODE; 1358 attr.ia_valid = ATTR_MODE;
1354 if (!IS_POSIXACL(dir)) 1359 attr.ia_mode &= ~current_umask();
1355 attr.ia_mode &= ~current_umask();
1356 } else { 1360 } else {
1357 open_flags &= ~(O_EXCL | O_CREAT); 1361 open_flags &= ~(O_EXCL | O_CREAT);
1358 attr.ia_valid = 0; 1362 attr.ia_valid = 0;
@@ -1718,11 +1722,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1718 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, 1722 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
1719 dir->i_ino, dentry->d_name.name); 1723 dir->i_ino, dentry->d_name.name);
1720 1724
1721 spin_lock(&dcache_lock);
1722 spin_lock(&dentry->d_lock); 1725 spin_lock(&dentry->d_lock);
1723 if (atomic_read(&dentry->d_count) > 1) { 1726 if (dentry->d_count > 1) {
1724 spin_unlock(&dentry->d_lock); 1727 spin_unlock(&dentry->d_lock);
1725 spin_unlock(&dcache_lock);
1726 /* Start asynchronous writeout of the inode */ 1728 /* Start asynchronous writeout of the inode */
1727 write_inode_now(dentry->d_inode, 0); 1729 write_inode_now(dentry->d_inode, 0);
1728 error = nfs_sillyrename(dir, dentry); 1730 error = nfs_sillyrename(dir, dentry);
@@ -1733,7 +1735,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1733 need_rehash = 1; 1735 need_rehash = 1;
1734 } 1736 }
1735 spin_unlock(&dentry->d_lock); 1737 spin_unlock(&dentry->d_lock);
1736 spin_unlock(&dcache_lock);
1737 error = nfs_safe_remove(dentry); 1738 error = nfs_safe_remove(dentry);
1738 if (!error || error == -ENOENT) { 1739 if (!error || error == -ENOENT) {
1739 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1740 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1868,7 +1869,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1868 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", 1869 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1869 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1870 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1870 new_dentry->d_parent->d_name.name, new_dentry->d_name.name, 1871 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1871 atomic_read(&new_dentry->d_count)); 1872 new_dentry->d_count);
1872 1873
1873 /* 1874 /*
1874 * For non-directories, check whether the target is busy and if so, 1875 * For non-directories, check whether the target is busy and if so,
@@ -1886,7 +1887,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1886 rehash = new_dentry; 1887 rehash = new_dentry;
1887 } 1888 }
1888 1889
1889 if (atomic_read(&new_dentry->d_count) > 2) { 1890 if (new_dentry->d_count > 2) {
1890 int err; 1891 int err;
1891 1892
1892 /* copy the target dentry's name */ 1893 /* copy the target dentry's name */
@@ -2188,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2188 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 2189 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2189} 2190}
2190 2191
2191int nfs_permission(struct inode *inode, int mask) 2192int nfs_permission(struct inode *inode, int mask, unsigned int flags)
2192{ 2193{
2193 struct rpc_cred *cred; 2194 struct rpc_cred *cred;
2194 int res = 0; 2195 int res = 0;
2195 2196
2197 if (flags & IPERM_FLAG_RCU)
2198 return -ECHILD;
2199
2196 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2200 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2197 2201
2198 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2202 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2240,7 +2244,7 @@ out:
2240out_notsup: 2244out_notsup:
2241 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2245 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2242 if (res == 0) 2246 if (res == 0)
2243 res = generic_permission(inode, mask, NULL); 2247 res = generic_permission(inode, mask, flags, NULL);
2244 goto out; 2248 goto out;
2245} 2249}
2246 2250
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814ce16..5596c6a2881 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
63 * This again causes shrink_dcache_for_umount_subtree() to 63 * This again causes shrink_dcache_for_umount_subtree() to
64 * Oops, since the test for IS_ROOT() will fail. 64 * Oops, since the test for IS_ROOT() will fail.
65 */ 65 */
66 spin_lock(&dcache_lock); 66 spin_lock(&sb->s_root->d_inode->i_lock);
67 spin_lock(&sb->s_root->d_lock);
67 list_del_init(&sb->s_root->d_alias); 68 list_del_init(&sb->s_root->d_alias);
68 spin_unlock(&dcache_lock); 69 spin_unlock(&sb->s_root->d_lock);
70 spin_unlock(&sb->s_root->d_inode->i_lock);
69 } 71 }
70 return 0; 72 return 0;
71} 73}
@@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 security_d_instantiate(ret, inode); 121 security_d_instantiate(ret, inode);
120 122
121 if (ret->d_op == NULL) 123 if (ret->d_op == NULL)
122 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 124 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
123out: 125out:
124 nfs_free_fattr(fsinfo.fattr); 126 nfs_free_fattr(fsinfo.fattr);
125 return ret; 127 return ret;
@@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
226 security_d_instantiate(ret, inode); 228 security_d_instantiate(ret, inode);
227 229
228 if (ret->d_op == NULL) 230 if (ret->d_op == NULL)
229 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 231 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
230 232
231out: 233out:
232 nfs_free_fattr(fattr); 234 nfs_free_fattr(fattr);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 4e2d9b6b138..18696882f1c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -238,7 +238,7 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239} 239}
240 240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */ 241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
242 242
243#include <linux/module.h> 243#include <linux/module.h>
244#include <linux/mutex.h> 244#include <linux/mutex.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c7341..ce00b704452 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1410,9 +1410,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1410 */ 1410 */
1411void nfs4_evict_inode(struct inode *inode) 1411void nfs4_evict_inode(struct inode *inode)
1412{ 1412{
1413 pnfs_destroy_layout(NFS_I(inode));
1413 truncate_inode_pages(&inode->i_data, 0); 1414 truncate_inode_pages(&inode->i_data, 0);
1414 end_writeback(inode); 1415 end_writeback(inode);
1415 pnfs_destroy_layout(NFS_I(inode));
1416 /* If we are holding a delegation, return it! */ 1416 /* If we are holding a delegation, return it! */
1417 nfs_inode_return_delegation_noreclaim(inode); 1417 nfs_inode_return_delegation_noreclaim(inode);
1418 /* First call standard NFS clear_inode() code */ 1418 /* First call standard NFS clear_inode() code */
@@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
1438 return &nfsi->vfs_inode; 1438 return &nfsi->vfs_inode;
1439} 1439}
1440 1440
1441void nfs_destroy_inode(struct inode *inode) 1441static void nfs_i_callback(struct rcu_head *head)
1442{ 1442{
1443 struct inode *inode = container_of(head, struct inode, i_rcu);
1444 INIT_LIST_HEAD(&inode->i_dentry);
1443 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1445 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1444} 1446}
1445 1447
1448void nfs_destroy_inode(struct inode *inode)
1449{
1450 call_rcu(&inode->i_rcu, nfs_i_callback);
1451}
1452
1446static inline void nfs4_init_once(struct nfs_inode *nfsi) 1453static inline void nfs4_init_once(struct nfs_inode *nfsi)
1447{ 1454{
1448#ifdef CONFIG_NFS_V4 1455#ifdef CONFIG_NFS_V4
@@ -1612,6 +1619,7 @@ static void __exit exit_nfs_fs(void)
1612#ifdef CONFIG_PROC_FS 1619#ifdef CONFIG_PROC_FS
1613 rpc_proc_unregister("nfs"); 1620 rpc_proc_unregister("nfs");
1614#endif 1621#endif
1622 nfs_cleanup_cb_ident_idr();
1615 unregister_nfs_fs(); 1623 unregister_nfs_fs();
1616 nfs_fs_proc_exit(); 1624 nfs_fs_proc_exit();
1617 nfsiod_stop(); 1625 nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e6356b750b7..bfa3a34af80 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -128,9 +128,13 @@ extern void nfs_umount(const struct nfs_mount_request *info);
128/* client.c */ 128/* client.c */
129extern struct rpc_program nfs_program; 129extern struct rpc_program nfs_program;
130 130
131extern void nfs_cleanup_cb_ident_idr(void);
131extern void nfs_put_client(struct nfs_client *); 132extern void nfs_put_client(struct nfs_client *);
132extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); 133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
133extern struct nfs_client *nfs_find_client_next(struct nfs_client *); 134extern struct nfs_client *nfs4_find_client_ident(int);
135extern struct nfs_client *
136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
137 int);
134extern struct nfs_server *nfs_create_server( 138extern struct nfs_server *nfs_create_server(
135 const struct nfs_parsed_mount_data *, 139 const struct nfs_parsed_mount_data *,
136 struct nfs_fh *); 140 struct nfs_fh *);
@@ -185,17 +189,20 @@ extern int __init nfs_init_directcache(void);
185extern void nfs_destroy_directcache(void); 189extern void nfs_destroy_directcache(void);
186 190
187/* nfs2xdr.c */ 191/* nfs2xdr.c */
188extern int nfs_stat_to_errno(int); 192extern int nfs_stat_to_errno(enum nfs_stat);
189extern struct rpc_procinfo nfs_procedures[]; 193extern struct rpc_procinfo nfs_procedures[];
190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 194extern int nfs2_decode_dirent(struct xdr_stream *,
195 struct nfs_entry *, int);
191 196
192/* nfs3xdr.c */ 197/* nfs3xdr.c */
193extern struct rpc_procinfo nfs3_procedures[]; 198extern struct rpc_procinfo nfs3_procedures[];
194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 199extern int nfs3_decode_dirent(struct xdr_stream *,
200 struct nfs_entry *, int);
195 201
196/* nfs4xdr.c */ 202/* nfs4xdr.c */
197#ifdef CONFIG_NFS_V4 203#ifdef CONFIG_NFS_V4
198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 204extern int nfs4_decode_dirent(struct xdr_stream *,
205 struct nfs_entry *, int);
199#endif 206#endif
200#ifdef CONFIG_NFS_V4_1 207#ifdef CONFIG_NFS_V4_1
201extern const u32 nfs41_maxread_overhead; 208extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 4f981f1f668..d4c2d6b7507 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -236,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info)
236 .authflavor = RPC_AUTH_UNIX, 236 .authflavor = RPC_AUTH_UNIX,
237 .flags = RPC_CLNT_CREATE_NOPING, 237 .flags = RPC_CLNT_CREATE_NOPING,
238 }; 238 };
239 struct mountres result;
240 struct rpc_message msg = { 239 struct rpc_message msg = {
241 .rpc_argp = info->dirpath, 240 .rpc_argp = info->dirpath,
242 .rpc_resp = &result,
243 }; 241 };
244 struct rpc_clnt *clnt; 242 struct rpc_clnt *clnt;
245 int status; 243 int status;
@@ -248,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info)
248 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 246 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
249 247
250 clnt = rpc_create(&args); 248 clnt = rpc_create(&args);
251 if (unlikely(IS_ERR(clnt))) 249 if (IS_ERR(clnt))
252 goto out_clnt_err; 250 goto out_clnt_err;
253 251
254 dprintk("NFS: sending UMNT request for %s:%s\n", 252 dprintk("NFS: sending UMNT request for %s:%s\n",
@@ -280,29 +278,20 @@ out_call_err:
280 * XDR encode/decode functions for MOUNT 278 * XDR encode/decode functions for MOUNT
281 */ 279 */
282 280
283static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) 281static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
284{ 282{
285 const u32 pathname_len = strlen(pathname); 283 const u32 pathname_len = strlen(pathname);
286 __be32 *p; 284 __be32 *p;
287 285
288 if (unlikely(pathname_len > MNTPATHLEN)) 286 BUG_ON(pathname_len > MNTPATHLEN);
289 return -EIO; 287 p = xdr_reserve_space(xdr, 4 + pathname_len);
290
291 p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
292 if (unlikely(p == NULL))
293 return -EIO;
294 xdr_encode_opaque(p, pathname, pathname_len); 288 xdr_encode_opaque(p, pathname, pathname_len);
295
296 return 0;
297} 289}
298 290
299static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, 291static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
300 const char *dirpath) 292 const char *dirpath)
301{ 293{
302 struct xdr_stream xdr; 294 encode_mntdirpath(xdr, dirpath);
303
304 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
305 return encode_mntdirpath(&xdr, dirpath);
306} 295}
307 296
308/* 297/*
@@ -320,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
320 u32 status; 309 u32 status;
321 __be32 *p; 310 __be32 *p;
322 311
323 p = xdr_inline_decode(xdr, sizeof(status)); 312 p = xdr_inline_decode(xdr, 4);
324 if (unlikely(p == NULL)) 313 if (unlikely(p == NULL))
325 return -EIO; 314 return -EIO;
326 status = ntohl(*p); 315 status = be32_to_cpup(p);
327 316
328 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { 317 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
329 if (mnt_errtbl[i].status == status) { 318 if (mnt_errtbl[i].status == status) {
@@ -351,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
351 return 0; 340 return 0;
352} 341}
353 342
354static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, 343static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
355 struct mountres *res) 344 struct xdr_stream *xdr,
345 struct mountres *res)
356{ 346{
357 struct xdr_stream xdr;
358 int status; 347 int status;
359 348
360 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 349 status = decode_status(xdr, res);
361
362 status = decode_status(&xdr, res);
363 if (unlikely(status != 0 || res->errno != 0)) 350 if (unlikely(status != 0 || res->errno != 0))
364 return status; 351 return status;
365 return decode_fhandle(&xdr, res); 352 return decode_fhandle(xdr, res);
366} 353}
367 354
368static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) 355static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -371,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
371 u32 status; 358 u32 status;
372 __be32 *p; 359 __be32 *p;
373 360
374 p = xdr_inline_decode(xdr, sizeof(status)); 361 p = xdr_inline_decode(xdr, 4);
375 if (unlikely(p == NULL)) 362 if (unlikely(p == NULL))
376 return -EIO; 363 return -EIO;
377 status = ntohl(*p); 364 status = be32_to_cpup(p);
378 365
379 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { 366 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
380 if (mnt3_errtbl[i].status == status) { 367 if (mnt3_errtbl[i].status == status) {
@@ -394,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
394 u32 size; 381 u32 size;
395 __be32 *p; 382 __be32 *p;
396 383
397 p = xdr_inline_decode(xdr, sizeof(size)); 384 p = xdr_inline_decode(xdr, 4);
398 if (unlikely(p == NULL)) 385 if (unlikely(p == NULL))
399 return -EIO; 386 return -EIO;
400 387
401 size = ntohl(*p++); 388 size = be32_to_cpup(p);
402 if (size > NFS3_FHSIZE || size == 0) 389 if (size > NFS3_FHSIZE || size == 0)
403 return -EIO; 390 return -EIO;
404 391
@@ -421,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
421 if (*count == 0) 408 if (*count == 0)
422 return 0; 409 return 0;
423 410
424 p = xdr_inline_decode(xdr, sizeof(entries)); 411 p = xdr_inline_decode(xdr, 4);
425 if (unlikely(p == NULL)) 412 if (unlikely(p == NULL))
426 return -EIO; 413 return -EIO;
427 entries = ntohl(*p); 414 entries = be32_to_cpup(p);
428 dprintk("NFS: received %u auth flavors\n", entries); 415 dprintk("NFS: received %u auth flavors\n", entries);
429 if (entries > NFS_MAX_SECFLAVORS) 416 if (entries > NFS_MAX_SECFLAVORS)
430 entries = NFS_MAX_SECFLAVORS; 417 entries = NFS_MAX_SECFLAVORS;
431 418
432 p = xdr_inline_decode(xdr, sizeof(u32) * entries); 419 p = xdr_inline_decode(xdr, 4 * entries);
433 if (unlikely(p == NULL)) 420 if (unlikely(p == NULL))
434 return -EIO; 421 return -EIO;
435 422
@@ -437,7 +424,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
437 entries = *count; 424 entries = *count;
438 425
439 for (i = 0; i < entries; i++) { 426 for (i = 0; i < entries; i++) {
440 flavors[i] = ntohl(*p++); 427 flavors[i] = be32_to_cpup(p++);
441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]); 428 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
442 } 429 }
443 *count = i; 430 *count = i;
@@ -445,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
445 return 0; 432 return 0;
446} 433}
447 434
448static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, 435static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
449 struct mountres *res) 436 struct xdr_stream *xdr,
437 struct mountres *res)
450{ 438{
451 struct xdr_stream xdr;
452 int status; 439 int status;
453 440
454 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 441 status = decode_fhs_status(xdr, res);
455
456 status = decode_fhs_status(&xdr, res);
457 if (unlikely(status != 0 || res->errno != 0)) 442 if (unlikely(status != 0 || res->errno != 0))
458 return status; 443 return status;
459 status = decode_fhandle3(&xdr, res); 444 status = decode_fhandle3(xdr, res);
460 if (unlikely(status != 0)) { 445 if (unlikely(status != 0)) {
461 res->errno = -EBADHANDLE; 446 res->errno = -EBADHANDLE;
462 return 0; 447 return 0;
463 } 448 }
464 return decode_auth_flavors(&xdr, res); 449 return decode_auth_flavors(xdr, res);
465} 450}
466 451
467static struct rpc_procinfo mnt_procedures[] = { 452static struct rpc_procinfo mnt_procedures[] = {
468 [MOUNTPROC_MNT] = { 453 [MOUNTPROC_MNT] = {
469 .p_proc = MOUNTPROC_MNT, 454 .p_proc = MOUNTPROC_MNT,
470 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 455 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
471 .p_decode = (kxdrproc_t)mnt_dec_mountres, 456 .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres,
472 .p_arglen = MNT_enc_dirpath_sz, 457 .p_arglen = MNT_enc_dirpath_sz,
473 .p_replen = MNT_dec_mountres_sz, 458 .p_replen = MNT_dec_mountres_sz,
474 .p_statidx = MOUNTPROC_MNT, 459 .p_statidx = MOUNTPROC_MNT,
@@ -476,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = {
476 }, 461 },
477 [MOUNTPROC_UMNT] = { 462 [MOUNTPROC_UMNT] = {
478 .p_proc = MOUNTPROC_UMNT, 463 .p_proc = MOUNTPROC_UMNT,
479 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 464 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
480 .p_arglen = MNT_enc_dirpath_sz, 465 .p_arglen = MNT_enc_dirpath_sz,
481 .p_statidx = MOUNTPROC_UMNT, 466 .p_statidx = MOUNTPROC_UMNT,
482 .p_name = "UMOUNT", 467 .p_name = "UMOUNT",
@@ -486,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = {
486static struct rpc_procinfo mnt3_procedures[] = { 471static struct rpc_procinfo mnt3_procedures[] = {
487 [MOUNTPROC3_MNT] = { 472 [MOUNTPROC3_MNT] = {
488 .p_proc = MOUNTPROC3_MNT, 473 .p_proc = MOUNTPROC3_MNT,
489 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 474 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
490 .p_decode = (kxdrproc_t)mnt_dec_mountres3, 475 .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3,
491 .p_arglen = MNT_enc_dirpath_sz, 476 .p_arglen = MNT_enc_dirpath_sz,
492 .p_replen = MNT_dec_mountres3_sz, 477 .p_replen = MNT_dec_mountres3_sz,
493 .p_statidx = MOUNTPROC3_MNT, 478 .p_statidx = MOUNTPROC3_MNT,
@@ -495,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
495 }, 480 },
496 [MOUNTPROC3_UMNT] = { 481 [MOUNTPROC3_UMNT] = {
497 .p_proc = MOUNTPROC3_UMNT, 482 .p_proc = MOUNTPROC3_UMNT,
498 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 483 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
499 .p_arglen = MNT_enc_dirpath_sz, 484 .p_arglen = MNT_enc_dirpath_sz,
500 .p_statidx = MOUNTPROC3_UMNT, 485 .p_statidx = MOUNTPROC3_UMNT,
501 .p_name = "UMOUNT", 486 .p_name = "UMOUNT",
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa3673cf..74aaf3963c1 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,12 +49,17 @@ char *nfs_path(const char *base,
49 const struct dentry *dentry, 49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen) 50 char *buffer, ssize_t buflen)
51{ 51{
52 char *end = buffer+buflen; 52 char *end;
53 int namelen; 53 int namelen;
54 unsigned seq;
54 55
56rename_retry:
57 end = buffer+buflen;
55 *--end = '\0'; 58 *--end = '\0';
56 buflen--; 59 buflen--;
57 spin_lock(&dcache_lock); 60
61 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock();
58 while (!IS_ROOT(dentry) && dentry != droot) { 63 while (!IS_ROOT(dentry) && dentry != droot) {
59 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
60 buflen -= namelen + 1; 65 buflen -= namelen + 1;
@@ -65,7 +70,9 @@ char *nfs_path(const char *base,
65 *--end = '/'; 70 *--end = '/';
66 dentry = dentry->d_parent; 71 dentry = dentry->d_parent;
67 } 72 }
68 spin_unlock(&dcache_lock); 73 rcu_read_unlock();
74 if (read_seqretry(&rename_lock, seq))
75 goto rename_retry;
69 if (*end != '/') { 76 if (*end != '/') {
70 if (--buflen < 0) 77 if (--buflen < 0)
71 goto Elong; 78 goto Elong;
@@ -82,7 +89,9 @@ char *nfs_path(const char *base,
82 memcpy(end, base, namelen); 89 memcpy(end, base, namelen);
83 return end; 90 return end;
84Elong_unlock: 91Elong_unlock:
85 spin_unlock(&dcache_lock); 92 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry;
86Elong: 95Elong:
87 return ERR_PTR(-ENAMETOOLONG); 96 return ERR_PTR(-ENAMETOOLONG);
88} 97}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5914a1911c9..792cb13a430 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,584 +61,1008 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64
64/* 65/*
65 * Common NFS XDR functions as inlines 66 * While encoding arguments, set up the reply buffer in advance to
67 * receive reply data directly into the page cache.
66 */ 68 */
67static inline __be32 * 69static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
68xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle) 70 unsigned int base, unsigned int len,
71 unsigned int bufsize)
69{ 72{
70 memcpy(p, fhandle->data, NFS2_FHSIZE); 73 struct rpc_auth *auth = req->rq_cred->cr_auth;
71 return p + XDR_QUADLEN(NFS2_FHSIZE); 74 unsigned int replen;
75
76 replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
77 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
72} 78}
73 79
74static inline __be32 * 80/*
75xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle) 81 * Handle decode buffer overflows out-of-line.
82 */
83static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
76{ 84{
77 /* NFSv2 handles have a fixed length */ 85 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
78 fhandle->size = NFS2_FHSIZE; 86 "Remaining buffer length is %tu words.\n",
79 memcpy(fhandle->data, p, NFS2_FHSIZE); 87 func, xdr->end - xdr->p);
80 return p + XDR_QUADLEN(NFS2_FHSIZE); 88}
89
90
91/*
92 * Encode/decode NFSv2 basic data types
93 *
94 * Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
95 * "NFS: Network File System Protocol Specification".
96 *
97 * Not all basic data types have their own encoding and decoding
98 * functions. For run-time efficiency, some data types are encoded
99 * or decoded inline.
100 */
101
102/*
103 * typedef opaque nfsdata<>;
104 */
105static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
106{
107 u32 recvd, count;
108 size_t hdrlen;
109 __be32 *p;
110
111 p = xdr_inline_decode(xdr, 4);
112 if (unlikely(p == NULL))
113 goto out_overflow;
114 count = be32_to_cpup(p);
115 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
116 recvd = xdr->buf->len - hdrlen;
117 if (unlikely(count > recvd))
118 goto out_cheating;
119out:
120 xdr_read_pages(xdr, count);
121 result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */
122 result->count = count;
123 return count;
124out_cheating:
125 dprintk("NFS: server cheating in read result: "
126 "count %u > recvd %u\n", count, recvd);
127 count = recvd;
128 goto out;
129out_overflow:
130 print_overflow_msg(__func__, xdr);
131 return -EIO;
132}
133
134/*
135 * enum stat {
136 * NFS_OK = 0,
137 * NFSERR_PERM = 1,
138 * NFSERR_NOENT = 2,
139 * NFSERR_IO = 5,
140 * NFSERR_NXIO = 6,
141 * NFSERR_ACCES = 13,
142 * NFSERR_EXIST = 17,
143 * NFSERR_NODEV = 19,
144 * NFSERR_NOTDIR = 20,
145 * NFSERR_ISDIR = 21,
146 * NFSERR_FBIG = 27,
147 * NFSERR_NOSPC = 28,
148 * NFSERR_ROFS = 30,
149 * NFSERR_NAMETOOLONG = 63,
150 * NFSERR_NOTEMPTY = 66,
151 * NFSERR_DQUOT = 69,
152 * NFSERR_STALE = 70,
153 * NFSERR_WFLUSH = 99
154 * };
155 */
156static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
157{
158 __be32 *p;
159
160 p = xdr_inline_decode(xdr, 4);
161 if (unlikely(p == NULL))
162 goto out_overflow;
163 *status = be32_to_cpup(p);
164 return 0;
165out_overflow:
166 print_overflow_msg(__func__, xdr);
167 return -EIO;
81} 168}
82 169
83static inline __be32* 170/*
84xdr_encode_time(__be32 *p, struct timespec *timep) 171 * 2.3.2. ftype
172 *
173 * enum ftype {
174 * NFNON = 0,
175 * NFREG = 1,
176 * NFDIR = 2,
177 * NFBLK = 3,
178 * NFCHR = 4,
179 * NFLNK = 5
180 * };
181 *
182 */
183static __be32 *xdr_decode_ftype(__be32 *p, u32 *type)
85{ 184{
86 *p++ = htonl(timep->tv_sec); 185 *type = be32_to_cpup(p++);
87 /* Convert nanoseconds into microseconds */ 186 if (unlikely(*type > NF2FIFO))
88 *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0); 187 *type = NFBAD;
89 return p; 188 return p;
90} 189}
91 190
92static inline __be32* 191/*
93xdr_encode_current_server_time(__be32 *p, struct timespec *timep) 192 * 2.3.3. fhandle
193 *
194 * typedef opaque fhandle[FHSIZE];
195 */
196static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh)
94{ 197{
95 /* 198 __be32 *p;
96 * Passing the invalid value useconds=1000000 is a 199
97 * Sun convention for "set to current server time". 200 BUG_ON(fh->size != NFS2_FHSIZE);
98 * It's needed to make permissions checks for the 201 p = xdr_reserve_space(xdr, NFS2_FHSIZE);
99 * "touch" program across v2 mounts to Solaris and 202 memcpy(p, fh->data, NFS2_FHSIZE);
100 * Irix boxes work correctly. See description of 203}
101 * sattr in section 6.1 of "NFS Illustrated" by 204
102 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 205static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
103 */ 206{
104 *p++ = htonl(timep->tv_sec); 207 __be32 *p;
105 *p++ = htonl(1000000); 208
209 p = xdr_inline_decode(xdr, NFS2_FHSIZE);
210 if (unlikely(p == NULL))
211 goto out_overflow;
212 fh->size = NFS2_FHSIZE;
213 memcpy(fh->data, p, NFS2_FHSIZE);
214 return 0;
215out_overflow:
216 print_overflow_msg(__func__, xdr);
217 return -EIO;
218}
219
220/*
221 * 2.3.4. timeval
222 *
223 * struct timeval {
224 * unsigned int seconds;
225 * unsigned int useconds;
226 * };
227 */
228static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
229{
230 *p++ = cpu_to_be32(timep->tv_sec);
231 if (timep->tv_nsec != 0)
232 *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
233 else
234 *p++ = cpu_to_be32(0);
106 return p; 235 return p;
107} 236}
108 237
109static inline __be32* 238/*
110xdr_decode_time(__be32 *p, struct timespec *timep) 239 * Passing the invalid value useconds=1000000 is a Sun convention for
240 * "set to current server time". It's needed to make permissions checks
241 * for the "touch" program across v2 mounts to Solaris and Irix servers
242 * work correctly. See description of sattr in section 6.1 of "NFS
243 * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
244 */
245static __be32 *xdr_encode_current_server_time(__be32 *p,
246 const struct timespec *timep)
111{ 247{
112 timep->tv_sec = ntohl(*p++); 248 *p++ = cpu_to_be32(timep->tv_sec);
113 /* Convert microseconds into nanoseconds */ 249 *p++ = cpu_to_be32(1000000);
114 timep->tv_nsec = ntohl(*p++) * 1000;
115 return p; 250 return p;
116} 251}
117 252
118static __be32 * 253static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
119xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) 254{
255 timep->tv_sec = be32_to_cpup(p++);
256 timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
257 return p;
258}
259
260/*
261 * 2.3.5. fattr
262 *
263 * struct fattr {
264 * ftype type;
265 * unsigned int mode;
266 * unsigned int nlink;
267 * unsigned int uid;
268 * unsigned int gid;
269 * unsigned int size;
270 * unsigned int blocksize;
271 * unsigned int rdev;
272 * unsigned int blocks;
273 * unsigned int fsid;
274 * unsigned int fileid;
275 * timeval atime;
276 * timeval mtime;
277 * timeval ctime;
278 * };
279 *
280 */
281static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
120{ 282{
121 u32 rdev, type; 283 u32 rdev, type;
122 type = ntohl(*p++); 284 __be32 *p;
123 fattr->mode = ntohl(*p++); 285
124 fattr->nlink = ntohl(*p++); 286 p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
125 fattr->uid = ntohl(*p++); 287 if (unlikely(p == NULL))
126 fattr->gid = ntohl(*p++); 288 goto out_overflow;
127 fattr->size = ntohl(*p++); 289
128 fattr->du.nfs2.blocksize = ntohl(*p++);
129 rdev = ntohl(*p++);
130 fattr->du.nfs2.blocks = ntohl(*p++);
131 fattr->fsid.major = ntohl(*p++);
132 fattr->fsid.minor = 0;
133 fattr->fileid = ntohl(*p++);
134 p = xdr_decode_time(p, &fattr->atime);
135 p = xdr_decode_time(p, &fattr->mtime);
136 p = xdr_decode_time(p, &fattr->ctime);
137 fattr->valid |= NFS_ATTR_FATTR_V2; 290 fattr->valid |= NFS_ATTR_FATTR_V2;
291
292 p = xdr_decode_ftype(p, &type);
293
294 fattr->mode = be32_to_cpup(p++);
295 fattr->nlink = be32_to_cpup(p++);
296 fattr->uid = be32_to_cpup(p++);
297 fattr->gid = be32_to_cpup(p++);
298 fattr->size = be32_to_cpup(p++);
299 fattr->du.nfs2.blocksize = be32_to_cpup(p++);
300
301 rdev = be32_to_cpup(p++);
138 fattr->rdev = new_decode_dev(rdev); 302 fattr->rdev = new_decode_dev(rdev);
139 if (type == NFCHR && rdev == NFS2_FIFO_DEV) { 303 if (type == (u32)NFCHR && rdev == (u32)NFS2_FIFO_DEV) {
140 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; 304 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
141 fattr->rdev = 0; 305 fattr->rdev = 0;
142 } 306 }
307
308 fattr->du.nfs2.blocks = be32_to_cpup(p++);
309 fattr->fsid.major = be32_to_cpup(p++);
310 fattr->fsid.minor = 0;
311 fattr->fileid = be32_to_cpup(p++);
312
313 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime);
316 return 0;
317out_overflow:
318 print_overflow_msg(__func__, xdr);
319 return -EIO;
320}
321
322/*
323 * 2.3.6. sattr
324 *
325 * struct sattr {
326 * unsigned int mode;
327 * unsigned int uid;
328 * unsigned int gid;
329 * unsigned int size;
330 * timeval atime;
331 * timeval mtime;
332 * };
333 */
334
335#define NFS2_SATTR_NOT_SET (0xffffffff)
336
337static __be32 *xdr_time_not_set(__be32 *p)
338{
339 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
340 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
143 return p; 341 return p;
144} 342}
145 343
146static inline __be32 * 344static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
147xdr_encode_sattr(__be32 *p, struct iattr *attr)
148{ 345{
149 const __be32 not_set = __constant_htonl(0xFFFFFFFF); 346 __be32 *p;
150 347
151 *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set; 348 p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
152 *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
153 *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set;
154 *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set;
155 349
156 if (attr->ia_valid & ATTR_ATIME_SET) { 350 if (attr->ia_valid & ATTR_MODE)
351 *p++ = cpu_to_be32(attr->ia_mode);
352 else
353 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
354 if (attr->ia_valid & ATTR_UID)
355 *p++ = cpu_to_be32(attr->ia_uid);
356 else
357 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
358 if (attr->ia_valid & ATTR_GID)
359 *p++ = cpu_to_be32(attr->ia_gid);
360 else
361 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
362 if (attr->ia_valid & ATTR_SIZE)
363 *p++ = cpu_to_be32((u32)attr->ia_size);
364 else
365 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
366
367 if (attr->ia_valid & ATTR_ATIME_SET)
157 p = xdr_encode_time(p, &attr->ia_atime); 368 p = xdr_encode_time(p, &attr->ia_atime);
158 } else if (attr->ia_valid & ATTR_ATIME) { 369 else if (attr->ia_valid & ATTR_ATIME)
159 p = xdr_encode_current_server_time(p, &attr->ia_atime); 370 p = xdr_encode_current_server_time(p, &attr->ia_atime);
160 } else { 371 else
161 *p++ = not_set; 372 p = xdr_time_not_set(p);
162 *p++ = not_set; 373 if (attr->ia_valid & ATTR_MTIME_SET)
163 } 374 xdr_encode_time(p, &attr->ia_mtime);
164 375 else if (attr->ia_valid & ATTR_MTIME)
165 if (attr->ia_valid & ATTR_MTIME_SET) { 376 xdr_encode_current_server_time(p, &attr->ia_mtime);
166 p = xdr_encode_time(p, &attr->ia_mtime); 377 else
167 } else if (attr->ia_valid & ATTR_MTIME) { 378 xdr_time_not_set(p);
168 p = xdr_encode_current_server_time(p, &attr->ia_mtime);
169 } else {
170 *p++ = not_set;
171 *p++ = not_set;
172 }
173 return p;
174} 379}
175 380
176/* 381/*
177 * NFS encode functions 382 * 2.3.7. filename
383 *
384 * typedef string filename<MAXNAMLEN>;
178 */ 385 */
386static void encode_filename(struct xdr_stream *xdr,
387 const char *name, u32 length)
388{
389 __be32 *p;
390
391 BUG_ON(length > NFS2_MAXNAMLEN);
392 p = xdr_reserve_space(xdr, 4 + length);
393 xdr_encode_opaque(p, name, length);
394}
395
396static int decode_filename_inline(struct xdr_stream *xdr,
397 const char **name, u32 *length)
398{
399 __be32 *p;
400 u32 count;
401
402 p = xdr_inline_decode(xdr, 4);
403 if (unlikely(p == NULL))
404 goto out_overflow;
405 count = be32_to_cpup(p);
406 if (count > NFS3_MAXNAMLEN)
407 goto out_nametoolong;
408 p = xdr_inline_decode(xdr, count);
409 if (unlikely(p == NULL))
410 goto out_overflow;
411 *name = (const char *)p;
412 *length = count;
413 return 0;
414out_nametoolong:
415 dprintk("NFS: returned filename too long: %u\n", count);
416 return -ENAMETOOLONG;
417out_overflow:
418 print_overflow_msg(__func__, xdr);
419 return -EIO;
420}
421
179/* 422/*
180 * Encode file handle argument 423 * 2.3.8. path
181 * GETATTR, READLINK, STATFS 424 *
425 * typedef string path<MAXPATHLEN>;
182 */ 426 */
183static int 427static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length)
184nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
185{ 428{
186 p = xdr_encode_fhandle(p, fh); 429 __be32 *p;
187 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 430
431 BUG_ON(length > NFS2_MAXPATHLEN);
432 p = xdr_reserve_space(xdr, 4);
433 *p = cpu_to_be32(length);
434 xdr_write_pages(xdr, pages, 0, length);
435}
436
437static int decode_path(struct xdr_stream *xdr)
438{
439 u32 length, recvd;
440 size_t hdrlen;
441 __be32 *p;
442
443 p = xdr_inline_decode(xdr, 4);
444 if (unlikely(p == NULL))
445 goto out_overflow;
446 length = be32_to_cpup(p);
447 if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
448 goto out_size;
449 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
450 recvd = xdr->buf->len - hdrlen;
451 if (unlikely(length > recvd))
452 goto out_cheating;
453
454 xdr_read_pages(xdr, length);
455 xdr_terminate_string(xdr->buf, length);
188 return 0; 456 return 0;
457out_size:
458 dprintk("NFS: returned pathname too long: %u\n", length);
459 return -ENAMETOOLONG;
460out_cheating:
461 dprintk("NFS: server cheating in pathname result: "
462 "length %u > received %u\n", length, recvd);
463 return -EIO;
464out_overflow:
465 print_overflow_msg(__func__, xdr);
466 return -EIO;
189} 467}
190 468
191/* 469/*
192 * Encode SETATTR arguments 470 * 2.3.9. attrstat
471 *
472 * union attrstat switch (stat status) {
473 * case NFS_OK:
474 * fattr attributes;
475 * default:
476 * void;
477 * };
193 */ 478 */
194static int 479static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result)
195nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args)
196{ 480{
197 p = xdr_encode_fhandle(p, args->fh); 481 enum nfs_stat status;
198 p = xdr_encode_sattr(p, args->sattr); 482 int error;
199 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 483
200 return 0; 484 error = decode_stat(xdr, &status);
485 if (unlikely(error))
486 goto out;
487 if (status != NFS_OK)
488 goto out_default;
489 error = decode_fattr(xdr, result);
490out:
491 return error;
492out_default:
493 return nfs_stat_to_errno(status);
201} 494}
202 495
203/* 496/*
204 * Encode directory ops argument 497 * 2.3.10. diropargs
205 * LOOKUP, RMDIR 498 *
499 * struct diropargs {
500 * fhandle dir;
501 * filename name;
502 * };
206 */ 503 */
207static int 504static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
208nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) 505 const char *name, u32 length)
209{ 506{
210 p = xdr_encode_fhandle(p, args->fh); 507 encode_fhandle(xdr, fh);
211 p = xdr_encode_array(p, args->name, args->len); 508 encode_filename(xdr, name, length);
212 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
213 return 0;
214} 509}
215 510
216/* 511/*
217 * Encode REMOVE argument 512 * 2.3.11. diropres
513 *
514 * union diropres switch (stat status) {
515 * case NFS_OK:
516 * struct {
517 * fhandle file;
518 * fattr attributes;
519 * } diropok;
520 * default:
521 * void;
522 * };
218 */ 523 */
219static int 524static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
220nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
221{ 525{
222 p = xdr_encode_fhandle(p, args->fh); 526 int error;
223 p = xdr_encode_array(p, args->name.name, args->name.len); 527
224 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 528 error = decode_fhandle(xdr, result->fh);
225 return 0; 529 if (unlikely(error))
530 goto out;
531 error = decode_fattr(xdr, result->fattr);
532out:
533 return error;
534}
535
536static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
537{
538 enum nfs_stat status;
539 int error;
540
541 error = decode_stat(xdr, &status);
542 if (unlikely(error))
543 goto out;
544 if (status != NFS_OK)
545 goto out_default;
546 error = decode_diropok(xdr, result);
547out:
548 return error;
549out_default:
550 return nfs_stat_to_errno(status);
226} 551}
227 552
553
228/* 554/*
229 * Arguments to a READ call. Since we read data directly into the page 555 * NFSv2 XDR encode functions
230 * cache, we also set up the reply iovec here so that iov[1] points 556 *
231 * exactly to the page we want to fetch. 557 * NFSv2 argument types are defined in section 2.2 of RFC 1094:
558 * "NFS: Network File System Protocol Specification".
232 */ 559 */
233static int 560
234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 561static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
562 struct xdr_stream *xdr,
563 const struct nfs_fh *fh)
235{ 564{
236 struct rpc_auth *auth = req->rq_cred->cr_auth; 565 encode_fhandle(xdr, fh);
237 unsigned int replen; 566}
238 u32 offset = (u32)args->offset; 567
568/*
569 * 2.2.3. sattrargs
570 *
571 * struct sattrargs {
572 * fhandle file;
573 * sattr attributes;
574 * };
575 */
576static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
577 struct xdr_stream *xdr,
578 const struct nfs_sattrargs *args)
579{
580 encode_fhandle(xdr, args->fh);
581 encode_sattr(xdr, args->sattr);
582}
583
584static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
585 struct xdr_stream *xdr,
586 const struct nfs_diropargs *args)
587{
588 encode_diropargs(xdr, args->fh, args->name, args->len);
589}
590
591static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
592 struct xdr_stream *xdr,
593 const struct nfs_readlinkargs *args)
594{
595 encode_fhandle(xdr, args->fh);
596 prepare_reply_buffer(req, args->pages, args->pgbase,
597 args->pglen, NFS_readlinkres_sz);
598}
599
600/*
601 * 2.2.7. readargs
602 *
603 * struct readargs {
604 * fhandle file;
605 * unsigned offset;
606 * unsigned count;
607 * unsigned totalcount;
608 * };
609 */
610static void encode_readargs(struct xdr_stream *xdr,
611 const struct nfs_readargs *args)
612{
613 u32 offset = args->offset;
239 u32 count = args->count; 614 u32 count = args->count;
615 __be32 *p;
240 616
241 p = xdr_encode_fhandle(p, args->fh); 617 encode_fhandle(xdr, args->fh);
242 *p++ = htonl(offset);
243 *p++ = htonl(count);
244 *p++ = htonl(count);
245 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
246 618
247 /* Inline the page array */ 619 p = xdr_reserve_space(xdr, 4 + 4 + 4);
248 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; 620 *p++ = cpu_to_be32(offset);
249 xdr_inline_pages(&req->rq_rcv_buf, replen, 621 *p++ = cpu_to_be32(count);
250 args->pages, args->pgbase, count); 622 *p = cpu_to_be32(count);
623}
624
625static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
626 struct xdr_stream *xdr,
627 const struct nfs_readargs *args)
628{
629 encode_readargs(xdr, args);
630 prepare_reply_buffer(req, args->pages, args->pgbase,
631 args->count, NFS_readres_sz);
251 req->rq_rcv_buf.flags |= XDRBUF_READ; 632 req->rq_rcv_buf.flags |= XDRBUF_READ;
252 return 0;
253} 633}
254 634
255/* 635/*
256 * Decode READ reply 636 * 2.2.9. writeargs
637 *
638 * struct writeargs {
639 * fhandle file;
640 * unsigned beginoffset;
641 * unsigned offset;
642 * unsigned totalcount;
643 * nfsdata data;
644 * };
257 */ 645 */
258static int 646static void encode_writeargs(struct xdr_stream *xdr,
259nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) 647 const struct nfs_writeargs *args)
260{ 648{
261 struct kvec *iov = req->rq_rcv_buf.head; 649 u32 offset = args->offset;
262 size_t hdrlen; 650 u32 count = args->count;
263 u32 count, recvd; 651 __be32 *p;
264 int status;
265
266 if ((status = ntohl(*p++)))
267 return nfs_stat_to_errno(status);
268 p = xdr_decode_fattr(p, res->fattr);
269
270 count = ntohl(*p++);
271 res->eof = 0;
272 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
273 if (iov->iov_len < hdrlen) {
274 dprintk("NFS: READ reply header overflowed:"
275 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
276 return -errno_NFSERR_IO;
277 } else if (iov->iov_len != hdrlen) {
278 dprintk("NFS: READ header is short. iovec will be shifted.\n");
279 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
280 }
281 652
282 recvd = req->rq_rcv_buf.len - hdrlen; 653 encode_fhandle(xdr, args->fh);
283 if (count > recvd) {
284 dprintk("NFS: server cheating in read reply: "
285 "count %u > recvd %u\n", count, recvd);
286 count = recvd;
287 }
288 654
289 dprintk("RPC: readres OK count %u\n", count); 655 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
290 if (count < res->count) 656 *p++ = cpu_to_be32(offset);
291 res->count = count; 657 *p++ = cpu_to_be32(offset);
658 *p++ = cpu_to_be32(count);
292 659
293 return count; 660 /* nfsdata */
661 *p = cpu_to_be32(count);
662 xdr_write_pages(xdr, args->pages, args->pgbase, count);
294} 663}
295 664
665static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
666 struct xdr_stream *xdr,
667 const struct nfs_writeargs *args)
668{
669 encode_writeargs(xdr, args);
670 xdr->buf->flags |= XDRBUF_WRITE;
671}
296 672
297/* 673/*
298 * Write arguments. Splice the buffer to be written into the iovec. 674 * 2.2.10. createargs
675 *
676 * struct createargs {
677 * diropargs where;
678 * sattr attributes;
679 * };
299 */ 680 */
300static int 681static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
301nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 682 struct xdr_stream *xdr,
683 const struct nfs_createargs *args)
302{ 684{
303 struct xdr_buf *sndbuf = &req->rq_snd_buf; 685 encode_diropargs(xdr, args->fh, args->name, args->len);
304 u32 offset = (u32)args->offset; 686 encode_sattr(xdr, args->sattr);
305 u32 count = args->count; 687}
306
307 p = xdr_encode_fhandle(p, args->fh);
308 *p++ = htonl(offset);
309 *p++ = htonl(offset);
310 *p++ = htonl(count);
311 *p++ = htonl(count);
312 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
313 688
314 /* Copy the page array */ 689static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
315 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 690 struct xdr_stream *xdr,
316 sndbuf->flags |= XDRBUF_WRITE; 691 const struct nfs_removeargs *args)
317 return 0; 692{
693 encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
318} 694}
319 695
320/* 696/*
321 * Encode create arguments 697 * 2.2.12. renameargs
322 * CREATE, MKDIR 698 *
699 * struct renameargs {
700 * diropargs from;
701 * diropargs to;
702 * };
323 */ 703 */
324static int 704static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
325nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) 705 struct xdr_stream *xdr,
706 const struct nfs_renameargs *args)
326{ 707{
327 p = xdr_encode_fhandle(p, args->fh); 708 const struct qstr *old = args->old_name;
328 p = xdr_encode_array(p, args->name, args->len); 709 const struct qstr *new = args->new_name;
329 p = xdr_encode_sattr(p, args->sattr); 710
330 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 711 encode_diropargs(xdr, args->old_dir, old->name, old->len);
331 return 0; 712 encode_diropargs(xdr, args->new_dir, new->name, new->len);
332} 713}
333 714
334/* 715/*
335 * Encode RENAME arguments 716 * 2.2.13. linkargs
717 *
718 * struct linkargs {
719 * fhandle from;
720 * diropargs to;
721 * };
336 */ 722 */
337static int 723static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 724 struct xdr_stream *xdr,
725 const struct nfs_linkargs *args)
339{ 726{
340 p = xdr_encode_fhandle(p, args->old_dir); 727 encode_fhandle(xdr, args->fromfh);
341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len); 728 encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0;
346} 729}
347 730
348/* 731/*
349 * Encode LINK arguments 732 * 2.2.14. symlinkargs
733 *
734 * struct symlinkargs {
735 * diropargs from;
736 * path to;
737 * sattr attributes;
738 * };
350 */ 739 */
351static int 740static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
352nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args) 741 struct xdr_stream *xdr,
742 const struct nfs_symlinkargs *args)
353{ 743{
354 p = xdr_encode_fhandle(p, args->fromfh); 744 encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
355 p = xdr_encode_fhandle(p, args->tofh); 745 encode_path(xdr, args->pages, args->pathlen);
356 p = xdr_encode_array(p, args->toname, args->tolen); 746 encode_sattr(xdr, args->sattr);
357 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
358 return 0;
359} 747}
360 748
361/* 749/*
362 * Encode SYMLINK arguments 750 * 2.2.17. readdirargs
751 *
752 * struct readdirargs {
753 * fhandle dir;
754 * nfscookie cookie;
755 * unsigned count;
756 * };
363 */ 757 */
364static int 758static void encode_readdirargs(struct xdr_stream *xdr,
365nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args) 759 const struct nfs_readdirargs *args)
366{ 760{
367 struct xdr_buf *sndbuf = &req->rq_snd_buf; 761 __be32 *p;
368 size_t pad;
369 762
370 p = xdr_encode_fhandle(p, args->fromfh); 763 encode_fhandle(xdr, args->fh);
371 p = xdr_encode_array(p, args->fromname, args->fromlen);
372 *p++ = htonl(args->pathlen);
373 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
374 764
375 xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); 765 p = xdr_reserve_space(xdr, 4 + 4);
766 *p++ = cpu_to_be32(args->cookie);
767 *p = cpu_to_be32(args->count);
768}
376 769
377 /* 770static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
378 * xdr_encode_pages may have added a few bytes to ensure the 771 struct xdr_stream *xdr,
379 * pathname ends on a 4-byte boundary. Start encoding the 772 const struct nfs_readdirargs *args)
380 * attributes after the pad bytes. 773{
381 */ 774 encode_readdirargs(xdr, args);
382 pad = sndbuf->tail->iov_len; 775 prepare_reply_buffer(req, args->pages, 0,
383 if (pad > 0) 776 args->count, NFS_readdirres_sz);
384 p++;
385 p = xdr_encode_sattr(p, args->sattr);
386 sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
387 return 0;
388} 777}
389 778
390/* 779/*
391 * Encode arguments to readdir call 780 * NFSv2 XDR decode functions
781 *
782 * NFSv2 result types are defined in section 2.2 of RFC 1094:
783 * "NFS: Network File System Protocol Specification".
392 */ 784 */
393static int 785
394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 786static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
787 void *__unused)
395{ 788{
396 struct rpc_auth *auth = req->rq_cred->cr_auth; 789 enum nfs_stat status;
397 unsigned int replen; 790 int error;
398 u32 count = args->count; 791
792 error = decode_stat(xdr, &status);
793 if (unlikely(error))
794 goto out;
795 if (status != NFS_OK)
796 goto out_default;
797out:
798 return error;
799out_default:
800 return nfs_stat_to_errno(status);
801}
399 802
400 p = xdr_encode_fhandle(p, args->fh); 803static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
401 *p++ = htonl(args->cookie); 804 struct nfs_fattr *result)
402 *p++ = htonl(count); /* see above */ 805{
403 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 806 return decode_attrstat(xdr, result);
807}
404 808
405 /* Inline the page array */ 809static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
406 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; 810 struct nfs_diropok *result)
407 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); 811{
408 return 0; 812 return decode_diropres(xdr, result);
409} 813}
410 814
411/* 815/*
412 * Decode the result of a readdir call. 816 * 2.2.6. readlinkres
413 * We're not really decoding anymore, we just leave the buffer untouched 817 *
414 * and only check that it is syntactically correct. 818 * union readlinkres switch (stat status) {
415 * The real decoding happens in nfs_decode_entry below, called directly 819 * case NFS_OK:
416 * from nfs_readdir for each entry. 820 * path data;
821 * default:
822 * void;
823 * };
417 */ 824 */
418static int 825static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
419nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) 826 struct xdr_stream *xdr, void *__unused)
420{ 827{
421 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 828 enum nfs_stat status;
422 struct kvec *iov = rcvbuf->head; 829 int error;
423 struct page **page; 830
424 size_t hdrlen; 831 error = decode_stat(xdr, &status);
425 unsigned int pglen, recvd; 832 if (unlikely(error))
426 int status; 833 goto out;
427 834 if (status != NFS_OK)
428 if ((status = ntohl(*p++))) 835 goto out_default;
429 return nfs_stat_to_errno(status); 836 error = decode_path(xdr);
430 837out:
431 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 838 return error;
432 if (iov->iov_len < hdrlen) { 839out_default:
433 dprintk("NFS: READDIR reply header overflowed:" 840 return nfs_stat_to_errno(status);
434 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 841}
435 return -errno_NFSERR_IO;
436 } else if (iov->iov_len != hdrlen) {
437 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
438 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
439 }
440 842
441 pglen = rcvbuf->page_len; 843/*
442 recvd = rcvbuf->len - hdrlen; 844 * 2.2.7. readres
443 if (pglen > recvd) 845 *
444 pglen = recvd; 846 * union readres switch (stat status) {
445 page = rcvbuf->pages; 847 * case NFS_OK:
446 return pglen; 848 * fattr attributes;
849 * nfsdata data;
850 * default:
851 * void;
852 * };
853 */
854static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
855 struct nfs_readres *result)
856{
857 enum nfs_stat status;
858 int error;
859
860 error = decode_stat(xdr, &status);
861 if (unlikely(error))
862 goto out;
863 if (status != NFS_OK)
864 goto out_default;
865 error = decode_fattr(xdr, result->fattr);
866 if (unlikely(error))
867 goto out;
868 error = decode_nfsdata(xdr, result);
869out:
870 return error;
871out_default:
872 return nfs_stat_to_errno(status);
447} 873}
448 874
449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 875static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
876 struct nfs_writeres *result)
450{ 877{
451 dprintk("nfs: %s: prematurely hit end of receive buffer. " 878 /* All NFSv2 writes are "file sync" writes */
452 "Remaining buffer length is %tu words.\n", 879 result->verf->committed = NFS_FILE_SYNC;
453 func, xdr->end - xdr->p); 880 return decode_attrstat(xdr, result->fattr);
454} 881}
455 882
456__be32 * 883/**
457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) 884 * nfs2_decode_dirent - Decode a single NFSv2 directory entry stored in
885 * the local page cache.
886 * @xdr: XDR stream where entry resides
887 * @entry: buffer to fill in with entry data
888 * @plus: boolean indicating whether this should be a readdirplus entry
889 *
890 * Returns zero if successful, otherwise a negative errno value is
891 * returned.
892 *
893 * This function is not invoked during READDIR reply decoding, but
894 * rather whenever an application invokes the getdents(2) system call
895 * on a directory already in our cache.
896 *
897 * 2.2.17. entry
898 *
899 * struct entry {
900 * unsigned fileid;
901 * filename name;
902 * nfscookie cookie;
903 * entry *nextentry;
904 * };
905 */
906int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
907 int plus)
458{ 908{
459 __be32 *p; 909 __be32 *p;
910 int error;
911
460 p = xdr_inline_decode(xdr, 4); 912 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p)) 913 if (unlikely(p == NULL))
462 goto out_overflow; 914 goto out_overflow;
463 if (!ntohl(*p++)) { 915 if (*p++ == xdr_zero) {
464 p = xdr_inline_decode(xdr, 4); 916 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p)) 917 if (unlikely(p == NULL))
466 goto out_overflow; 918 goto out_overflow;
467 if (!ntohl(*p++)) 919 if (*p++ == xdr_zero)
468 return ERR_PTR(-EAGAIN); 920 return -EAGAIN;
469 entry->eof = 1; 921 entry->eof = 1;
470 return ERR_PTR(-EBADCOOKIE); 922 return -EBADCOOKIE;
471 } 923 }
472 924
473 p = xdr_inline_decode(xdr, 8); 925 p = xdr_inline_decode(xdr, 4);
474 if (unlikely(!p)) 926 if (unlikely(p == NULL))
475 goto out_overflow; 927 goto out_overflow;
928 entry->ino = be32_to_cpup(p);
476 929
477 entry->ino = ntohl(*p++); 930 error = decode_filename_inline(xdr, &entry->name, &entry->len);
478 entry->len = ntohl(*p++); 931 if (unlikely(error))
932 return error;
479 933
480 p = xdr_inline_decode(xdr, entry->len + 4); 934 /*
481 if (unlikely(!p)) 935 * The type (size and byte order) of nfscookie isn't defined in
936 * RFC 1094. This implementation assumes that it's an XDR uint32.
937 */
938 entry->prev_cookie = entry->cookie;
939 p = xdr_inline_decode(xdr, 4);
940 if (unlikely(p == NULL))
482 goto out_overflow; 941 goto out_overflow;
483 entry->name = (const char *) p; 942 entry->cookie = be32_to_cpup(p);
484 p += XDR_QUADLEN(entry->len);
485 entry->prev_cookie = entry->cookie;
486 entry->cookie = ntohl(*p++);
487 943
488 entry->d_type = DT_UNKNOWN; 944 entry->d_type = DT_UNKNOWN;
489 945
490 p = xdr_inline_peek(xdr, 8); 946 return 0;
491 if (p != NULL)
492 entry->eof = !p[0] && p[1];
493 else
494 entry->eof = 0;
495
496 return p;
497 947
498out_overflow: 948out_overflow:
499 print_overflow_msg(__func__, xdr); 949 print_overflow_msg(__func__, xdr);
500 return ERR_PTR(-EAGAIN); 950 return -EAGAIN;
501}
502
503/*
504 * NFS XDR decode functions
505 */
506/*
507 * Decode simple status reply
508 */
509static int
510nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
511{
512 int status;
513
514 if ((status = ntohl(*p++)) != 0)
515 status = nfs_stat_to_errno(status);
516 return status;
517} 951}
518 952
519/* 953/*
520 * Decode attrstat reply 954 * 2.2.17. readdirres
521 * GETATTR, SETATTR, WRITE 955 *
522 */ 956 * union readdirres switch (stat status) {
523static int 957 * case NFS_OK:
524nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 958 * struct {
525{ 959 * entry *entries;
526 int status; 960 * bool eof;
527 961 * } readdirok;
528 if ((status = ntohl(*p++))) 962 * default:
529 return nfs_stat_to_errno(status); 963 * void;
530 xdr_decode_fattr(p, fattr); 964 * };
531 return 0; 965 *
532} 966 * Read the directory contents into the page cache, but don't
533 967 * touch them. The actual decoding is done by nfs2_decode_dirent()
534/* 968 * during subsequent nfs_readdir() calls.
535 * Decode diropres reply
536 * LOOKUP, CREATE, MKDIR
537 */ 969 */
538static int 970static int decode_readdirok(struct xdr_stream *xdr)
539nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
540{ 971{
541 int status; 972 u32 recvd, pglen;
973 size_t hdrlen;
542 974
543 if ((status = ntohl(*p++))) 975 pglen = xdr->buf->page_len;
544 return nfs_stat_to_errno(status); 976 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
545 p = xdr_decode_fhandle(p, res->fh); 977 recvd = xdr->buf->len - hdrlen;
546 xdr_decode_fattr(p, res->fattr); 978 if (unlikely(pglen > recvd))
547 return 0; 979 goto out_cheating;
980out:
981 xdr_read_pages(xdr, pglen);
982 return pglen;
983out_cheating:
984 dprintk("NFS: server cheating in readdir result: "
985 "pglen %u > recvd %u\n", pglen, recvd);
986 pglen = recvd;
987 goto out;
548} 988}
549 989
550/* 990static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
551 * Encode READLINK args 991 struct xdr_stream *xdr, void *__unused)
552 */
553static int
554nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
555{ 992{
556 struct rpc_auth *auth = req->rq_cred->cr_auth; 993 enum nfs_stat status;
557 unsigned int replen; 994 int error;
558 995
559 p = xdr_encode_fhandle(p, args->fh); 996 error = decode_stat(xdr, &status);
560 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 997 if (unlikely(error))
561 998 goto out;
562 /* Inline the page array */ 999 if (status != NFS_OK)
563 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; 1000 goto out_default;
564 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); 1001 error = decode_readdirok(xdr);
565 return 0; 1002out:
1003 return error;
1004out_default:
1005 return nfs_stat_to_errno(status);
566} 1006}
567 1007
568/* 1008/*
569 * Decode READLINK reply 1009 * 2.2.18. statfsres
1010 *
1011 * union statfsres (stat status) {
1012 * case NFS_OK:
1013 * struct {
1014 * unsigned tsize;
1015 * unsigned bsize;
1016 * unsigned blocks;
1017 * unsigned bfree;
1018 * unsigned bavail;
1019 * } info;
1020 * default:
1021 * void;
1022 * };
570 */ 1023 */
571static int 1024static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
572nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
573{ 1025{
574 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1026 __be32 *p;
575 struct kvec *iov = rcvbuf->head;
576 size_t hdrlen;
577 u32 len, recvd;
578 int status;
579
580 if ((status = ntohl(*p++)))
581 return nfs_stat_to_errno(status);
582 /* Convert length of symlink */
583 len = ntohl(*p++);
584 if (len >= rcvbuf->page_len) {
585 dprintk("nfs: server returned giant symlink!\n");
586 return -ENAMETOOLONG;
587 }
588 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
589 if (iov->iov_len < hdrlen) {
590 dprintk("NFS: READLINK reply header overflowed:"
591 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
592 return -errno_NFSERR_IO;
593 } else if (iov->iov_len != hdrlen) {
594 dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
595 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
596 }
597 recvd = req->rq_rcv_buf.len - hdrlen;
598 if (recvd < len) {
599 dprintk("NFS: server cheating in readlink reply: "
600 "count %u > recvd %u\n", len, recvd);
601 return -EIO;
602 }
603 1027
604 xdr_terminate_string(rcvbuf, len); 1028 p = xdr_inline_decode(xdr, NFS_info_sz << 2);
1029 if (unlikely(p == NULL))
1030 goto out_overflow;
1031 result->tsize = be32_to_cpup(p++);
1032 result->bsize = be32_to_cpup(p++);
1033 result->blocks = be32_to_cpup(p++);
1034 result->bfree = be32_to_cpup(p++);
1035 result->bavail = be32_to_cpup(p);
605 return 0; 1036 return 0;
1037out_overflow:
1038 print_overflow_msg(__func__, xdr);
1039 return -EIO;
606} 1040}
607 1041
608/* 1042static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
609 * Decode WRITE reply 1043 struct nfs2_fsstat *result)
610 */
611static int
612nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
613{ 1044{
614 res->verf->committed = NFS_FILE_SYNC; 1045 enum nfs_stat status;
615 return nfs_xdr_attrstat(req, p, res->fattr); 1046 int error;
1047
1048 error = decode_stat(xdr, &status);
1049 if (unlikely(error))
1050 goto out;
1051 if (status != NFS_OK)
1052 goto out_default;
1053 error = decode_info(xdr, result);
1054out:
1055 return error;
1056out_default:
1057 return nfs_stat_to_errno(status);
616} 1058}
617 1059
618/*
619 * Decode STATFS reply
620 */
621static int
622nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
623{
624 int status;
625
626 if ((status = ntohl(*p++)))
627 return nfs_stat_to_errno(status);
628
629 res->tsize = ntohl(*p++);
630 res->bsize = ntohl(*p++);
631 res->blocks = ntohl(*p++);
632 res->bfree = ntohl(*p++);
633 res->bavail = ntohl(*p++);
634 return 0;
635}
636 1060
637/* 1061/*
638 * We need to translate between nfs status return values and 1062 * We need to translate between nfs status return values and
639 * the local errno values which may not be the same. 1063 * the local errno values which may not be the same.
640 */ 1064 */
641static struct { 1065static const struct {
642 int stat; 1066 int stat;
643 int errno; 1067 int errno;
644} nfs_errtbl[] = { 1068} nfs_errtbl[] = {
@@ -678,28 +1102,30 @@ static struct {
678 { -1, -EIO } 1102 { -1, -EIO }
679}; 1103};
680 1104
681/* 1105/**
682 * Convert an NFS error code to a local one. 1106 * nfs_stat_to_errno - convert an NFS status code to a local errno
683 * This one is used jointly by NFSv2 and NFSv3. 1107 * @status: NFS status code to convert
1108 *
1109 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3.
684 */ 1111 */
685int 1112int nfs_stat_to_errno(enum nfs_stat status)
686nfs_stat_to_errno(int stat)
687{ 1113{
688 int i; 1114 int i;
689 1115
690 for (i = 0; nfs_errtbl[i].stat != -1; i++) { 1116 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
691 if (nfs_errtbl[i].stat == stat) 1117 if (nfs_errtbl[i].stat == (int)status)
692 return nfs_errtbl[i].errno; 1118 return nfs_errtbl[i].errno;
693 } 1119 }
694 dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); 1120 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
695 return nfs_errtbl[i].errno; 1121 return nfs_errtbl[i].errno;
696} 1122}
697 1123
698#define PROC(proc, argtype, restype, timer) \ 1124#define PROC(proc, argtype, restype, timer) \
699[NFSPROC_##proc] = { \ 1125[NFSPROC_##proc] = { \
700 .p_proc = NFSPROC_##proc, \ 1126 .p_proc = NFSPROC_##proc, \
701 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ 1127 .p_encode = (kxdreproc_t)nfs2_xdr_enc_##argtype, \
702 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ 1128 .p_decode = (kxdrdproc_t)nfs2_xdr_dec_##restype, \
703 .p_arglen = NFS_##argtype##_sz, \ 1129 .p_arglen = NFS_##argtype##_sz, \
704 .p_replen = NFS_##restype##_sz, \ 1130 .p_replen = NFS_##restype##_sz, \
705 .p_timer = timer, \ 1131 .p_timer = timer, \
@@ -707,21 +1133,21 @@ nfs_stat_to_errno(int stat)
707 .p_name = #proc, \ 1133 .p_name = #proc, \
708 } 1134 }
709struct rpc_procinfo nfs_procedures[] = { 1135struct rpc_procinfo nfs_procedures[] = {
710 PROC(GETATTR, fhandle, attrstat, 1), 1136 PROC(GETATTR, fhandle, attrstat, 1),
711 PROC(SETATTR, sattrargs, attrstat, 0), 1137 PROC(SETATTR, sattrargs, attrstat, 0),
712 PROC(LOOKUP, diropargs, diropres, 2), 1138 PROC(LOOKUP, diropargs, diropres, 2),
713 PROC(READLINK, readlinkargs, readlinkres, 3), 1139 PROC(READLINK, readlinkargs, readlinkres, 3),
714 PROC(READ, readargs, readres, 3), 1140 PROC(READ, readargs, readres, 3),
715 PROC(WRITE, writeargs, writeres, 4), 1141 PROC(WRITE, writeargs, writeres, 4),
716 PROC(CREATE, createargs, diropres, 0), 1142 PROC(CREATE, createargs, diropres, 0),
717 PROC(REMOVE, removeargs, stat, 0), 1143 PROC(REMOVE, removeargs, stat, 0),
718 PROC(RENAME, renameargs, stat, 0), 1144 PROC(RENAME, renameargs, stat, 0),
719 PROC(LINK, linkargs, stat, 0), 1145 PROC(LINK, linkargs, stat, 0),
720 PROC(SYMLINK, symlinkargs, stat, 0), 1146 PROC(SYMLINK, symlinkargs, stat, 0),
721 PROC(MKDIR, createargs, diropres, 0), 1147 PROC(MKDIR, createargs, diropres, 0),
722 PROC(RMDIR, diropargs, stat, 0), 1148 PROC(RMDIR, diropargs, stat, 0),
723 PROC(READDIR, readdirargs, readdirres, 3), 1149 PROC(READDIR, readdirargs, readdirres, 3),
724 PROC(STATFS, fhandle, statfsres, 0), 1150 PROC(STATFS, fhandle, statfsres, 0),
725}; 1151};
726 1152
727struct rpc_version nfs_version2 = { 1153struct rpc_version nfs_version2 = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f06da..01c5e8b1941 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -37,18 +37,16 @@
37#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) 37#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
38#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) 38#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
39#define NFS3_fattr_sz (21) 39#define NFS3_fattr_sz (21)
40#define NFS3_wcc_attr_sz (6) 40#define NFS3_cookieverf_sz (NFS3_COOKIEVERFSIZE>>2)
41#define NFS3_wcc_attr_sz (6)
41#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) 42#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
42#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) 43#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
43#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) 44#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
44#define NFS3_fsstat_sz
45#define NFS3_fsinfo_sz
46#define NFS3_pathconf_sz
47#define NFS3_entry_sz (NFS3_filename_sz+3)
48
49#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
50#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) 45#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
51#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz) 46
47#define NFS3_getattrargs_sz (NFS3_fh_sz)
48#define NFS3_setattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
49#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz)
52#define NFS3_accessargs_sz (NFS3_fh_sz+1) 50#define NFS3_accessargs_sz (NFS3_fh_sz+1)
53#define NFS3_readlinkargs_sz (NFS3_fh_sz) 51#define NFS3_readlinkargs_sz (NFS3_fh_sz)
54#define NFS3_readargs_sz (NFS3_fh_sz+3) 52#define NFS3_readargs_sz (NFS3_fh_sz+3)
@@ -57,14 +55,16 @@
57#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 55#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
58#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) 56#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
59#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) 57#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
58#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
60#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) 59#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
61#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) 60#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
62#define NFS3_readdirargs_sz (NFS3_fh_sz+2) 61#define NFS3_readdirargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+3)
62#define NFS3_readdirplusargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+4)
63#define NFS3_commitargs_sz (NFS3_fh_sz+3) 63#define NFS3_commitargs_sz (NFS3_fh_sz+3)
64 64
65#define NFS3_attrstat_sz (1+NFS3_fattr_sz) 65#define NFS3_getattrres_sz (1+NFS3_fattr_sz)
66#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) 66#define NFS3_setattrres_sz (1+NFS3_wcc_data_sz)
67#define NFS3_removeres_sz (NFS3_wccstat_sz) 67#define NFS3_removeres_sz (NFS3_setattrres_sz)
68#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) 68#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
69#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) 69#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
70#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) 70#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
@@ -100,1079 +100,2362 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103/*
104 * While encoding arguments, set up the reply buffer in advance to
105 * receive reply data directly into the page cache.
106 */
107static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
108 unsigned int base, unsigned int len,
109 unsigned int bufsize)
110{
111 struct rpc_auth *auth = req->rq_cred->cr_auth;
112 unsigned int replen;
113
114 replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
115 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
116}
117
118/*
119 * Handle decode buffer overflows out-of-line.
120 */
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 121static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{ 122{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. " 123 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
106 "Remaining buffer length is %tu words.\n", 124 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p); 125 func, xdr->end - xdr->p);
108} 126}
109 127
128
110/* 129/*
111 * Common NFS XDR functions as inlines 130 * Encode/decode NFSv3 basic data types
131 *
132 * Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
133 * "NFS Version 3 Protocol Specification".
134 *
135 * Not all basic data types have their own encoding and decoding
136 * functions. For run-time efficiency, some data types are encoded
137 * or decoded inline.
112 */ 138 */
113static inline __be32 * 139
114xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh) 140static void encode_uint32(struct xdr_stream *xdr, u32 value)
115{ 141{
116 return xdr_encode_array(p, fh->data, fh->size); 142 __be32 *p = xdr_reserve_space(xdr, 4);
143 *p = cpu_to_be32(value);
117} 144}
118 145
119static inline __be32 * 146static int decode_uint32(struct xdr_stream *xdr, u32 *value)
120xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
121{ 147{
122 if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { 148 __be32 *p;
123 memcpy(fh->data, p, fh->size); 149
124 return p + XDR_QUADLEN(fh->size); 150 p = xdr_inline_decode(xdr, 4);
125 } 151 if (unlikely(p == NULL))
126 return NULL; 152 goto out_overflow;
153 *value = be32_to_cpup(p);
154 return 0;
155out_overflow:
156 print_overflow_msg(__func__, xdr);
157 return -EIO;
158}
159
160static int decode_uint64(struct xdr_stream *xdr, u64 *value)
161{
162 __be32 *p;
163
164 p = xdr_inline_decode(xdr, 8);
165 if (unlikely(p == NULL))
166 goto out_overflow;
167 xdr_decode_hyper(p, value);
168 return 0;
169out_overflow:
170 print_overflow_msg(__func__, xdr);
171 return -EIO;
172}
173
174/*
175 * fileid3
176 *
177 * typedef uint64 fileid3;
178 */
179static __be32 *xdr_decode_fileid3(__be32 *p, u64 *fileid)
180{
181 return xdr_decode_hyper(p, fileid);
182}
183
184static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid)
185{
186 return decode_uint64(xdr, fileid);
187}
188
189/*
190 * filename3
191 *
192 * typedef string filename3<>;
193 */
194static void encode_filename3(struct xdr_stream *xdr,
195 const char *name, u32 length)
196{
197 __be32 *p;
198
199 BUG_ON(length > NFS3_MAXNAMLEN);
200 p = xdr_reserve_space(xdr, 4 + length);
201 xdr_encode_opaque(p, name, length);
127} 202}
128 203
129static inline __be32 * 204static int decode_inline_filename3(struct xdr_stream *xdr,
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh) 205 const char **name, u32 *length)
131{ 206{
132 __be32 *p; 207 __be32 *p;
208 u32 count;
209
133 p = xdr_inline_decode(xdr, 4); 210 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p)) 211 if (unlikely(p == NULL))
212 goto out_overflow;
213 count = be32_to_cpup(p);
214 if (count > NFS3_MAXNAMLEN)
215 goto out_nametoolong;
216 p = xdr_inline_decode(xdr, count);
217 if (unlikely(p == NULL))
135 goto out_overflow; 218 goto out_overflow;
136 fh->size = ntohl(*p++); 219 *name = (const char *)p;
220 *length = count;
221 return 0;
137 222
138 if (fh->size <= NFS3_FHSIZE) { 223out_nametoolong:
139 p = xdr_inline_decode(xdr, fh->size); 224 dprintk("NFS: returned filename too long: %u\n", count);
140 if (unlikely(!p)) 225 return -ENAMETOOLONG;
141 goto out_overflow; 226out_overflow:
142 memcpy(fh->data, p, fh->size); 227 print_overflow_msg(__func__, xdr);
143 return p + XDR_QUADLEN(fh->size); 228 return -EIO;
144 } 229}
145 return NULL; 230
231/*
232 * nfspath3
233 *
234 * typedef string nfspath3<>;
235 */
236static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
237 const u32 length)
238{
239 BUG_ON(length > NFS3_MAXPATHLEN);
240 encode_uint32(xdr, length);
241 xdr_write_pages(xdr, pages, 0, length);
242}
146 243
244static int decode_nfspath3(struct xdr_stream *xdr)
245{
246 u32 recvd, count;
247 size_t hdrlen;
248 __be32 *p;
249
250 p = xdr_inline_decode(xdr, 4);
251 if (unlikely(p == NULL))
252 goto out_overflow;
253 count = be32_to_cpup(p);
254 if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
255 goto out_nametoolong;
256 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
257 recvd = xdr->buf->len - hdrlen;
258 if (unlikely(count > recvd))
259 goto out_cheating;
260
261 xdr_read_pages(xdr, count);
262 xdr_terminate_string(xdr->buf, count);
263 return 0;
264
265out_nametoolong:
266 dprintk("NFS: returned pathname too long: %u\n", count);
267 return -ENAMETOOLONG;
268out_cheating:
269 dprintk("NFS: server cheating in pathname result: "
270 "count %u > recvd %u\n", count, recvd);
271 return -EIO;
147out_overflow: 272out_overflow:
148 print_overflow_msg(__func__, xdr); 273 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO); 274 return -EIO;
150} 275}
151 276
152/* 277/*
153 * Encode/decode time. 278 * cookie3
279 *
280 * typedef uint64 cookie3
154 */ 281 */
155static inline __be32 * 282static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
156xdr_encode_time3(__be32 *p, struct timespec *timep)
157{ 283{
158 *p++ = htonl(timep->tv_sec); 284 return xdr_encode_hyper(p, cookie);
159 *p++ = htonl(timep->tv_nsec);
160 return p;
161} 285}
162 286
163static inline __be32 * 287static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie)
164xdr_decode_time3(__be32 *p, struct timespec *timep)
165{ 288{
166 timep->tv_sec = ntohl(*p++); 289 return decode_uint64(xdr, cookie);
167 timep->tv_nsec = ntohl(*p++); 290}
168 return p; 291
292/*
293 * cookieverf3
294 *
295 * typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE];
296 */
297static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
298{
299 memcpy(p, verifier, NFS3_COOKIEVERFSIZE);
300 return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
301}
302
303static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
304{
305 __be32 *p;
306
307 p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
308 if (unlikely(p == NULL))
309 goto out_overflow;
310 memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
311 return 0;
312out_overflow:
313 print_overflow_msg(__func__, xdr);
314 return -EIO;
315}
316
317/*
318 * createverf3
319 *
320 * typedef opaque createverf3[NFS3_CREATEVERFSIZE];
321 */
322static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
323{
324 __be32 *p;
325
326 p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE);
327 memcpy(p, verifier, NFS3_CREATEVERFSIZE);
328}
329
330static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
331{
332 __be32 *p;
333
334 p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
335 if (unlikely(p == NULL))
336 goto out_overflow;
337 memcpy(verifier, p, NFS3_WRITEVERFSIZE);
338 return 0;
339out_overflow:
340 print_overflow_msg(__func__, xdr);
341 return -EIO;
342}
343
344/*
345 * size3
346 *
347 * typedef uint64 size3;
348 */
349static __be32 *xdr_decode_size3(__be32 *p, u64 *size)
350{
351 return xdr_decode_hyper(p, size);
352}
353
354/*
355 * nfsstat3
356 *
357 * enum nfsstat3 {
358 * NFS3_OK = 0,
359 * ...
360 * }
361 */
362#define NFS3_OK NFS_OK
363
364static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
365{
366 __be32 *p;
367
368 p = xdr_inline_decode(xdr, 4);
369 if (unlikely(p == NULL))
370 goto out_overflow;
371 *status = be32_to_cpup(p);
372 return 0;
373out_overflow:
374 print_overflow_msg(__func__, xdr);
375 return -EIO;
376}
377
378/*
379 * ftype3
380 *
381 * enum ftype3 {
382 * NF3REG = 1,
383 * NF3DIR = 2,
384 * NF3BLK = 3,
385 * NF3CHR = 4,
386 * NF3LNK = 5,
387 * NF3SOCK = 6,
388 * NF3FIFO = 7
389 * };
390 */
391static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
392{
393 BUG_ON(type > NF3FIFO);
394 encode_uint32(xdr, type);
169} 395}
170 396
171static __be32 * 397static __be32 *xdr_decode_ftype3(__be32 *p, umode_t *mode)
172xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
173{ 398{
174 unsigned int type, major, minor; 399 u32 type;
175 umode_t fmode;
176 400
177 type = ntohl(*p++); 401 type = be32_to_cpup(p++);
178 if (type > NF3FIFO) 402 if (type > NF3FIFO)
179 type = NF3NON; 403 type = NF3NON;
180 fmode = nfs_type2fmt[type]; 404 *mode = nfs_type2fmt[type];
181 fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; 405 return p;
182 fattr->nlink = ntohl(*p++); 406}
183 fattr->uid = ntohl(*p++);
184 fattr->gid = ntohl(*p++);
185 p = xdr_decode_hyper(p, &fattr->size);
186 p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
187
188 /* Turn remote device info into Linux-specific dev_t */
189 major = ntohl(*p++);
190 minor = ntohl(*p++);
191 fattr->rdev = MKDEV(major, minor);
192 if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
193 fattr->rdev = 0;
194 407
195 p = xdr_decode_hyper(p, &fattr->fsid.major); 408/*
196 fattr->fsid.minor = 0; 409 * specdata3
197 p = xdr_decode_hyper(p, &fattr->fileid); 410 *
198 p = xdr_decode_time3(p, &fattr->atime); 411 * struct specdata3 {
199 p = xdr_decode_time3(p, &fattr->mtime); 412 * uint32 specdata1;
200 p = xdr_decode_time3(p, &fattr->ctime); 413 * uint32 specdata2;
414 * };
415 */
416static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev)
417{
418 __be32 *p;
201 419
202 /* Update the mode bits */ 420 p = xdr_reserve_space(xdr, 8);
203 fattr->valid |= NFS_ATTR_FATTR_V3; 421 *p++ = cpu_to_be32(MAJOR(rdev));
422 *p = cpu_to_be32(MINOR(rdev));
423}
424
425static __be32 *xdr_decode_specdata3(__be32 *p, dev_t *rdev)
426{
427 unsigned int major, minor;
428
429 major = be32_to_cpup(p++);
430 minor = be32_to_cpup(p++);
431 *rdev = MKDEV(major, minor);
432 if (MAJOR(*rdev) != major || MINOR(*rdev) != minor)
433 *rdev = 0;
434 return p;
435}
436
437/*
438 * nfs_fh3
439 *
440 * struct nfs_fh3 {
441 * opaque data<NFS3_FHSIZE>;
442 * };
443 */
444static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
445{
446 __be32 *p;
447
448 BUG_ON(fh->size > NFS3_FHSIZE);
449 p = xdr_reserve_space(xdr, 4 + fh->size);
450 xdr_encode_opaque(p, fh->data, fh->size);
451}
452
453static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
454{
455 u32 length;
456 __be32 *p;
457
458 p = xdr_inline_decode(xdr, 4);
459 if (unlikely(p == NULL))
460 goto out_overflow;
461 length = be32_to_cpup(p++);
462 if (unlikely(length > NFS3_FHSIZE))
463 goto out_toobig;
464 p = xdr_inline_decode(xdr, length);
465 if (unlikely(p == NULL))
466 goto out_overflow;
467 fh->size = length;
468 memcpy(fh->data, p, length);
469 return 0;
470out_toobig:
471 dprintk("NFS: file handle size (%u) too big\n", length);
472 return -E2BIG;
473out_overflow:
474 print_overflow_msg(__func__, xdr);
475 return -EIO;
476}
477
478static void zero_nfs_fh3(struct nfs_fh *fh)
479{
480 memset(fh, 0, sizeof(*fh));
481}
482
483/*
484 * nfstime3
485 *
486 * struct nfstime3 {
487 * uint32 seconds;
488 * uint32 nseconds;
489 * };
490 */
491static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
492{
493 *p++ = cpu_to_be32(timep->tv_sec);
494 *p++ = cpu_to_be32(timep->tv_nsec);
204 return p; 495 return p;
205} 496}
206 497
207static inline __be32 * 498static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
208xdr_encode_sattr(__be32 *p, struct iattr *attr)
209{ 499{
500 timep->tv_sec = be32_to_cpup(p++);
501 timep->tv_nsec = be32_to_cpup(p++);
502 return p;
503}
504
505/*
506 * sattr3
507 *
508 * enum time_how {
509 * DONT_CHANGE = 0,
510 * SET_TO_SERVER_TIME = 1,
511 * SET_TO_CLIENT_TIME = 2
512 * };
513 *
514 * union set_mode3 switch (bool set_it) {
515 * case TRUE:
516 * mode3 mode;
517 * default:
518 * void;
519 * };
520 *
521 * union set_uid3 switch (bool set_it) {
522 * case TRUE:
523 * uid3 uid;
524 * default:
525 * void;
526 * };
527 *
528 * union set_gid3 switch (bool set_it) {
529 * case TRUE:
530 * gid3 gid;
531 * default:
532 * void;
533 * };
534 *
535 * union set_size3 switch (bool set_it) {
536 * case TRUE:
537 * size3 size;
538 * default:
539 * void;
540 * };
541 *
542 * union set_atime switch (time_how set_it) {
543 * case SET_TO_CLIENT_TIME:
544 * nfstime3 atime;
545 * default:
546 * void;
547 * };
548 *
549 * union set_mtime switch (time_how set_it) {
550 * case SET_TO_CLIENT_TIME:
551 * nfstime3 mtime;
552 * default:
553 * void;
554 * };
555 *
556 * struct sattr3 {
557 * set_mode3 mode;
558 * set_uid3 uid;
559 * set_gid3 gid;
560 * set_size3 size;
561 * set_atime atime;
562 * set_mtime mtime;
563 * };
564 */
565static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
566{
567 u32 nbytes;
568 __be32 *p;
569
570 /*
571 * In order to make only a single xdr_reserve_space() call,
572 * pre-compute the total number of bytes to be reserved.
573 * Six boolean values, one for each set_foo field, are always
574 * present in the encoded result, so start there.
575 */
576 nbytes = 6 * 4;
577 if (attr->ia_valid & ATTR_MODE)
578 nbytes += 4;
579 if (attr->ia_valid & ATTR_UID)
580 nbytes += 4;
581 if (attr->ia_valid & ATTR_GID)
582 nbytes += 4;
583 if (attr->ia_valid & ATTR_SIZE)
584 nbytes += 8;
585 if (attr->ia_valid & ATTR_ATIME_SET)
586 nbytes += 8;
587 if (attr->ia_valid & ATTR_MTIME_SET)
588 nbytes += 8;
589 p = xdr_reserve_space(xdr, nbytes);
590
210 if (attr->ia_valid & ATTR_MODE) { 591 if (attr->ia_valid & ATTR_MODE) {
211 *p++ = xdr_one; 592 *p++ = xdr_one;
212 *p++ = htonl(attr->ia_mode & S_IALLUGO); 593 *p++ = cpu_to_be32(attr->ia_mode & S_IALLUGO);
213 } else { 594 } else
214 *p++ = xdr_zero; 595 *p++ = xdr_zero;
215 } 596
216 if (attr->ia_valid & ATTR_UID) { 597 if (attr->ia_valid & ATTR_UID) {
217 *p++ = xdr_one; 598 *p++ = xdr_one;
218 *p++ = htonl(attr->ia_uid); 599 *p++ = cpu_to_be32(attr->ia_uid);
219 } else { 600 } else
220 *p++ = xdr_zero; 601 *p++ = xdr_zero;
221 } 602
222 if (attr->ia_valid & ATTR_GID) { 603 if (attr->ia_valid & ATTR_GID) {
223 *p++ = xdr_one; 604 *p++ = xdr_one;
224 *p++ = htonl(attr->ia_gid); 605 *p++ = cpu_to_be32(attr->ia_gid);
225 } else { 606 } else
226 *p++ = xdr_zero; 607 *p++ = xdr_zero;
227 } 608
228 if (attr->ia_valid & ATTR_SIZE) { 609 if (attr->ia_valid & ATTR_SIZE) {
229 *p++ = xdr_one; 610 *p++ = xdr_one;
230 p = xdr_encode_hyper(p, (__u64) attr->ia_size); 611 p = xdr_encode_hyper(p, (u64)attr->ia_size);
231 } else { 612 } else
232 *p++ = xdr_zero; 613 *p++ = xdr_zero;
233 } 614
234 if (attr->ia_valid & ATTR_ATIME_SET) { 615 if (attr->ia_valid & ATTR_ATIME_SET) {
235 *p++ = xdr_two; 616 *p++ = xdr_two;
236 p = xdr_encode_time3(p, &attr->ia_atime); 617 p = xdr_encode_nfstime3(p, &attr->ia_atime);
237 } else if (attr->ia_valid & ATTR_ATIME) { 618 } else if (attr->ia_valid & ATTR_ATIME) {
238 *p++ = xdr_one; 619 *p++ = xdr_one;
239 } else { 620 } else
240 *p++ = xdr_zero; 621 *p++ = xdr_zero;
241 } 622
242 if (attr->ia_valid & ATTR_MTIME_SET) { 623 if (attr->ia_valid & ATTR_MTIME_SET) {
243 *p++ = xdr_two; 624 *p++ = xdr_two;
244 p = xdr_encode_time3(p, &attr->ia_mtime); 625 xdr_encode_nfstime3(p, &attr->ia_mtime);
245 } else if (attr->ia_valid & ATTR_MTIME) { 626 } else if (attr->ia_valid & ATTR_MTIME) {
246 *p++ = xdr_one; 627 *p = xdr_one;
247 } else { 628 } else
248 *p++ = xdr_zero; 629 *p = xdr_zero;
249 } 630}
250 return p; 631
632/*
633 * fattr3
634 *
635 * struct fattr3 {
636 * ftype3 type;
637 * mode3 mode;
638 * uint32 nlink;
639 * uid3 uid;
640 * gid3 gid;
641 * size3 size;
642 * size3 used;
643 * specdata3 rdev;
644 * uint64 fsid;
645 * fileid3 fileid;
646 * nfstime3 atime;
647 * nfstime3 mtime;
648 * nfstime3 ctime;
649 * };
650 */
651static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
652{
653 umode_t fmode;
654 __be32 *p;
655
656 p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
657 if (unlikely(p == NULL))
658 goto out_overflow;
659
660 p = xdr_decode_ftype3(p, &fmode);
661
662 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
663 fattr->nlink = be32_to_cpup(p++);
664 fattr->uid = be32_to_cpup(p++);
665 fattr->gid = be32_to_cpup(p++);
666
667 p = xdr_decode_size3(p, &fattr->size);
668 p = xdr_decode_size3(p, &fattr->du.nfs3.used);
669 p = xdr_decode_specdata3(p, &fattr->rdev);
670
671 p = xdr_decode_hyper(p, &fattr->fsid.major);
672 fattr->fsid.minor = 0;
673
674 p = xdr_decode_fileid3(p, &fattr->fileid);
675 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime);
678
679 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0;
681out_overflow:
682 print_overflow_msg(__func__, xdr);
683 return -EIO;
251} 684}
252 685
253static inline __be32 * 686/*
254xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr) 687 * post_op_attr
688 *
689 * union post_op_attr switch (bool attributes_follow) {
690 * case TRUE:
691 * fattr3 attributes;
692 * case FALSE:
693 * void;
694 * };
695 */
696static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
255{ 697{
256 p = xdr_decode_hyper(p, &fattr->pre_size); 698 __be32 *p;
257 p = xdr_decode_time3(p, &fattr->pre_mtime); 699
258 p = xdr_decode_time3(p, &fattr->pre_ctime); 700 p = xdr_inline_decode(xdr, 4);
701 if (unlikely(p == NULL))
702 goto out_overflow;
703 if (*p != xdr_zero)
704 return decode_fattr3(xdr, fattr);
705 return 0;
706out_overflow:
707 print_overflow_msg(__func__, xdr);
708 return -EIO;
709}
710
711/*
712 * wcc_attr
713 * struct wcc_attr {
714 * size3 size;
715 * nfstime3 mtime;
716 * nfstime3 ctime;
717 * };
718 */
719static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
720{
721 __be32 *p;
722
723 p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
724 if (unlikely(p == NULL))
725 goto out_overflow;
726
259 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
260 | NFS_ATTR_FATTR_PREMTIME 728 | NFS_ATTR_FATTR_PREMTIME
261 | NFS_ATTR_FATTR_PRECTIME; 729 | NFS_ATTR_FATTR_PRECTIME;
262 return p;
263}
264 730
265static inline __be32 * 731 p = xdr_decode_size3(p, &fattr->pre_size);
266xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) 732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
267{ 733 xdr_decode_nfstime3(p, &fattr->pre_ctime);
268 if (*p++) 734
269 p = xdr_decode_fattr(p, fattr); 735 return 0;
270 return p; 736out_overflow:
737 print_overflow_msg(__func__, xdr);
738 return -EIO;
271} 739}
272 740
273static inline __be32 * 741/*
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr) 742 * pre_op_attr
743 * union pre_op_attr switch (bool attributes_follow) {
744 * case TRUE:
745 * wcc_attr attributes;
746 * case FALSE:
747 * void;
748 * };
749 *
750 * wcc_data
751 *
752 * struct wcc_data {
753 * pre_op_attr before;
754 * post_op_attr after;
755 * };
756 */
757static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{ 758{
276 __be32 *p; 759 __be32 *p;
277 760
278 p = xdr_inline_decode(xdr, 4); 761 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p)) 762 if (unlikely(p == NULL))
280 goto out_overflow; 763 goto out_overflow;
281 if (ntohl(*p++)) { 764 if (*p != xdr_zero)
282 p = xdr_inline_decode(xdr, 84); 765 return decode_wcc_attr(xdr, fattr);
283 if (unlikely(!p)) 766 return 0;
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow: 767out_overflow:
289 print_overflow_msg(__func__, xdr); 768 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO); 769 return -EIO;
291} 770}
292 771
293static inline __be32 * 772static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
295{ 773{
296 if (*p++) 774 int error;
297 return xdr_decode_wcc_attr(p, fattr); 775
298 return p; 776 error = decode_pre_op_attr(xdr, fattr);
777 if (unlikely(error))
778 goto out;
779 error = decode_post_op_attr(xdr, fattr);
780out:
781 return error;
299} 782}
300 783
784/*
785 * post_op_fh3
786 *
787 * union post_op_fh3 switch (bool handle_follows) {
788 * case TRUE:
789 * nfs_fh3 handle;
790 * case FALSE:
791 * void;
792 * };
793 */
794static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
795{
796 __be32 *p = xdr_inline_decode(xdr, 4);
797 if (unlikely(p == NULL))
798 goto out_overflow;
799 if (*p != xdr_zero)
800 return decode_nfs_fh3(xdr, fh);
801 zero_nfs_fh3(fh);
802 return 0;
803out_overflow:
804 print_overflow_msg(__func__, xdr);
805 return -EIO;
806}
301 807
302static inline __be32 * 808/*
303xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr) 809 * diropargs3
810 *
811 * struct diropargs3 {
812 * nfs_fh3 dir;
813 * filename3 name;
814 * };
815 */
816static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
817 const char *name, u32 length)
304{ 818{
305 p = xdr_decode_pre_op_attr(p, fattr); 819 encode_nfs_fh3(xdr, fh);
306 return xdr_decode_post_op_attr(p, fattr); 820 encode_filename3(xdr, name, length);
307} 821}
308 822
823
309/* 824/*
310 * NFS encode functions 825 * NFSv3 XDR encode functions
826 *
827 * NFSv3 argument types are defined in section 3.3 of RFC 1813:
828 * "NFS Version 3 Protocol Specification".
311 */ 829 */
312 830
313/* 831/*
314 * Encode file handle argument 832 * 3.3.1 GETATTR3args
833 *
834 * struct GETATTR3args {
835 * nfs_fh3 object;
836 * };
315 */ 837 */
316static int 838static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
317nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) 839 struct xdr_stream *xdr,
840 const struct nfs_fh *fh)
318{ 841{
319 p = xdr_encode_fhandle(p, fh); 842 encode_nfs_fh3(xdr, fh);
320 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
321 return 0;
322} 843}
323 844
324/* 845/*
325 * Encode SETATTR arguments 846 * 3.3.2 SETATTR3args
847 *
848 * union sattrguard3 switch (bool check) {
849 * case TRUE:
850 * nfstime3 obj_ctime;
851 * case FALSE:
852 * void;
853 * };
854 *
855 * struct SETATTR3args {
856 * nfs_fh3 object;
857 * sattr3 new_attributes;
858 * sattrguard3 guard;
859 * };
326 */ 860 */
327static int 861static void encode_sattrguard3(struct xdr_stream *xdr,
328nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) 862 const struct nfs3_sattrargs *args)
329{ 863{
330 p = xdr_encode_fhandle(p, args->fh); 864 __be32 *p;
331 p = xdr_encode_sattr(p, args->sattr); 865
332 *p++ = htonl(args->guard); 866 if (args->guard) {
333 if (args->guard) 867 p = xdr_reserve_space(xdr, 4 + 8);
334 p = xdr_encode_time3(p, &args->guardtime); 868 *p++ = xdr_one;
335 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 869 xdr_encode_nfstime3(p, &args->guardtime);
336 return 0; 870 } else {
871 p = xdr_reserve_space(xdr, 4);
872 *p = xdr_zero;
873 }
874}
875
876static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
877 struct xdr_stream *xdr,
878 const struct nfs3_sattrargs *args)
879{
880 encode_nfs_fh3(xdr, args->fh);
881 encode_sattr3(xdr, args->sattr);
882 encode_sattrguard3(xdr, args);
337} 883}
338 884
339/* 885/*
340 * Encode directory ops argument 886 * 3.3.3 LOOKUP3args
887 *
888 * struct LOOKUP3args {
889 * diropargs3 what;
890 * };
341 */ 891 */
342static int 892static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
343nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args) 893 struct xdr_stream *xdr,
894 const struct nfs3_diropargs *args)
344{ 895{
345 p = xdr_encode_fhandle(p, args->fh); 896 encode_diropargs3(xdr, args->fh, args->name, args->len);
346 p = xdr_encode_array(p, args->name, args->len);
347 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
348 return 0;
349} 897}
350 898
351/* 899/*
352 * Encode REMOVE argument 900 * 3.3.4 ACCESS3args
901 *
902 * struct ACCESS3args {
903 * nfs_fh3 object;
904 * uint32 access;
905 * };
353 */ 906 */
354static int 907static void encode_access3args(struct xdr_stream *xdr,
355nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) 908 const struct nfs3_accessargs *args)
356{ 909{
357 p = xdr_encode_fhandle(p, args->fh); 910 encode_nfs_fh3(xdr, args->fh);
358 p = xdr_encode_array(p, args->name.name, args->name.len); 911 encode_uint32(xdr, args->access);
359 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 912}
360 return 0; 913
914static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
915 struct xdr_stream *xdr,
916 const struct nfs3_accessargs *args)
917{
918 encode_access3args(xdr, args);
361} 919}
362 920
363/* 921/*
364 * Encode access() argument 922 * 3.3.5 READLINK3args
923 *
924 * struct READLINK3args {
925 * nfs_fh3 symlink;
926 * };
365 */ 927 */
366static int 928static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
367nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args) 929 struct xdr_stream *xdr,
930 const struct nfs3_readlinkargs *args)
368{ 931{
369 p = xdr_encode_fhandle(p, args->fh); 932 encode_nfs_fh3(xdr, args->fh);
370 *p++ = htonl(args->access); 933 prepare_reply_buffer(req, args->pages, args->pgbase,
371 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 934 args->pglen, NFS3_readlinkres_sz);
372 return 0;
373} 935}
374 936
375/* 937/*
376 * Arguments to a READ call. Since we read data directly into the page 938 * 3.3.6 READ3args
377 * cache, we also set up the reply iovec here so that iov[1] points 939 *
378 * exactly to the page we want to fetch. 940 * struct READ3args {
941 * nfs_fh3 file;
942 * offset3 offset;
943 * count3 count;
944 * };
379 */ 945 */
380static int 946static void encode_read3args(struct xdr_stream *xdr,
381nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 947 const struct nfs_readargs *args)
382{ 948{
383 struct rpc_auth *auth = req->rq_cred->cr_auth; 949 __be32 *p;
384 unsigned int replen; 950
385 u32 count = args->count; 951 encode_nfs_fh3(xdr, args->fh);
386 952
387 p = xdr_encode_fhandle(p, args->fh); 953 p = xdr_reserve_space(xdr, 8 + 4);
388 p = xdr_encode_hyper(p, args->offset); 954 p = xdr_encode_hyper(p, args->offset);
389 *p++ = htonl(count); 955 *p = cpu_to_be32(args->count);
390 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 956}
391 957
392 /* Inline the page array */ 958static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
393 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; 959 struct xdr_stream *xdr,
394 xdr_inline_pages(&req->rq_rcv_buf, replen, 960 const struct nfs_readargs *args)
395 args->pages, args->pgbase, count); 961{
962 encode_read3args(xdr, args);
963 prepare_reply_buffer(req, args->pages, args->pgbase,
964 args->count, NFS3_readres_sz);
396 req->rq_rcv_buf.flags |= XDRBUF_READ; 965 req->rq_rcv_buf.flags |= XDRBUF_READ;
397 return 0;
398} 966}
399 967
400/* 968/*
401 * Write arguments. Splice the buffer to be written into the iovec. 969 * 3.3.7 WRITE3args
970 *
971 * enum stable_how {
972 * UNSTABLE = 0,
973 * DATA_SYNC = 1,
974 * FILE_SYNC = 2
975 * };
976 *
977 * struct WRITE3args {
978 * nfs_fh3 file;
979 * offset3 offset;
980 * count3 count;
981 * stable_how stable;
982 * opaque data<>;
983 * };
402 */ 984 */
403static int 985static void encode_write3args(struct xdr_stream *xdr,
404nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 986 const struct nfs_writeargs *args)
405{ 987{
406 struct xdr_buf *sndbuf = &req->rq_snd_buf; 988 __be32 *p;
407 u32 count = args->count; 989
990 encode_nfs_fh3(xdr, args->fh);
408 991
409 p = xdr_encode_fhandle(p, args->fh); 992 p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4);
410 p = xdr_encode_hyper(p, args->offset); 993 p = xdr_encode_hyper(p, args->offset);
411 *p++ = htonl(count); 994 *p++ = cpu_to_be32(args->count);
412 *p++ = htonl(args->stable); 995 *p++ = cpu_to_be32(args->stable);
413 *p++ = htonl(count); 996 *p = cpu_to_be32(args->count);
414 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); 997 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
415 998}
416 /* Copy the page array */ 999
417 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 1000static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
418 sndbuf->flags |= XDRBUF_WRITE; 1001 struct xdr_stream *xdr,
419 return 0; 1002 const struct nfs_writeargs *args)
1003{
1004 encode_write3args(xdr, args);
1005 xdr->buf->flags |= XDRBUF_WRITE;
420} 1006}
421 1007
422/* 1008/*
423 * Encode CREATE arguments 1009 * 3.3.8 CREATE3args
1010 *
1011 * enum createmode3 {
1012 * UNCHECKED = 0,
1013 * GUARDED = 1,
1014 * EXCLUSIVE = 2
1015 * };
1016 *
1017 * union createhow3 switch (createmode3 mode) {
1018 * case UNCHECKED:
1019 * case GUARDED:
1020 * sattr3 obj_attributes;
1021 * case EXCLUSIVE:
1022 * createverf3 verf;
1023 * };
1024 *
1025 * struct CREATE3args {
1026 * diropargs3 where;
1027 * createhow3 how;
1028 * };
424 */ 1029 */
425static int 1030static void encode_createhow3(struct xdr_stream *xdr,
426nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args) 1031 const struct nfs3_createargs *args)
427{ 1032{
428 p = xdr_encode_fhandle(p, args->fh); 1033 encode_uint32(xdr, args->createmode);
429 p = xdr_encode_array(p, args->name, args->len); 1034 switch (args->createmode) {
430 1035 case NFS3_CREATE_UNCHECKED:
431 *p++ = htonl(args->createmode); 1036 case NFS3_CREATE_GUARDED:
432 if (args->createmode == NFS3_CREATE_EXCLUSIVE) { 1037 encode_sattr3(xdr, args->sattr);
433 *p++ = args->verifier[0]; 1038 break;
434 *p++ = args->verifier[1]; 1039 case NFS3_CREATE_EXCLUSIVE:
435 } else 1040 encode_createverf3(xdr, args->verifier);
436 p = xdr_encode_sattr(p, args->sattr); 1041 break;
1042 default:
1043 BUG();
1044 }
1045}
437 1046
438 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1047static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
439 return 0; 1048 struct xdr_stream *xdr,
1049 const struct nfs3_createargs *args)
1050{
1051 encode_diropargs3(xdr, args->fh, args->name, args->len);
1052 encode_createhow3(xdr, args);
440} 1053}
441 1054
442/* 1055/*
443 * Encode MKDIR arguments 1056 * 3.3.9 MKDIR3args
1057 *
1058 * struct MKDIR3args {
1059 * diropargs3 where;
1060 * sattr3 attributes;
1061 * };
444 */ 1062 */
445static int 1063static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
446nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args) 1064 struct xdr_stream *xdr,
1065 const struct nfs3_mkdirargs *args)
447{ 1066{
448 p = xdr_encode_fhandle(p, args->fh); 1067 encode_diropargs3(xdr, args->fh, args->name, args->len);
449 p = xdr_encode_array(p, args->name, args->len); 1068 encode_sattr3(xdr, args->sattr);
450 p = xdr_encode_sattr(p, args->sattr);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0;
453} 1069}
454 1070
455/* 1071/*
456 * Encode SYMLINK arguments 1072 * 3.3.10 SYMLINK3args
1073 *
1074 * struct symlinkdata3 {
1075 * sattr3 symlink_attributes;
1076 * nfspath3 symlink_data;
1077 * };
1078 *
1079 * struct SYMLINK3args {
1080 * diropargs3 where;
1081 * symlinkdata3 symlink;
1082 * };
457 */ 1083 */
458static int 1084static void encode_symlinkdata3(struct xdr_stream *xdr,
459nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args) 1085 const struct nfs3_symlinkargs *args)
460{ 1086{
461 p = xdr_encode_fhandle(p, args->fromfh); 1087 encode_sattr3(xdr, args->sattr);
462 p = xdr_encode_array(p, args->fromname, args->fromlen); 1088 encode_nfspath3(xdr, args->pages, args->pathlen);
463 p = xdr_encode_sattr(p, args->sattr); 1089}
464 *p++ = htonl(args->pathlen);
465 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
466 1090
467 /* Copy the page */ 1091static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
468 xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); 1092 struct xdr_stream *xdr,
469 return 0; 1093 const struct nfs3_symlinkargs *args)
1094{
1095 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
1096 encode_symlinkdata3(xdr, args);
470} 1097}
471 1098
472/* 1099/*
473 * Encode MKNOD arguments 1100 * 3.3.11 MKNOD3args
1101 *
1102 * struct devicedata3 {
1103 * sattr3 dev_attributes;
1104 * specdata3 spec;
1105 * };
1106 *
1107 * union mknoddata3 switch (ftype3 type) {
1108 * case NF3CHR:
1109 * case NF3BLK:
1110 * devicedata3 device;
1111 * case NF3SOCK:
1112 * case NF3FIFO:
1113 * sattr3 pipe_attributes;
1114 * default:
1115 * void;
1116 * };
1117 *
1118 * struct MKNOD3args {
1119 * diropargs3 where;
1120 * mknoddata3 what;
1121 * };
474 */ 1122 */
475static int 1123static void encode_devicedata3(struct xdr_stream *xdr,
476nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) 1124 const struct nfs3_mknodargs *args)
477{ 1125{
478 p = xdr_encode_fhandle(p, args->fh); 1126 encode_sattr3(xdr, args->sattr);
479 p = xdr_encode_array(p, args->name, args->len); 1127 encode_specdata3(xdr, args->rdev);
480 *p++ = htonl(args->type); 1128}
481 p = xdr_encode_sattr(p, args->sattr); 1129
482 if (args->type == NF3CHR || args->type == NF3BLK) { 1130static void encode_mknoddata3(struct xdr_stream *xdr,
483 *p++ = htonl(MAJOR(args->rdev)); 1131 const struct nfs3_mknodargs *args)
484 *p++ = htonl(MINOR(args->rdev)); 1132{
1133 encode_ftype3(xdr, args->type);
1134 switch (args->type) {
1135 case NF3CHR:
1136 case NF3BLK:
1137 encode_devicedata3(xdr, args);
1138 break;
1139 case NF3SOCK:
1140 case NF3FIFO:
1141 encode_sattr3(xdr, args->sattr);
1142 break;
1143 case NF3REG:
1144 case NF3DIR:
1145 break;
1146 default:
1147 BUG();
485 } 1148 }
1149}
486 1150
487 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1151static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
488 return 0; 1152 struct xdr_stream *xdr,
1153 const struct nfs3_mknodargs *args)
1154{
1155 encode_diropargs3(xdr, args->fh, args->name, args->len);
1156 encode_mknoddata3(xdr, args);
489} 1157}
490 1158
491/* 1159/*
492 * Encode RENAME arguments 1160 * 3.3.12 REMOVE3args
1161 *
1162 * struct REMOVE3args {
1163 * diropargs3 object;
1164 * };
493 */ 1165 */
494static int 1166static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 1167 struct xdr_stream *xdr,
496{ 1168 const struct nfs_removeargs *args)
497 p = xdr_encode_fhandle(p, args->old_dir); 1169{
498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len); 1170 encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
499 p = xdr_encode_fhandle(p, args->new_dir);
500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
502 return 0;
503} 1171}
504 1172
505/* 1173/*
506 * Encode LINK arguments 1174 * 3.3.14 RENAME3args
1175 *
1176 * struct RENAME3args {
1177 * diropargs3 from;
1178 * diropargs3 to;
1179 * };
507 */ 1180 */
508static int 1181static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
509nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) 1182 struct xdr_stream *xdr,
1183 const struct nfs_renameargs *args)
510{ 1184{
511 p = xdr_encode_fhandle(p, args->fromfh); 1185 const struct qstr *old = args->old_name;
512 p = xdr_encode_fhandle(p, args->tofh); 1186 const struct qstr *new = args->new_name;
513 p = xdr_encode_array(p, args->toname, args->tolen); 1187
514 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1188 encode_diropargs3(xdr, args->old_dir, old->name, old->len);
515 return 0; 1189 encode_diropargs3(xdr, args->new_dir, new->name, new->len);
516} 1190}
517 1191
518/* 1192/*
519 * Encode arguments to readdir call 1193 * 3.3.15 LINK3args
1194 *
1195 * struct LINK3args {
1196 * nfs_fh3 file;
1197 * diropargs3 link;
1198 * };
520 */ 1199 */
521static int 1200static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
522nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 1201 struct xdr_stream *xdr,
1202 const struct nfs3_linkargs *args)
523{ 1203{
524 struct rpc_auth *auth = req->rq_cred->cr_auth; 1204 encode_nfs_fh3(xdr, args->fromfh);
525 unsigned int replen; 1205 encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
526 u32 count = args->count;
527
528 p = xdr_encode_fhandle(p, args->fh);
529 p = xdr_encode_hyper(p, args->cookie);
530 *p++ = args->verf[0];
531 *p++ = args->verf[1];
532 if (args->plus) {
533 /* readdirplus: need dircount + buffer size.
534 * We just make sure we make dircount big enough */
535 *p++ = htonl(count >> 3);
536 }
537 *p++ = htonl(count);
538 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
539
540 /* Inline the page array */
541 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
542 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
543 return 0;
544} 1206}
545 1207
546/* 1208/*
547 * Decode the result of a readdir call. 1209 * 3.3.16 READDIR3args
548 * We just check for syntactical correctness. 1210 *
1211 * struct READDIR3args {
1212 * nfs_fh3 dir;
1213 * cookie3 cookie;
1214 * cookieverf3 cookieverf;
1215 * count3 count;
1216 * };
549 */ 1217 */
550static int 1218static void encode_readdir3args(struct xdr_stream *xdr,
551nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res) 1219 const struct nfs3_readdirargs *args)
552{ 1220{
553 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1221 __be32 *p;
554 struct kvec *iov = rcvbuf->head;
555 struct page **page;
556 size_t hdrlen;
557 u32 recvd, pglen;
558 int status;
559
560 status = ntohl(*p++);
561 /* Decode post_op_attrs */
562 p = xdr_decode_post_op_attr(p, res->dir_attr);
563 if (status)
564 return nfs_stat_to_errno(status);
565 /* Decode verifier cookie */
566 if (res->verf) {
567 res->verf[0] = *p++;
568 res->verf[1] = *p++;
569 } else {
570 p += 2;
571 }
572 1222
573 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1223 encode_nfs_fh3(xdr, args->fh);
574 if (iov->iov_len < hdrlen) {
575 dprintk("NFS: READDIR reply header overflowed:"
576 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
577 return -errno_NFSERR_IO;
578 } else if (iov->iov_len != hdrlen) {
579 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
580 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
581 }
582 1224
583 pglen = rcvbuf->page_len; 1225 p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4);
584 recvd = rcvbuf->len - hdrlen; 1226 p = xdr_encode_cookie3(p, args->cookie);
585 if (pglen > recvd) 1227 p = xdr_encode_cookieverf3(p, args->verf);
586 pglen = recvd; 1228 *p = cpu_to_be32(args->count);
587 page = rcvbuf->pages; 1229}
588 1230
589 return pglen; 1231static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
1232 struct xdr_stream *xdr,
1233 const struct nfs3_readdirargs *args)
1234{
1235 encode_readdir3args(xdr, args);
1236 prepare_reply_buffer(req, args->pages, 0,
1237 args->count, NFS3_readdirres_sz);
590} 1238}
591 1239
592__be32 * 1240/*
593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) 1241 * 3.3.17 READDIRPLUS3args
1242 *
1243 * struct READDIRPLUS3args {
1244 * nfs_fh3 dir;
1245 * cookie3 cookie;
1246 * cookieverf3 cookieverf;
1247 * count3 dircount;
1248 * count3 maxcount;
1249 * };
1250 */
1251static void encode_readdirplus3args(struct xdr_stream *xdr,
1252 const struct nfs3_readdirargs *args)
594{ 1253{
595 __be32 *p; 1254 __be32 *p;
596 struct nfs_entry old = *entry;
597
598 p = xdr_inline_decode(xdr, 4);
599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
606 return ERR_PTR(-EAGAIN);
607 entry->eof = 1;
608 return ERR_PTR(-EBADCOOKIE);
609 }
610 1255
611 p = xdr_inline_decode(xdr, 12); 1256 encode_nfs_fh3(xdr, args->fh);
612 if (unlikely(!p))
613 goto out_overflow;
614 p = xdr_decode_hyper(p, &entry->ino);
615 entry->len = ntohl(*p++);
616 1257
617 p = xdr_inline_decode(xdr, entry->len + 8); 1258 p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4);
618 if (unlikely(!p)) 1259 p = xdr_encode_cookie3(p, args->cookie);
619 goto out_overflow; 1260 p = xdr_encode_cookieverf3(p, args->verf);
620 entry->name = (const char *) p;
621 p += XDR_QUADLEN(entry->len);
622 entry->prev_cookie = entry->cookie;
623 p = xdr_decode_hyper(p, &entry->cookie);
624
625 entry->d_type = DT_UNKNOWN;
626 if (plus) {
627 entry->fattr->valid = 0;
628 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
629 if (IS_ERR(p))
630 goto out_overflow_exit;
631 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
632 /* In fact, a post_op_fh3: */
633 p = xdr_inline_decode(xdr, 4);
634 if (unlikely(!p))
635 goto out_overflow;
636 if (*p++) {
637 p = xdr_decode_fhandle_stream(xdr, entry->fh);
638 if (IS_ERR(p))
639 goto out_overflow_exit;
640 /* Ugh -- server reply was truncated */
641 if (p == NULL) {
642 dprintk("NFS: FH truncated\n");
643 *entry = old;
644 return ERR_PTR(-EAGAIN);
645 }
646 } else
647 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
648 }
649 1261
650 p = xdr_inline_peek(xdr, 8); 1262 /*
651 if (p != NULL) 1263 * readdirplus: need dircount + buffer size.
652 entry->eof = !p[0] && p[1]; 1264 * We just make sure we make dircount big enough
653 else 1265 */
654 entry->eof = 0; 1266 *p++ = cpu_to_be32(args->count >> 3);
655 1267
656 return p; 1268 *p = cpu_to_be32(args->count);
1269}
657 1270
658out_overflow: 1271static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
659 print_overflow_msg(__func__, xdr); 1272 struct xdr_stream *xdr,
660out_overflow_exit: 1273 const struct nfs3_readdirargs *args)
661 return ERR_PTR(-EAGAIN); 1274{
1275 encode_readdirplus3args(xdr, args);
1276 prepare_reply_buffer(req, args->pages, 0,
1277 args->count, NFS3_readdirres_sz);
662} 1278}
663 1279
664/* 1280/*
665 * Encode COMMIT arguments 1281 * 3.3.21 COMMIT3args
1282 *
1283 * struct COMMIT3args {
1284 * nfs_fh3 file;
1285 * offset3 offset;
1286 * count3 count;
1287 * };
666 */ 1288 */
667static int 1289static void encode_commit3args(struct xdr_stream *xdr,
668nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 1290 const struct nfs_writeargs *args)
669{ 1291{
670 p = xdr_encode_fhandle(p, args->fh); 1292 __be32 *p;
1293
1294 encode_nfs_fh3(xdr, args->fh);
1295
1296 p = xdr_reserve_space(xdr, 8 + 4);
671 p = xdr_encode_hyper(p, args->offset); 1297 p = xdr_encode_hyper(p, args->offset);
672 *p++ = htonl(args->count); 1298 *p = cpu_to_be32(args->count);
673 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
674 return 0;
675} 1299}
676 1300
677#ifdef CONFIG_NFS_V3_ACL 1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
678/* 1302 struct xdr_stream *xdr,
679 * Encode GETACL arguments 1303 const struct nfs_writeargs *args)
680 */
681static int
682nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
683 struct nfs3_getaclargs *args)
684{ 1304{
685 struct rpc_auth *auth = req->rq_cred->cr_auth; 1305 encode_commit3args(xdr, args);
686 unsigned int replen; 1306}
687 1307
688 p = xdr_encode_fhandle(p, args->fh); 1308#ifdef CONFIG_NFS_V3_ACL
689 *p++ = htonl(args->mask);
690 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
691 1309
692 if (args->mask & (NFS_ACL | NFS_DFACL)) { 1310static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
693 /* Inline the page array */ 1311 struct xdr_stream *xdr,
694 replen = (RPC_REPHDRSIZE + auth->au_rslack + 1312 const struct nfs3_getaclargs *args)
695 ACL3_getaclres_sz) << 2; 1313{
696 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, 1314 encode_nfs_fh3(xdr, args->fh);
697 NFSACL_MAXPAGES << PAGE_SHIFT); 1315 encode_uint32(xdr, args->mask);
698 } 1316 if (args->mask & (NFS_ACL | NFS_DFACL))
699 return 0; 1317 prepare_reply_buffer(req, args->pages, 0,
1318 NFSACL_MAXPAGES << PAGE_SHIFT,
1319 ACL3_getaclres_sz);
700} 1320}
701 1321
702/* 1322static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
703 * Encode SETACL arguments 1323 struct xdr_stream *xdr,
704 */ 1324 const struct nfs3_setaclargs *args)
705static int
706nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
707 struct nfs3_setaclargs *args)
708{ 1325{
709 struct xdr_buf *buf = &req->rq_snd_buf;
710 unsigned int base; 1326 unsigned int base;
711 int err; 1327 int error;
712
713 p = xdr_encode_fhandle(p, NFS_FH(args->inode));
714 *p++ = htonl(args->mask);
715 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
716 base = req->rq_slen;
717 1328
1329 encode_nfs_fh3(xdr, NFS_FH(args->inode));
1330 encode_uint32(xdr, args->mask);
718 if (args->npages != 0) 1331 if (args->npages != 0)
719 xdr_encode_pages(buf, args->pages, 0, args->len); 1332 xdr_write_pages(xdr, args->pages, 0, args->len);
720 else
721 req->rq_slen = xdr_adjust_iovec(req->rq_svec,
722 p + XDR_QUADLEN(args->len));
723 1333
724 err = nfsacl_encode(buf, base, args->inode, 1334 base = req->rq_slen;
1335 error = nfsacl_encode(xdr->buf, base, args->inode,
725 (args->mask & NFS_ACL) ? 1336 (args->mask & NFS_ACL) ?
726 args->acl_access : NULL, 1, 0); 1337 args->acl_access : NULL, 1, 0);
727 if (err > 0) 1338 BUG_ON(error < 0);
728 err = nfsacl_encode(buf, base + err, args->inode, 1339 error = nfsacl_encode(xdr->buf, base + error, args->inode,
729 (args->mask & NFS_DFACL) ? 1340 (args->mask & NFS_DFACL) ?
730 args->acl_default : NULL, 1, 1341 args->acl_default : NULL, 1,
731 NFS_ACL_DEFAULT); 1342 NFS_ACL_DEFAULT);
732 return (err > 0) ? 0 : err; 1343 BUG_ON(error < 0);
733} 1344}
1345
734#endif /* CONFIG_NFS_V3_ACL */ 1346#endif /* CONFIG_NFS_V3_ACL */
735 1347
736/* 1348/*
737 * NFS XDR decode functions 1349 * NFSv3 XDR decode functions
1350 *
1351 * NFSv3 result types are defined in section 3.3 of RFC 1813:
1352 * "NFS Version 3 Protocol Specification".
738 */ 1353 */
739 1354
740/* 1355/*
741 * Decode attrstat reply. 1356 * 3.3.1 GETATTR3res
1357 *
1358 * struct GETATTR3resok {
1359 * fattr3 obj_attributes;
1360 * };
1361 *
1362 * union GETATTR3res switch (nfsstat3 status) {
1363 * case NFS3_OK:
1364 * GETATTR3resok resok;
1365 * default:
1366 * void;
1367 * };
742 */ 1368 */
743static int 1369static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
744nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1370 struct xdr_stream *xdr,
1371 struct nfs_fattr *result)
745{ 1372{
746 int status; 1373 enum nfs_stat status;
747 1374 int error;
748 if ((status = ntohl(*p++))) 1375
749 return nfs_stat_to_errno(status); 1376 error = decode_nfsstat3(xdr, &status);
750 xdr_decode_fattr(p, fattr); 1377 if (unlikely(error))
751 return 0; 1378 goto out;
1379 if (status != NFS3_OK)
1380 goto out_default;
1381 error = decode_fattr3(xdr, result);
1382out:
1383 return error;
1384out_default:
1385 return nfs_stat_to_errno(status);
752} 1386}
753 1387
754/* 1388/*
755 * Decode status+wcc_data reply 1389 * 3.3.2 SETATTR3res
756 * SATTR, REMOVE, RMDIR 1390 *
1391 * struct SETATTR3resok {
1392 * wcc_data obj_wcc;
1393 * };
1394 *
1395 * struct SETATTR3resfail {
1396 * wcc_data obj_wcc;
1397 * };
1398 *
1399 * union SETATTR3res switch (nfsstat3 status) {
1400 * case NFS3_OK:
1401 * SETATTR3resok resok;
1402 * default:
1403 * SETATTR3resfail resfail;
1404 * };
757 */ 1405 */
758static int 1406static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
759nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1407 struct xdr_stream *xdr,
1408 struct nfs_fattr *result)
760{ 1409{
761 int status; 1410 enum nfs_stat status;
762 1411 int error;
763 if ((status = ntohl(*p++))) 1412
764 status = nfs_stat_to_errno(status); 1413 error = decode_nfsstat3(xdr, &status);
765 xdr_decode_wcc_data(p, fattr); 1414 if (unlikely(error))
766 return status; 1415 goto out;
1416 error = decode_wcc_data(xdr, result);
1417 if (unlikely(error))
1418 goto out;
1419 if (status != NFS3_OK)
1420 goto out_status;
1421out:
1422 return error;
1423out_status:
1424 return nfs_stat_to_errno(status);
767} 1425}
768 1426
769static int 1427/*
770nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) 1428 * 3.3.3 LOOKUP3res
1429 *
1430 * struct LOOKUP3resok {
1431 * nfs_fh3 object;
1432 * post_op_attr obj_attributes;
1433 * post_op_attr dir_attributes;
1434 * };
1435 *
1436 * struct LOOKUP3resfail {
1437 * post_op_attr dir_attributes;
1438 * };
1439 *
1440 * union LOOKUP3res switch (nfsstat3 status) {
1441 * case NFS3_OK:
1442 * LOOKUP3resok resok;
1443 * default:
1444 * LOOKUP3resfail resfail;
1445 * };
1446 */
1447static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
1448 struct xdr_stream *xdr,
1449 struct nfs3_diropres *result)
771{ 1450{
772 return nfs3_xdr_wccstat(req, p, res->dir_attr); 1451 enum nfs_stat status;
1452 int error;
1453
1454 error = decode_nfsstat3(xdr, &status);
1455 if (unlikely(error))
1456 goto out;
1457 if (status != NFS3_OK)
1458 goto out_default;
1459 error = decode_nfs_fh3(xdr, result->fh);
1460 if (unlikely(error))
1461 goto out;
1462 error = decode_post_op_attr(xdr, result->fattr);
1463 if (unlikely(error))
1464 goto out;
1465 error = decode_post_op_attr(xdr, result->dir_attr);
1466out:
1467 return error;
1468out_default:
1469 error = decode_post_op_attr(xdr, result->dir_attr);
1470 if (unlikely(error))
1471 goto out;
1472 return nfs_stat_to_errno(status);
773} 1473}
774 1474
775/* 1475/*
776 * Decode LOOKUP reply 1476 * 3.3.4 ACCESS3res
1477 *
1478 * struct ACCESS3resok {
1479 * post_op_attr obj_attributes;
1480 * uint32 access;
1481 * };
1482 *
1483 * struct ACCESS3resfail {
1484 * post_op_attr obj_attributes;
1485 * };
1486 *
1487 * union ACCESS3res switch (nfsstat3 status) {
1488 * case NFS3_OK:
1489 * ACCESS3resok resok;
1490 * default:
1491 * ACCESS3resfail resfail;
1492 * };
777 */ 1493 */
778static int 1494static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
779nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) 1495 struct xdr_stream *xdr,
1496 struct nfs3_accessres *result)
780{ 1497{
781 int status; 1498 enum nfs_stat status;
782 1499 int error;
783 if ((status = ntohl(*p++))) { 1500
784 status = nfs_stat_to_errno(status); 1501 error = decode_nfsstat3(xdr, &status);
785 } else { 1502 if (unlikely(error))
786 if (!(p = xdr_decode_fhandle(p, res->fh))) 1503 goto out;
787 return -errno_NFSERR_IO; 1504 error = decode_post_op_attr(xdr, result->fattr);
788 p = xdr_decode_post_op_attr(p, res->fattr); 1505 if (unlikely(error))
789 } 1506 goto out;
790 xdr_decode_post_op_attr(p, res->dir_attr); 1507 if (status != NFS3_OK)
791 return status; 1508 goto out_default;
1509 error = decode_uint32(xdr, &result->access);
1510out:
1511 return error;
1512out_default:
1513 return nfs_stat_to_errno(status);
792} 1514}
793 1515
794/* 1516/*
795 * Decode ACCESS reply 1517 * 3.3.5 READLINK3res
1518 *
1519 * struct READLINK3resok {
1520 * post_op_attr symlink_attributes;
1521 * nfspath3 data;
1522 * };
1523 *
1524 * struct READLINK3resfail {
1525 * post_op_attr symlink_attributes;
1526 * };
1527 *
1528 * union READLINK3res switch (nfsstat3 status) {
1529 * case NFS3_OK:
1530 * READLINK3resok resok;
1531 * default:
1532 * READLINK3resfail resfail;
1533 * };
796 */ 1534 */
797static int 1535static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
798nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) 1536 struct xdr_stream *xdr,
1537 struct nfs_fattr *result)
799{ 1538{
800 int status = ntohl(*p++); 1539 enum nfs_stat status;
801 1540 int error;
802 p = xdr_decode_post_op_attr(p, res->fattr); 1541
803 if (status) 1542 error = decode_nfsstat3(xdr, &status);
804 return nfs_stat_to_errno(status); 1543 if (unlikely(error))
805 res->access = ntohl(*p++); 1544 goto out;
806 return 0; 1545 error = decode_post_op_attr(xdr, result);
1546 if (unlikely(error))
1547 goto out;
1548 if (status != NFS3_OK)
1549 goto out_default;
1550 error = decode_nfspath3(xdr);
1551out:
1552 return error;
1553out_default:
1554 return nfs_stat_to_errno(status);
807} 1555}
808 1556
809static int 1557/*
810nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 1558 * 3.3.6 READ3res
1559 *
1560 * struct READ3resok {
1561 * post_op_attr file_attributes;
1562 * count3 count;
1563 * bool eof;
1564 * opaque data<>;
1565 * };
1566 *
1567 * struct READ3resfail {
1568 * post_op_attr file_attributes;
1569 * };
1570 *
1571 * union READ3res switch (nfsstat3 status) {
1572 * case NFS3_OK:
1573 * READ3resok resok;
1574 * default:
1575 * READ3resfail resfail;
1576 * };
1577 */
1578static int decode_read3resok(struct xdr_stream *xdr,
1579 struct nfs_readres *result)
811{ 1580{
812 struct rpc_auth *auth = req->rq_cred->cr_auth; 1581 u32 eof, count, ocount, recvd;
813 unsigned int replen; 1582 size_t hdrlen;
1583 __be32 *p;
814 1584
815 p = xdr_encode_fhandle(p, args->fh); 1585 p = xdr_inline_decode(xdr, 4 + 4 + 4);
816 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1586 if (unlikely(p == NULL))
1587 goto out_overflow;
1588 count = be32_to_cpup(p++);
1589 eof = be32_to_cpup(p++);
1590 ocount = be32_to_cpup(p++);
1591 if (unlikely(ocount != count))
1592 goto out_mismatch;
1593 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
1594 recvd = xdr->buf->len - hdrlen;
1595 if (unlikely(count > recvd))
1596 goto out_cheating;
1597
1598out:
1599 xdr_read_pages(xdr, count);
1600 result->eof = eof;
1601 result->count = count;
1602 return count;
1603out_mismatch:
1604 dprintk("NFS: READ count doesn't match length of opaque: "
1605 "count %u != ocount %u\n", count, ocount);
1606 return -EIO;
1607out_cheating:
1608 dprintk("NFS: server cheating in read result: "
1609 "count %u > recvd %u\n", count, recvd);
1610 count = recvd;
1611 eof = 0;
1612 goto out;
1613out_overflow:
1614 print_overflow_msg(__func__, xdr);
1615 return -EIO;
1616}
817 1617
818 /* Inline the page array */ 1618static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
819 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; 1619 struct nfs_readres *result)
820 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); 1620{
821 return 0; 1621 enum nfs_stat status;
1622 int error;
1623
1624 error = decode_nfsstat3(xdr, &status);
1625 if (unlikely(error))
1626 goto out;
1627 error = decode_post_op_attr(xdr, result->fattr);
1628 if (unlikely(error))
1629 goto out;
1630 if (status != NFS3_OK)
1631 goto out_status;
1632 error = decode_read3resok(xdr, result);
1633out:
1634 return error;
1635out_status:
1636 return nfs_stat_to_errno(status);
822} 1637}
823 1638
824/* 1639/*
825 * Decode READLINK reply 1640 * 3.3.7 WRITE3res
1641 *
1642 * enum stable_how {
1643 * UNSTABLE = 0,
1644 * DATA_SYNC = 1,
1645 * FILE_SYNC = 2
1646 * };
1647 *
1648 * struct WRITE3resok {
1649 * wcc_data file_wcc;
1650 * count3 count;
1651 * stable_how committed;
1652 * writeverf3 verf;
1653 * };
1654 *
1655 * struct WRITE3resfail {
1656 * wcc_data file_wcc;
1657 * };
1658 *
1659 * union WRITE3res switch (nfsstat3 status) {
1660 * case NFS3_OK:
1661 * WRITE3resok resok;
1662 * default:
1663 * WRITE3resfail resfail;
1664 * };
826 */ 1665 */
827static int 1666static int decode_write3resok(struct xdr_stream *xdr,
828nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1667 struct nfs_writeres *result)
829{ 1668{
830 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1669 __be32 *p;
831 struct kvec *iov = rcvbuf->head;
832 size_t hdrlen;
833 u32 len, recvd;
834 int status;
835
836 status = ntohl(*p++);
837 p = xdr_decode_post_op_attr(p, fattr);
838
839 if (status != 0)
840 return nfs_stat_to_errno(status);
841
842 /* Convert length of symlink */
843 len = ntohl(*p++);
844 if (len >= rcvbuf->page_len) {
845 dprintk("nfs: server returned giant symlink!\n");
846 return -ENAMETOOLONG;
847 }
848 1670
849 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1671 p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
850 if (iov->iov_len < hdrlen) { 1672 if (unlikely(p == NULL))
851 dprintk("NFS: READLINK reply header overflowed:" 1673 goto out_overflow;
852 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 1674 result->count = be32_to_cpup(p++);
853 return -errno_NFSERR_IO; 1675 result->verf->committed = be32_to_cpup(p++);
854 } else if (iov->iov_len != hdrlen) { 1676 if (unlikely(result->verf->committed > NFS_FILE_SYNC))
855 dprintk("NFS: READLINK header is short. " 1677 goto out_badvalue;
856 "iovec will be shifted.\n"); 1678 memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
857 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); 1679 return result->count;
858 } 1680out_badvalue:
859 recvd = req->rq_rcv_buf.len - hdrlen; 1681 dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
860 if (recvd < len) { 1682 return -EIO;
861 dprintk("NFS: server cheating in readlink reply: " 1683out_overflow:
862 "count %u > recvd %u\n", len, recvd); 1684 print_overflow_msg(__func__, xdr);
863 return -EIO; 1685 return -EIO;
864 } 1686}
865 1687
866 xdr_terminate_string(rcvbuf, len); 1688static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
867 return 0; 1689 struct nfs_writeres *result)
1690{
1691 enum nfs_stat status;
1692 int error;
1693
1694 error = decode_nfsstat3(xdr, &status);
1695 if (unlikely(error))
1696 goto out;
1697 error = decode_wcc_data(xdr, result->fattr);
1698 if (unlikely(error))
1699 goto out;
1700 if (status != NFS3_OK)
1701 goto out_status;
1702 error = decode_write3resok(xdr, result);
1703out:
1704 return error;
1705out_status:
1706 return nfs_stat_to_errno(status);
868} 1707}
869 1708
870/* 1709/*
871 * Decode READ reply 1710 * 3.3.8 CREATE3res
1711 *
1712 * struct CREATE3resok {
1713 * post_op_fh3 obj;
1714 * post_op_attr obj_attributes;
1715 * wcc_data dir_wcc;
1716 * };
1717 *
1718 * struct CREATE3resfail {
1719 * wcc_data dir_wcc;
1720 * };
1721 *
1722 * union CREATE3res switch (nfsstat3 status) {
1723 * case NFS3_OK:
1724 * CREATE3resok resok;
1725 * default:
1726 * CREATE3resfail resfail;
1727 * };
872 */ 1728 */
873static int 1729static int decode_create3resok(struct xdr_stream *xdr,
874nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) 1730 struct nfs3_diropres *result)
875{ 1731{
876 struct kvec *iov = req->rq_rcv_buf.head; 1732 int error;
877 size_t hdrlen; 1733
878 u32 count, ocount, recvd; 1734 error = decode_post_op_fh3(xdr, result->fh);
879 int status; 1735 if (unlikely(error))
1736 goto out;
1737 error = decode_post_op_attr(xdr, result->fattr);
1738 if (unlikely(error))
1739 goto out;
1740 /* The server isn't required to return a file handle.
1741 * If it didn't, force the client to perform a LOOKUP
1742 * to determine the correct file handle and attribute
1743 * values for the new object. */
1744 if (result->fh->size == 0)
1745 result->fattr->valid = 0;
1746 error = decode_wcc_data(xdr, result->dir_attr);
1747out:
1748 return error;
1749}
880 1750
881 status = ntohl(*p++); 1751static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
882 p = xdr_decode_post_op_attr(p, res->fattr); 1752 struct xdr_stream *xdr,
1753 struct nfs3_diropres *result)
1754{
1755 enum nfs_stat status;
1756 int error;
1757
1758 error = decode_nfsstat3(xdr, &status);
1759 if (unlikely(error))
1760 goto out;
1761 if (status != NFS3_OK)
1762 goto out_default;
1763 error = decode_create3resok(xdr, result);
1764out:
1765 return error;
1766out_default:
1767 error = decode_wcc_data(xdr, result->dir_attr);
1768 if (unlikely(error))
1769 goto out;
1770 return nfs_stat_to_errno(status);
1771}
883 1772
884 if (status != 0) 1773/*
885 return nfs_stat_to_errno(status); 1774 * 3.3.12 REMOVE3res
1775 *
1776 * struct REMOVE3resok {
1777 * wcc_data dir_wcc;
1778 * };
1779 *
1780 * struct REMOVE3resfail {
1781 * wcc_data dir_wcc;
1782 * };
1783 *
1784 * union REMOVE3res switch (nfsstat3 status) {
1785 * case NFS3_OK:
1786 * REMOVE3resok resok;
1787 * default:
1788 * REMOVE3resfail resfail;
1789 * };
1790 */
1791static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1792 struct xdr_stream *xdr,
1793 struct nfs_removeres *result)
1794{
1795 enum nfs_stat status;
1796 int error;
1797
1798 error = decode_nfsstat3(xdr, &status);
1799 if (unlikely(error))
1800 goto out;
1801 error = decode_wcc_data(xdr, result->dir_attr);
1802 if (unlikely(error))
1803 goto out;
1804 if (status != NFS3_OK)
1805 goto out_status;
1806out:
1807 return error;
1808out_status:
1809 return nfs_stat_to_errno(status);
1810}
886 1811
887 /* Decode reply count and EOF flag. NFSv3 is somewhat redundant 1812/*
888 * in that it puts the count both in the res struct and in the 1813 * 3.3.14 RENAME3res
889 * opaque data count. */ 1814 *
890 count = ntohl(*p++); 1815 * struct RENAME3resok {
891 res->eof = ntohl(*p++); 1816 * wcc_data fromdir_wcc;
892 ocount = ntohl(*p++); 1817 * wcc_data todir_wcc;
1818 * };
1819 *
1820 * struct RENAME3resfail {
1821 * wcc_data fromdir_wcc;
1822 * wcc_data todir_wcc;
1823 * };
1824 *
1825 * union RENAME3res switch (nfsstat3 status) {
1826 * case NFS3_OK:
1827 * RENAME3resok resok;
1828 * default:
1829 * RENAME3resfail resfail;
1830 * };
1831 */
1832static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1833 struct xdr_stream *xdr,
1834 struct nfs_renameres *result)
1835{
1836 enum nfs_stat status;
1837 int error;
1838
1839 error = decode_nfsstat3(xdr, &status);
1840 if (unlikely(error))
1841 goto out;
1842 error = decode_wcc_data(xdr, result->old_fattr);
1843 if (unlikely(error))
1844 goto out;
1845 error = decode_wcc_data(xdr, result->new_fattr);
1846 if (unlikely(error))
1847 goto out;
1848 if (status != NFS3_OK)
1849 goto out_status;
1850out:
1851 return error;
1852out_status:
1853 return nfs_stat_to_errno(status);
1854}
893 1855
894 if (ocount != count) { 1856/*
895 dprintk("NFS: READ count doesn't match RPC opaque count.\n"); 1857 * 3.3.15 LINK3res
896 return -errno_NFSERR_IO; 1858 *
897 } 1859 * struct LINK3resok {
1860 * post_op_attr file_attributes;
1861 * wcc_data linkdir_wcc;
1862 * };
1863 *
1864 * struct LINK3resfail {
1865 * post_op_attr file_attributes;
1866 * wcc_data linkdir_wcc;
1867 * };
1868 *
1869 * union LINK3res switch (nfsstat3 status) {
1870 * case NFS3_OK:
1871 * LINK3resok resok;
1872 * default:
1873 * LINK3resfail resfail;
1874 * };
1875 */
1876static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1877 struct nfs3_linkres *result)
1878{
1879 enum nfs_stat status;
1880 int error;
1881
1882 error = decode_nfsstat3(xdr, &status);
1883 if (unlikely(error))
1884 goto out;
1885 error = decode_post_op_attr(xdr, result->fattr);
1886 if (unlikely(error))
1887 goto out;
1888 error = decode_wcc_data(xdr, result->dir_attr);
1889 if (unlikely(error))
1890 goto out;
1891 if (status != NFS3_OK)
1892 goto out_status;
1893out:
1894 return error;
1895out_status:
1896 return nfs_stat_to_errno(status);
1897}
898 1898
899 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1899/**
900 if (iov->iov_len < hdrlen) { 1900 * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in
901 dprintk("NFS: READ reply header overflowed:" 1901 * the local page cache
902 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 1902 * @xdr: XDR stream where entry resides
903 return -errno_NFSERR_IO; 1903 * @entry: buffer to fill in with entry data
904 } else if (iov->iov_len != hdrlen) { 1904 * @plus: boolean indicating whether this should be a readdirplus entry
905 dprintk("NFS: READ header is short. iovec will be shifted.\n"); 1905 *
906 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); 1906 * Returns zero if successful, otherwise a negative errno value is
907 } 1907 * returned.
1908 *
1909 * This function is not invoked during READDIR reply decoding, but
1910 * rather whenever an application invokes the getdents(2) system call
1911 * on a directory already in our cache.
1912 *
1913 * 3.3.16 entry3
1914 *
1915 * struct entry3 {
1916 * fileid3 fileid;
1917 * filename3 name;
1918 * cookie3 cookie;
1919 * fhandle3 filehandle;
1920 * post_op_attr3 attributes;
1921 * entry3 *nextentry;
1922 * };
1923 *
1924 * 3.3.17 entryplus3
1925 * struct entryplus3 {
1926 * fileid3 fileid;
1927 * filename3 name;
1928 * cookie3 cookie;
1929 * post_op_attr name_attributes;
1930 * post_op_fh3 name_handle;
1931 * entryplus3 *nextentry;
1932 * };
1933 */
1934int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1935 int plus)
1936{
1937 struct nfs_entry old = *entry;
1938 __be32 *p;
1939 int error;
908 1940
909 recvd = req->rq_rcv_buf.len - hdrlen; 1941 p = xdr_inline_decode(xdr, 4);
910 if (count > recvd) { 1942 if (unlikely(p == NULL))
911 dprintk("NFS: server cheating in read reply: " 1943 goto out_overflow;
912 "count %u > recvd %u\n", count, recvd); 1944 if (*p == xdr_zero) {
913 count = recvd; 1945 p = xdr_inline_decode(xdr, 4);
914 res->eof = 0; 1946 if (unlikely(p == NULL))
1947 goto out_overflow;
1948 if (*p == xdr_zero)
1949 return -EAGAIN;
1950 entry->eof = 1;
1951 return -EBADCOOKIE;
915 } 1952 }
916 1953
917 if (count < res->count) 1954 error = decode_fileid3(xdr, &entry->ino);
918 res->count = count; 1955 if (unlikely(error))
1956 return error;
919 1957
920 return count; 1958 error = decode_inline_filename3(xdr, &entry->name, &entry->len);
921} 1959 if (unlikely(error))
1960 return error;
922 1961
923/* 1962 entry->prev_cookie = entry->cookie;
924 * Decode WRITE response 1963 error = decode_cookie3(xdr, &entry->cookie);
925 */ 1964 if (unlikely(error))
926static int 1965 return error;
927nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
928{
929 int status;
930 1966
931 status = ntohl(*p++); 1967 entry->d_type = DT_UNKNOWN;
932 p = xdr_decode_wcc_data(p, res->fattr);
933 1968
934 if (status != 0) 1969 if (plus) {
935 return nfs_stat_to_errno(status); 1970 entry->fattr->valid = 0;
1971 error = decode_post_op_attr(xdr, entry->fattr);
1972 if (unlikely(error))
1973 return error;
1974 if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
1975 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
936 1976
937 res->count = ntohl(*p++); 1977 /* In fact, a post_op_fh3: */
938 res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); 1978 p = xdr_inline_decode(xdr, 4);
939 res->verf->verifier[0] = *p++; 1979 if (unlikely(p == NULL))
940 res->verf->verifier[1] = *p++; 1980 goto out_overflow;
1981 if (*p != xdr_zero) {
1982 error = decode_nfs_fh3(xdr, entry->fh);
1983 if (unlikely(error)) {
1984 if (error == -E2BIG)
1985 goto out_truncated;
1986 return error;
1987 }
1988 } else
1989 zero_nfs_fh3(entry->fh);
1990 }
941 1991
942 return res->count; 1992 return 0;
943}
944 1993
945/* 1994out_overflow:
946 * Decode a CREATE response 1995 print_overflow_msg(__func__, xdr);
947 */ 1996 return -EAGAIN;
948static int 1997out_truncated:
949nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) 1998 dprintk("NFS: directory entry contains invalid file handle\n");
950{ 1999 *entry = old;
951 int status; 2000 return -EAGAIN;
952
953 status = ntohl(*p++);
954 if (status == 0) {
955 if (*p++) {
956 if (!(p = xdr_decode_fhandle(p, res->fh)))
957 return -errno_NFSERR_IO;
958 p = xdr_decode_post_op_attr(p, res->fattr);
959 } else {
960 memset(res->fh, 0, sizeof(*res->fh));
961 /* Do decode post_op_attr but set it to NULL */
962 p = xdr_decode_post_op_attr(p, res->fattr);
963 res->fattr->valid = 0;
964 }
965 } else {
966 status = nfs_stat_to_errno(status);
967 }
968 p = xdr_decode_wcc_data(p, res->dir_attr);
969 return status;
970} 2001}
971 2002
972/* 2003/*
973 * Decode RENAME reply 2004 * 3.3.16 READDIR3res
2005 *
2006 * struct dirlist3 {
2007 * entry3 *entries;
2008 * bool eof;
2009 * };
2010 *
2011 * struct READDIR3resok {
2012 * post_op_attr dir_attributes;
2013 * cookieverf3 cookieverf;
2014 * dirlist3 reply;
2015 * };
2016 *
2017 * struct READDIR3resfail {
2018 * post_op_attr dir_attributes;
2019 * };
2020 *
2021 * union READDIR3res switch (nfsstat3 status) {
2022 * case NFS3_OK:
2023 * READDIR3resok resok;
2024 * default:
2025 * READDIR3resfail resfail;
2026 * };
2027 *
2028 * Read the directory contents into the page cache, but otherwise
2029 * don't touch them. The actual decoding is done by nfs3_decode_entry()
2030 * during subsequent nfs_readdir() calls.
974 */ 2031 */
975static int 2032static int decode_dirlist3(struct xdr_stream *xdr)
976nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
977{ 2033{
978 int status; 2034 u32 recvd, pglen;
2035 size_t hdrlen;
979 2036
980 if ((status = ntohl(*p++)) != 0) 2037 pglen = xdr->buf->page_len;
981 status = nfs_stat_to_errno(status); 2038 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
982 p = xdr_decode_wcc_data(p, res->old_fattr); 2039 recvd = xdr->buf->len - hdrlen;
983 p = xdr_decode_wcc_data(p, res->new_fattr); 2040 if (unlikely(pglen > recvd))
984 return status; 2041 goto out_cheating;
2042out:
2043 xdr_read_pages(xdr, pglen);
2044 return pglen;
2045out_cheating:
2046 dprintk("NFS: server cheating in readdir result: "
2047 "pglen %u > recvd %u\n", pglen, recvd);
2048 pglen = recvd;
2049 goto out;
985} 2050}
986 2051
987/* 2052static int decode_readdir3resok(struct xdr_stream *xdr,
988 * Decode LINK reply 2053 struct nfs3_readdirres *result)
989 */
990static int
991nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
992{ 2054{
993 int status; 2055 int error;
2056
2057 error = decode_post_op_attr(xdr, result->dir_attr);
2058 if (unlikely(error))
2059 goto out;
2060 /* XXX: do we need to check if result->verf != NULL ? */
2061 error = decode_cookieverf3(xdr, result->verf);
2062 if (unlikely(error))
2063 goto out;
2064 error = decode_dirlist3(xdr);
2065out:
2066 return error;
2067}
994 2068
995 if ((status = ntohl(*p++)) != 0) 2069static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
996 status = nfs_stat_to_errno(status); 2070 struct xdr_stream *xdr,
997 p = xdr_decode_post_op_attr(p, res->fattr); 2071 struct nfs3_readdirres *result)
998 p = xdr_decode_wcc_data(p, res->dir_attr); 2072{
999 return status; 2073 enum nfs_stat status;
2074 int error;
2075
2076 error = decode_nfsstat3(xdr, &status);
2077 if (unlikely(error))
2078 goto out;
2079 if (status != NFS3_OK)
2080 goto out_default;
2081 error = decode_readdir3resok(xdr, result);
2082out:
2083 return error;
2084out_default:
2085 error = decode_post_op_attr(xdr, result->dir_attr);
2086 if (unlikely(error))
2087 goto out;
2088 return nfs_stat_to_errno(status);
1000} 2089}
1001 2090
1002/* 2091/*
1003 * Decode FSSTAT reply 2092 * 3.3.18 FSSTAT3res
2093 *
2094 * struct FSSTAT3resok {
2095 * post_op_attr obj_attributes;
2096 * size3 tbytes;
2097 * size3 fbytes;
2098 * size3 abytes;
2099 * size3 tfiles;
2100 * size3 ffiles;
2101 * size3 afiles;
2102 * uint32 invarsec;
2103 * };
2104 *
2105 * struct FSSTAT3resfail {
2106 * post_op_attr obj_attributes;
2107 * };
2108 *
2109 * union FSSTAT3res switch (nfsstat3 status) {
2110 * case NFS3_OK:
2111 * FSSTAT3resok resok;
2112 * default:
2113 * FSSTAT3resfail resfail;
2114 * };
1004 */ 2115 */
1005static int 2116static int decode_fsstat3resok(struct xdr_stream *xdr,
1006nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res) 2117 struct nfs_fsstat *result)
1007{ 2118{
1008 int status; 2119 __be32 *p;
1009
1010 status = ntohl(*p++);
1011
1012 p = xdr_decode_post_op_attr(p, res->fattr);
1013 if (status != 0)
1014 return nfs_stat_to_errno(status);
1015
1016 p = xdr_decode_hyper(p, &res->tbytes);
1017 p = xdr_decode_hyper(p, &res->fbytes);
1018 p = xdr_decode_hyper(p, &res->abytes);
1019 p = xdr_decode_hyper(p, &res->tfiles);
1020 p = xdr_decode_hyper(p, &res->ffiles);
1021 p = xdr_decode_hyper(p, &res->afiles);
1022 2120
2121 p = xdr_inline_decode(xdr, 8 * 6 + 4);
2122 if (unlikely(p == NULL))
2123 goto out_overflow;
2124 p = xdr_decode_size3(p, &result->tbytes);
2125 p = xdr_decode_size3(p, &result->fbytes);
2126 p = xdr_decode_size3(p, &result->abytes);
2127 p = xdr_decode_size3(p, &result->tfiles);
2128 p = xdr_decode_size3(p, &result->ffiles);
2129 xdr_decode_size3(p, &result->afiles);
1023 /* ignore invarsec */ 2130 /* ignore invarsec */
1024 return 0; 2131 return 0;
2132out_overflow:
2133 print_overflow_msg(__func__, xdr);
2134 return -EIO;
2135}
2136
2137static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2138 struct xdr_stream *xdr,
2139 struct nfs_fsstat *result)
2140{
2141 enum nfs_stat status;
2142 int error;
2143
2144 error = decode_nfsstat3(xdr, &status);
2145 if (unlikely(error))
2146 goto out;
2147 error = decode_post_op_attr(xdr, result->fattr);
2148 if (unlikely(error))
2149 goto out;
2150 if (status != NFS3_OK)
2151 goto out_status;
2152 error = decode_fsstat3resok(xdr, result);
2153out:
2154 return error;
2155out_status:
2156 return nfs_stat_to_errno(status);
1025} 2157}
1026 2158
1027/* 2159/*
1028 * Decode FSINFO reply 2160 * 3.3.19 FSINFO3res
2161 *
2162 * struct FSINFO3resok {
2163 * post_op_attr obj_attributes;
2164 * uint32 rtmax;
2165 * uint32 rtpref;
2166 * uint32 rtmult;
2167 * uint32 wtmax;
2168 * uint32 wtpref;
2169 * uint32 wtmult;
2170 * uint32 dtpref;
2171 * size3 maxfilesize;
2172 * nfstime3 time_delta;
2173 * uint32 properties;
2174 * };
2175 *
2176 * struct FSINFO3resfail {
2177 * post_op_attr obj_attributes;
2178 * };
2179 *
2180 * union FSINFO3res switch (nfsstat3 status) {
2181 * case NFS3_OK:
2182 * FSINFO3resok resok;
2183 * default:
2184 * FSINFO3resfail resfail;
2185 * };
1029 */ 2186 */
1030static int 2187static int decode_fsinfo3resok(struct xdr_stream *xdr,
1031nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) 2188 struct nfs_fsinfo *result)
1032{ 2189{
1033 int status; 2190 __be32 *p;
1034
1035 status = ntohl(*p++);
1036
1037 p = xdr_decode_post_op_attr(p, res->fattr);
1038 if (status != 0)
1039 return nfs_stat_to_errno(status);
1040 2191
1041 res->rtmax = ntohl(*p++); 2192 p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
1042 res->rtpref = ntohl(*p++); 2193 if (unlikely(p == NULL))
1043 res->rtmult = ntohl(*p++); 2194 goto out_overflow;
1044 res->wtmax = ntohl(*p++); 2195 result->rtmax = be32_to_cpup(p++);
1045 res->wtpref = ntohl(*p++); 2196 result->rtpref = be32_to_cpup(p++);
1046 res->wtmult = ntohl(*p++); 2197 result->rtmult = be32_to_cpup(p++);
1047 res->dtpref = ntohl(*p++); 2198 result->wtmax = be32_to_cpup(p++);
1048 p = xdr_decode_hyper(p, &res->maxfilesize); 2199 result->wtpref = be32_to_cpup(p++);
1049 p = xdr_decode_time3(p, &res->time_delta); 2200 result->wtmult = be32_to_cpup(p++);
2201 result->dtpref = be32_to_cpup(p++);
2202 p = xdr_decode_size3(p, &result->maxfilesize);
2203 xdr_decode_nfstime3(p, &result->time_delta);
1050 2204
1051 /* ignore properties */ 2205 /* ignore properties */
1052 res->lease_time = 0; 2206 result->lease_time = 0;
1053 return 0; 2207 return 0;
2208out_overflow:
2209 print_overflow_msg(__func__, xdr);
2210 return -EIO;
2211}
2212
2213static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2214 struct xdr_stream *xdr,
2215 struct nfs_fsinfo *result)
2216{
2217 enum nfs_stat status;
2218 int error;
2219
2220 error = decode_nfsstat3(xdr, &status);
2221 if (unlikely(error))
2222 goto out;
2223 error = decode_post_op_attr(xdr, result->fattr);
2224 if (unlikely(error))
2225 goto out;
2226 if (status != NFS3_OK)
2227 goto out_status;
2228 error = decode_fsinfo3resok(xdr, result);
2229out:
2230 return error;
2231out_status:
2232 return nfs_stat_to_errno(status);
1054} 2233}
1055 2234
1056/* 2235/*
1057 * Decode PATHCONF reply 2236 * 3.3.20 PATHCONF3res
2237 *
2238 * struct PATHCONF3resok {
2239 * post_op_attr obj_attributes;
2240 * uint32 linkmax;
2241 * uint32 name_max;
2242 * bool no_trunc;
2243 * bool chown_restricted;
2244 * bool case_insensitive;
2245 * bool case_preserving;
2246 * };
2247 *
2248 * struct PATHCONF3resfail {
2249 * post_op_attr obj_attributes;
2250 * };
2251 *
2252 * union PATHCONF3res switch (nfsstat3 status) {
2253 * case NFS3_OK:
2254 * PATHCONF3resok resok;
2255 * default:
2256 * PATHCONF3resfail resfail;
2257 * };
1058 */ 2258 */
1059static int 2259static int decode_pathconf3resok(struct xdr_stream *xdr,
1060nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res) 2260 struct nfs_pathconf *result)
1061{ 2261{
1062 int status; 2262 __be32 *p;
1063
1064 status = ntohl(*p++);
1065
1066 p = xdr_decode_post_op_attr(p, res->fattr);
1067 if (status != 0)
1068 return nfs_stat_to_errno(status);
1069 res->max_link = ntohl(*p++);
1070 res->max_namelen = ntohl(*p++);
1071 2263
2264 p = xdr_inline_decode(xdr, 4 * 6);
2265 if (unlikely(p == NULL))
2266 goto out_overflow;
2267 result->max_link = be32_to_cpup(p++);
2268 result->max_namelen = be32_to_cpup(p);
1072 /* ignore remaining fields */ 2269 /* ignore remaining fields */
1073 return 0; 2270 return 0;
2271out_overflow:
2272 print_overflow_msg(__func__, xdr);
2273 return -EIO;
2274}
2275
2276static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2277 struct xdr_stream *xdr,
2278 struct nfs_pathconf *result)
2279{
2280 enum nfs_stat status;
2281 int error;
2282
2283 error = decode_nfsstat3(xdr, &status);
2284 if (unlikely(error))
2285 goto out;
2286 error = decode_post_op_attr(xdr, result->fattr);
2287 if (unlikely(error))
2288 goto out;
2289 if (status != NFS3_OK)
2290 goto out_status;
2291 error = decode_pathconf3resok(xdr, result);
2292out:
2293 return error;
2294out_status:
2295 return nfs_stat_to_errno(status);
1074} 2296}
1075 2297
1076/* 2298/*
1077 * Decode COMMIT reply 2299 * 3.3.21 COMMIT3res
2300 *
2301 * struct COMMIT3resok {
2302 * wcc_data file_wcc;
2303 * writeverf3 verf;
2304 * };
2305 *
2306 * struct COMMIT3resfail {
2307 * wcc_data file_wcc;
2308 * };
2309 *
2310 * union COMMIT3res switch (nfsstat3 status) {
2311 * case NFS3_OK:
2312 * COMMIT3resok resok;
2313 * default:
2314 * COMMIT3resfail resfail;
2315 * };
1078 */ 2316 */
1079static int 2317static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
1080nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) 2318 struct xdr_stream *xdr,
2319 struct nfs_writeres *result)
1081{ 2320{
1082 int status; 2321 enum nfs_stat status;
1083 2322 int error;
1084 status = ntohl(*p++); 2323
1085 p = xdr_decode_wcc_data(p, res->fattr); 2324 error = decode_nfsstat3(xdr, &status);
1086 if (status != 0) 2325 if (unlikely(error))
1087 return nfs_stat_to_errno(status); 2326 goto out;
1088 2327 error = decode_wcc_data(xdr, result->fattr);
1089 res->verf->verifier[0] = *p++; 2328 if (unlikely(error))
1090 res->verf->verifier[1] = *p++; 2329 goto out;
1091 return 0; 2330 if (status != NFS3_OK)
2331 goto out_status;
2332 error = decode_writeverf3(xdr, result->verf->verifier);
2333out:
2334 return error;
2335out_status:
2336 return nfs_stat_to_errno(status);
1092} 2337}
1093 2338
1094#ifdef CONFIG_NFS_V3_ACL 2339#ifdef CONFIG_NFS_V3_ACL
1095/* 2340
1096 * Decode GETACL reply 2341static inline int decode_getacl3resok(struct xdr_stream *xdr,
1097 */ 2342 struct nfs3_getaclres *result)
1098static int
1099nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
1100 struct nfs3_getaclres *res)
1101{ 2343{
1102 struct xdr_buf *buf = &req->rq_rcv_buf;
1103 int status = ntohl(*p++);
1104 struct posix_acl **acl; 2344 struct posix_acl **acl;
1105 unsigned int *aclcnt; 2345 unsigned int *aclcnt;
1106 int err, base; 2346 size_t hdrlen;
1107 2347 int error;
1108 if (status != 0) 2348
1109 return nfs_stat_to_errno(status); 2349 error = decode_post_op_attr(xdr, result->fattr);
1110 p = xdr_decode_post_op_attr(p, res->fattr); 2350 if (unlikely(error))
1111 res->mask = ntohl(*p++); 2351 goto out;
1112 if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 2352 error = decode_uint32(xdr, &result->mask);
1113 return -EINVAL; 2353 if (unlikely(error))
1114 base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; 2354 goto out;
1115 2355 error = -EINVAL;
1116 acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; 2356 if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
1117 aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; 2357 goto out;
1118 err = nfsacl_decode(buf, base, aclcnt, acl); 2358
1119 2359 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
1120 acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; 2360
1121 aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; 2361 acl = NULL;
1122 if (err > 0) 2362 if (result->mask & NFS_ACL)
1123 err = nfsacl_decode(buf, base + err, aclcnt, acl); 2363 acl = &result->acl_access;
1124 return (err > 0) ? 0 : err; 2364 aclcnt = NULL;
2365 if (result->mask & NFS_ACLCNT)
2366 aclcnt = &result->acl_access_count;
2367 error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl);
2368 if (unlikely(error <= 0))
2369 goto out;
2370
2371 acl = NULL;
2372 if (result->mask & NFS_DFACL)
2373 acl = &result->acl_default;
2374 aclcnt = NULL;
2375 if (result->mask & NFS_DFACLCNT)
2376 aclcnt = &result->acl_default_count;
2377 error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl);
2378 if (unlikely(error <= 0))
2379 return error;
2380 error = 0;
2381out:
2382 return error;
1125} 2383}
1126 2384
1127/* 2385static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
1128 * Decode setacl reply. 2386 struct xdr_stream *xdr,
1129 */ 2387 struct nfs3_getaclres *result)
1130static int
1131nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
1132{ 2388{
1133 int status = ntohl(*p++); 2389 enum nfs_stat status;
2390 int error;
2391
2392 error = decode_nfsstat3(xdr, &status);
2393 if (unlikely(error))
2394 goto out;
2395 if (status != NFS3_OK)
2396 goto out_default;
2397 error = decode_getacl3resok(xdr, result);
2398out:
2399 return error;
2400out_default:
2401 return nfs_stat_to_errno(status);
2402}
1134 2403
1135 if (status) 2404static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
1136 return nfs_stat_to_errno(status); 2405 struct xdr_stream *xdr,
1137 xdr_decode_post_op_attr(p, fattr); 2406 struct nfs_fattr *result)
1138 return 0; 2407{
2408 enum nfs_stat status;
2409 int error;
2410
2411 error = decode_nfsstat3(xdr, &status);
2412 if (unlikely(error))
2413 goto out;
2414 if (status != NFS3_OK)
2415 goto out_default;
2416 error = decode_post_op_attr(xdr, result);
2417out:
2418 return error;
2419out_default:
2420 return nfs_stat_to_errno(status);
1139} 2421}
2422
1140#endif /* CONFIG_NFS_V3_ACL */ 2423#endif /* CONFIG_NFS_V3_ACL */
1141 2424
1142#define PROC(proc, argtype, restype, timer) \ 2425#define PROC(proc, argtype, restype, timer) \
1143[NFS3PROC_##proc] = { \ 2426[NFS3PROC_##proc] = { \
1144 .p_proc = NFS3PROC_##proc, \ 2427 .p_proc = NFS3PROC_##proc, \
1145 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ 2428 .p_encode = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args, \
1146 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ 2429 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res, \
1147 .p_arglen = NFS3_##argtype##_sz, \ 2430 .p_arglen = NFS3_##argtype##args_sz, \
1148 .p_replen = NFS3_##restype##_sz, \ 2431 .p_replen = NFS3_##restype##res_sz, \
1149 .p_timer = timer, \ 2432 .p_timer = timer, \
1150 .p_statidx = NFS3PROC_##proc, \ 2433 .p_statidx = NFS3PROC_##proc, \
1151 .p_name = #proc, \ 2434 .p_name = #proc, \
1152 } 2435 }
1153 2436
1154struct rpc_procinfo nfs3_procedures[] = { 2437struct rpc_procinfo nfs3_procedures[] = {
1155 PROC(GETATTR, fhandle, attrstat, 1), 2438 PROC(GETATTR, getattr, getattr, 1),
1156 PROC(SETATTR, sattrargs, wccstat, 0), 2439 PROC(SETATTR, setattr, setattr, 0),
1157 PROC(LOOKUP, diropargs, lookupres, 2), 2440 PROC(LOOKUP, lookup, lookup, 2),
1158 PROC(ACCESS, accessargs, accessres, 1), 2441 PROC(ACCESS, access, access, 1),
1159 PROC(READLINK, readlinkargs, readlinkres, 3), 2442 PROC(READLINK, readlink, readlink, 3),
1160 PROC(READ, readargs, readres, 3), 2443 PROC(READ, read, read, 3),
1161 PROC(WRITE, writeargs, writeres, 4), 2444 PROC(WRITE, write, write, 4),
1162 PROC(CREATE, createargs, createres, 0), 2445 PROC(CREATE, create, create, 0),
1163 PROC(MKDIR, mkdirargs, createres, 0), 2446 PROC(MKDIR, mkdir, create, 0),
1164 PROC(SYMLINK, symlinkargs, createres, 0), 2447 PROC(SYMLINK, symlink, create, 0),
1165 PROC(MKNOD, mknodargs, createres, 0), 2448 PROC(MKNOD, mknod, create, 0),
1166 PROC(REMOVE, removeargs, removeres, 0), 2449 PROC(REMOVE, remove, remove, 0),
1167 PROC(RMDIR, diropargs, wccstat, 0), 2450 PROC(RMDIR, lookup, setattr, 0),
1168 PROC(RENAME, renameargs, renameres, 0), 2451 PROC(RENAME, rename, rename, 0),
1169 PROC(LINK, linkargs, linkres, 0), 2452 PROC(LINK, link, link, 0),
1170 PROC(READDIR, readdirargs, readdirres, 3), 2453 PROC(READDIR, readdir, readdir, 3),
1171 PROC(READDIRPLUS, readdirargs, readdirres, 3), 2454 PROC(READDIRPLUS, readdirplus, readdir, 3),
1172 PROC(FSSTAT, fhandle, fsstatres, 0), 2455 PROC(FSSTAT, getattr, fsstat, 0),
1173 PROC(FSINFO, fhandle, fsinfores, 0), 2456 PROC(FSINFO, getattr, fsinfo, 0),
1174 PROC(PATHCONF, fhandle, pathconfres, 0), 2457 PROC(PATHCONF, getattr, pathconf, 0),
1175 PROC(COMMIT, commitargs, commitres, 5), 2458 PROC(COMMIT, commit, commit, 5),
1176}; 2459};
1177 2460
1178struct rpc_version nfs_version3 = { 2461struct rpc_version nfs_version3 = {
@@ -1185,8 +2468,8 @@ struct rpc_version nfs_version3 = {
1185static struct rpc_procinfo nfs3_acl_procedures[] = { 2468static struct rpc_procinfo nfs3_acl_procedures[] = {
1186 [ACLPROC3_GETACL] = { 2469 [ACLPROC3_GETACL] = {
1187 .p_proc = ACLPROC3_GETACL, 2470 .p_proc = ACLPROC3_GETACL,
1188 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, 2471 .p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
1189 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, 2472 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
1190 .p_arglen = ACL3_getaclargs_sz, 2473 .p_arglen = ACL3_getaclargs_sz,
1191 .p_replen = ACL3_getaclres_sz, 2474 .p_replen = ACL3_getaclres_sz,
1192 .p_timer = 1, 2475 .p_timer = 1,
@@ -1194,8 +2477,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1194 }, 2477 },
1195 [ACLPROC3_SETACL] = { 2478 [ACLPROC3_SETACL] = {
1196 .p_proc = ACLPROC3_SETACL, 2479 .p_proc = ACLPROC3_SETACL,
1197 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, 2480 .p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
1198 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, 2481 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
1199 .p_arglen = ACL3_setaclargs_sz, 2482 .p_arglen = ACL3_setaclargs_sz,
1200 .p_replen = ACL3_setaclres_sz, 2483 .p_replen = ACL3_setaclres_sz,
1201 .p_timer = 0, 2484 .p_timer = 0,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9fa496387fd..7a747407314 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
44 NFS4CLNT_RECLAIM_REBOOT, 44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_LAYOUTRECALL,
47 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
49}; 50};
@@ -109,7 +110,7 @@ struct nfs_unique_id {
109struct nfs4_state_owner { 110struct nfs4_state_owner {
110 struct nfs_unique_id so_owner_id; 111 struct nfs_unique_id so_owner_id;
111 struct nfs_server *so_server; 112 struct nfs_server *so_server;
112 struct rb_node so_client_node; 113 struct rb_node so_server_node;
113 114
114 struct rpc_cred *so_cred; /* Associated cred */ 115 struct rpc_cred *so_cred; /* Associated cred */
115 116
@@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops {
227extern const struct dentry_operations nfs4_dentry_operations; 228extern const struct dentry_operations nfs4_dentry_operations;
228extern const struct inode_operations nfs4_dir_inode_operations; 229extern const struct inode_operations nfs4_dir_inode_operations;
229 230
230/* inode.c */
231extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
232extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
233extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
234
235
236/* nfs4proc.c */ 231/* nfs4proc.c */
237extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 232extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
238extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 233extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -241,11 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
241extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 236extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 237extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 238extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 239extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 240extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 241extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
247 struct nfs4_fs_locations *fs_locations, struct page *page); 242 struct nfs4_fs_locations *fs_locations, struct page *page);
248extern void nfs4_release_lockowner(const struct nfs4_lock_state *); 243extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
244extern const struct xattr_handler *nfs4_xattr_handlers[];
249 245
250#if defined(CONFIG_NFS_V4_1) 246#if defined(CONFIG_NFS_V4_1)
251static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 247static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -331,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
331extern const nfs4_stateid zero_stateid; 327extern const nfs4_stateid zero_stateid;
332 328
333/* nfs4xdr.c */ 329/* nfs4xdr.c */
334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
335extern struct rpc_procinfo nfs4_procedures[]; 330extern struct rpc_procinfo nfs4_procedures[];
336 331
337struct nfs4_mount_data; 332struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2e92f0d8d65..23f930caf1e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,7 +82,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
82{ 82{
83 struct nfs4_file_layout_dsaddr *dsaddr; 83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL; 84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode); 85 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
86 86
87 dprintk("--> %s\n", __func__); 87 dprintk("--> %s\n", __func__);
88 88
@@ -101,7 +101,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
101 /* find and reference the deviceid */ 101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) { 103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id); 104 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 105 if (dsaddr == NULL)
106 goto out; 106 goto out;
107 } 107 }
@@ -243,7 +243,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
243static void 243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{ 245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode); 246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248 248
249 dprintk("--> %s\n", __func__); 249 dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4435e5e1f90..9d992b0346e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -49,6 +49,7 @@
49#include <linux/mount.h> 49#include <linux/mount.h>
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h>
52 53
53#include "nfs4_fs.h" 54#include "nfs4_fs.h"
54#include "delegation.h" 55#include "delegation.h"
@@ -355,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
355} 356}
356 357
357/* 358/*
358 * Signal state manager thread if session is drained 359 * Signal state manager thread if session fore channel is drained
359 */ 360 */
360static void nfs41_check_drain_session_complete(struct nfs4_session *ses) 361static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
361{ 362{
362 struct rpc_task *task; 363 struct rpc_task *task;
363 364
@@ -371,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
371 if (ses->fc_slot_table.highest_used_slotid != -1) 372 if (ses->fc_slot_table.highest_used_slotid != -1)
372 return; 373 return;
373 374
374 dprintk("%s COMPLETE: Session Drained\n", __func__); 375 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
375 complete(&ses->complete); 376 complete(&ses->fc_slot_table.complete);
377}
378
379/*
380 * Signal state manager thread if session back channel is drained
381 */
382void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
383{
384 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
385 ses->bc_slot_table.highest_used_slotid != -1)
386 return;
387 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
388 complete(&ses->bc_slot_table.complete);
376} 389}
377 390
378static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) 391static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -389,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
389 402
390 spin_lock(&tbl->slot_tbl_lock); 403 spin_lock(&tbl->slot_tbl_lock);
391 nfs4_free_slot(tbl, res->sr_slot); 404 nfs4_free_slot(tbl, res->sr_slot);
392 nfs41_check_drain_session_complete(res->sr_session); 405 nfs4_check_drain_fc_complete(res->sr_session);
393 spin_unlock(&tbl->slot_tbl_lock); 406 spin_unlock(&tbl->slot_tbl_lock);
394 res->sr_slot = NULL; 407 res->sr_slot = NULL;
395} 408}
@@ -1826,6 +1839,8 @@ struct nfs4_closedata {
1826 struct nfs_closeres res; 1839 struct nfs_closeres res;
1827 struct nfs_fattr fattr; 1840 struct nfs_fattr fattr;
1828 unsigned long timestamp; 1841 unsigned long timestamp;
1842 bool roc;
1843 u32 roc_barrier;
1829}; 1844};
1830 1845
1831static void nfs4_free_closedata(void *data) 1846static void nfs4_free_closedata(void *data)
@@ -1833,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
1833 struct nfs4_closedata *calldata = data; 1848 struct nfs4_closedata *calldata = data;
1834 struct nfs4_state_owner *sp = calldata->state->owner; 1849 struct nfs4_state_owner *sp = calldata->state->owner;
1835 1850
1851 if (calldata->roc)
1852 pnfs_roc_release(calldata->state->inode);
1836 nfs4_put_open_state(calldata->state); 1853 nfs4_put_open_state(calldata->state);
1837 nfs_free_seqid(calldata->arg.seqid); 1854 nfs_free_seqid(calldata->arg.seqid);
1838 nfs4_put_state_owner(sp); 1855 nfs4_put_state_owner(sp);
@@ -1865,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1865 */ 1882 */
1866 switch (task->tk_status) { 1883 switch (task->tk_status) {
1867 case 0: 1884 case 0:
1885 if (calldata->roc)
1886 pnfs_roc_set_barrier(state->inode,
1887 calldata->roc_barrier);
1868 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 1888 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
1869 renew_lease(server, calldata->timestamp); 1889 renew_lease(server, calldata->timestamp);
1870 nfs4_close_clear_stateid_flags(state, 1890 nfs4_close_clear_stateid_flags(state,
@@ -1917,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1917 return; 1937 return;
1918 } 1938 }
1919 1939
1920 if (calldata->arg.fmode == 0) 1940 if (calldata->arg.fmode == 0) {
1921 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 1941 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
1942 if (calldata->roc &&
1943 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
1944 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
1945 task, NULL);
1946 return;
1947 }
1948 }
1922 1949
1923 nfs_fattr_init(calldata->res.fattr); 1950 nfs_fattr_init(calldata->res.fattr);
1924 calldata->timestamp = jiffies; 1951 calldata->timestamp = jiffies;
@@ -1946,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
1946 * 1973 *
1947 * NOTE: Caller must be holding the sp->so_owner semaphore! 1974 * NOTE: Caller must be holding the sp->so_owner semaphore!
1948 */ 1975 */
1949int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait) 1976int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
1950{ 1977{
1951 struct nfs_server *server = NFS_SERVER(state->inode); 1978 struct nfs_server *server = NFS_SERVER(state->inode);
1952 struct nfs4_closedata *calldata; 1979 struct nfs4_closedata *calldata;
@@ -1981,11 +2008,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1981 calldata->res.fattr = &calldata->fattr; 2008 calldata->res.fattr = &calldata->fattr;
1982 calldata->res.seqid = calldata->arg.seqid; 2009 calldata->res.seqid = calldata->arg.seqid;
1983 calldata->res.server = server; 2010 calldata->res.server = server;
2011 calldata->roc = roc;
1984 path_get(path); 2012 path_get(path);
1985 calldata->path = *path; 2013 calldata->path = *path;
1986 2014
1987 msg.rpc_argp = &calldata->arg, 2015 msg.rpc_argp = &calldata->arg;
1988 msg.rpc_resp = &calldata->res, 2016 msg.rpc_resp = &calldata->res;
1989 task_setup_data.callback_data = calldata; 2017 task_setup_data.callback_data = calldata;
1990 task = rpc_run_task(&task_setup_data); 2018 task = rpc_run_task(&task_setup_data);
1991 if (IS_ERR(task)) 2019 if (IS_ERR(task))
@@ -1998,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1998out_free_calldata: 2026out_free_calldata:
1999 kfree(calldata); 2027 kfree(calldata);
2000out: 2028out:
2029 if (roc)
2030 pnfs_roc_release(state->inode);
2001 nfs4_put_open_state(state); 2031 nfs4_put_open_state(state);
2002 nfs4_put_state_owner(sp); 2032 nfs4_put_state_owner(sp);
2003 return status; 2033 return status;
@@ -2486,6 +2516,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2486 path = &ctx->path; 2516 path = &ctx->path;
2487 fmode = ctx->mode; 2517 fmode = ctx->mode;
2488 } 2518 }
2519 sattr->ia_mode &= ~current_umask();
2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); 2520 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2490 d_drop(dentry); 2521 d_drop(dentry);
2491 if (IS_ERR(state)) { 2522 if (IS_ERR(state)) {
@@ -2816,6 +2847,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2816{ 2847{
2817 struct nfs4_exception exception = { }; 2848 struct nfs4_exception exception = { };
2818 int err; 2849 int err;
2850
2851 sattr->ia_mode &= ~current_umask();
2819 do { 2852 do {
2820 err = nfs4_handle_exception(NFS_SERVER(dir), 2853 err = nfs4_handle_exception(NFS_SERVER(dir),
2821 _nfs4_proc_mkdir(dir, dentry, sattr), 2854 _nfs4_proc_mkdir(dir, dentry, sattr),
@@ -2916,6 +2949,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
2916{ 2949{
2917 struct nfs4_exception exception = { }; 2950 struct nfs4_exception exception = { };
2918 int err; 2951 int err;
2952
2953 sattr->ia_mode &= ~current_umask();
2919 do { 2954 do {
2920 err = nfs4_handle_exception(NFS_SERVER(dir), 2955 err = nfs4_handle_exception(NFS_SERVER(dir),
2921 _nfs4_proc_mknod(dir, dentry, sattr, rdev), 2956 _nfs4_proc_mknod(dir, dentry, sattr, rdev),
@@ -3478,6 +3513,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3478 struct nfs4_setclientid setclientid = { 3513 struct nfs4_setclientid setclientid = {
3479 .sc_verifier = &sc_verifier, 3514 .sc_verifier = &sc_verifier,
3480 .sc_prog = program, 3515 .sc_prog = program,
3516 .sc_cb_ident = clp->cl_cb_ident,
3481 }; 3517 };
3482 struct rpc_message msg = { 3518 struct rpc_message msg = {
3483 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], 3519 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -3517,7 +3553,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3517 if (signalled()) 3553 if (signalled())
3518 break; 3554 break;
3519 if (loop++ & 1) 3555 if (loop++ & 1)
3520 ssleep(clp->cl_lease_time + 1); 3556 ssleep(clp->cl_lease_time / HZ + 1);
3521 else 3557 else
3522 if (++clp->cl_id_uniquifier == 0) 3558 if (++clp->cl_id_uniquifier == 0)
3523 break; 3559 break;
@@ -3663,8 +3699,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3663 data->rpc_status = 0; 3699 data->rpc_status = 0;
3664 3700
3665 task_setup_data.callback_data = data; 3701 task_setup_data.callback_data = data;
3666 msg.rpc_argp = &data->args, 3702 msg.rpc_argp = &data->args;
3667 msg.rpc_resp = &data->res, 3703 msg.rpc_resp = &data->res;
3668 task = rpc_run_task(&task_setup_data); 3704 task = rpc_run_task(&task_setup_data);
3669 if (IS_ERR(task)) 3705 if (IS_ERR(task))
3670 return PTR_ERR(task); 3706 return PTR_ERR(task);
@@ -3743,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3743 goto out; 3779 goto out;
3744 lsp = request->fl_u.nfs4_fl.owner; 3780 lsp = request->fl_u.nfs4_fl.owner;
3745 arg.lock_owner.id = lsp->ls_id.id; 3781 arg.lock_owner.id = lsp->ls_id.id;
3782 arg.lock_owner.s_dev = server->s_dev;
3746 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3783 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
3747 switch (status) { 3784 switch (status) {
3748 case 0: 3785 case 0:
@@ -3908,8 +3945,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3908 return ERR_PTR(-ENOMEM); 3945 return ERR_PTR(-ENOMEM);
3909 } 3946 }
3910 3947
3911 msg.rpc_argp = &data->arg, 3948 msg.rpc_argp = &data->arg;
3912 msg.rpc_resp = &data->res, 3949 msg.rpc_resp = &data->res;
3913 task_setup_data.callback_data = data; 3950 task_setup_data.callback_data = data;
3914 return rpc_run_task(&task_setup_data); 3951 return rpc_run_task(&task_setup_data);
3915} 3952}
@@ -3988,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3988 p->arg.lock_stateid = &lsp->ls_stateid; 4025 p->arg.lock_stateid = &lsp->ls_stateid;
3989 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 4026 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3990 p->arg.lock_owner.id = lsp->ls_id.id; 4027 p->arg.lock_owner.id = lsp->ls_id.id;
4028 p->arg.lock_owner.s_dev = server->s_dev;
3991 p->res.lock_seqid = p->arg.lock_seqid; 4029 p->res.lock_seqid = p->arg.lock_seqid;
3992 p->lsp = lsp; 4030 p->lsp = lsp;
3993 p->server = server; 4031 p->server = server;
@@ -4145,8 +4183,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4145 data->arg.reclaim = NFS_LOCK_RECLAIM; 4183 data->arg.reclaim = NFS_LOCK_RECLAIM;
4146 task_setup_data.callback_ops = &nfs4_recover_lock_ops; 4184 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4147 } 4185 }
4148 msg.rpc_argp = &data->arg, 4186 msg.rpc_argp = &data->arg;
4149 msg.rpc_resp = &data->res, 4187 msg.rpc_resp = &data->res;
4150 task_setup_data.callback_data = data; 4188 task_setup_data.callback_data = data;
4151 task = rpc_run_task(&task_setup_data); 4189 task = rpc_run_task(&task_setup_data);
4152 if (IS_ERR(task)) 4190 if (IS_ERR(task))
@@ -4392,48 +4430,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
4392 return; 4430 return;
4393 args->lock_owner.clientid = server->nfs_client->cl_clientid; 4431 args->lock_owner.clientid = server->nfs_client->cl_clientid;
4394 args->lock_owner.id = lsp->ls_id.id; 4432 args->lock_owner.id = lsp->ls_id.id;
4433 args->lock_owner.s_dev = server->s_dev;
4395 msg.rpc_argp = args; 4434 msg.rpc_argp = args;
4396 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); 4435 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
4397} 4436}
4398 4437
4399#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4438#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
4400 4439
4401int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, 4440static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
4402 size_t buflen, int flags) 4441 const void *buf, size_t buflen,
4442 int flags, int type)
4403{ 4443{
4404 struct inode *inode = dentry->d_inode; 4444 if (strcmp(key, "") != 0)
4405 4445 return -EINVAL;
4406 if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
4407 return -EOPNOTSUPP;
4408 4446
4409 return nfs4_proc_set_acl(inode, buf, buflen); 4447 return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
4410} 4448}
4411 4449
4412/* The getxattr man page suggests returning -ENODATA for unknown attributes, 4450static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
4413 * and that's what we'll do for e.g. user attributes that haven't been set. 4451 void *buf, size_t buflen, int type)
4414 * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
4415 * attributes in kernel-managed attribute namespaces. */
4416ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
4417 size_t buflen)
4418{ 4452{
4419 struct inode *inode = dentry->d_inode; 4453 if (strcmp(key, "") != 0)
4420 4454 return -EINVAL;
4421 if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
4422 return -EOPNOTSUPP;
4423 4455
4424 return nfs4_proc_get_acl(inode, buf, buflen); 4456 return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
4425} 4457}
4426 4458
4427ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) 4459static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
4460 size_t list_len, const char *name,
4461 size_t name_len, int type)
4428{ 4462{
4429 size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; 4463 size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
4430 4464
4431 if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) 4465 if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
4432 return 0; 4466 return 0;
4433 if (buf && buflen < len) 4467
4434 return -ERANGE; 4468 if (list && len <= list_len)
4435 if (buf) 4469 memcpy(list, XATTR_NAME_NFSV4_ACL, len);
4436 memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
4437 return len; 4470 return len;
4438} 4471}
4439 4472
@@ -4486,6 +4519,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4486 4519
4487#ifdef CONFIG_NFS_V4_1 4520#ifdef CONFIG_NFS_V4_1
4488/* 4521/*
4522 * Check the exchange flags returned by the server for invalid flags, having
4523 * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
4524 * DS flags set.
4525 */
4526static int nfs4_check_cl_exchange_flags(u32 flags)
4527{
4528 if (flags & ~EXCHGID4_FLAG_MASK_R)
4529 goto out_inval;
4530 if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
4531 (flags & EXCHGID4_FLAG_USE_NON_PNFS))
4532 goto out_inval;
4533 if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
4534 goto out_inval;
4535 return NFS_OK;
4536out_inval:
4537 return -NFS4ERR_INVAL;
4538}
4539
4540/*
4489 * nfs4_proc_exchange_id() 4541 * nfs4_proc_exchange_id()
4490 * 4542 *
4491 * Since the clientid has expired, all compounds using sessions 4543 * Since the clientid has expired, all compounds using sessions
@@ -4498,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4498 nfs4_verifier verifier; 4550 nfs4_verifier verifier;
4499 struct nfs41_exchange_id_args args = { 4551 struct nfs41_exchange_id_args args = {
4500 .client = clp, 4552 .client = clp,
4501 .flags = clp->cl_exchange_flags, 4553 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
4502 }; 4554 };
4503 struct nfs41_exchange_id_res res = { 4555 struct nfs41_exchange_id_res res = {
4504 .client = clp, 4556 .client = clp,
@@ -4515,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4515 dprintk("--> %s\n", __func__); 4567 dprintk("--> %s\n", __func__);
4516 BUG_ON(clp == NULL); 4568 BUG_ON(clp == NULL);
4517 4569
4518 /* Remove server-only flags */
4519 args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
4520
4521 p = (u32 *)verifier.data; 4570 p = (u32 *)verifier.data;
4522 *p++ = htonl((u32)clp->cl_boot_time.tv_sec); 4571 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4523 *p = htonl((u32)clp->cl_boot_time.tv_nsec); 4572 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4543,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4543 break; 4592 break;
4544 } 4593 }
4545 4594
4595 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4546 dprintk("<-- %s status= %d\n", __func__, status); 4596 dprintk("<-- %s status= %d\n", __func__, status);
4547 return status; 4597 return status;
4548} 4598}
@@ -4776,17 +4826,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4776 if (!session) 4826 if (!session)
4777 return NULL; 4827 return NULL;
4778 4828
4779 init_completion(&session->complete);
4780
4781 tbl = &session->fc_slot_table; 4829 tbl = &session->fc_slot_table;
4782 tbl->highest_used_slotid = -1; 4830 tbl->highest_used_slotid = -1;
4783 spin_lock_init(&tbl->slot_tbl_lock); 4831 spin_lock_init(&tbl->slot_tbl_lock);
4784 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4832 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4833 init_completion(&tbl->complete);
4785 4834
4786 tbl = &session->bc_slot_table; 4835 tbl = &session->bc_slot_table;
4787 tbl->highest_used_slotid = -1; 4836 tbl->highest_used_slotid = -1;
4788 spin_lock_init(&tbl->slot_tbl_lock); 4837 spin_lock_init(&tbl->slot_tbl_lock);
4789 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 4838 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4839 init_completion(&tbl->complete);
4790 4840
4791 session->session_state = 1<<NFS4_SESSION_INITING; 4841 session->session_state = 1<<NFS4_SESSION_INITING;
4792 4842
@@ -5280,13 +5330,23 @@ static void
5280nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) 5330nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5281{ 5331{
5282 struct nfs4_layoutget *lgp = calldata; 5332 struct nfs4_layoutget *lgp = calldata;
5283 struct inode *ino = lgp->args.inode; 5333 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5284 struct nfs_server *server = NFS_SERVER(ino);
5285 5334
5286 dprintk("--> %s\n", __func__); 5335 dprintk("--> %s\n", __func__);
5336 /* Note the is a race here, where a CB_LAYOUTRECALL can come in
5337 * right now covering the LAYOUTGET we are about to send.
5338 * However, that is not so catastrophic, and there seems
5339 * to be no way to prevent it completely.
5340 */
5287 if (nfs4_setup_sequence(server, &lgp->args.seq_args, 5341 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5288 &lgp->res.seq_res, 0, task)) 5342 &lgp->res.seq_res, 0, task))
5289 return; 5343 return;
5344 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
5345 NFS_I(lgp->args.inode)->layout,
5346 lgp->args.ctx->state)) {
5347 rpc_exit(task, NFS4_OK);
5348 return;
5349 }
5290 rpc_call_start(task); 5350 rpc_call_start(task);
5291} 5351}
5292 5352
@@ -5313,7 +5373,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5313 return; 5373 return;
5314 } 5374 }
5315 } 5375 }
5316 lgp->status = task->tk_status;
5317 dprintk("<-- %s\n", __func__); 5376 dprintk("<-- %s\n", __func__);
5318} 5377}
5319 5378
@@ -5322,7 +5381,6 @@ static void nfs4_layoutget_release(void *calldata)
5322 struct nfs4_layoutget *lgp = calldata; 5381 struct nfs4_layoutget *lgp = calldata;
5323 5382
5324 dprintk("--> %s\n", __func__); 5383 dprintk("--> %s\n", __func__);
5325 put_layout_hdr(lgp->args.inode);
5326 if (lgp->res.layout.buf != NULL) 5384 if (lgp->res.layout.buf != NULL)
5327 free_page((unsigned long) lgp->res.layout.buf); 5385 free_page((unsigned long) lgp->res.layout.buf);
5328 put_nfs_open_context(lgp->args.ctx); 5386 put_nfs_open_context(lgp->args.ctx);
@@ -5367,13 +5425,10 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5367 if (IS_ERR(task)) 5425 if (IS_ERR(task))
5368 return PTR_ERR(task); 5426 return PTR_ERR(task);
5369 status = nfs4_wait_for_completion_rpc_task(task); 5427 status = nfs4_wait_for_completion_rpc_task(task);
5370 if (status != 0) 5428 if (status == 0)
5371 goto out; 5429 status = task->tk_status;
5372 status = lgp->status; 5430 if (status == 0)
5373 if (status != 0) 5431 status = pnfs_layout_process(lgp);
5374 goto out;
5375 status = pnfs_layout_process(lgp);
5376out:
5377 rpc_put_task(task); 5432 rpc_put_task(task);
5378 dprintk("<-- %s status=%d\n", __func__, status); 5433 dprintk("<-- %s status=%d\n", __func__, status);
5379 return status; 5434 return status;
@@ -5504,9 +5559,10 @@ static const struct inode_operations nfs4_file_inode_operations = {
5504 .permission = nfs_permission, 5559 .permission = nfs_permission,
5505 .getattr = nfs_getattr, 5560 .getattr = nfs_getattr,
5506 .setattr = nfs_setattr, 5561 .setattr = nfs_setattr,
5507 .getxattr = nfs4_getxattr, 5562 .getxattr = generic_getxattr,
5508 .setxattr = nfs4_setxattr, 5563 .setxattr = generic_setxattr,
5509 .listxattr = nfs4_listxattr, 5564 .listxattr = generic_listxattr,
5565 .removexattr = generic_removexattr,
5510}; 5566};
5511 5567
5512const struct nfs_rpc_ops nfs_v4_clientops = { 5568const struct nfs_rpc_ops nfs_v4_clientops = {
@@ -5551,6 +5607,18 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5551 .open_context = nfs4_atomic_open, 5607 .open_context = nfs4_atomic_open,
5552}; 5608};
5553 5609
5610static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
5611 .prefix = XATTR_NAME_NFSV4_ACL,
5612 .list = nfs4_xattr_list_nfs4_acl,
5613 .get = nfs4_xattr_get_nfs4_acl,
5614 .set = nfs4_xattr_set_nfs4_acl,
5615};
5616
5617const struct xattr_handler *nfs4_xattr_handlers[] = {
5618 &nfs4_xattr_nfs4_acl_handler,
5619 NULL
5620};
5621
5554/* 5622/*
5555 * Local variables: 5623 * Local variables:
5556 * c-basic-offset: 8 5624 * c-basic-offset: 8
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 72b6c580af1..402143d75fc 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work)
63 63
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 /* Are there any active superblocks? */ 66
67 if (list_empty(&clp->cl_superblocks)) 67 rcu_read_lock();
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
68 goto out; 70 goto out;
71 }
72 rcu_read_unlock();
73
69 spin_lock(&clp->cl_lock); 74 spin_lock(&clp->cl_lock);
70 lease = clp->cl_lease_time; 75 lease = clp->cl_lease_time;
71 last = clp->cl_last_renewal; 76 last = clp->cl_last_renewal;
@@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
75 cred = ops->get_state_renewal_cred_locked(clp); 80 cred = ops->get_state_renewal_cred_locked(clp);
76 spin_unlock(&clp->cl_lock); 81 spin_unlock(&clp->cl_lock);
77 if (cred == NULL) { 82 if (cred == NULL) {
78 if (list_empty(&clp->cl_delegations)) { 83 if (!nfs_delegations_present(clp)) {
79 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 84 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
80 goto out; 85 goto out;
81 } 86 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a312673..2336d532cf6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
105 put_rpccred(cred); 105 put_rpccred(cred);
106} 106}
107 107
108struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) 108static struct rpc_cred *
109nfs4_get_renew_cred_server_locked(struct nfs_server *server)
109{ 110{
111 struct rpc_cred *cred = NULL;
110 struct nfs4_state_owner *sp; 112 struct nfs4_state_owner *sp;
111 struct rb_node *pos; 113 struct rb_node *pos;
112 struct rpc_cred *cred = NULL;
113 114
114 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 115 for (pos = rb_first(&server->state_owners);
115 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 116 pos != NULL;
117 pos = rb_next(pos)) {
118 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
116 if (list_empty(&sp->so_states)) 119 if (list_empty(&sp->so_states))
117 continue; 120 continue;
118 cred = get_rpccred(sp->so_cred); 121 cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
121 return cred; 124 return cred;
122} 125}
123 126
127/**
128 * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
129 * @clp: client state handle
130 *
131 * Returns an rpc_cred with reference count bumped, or NULL.
132 * Caller must hold clp->cl_lock.
133 */
134struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
135{
136 struct rpc_cred *cred = NULL;
137 struct nfs_server *server;
138
139 rcu_read_lock();
140 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
141 cred = nfs4_get_renew_cred_server_locked(server);
142 if (cred != NULL)
143 break;
144 }
145 rcu_read_unlock();
146 return cred;
147}
148
124#if defined(CONFIG_NFS_V4_1) 149#if defined(CONFIG_NFS_V4_1)
125 150
126static int nfs41_setup_state_renewal(struct nfs_client *clp) 151static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -142,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
142 return status; 167 return status;
143} 168}
144 169
170/*
171 * Back channel returns NFS4ERR_DELAY for new requests when
172 * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
173 * is ended.
174 */
145static void nfs4_end_drain_session(struct nfs_client *clp) 175static void nfs4_end_drain_session(struct nfs_client *clp)
146{ 176{
147 struct nfs4_session *ses = clp->cl_session; 177 struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
165 } 195 }
166} 196}
167 197
168static int nfs4_begin_drain_session(struct nfs_client *clp) 198static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
169{ 199{
170 struct nfs4_session *ses = clp->cl_session;
171 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
172
173 spin_lock(&tbl->slot_tbl_lock); 200 spin_lock(&tbl->slot_tbl_lock);
174 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
175 if (tbl->highest_used_slotid != -1) { 201 if (tbl->highest_used_slotid != -1) {
176 INIT_COMPLETION(ses->complete); 202 INIT_COMPLETION(tbl->complete);
177 spin_unlock(&tbl->slot_tbl_lock); 203 spin_unlock(&tbl->slot_tbl_lock);
178 return wait_for_completion_interruptible(&ses->complete); 204 return wait_for_completion_interruptible(&tbl->complete);
179 } 205 }
180 spin_unlock(&tbl->slot_tbl_lock); 206 spin_unlock(&tbl->slot_tbl_lock);
181 return 0; 207 return 0;
182} 208}
183 209
210static int nfs4_begin_drain_session(struct nfs_client *clp)
211{
212 struct nfs4_session *ses = clp->cl_session;
213 int ret = 0;
214
215 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
216 /* back channel */
217 ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
218 if (ret)
219 return ret;
220 /* fore channel */
221 return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
222}
223
184int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 224int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
185{ 225{
186 int status; 226 int status;
@@ -192,6 +232,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
192 status = nfs4_proc_create_session(clp); 232 status = nfs4_proc_create_session(clp);
193 if (status != 0) 233 if (status != 0)
194 goto out; 234 goto out;
235 status = nfs4_set_callback_sessionid(clp);
236 if (status != 0) {
237 printk(KERN_WARNING "Sessionid not set. No callback service\n");
238 nfs_callback_down(1);
239 status = 0;
240 }
195 nfs41_setup_state_renewal(clp); 241 nfs41_setup_state_renewal(clp);
196 nfs_mark_client_ready(clp, NFS_CS_READY); 242 nfs_mark_client_ready(clp, NFS_CS_READY);
197out: 243out:
@@ -210,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
210 256
211#endif /* CONFIG_NFS_V4_1 */ 257#endif /* CONFIG_NFS_V4_1 */
212 258
213struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 259static struct rpc_cred *
260nfs4_get_setclientid_cred_server(struct nfs_server *server)
214{ 261{
262 struct nfs_client *clp = server->nfs_client;
263 struct rpc_cred *cred = NULL;
215 struct nfs4_state_owner *sp; 264 struct nfs4_state_owner *sp;
216 struct rb_node *pos; 265 struct rb_node *pos;
266
267 spin_lock(&clp->cl_lock);
268 pos = rb_first(&server->state_owners);
269 if (pos != NULL) {
270 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
271 cred = get_rpccred(sp->so_cred);
272 }
273 spin_unlock(&clp->cl_lock);
274 return cred;
275}
276
277/**
278 * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
279 * @clp: client state handle
280 *
281 * Returns an rpc_cred with reference count bumped, or NULL.
282 */
283struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
284{
285 struct nfs_server *server;
217 struct rpc_cred *cred; 286 struct rpc_cred *cred;
218 287
219 spin_lock(&clp->cl_lock); 288 spin_lock(&clp->cl_lock);
220 cred = nfs4_get_machine_cred_locked(clp); 289 cred = nfs4_get_machine_cred_locked(clp);
290 spin_unlock(&clp->cl_lock);
221 if (cred != NULL) 291 if (cred != NULL)
222 goto out; 292 goto out;
223 pos = rb_first(&clp->cl_state_owners); 293
224 if (pos != NULL) { 294 rcu_read_lock();
225 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 295 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
226 cred = get_rpccred(sp->so_cred); 296 cred = nfs4_get_setclientid_cred_server(server);
297 if (cred != NULL)
298 break;
227 } 299 }
300 rcu_read_unlock();
301
228out: 302out:
229 spin_unlock(&clp->cl_lock);
230 return cred; 303 return cred;
231} 304}
232 305
233static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, 306static void nfs_alloc_unique_id_locked(struct rb_root *root,
234 __u64 minval, int maxbits) 307 struct nfs_unique_id *new,
308 __u64 minval, int maxbits)
235{ 309{
236 struct rb_node **p, *parent; 310 struct rb_node **p, *parent;
237 struct nfs_unique_id *pos; 311 struct nfs_unique_id *pos;
@@ -286,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
286} 360}
287 361
288static struct nfs4_state_owner * 362static struct nfs4_state_owner *
289nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) 363nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
290{ 364{
291 struct nfs_client *clp = server->nfs_client; 365 struct rb_node **p = &server->state_owners.rb_node,
292 struct rb_node **p = &clp->cl_state_owners.rb_node,
293 *parent = NULL; 366 *parent = NULL;
294 struct nfs4_state_owner *sp, *res = NULL; 367 struct nfs4_state_owner *sp, *res = NULL;
295 368
296 while (*p != NULL) { 369 while (*p != NULL) {
297 parent = *p; 370 parent = *p;
298 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); 371 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
299 372
300 if (server < sp->so_server) { 373 if (server < sp->so_server) {
301 p = &parent->rb_left; 374 p = &parent->rb_left;
@@ -319,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
319} 392}
320 393
321static struct nfs4_state_owner * 394static struct nfs4_state_owner *
322nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) 395nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
323{ 396{
324 struct rb_node **p = &clp->cl_state_owners.rb_node, 397 struct nfs_server *server = new->so_server;
398 struct rb_node **p = &server->state_owners.rb_node,
325 *parent = NULL; 399 *parent = NULL;
326 struct nfs4_state_owner *sp; 400 struct nfs4_state_owner *sp;
327 401
328 while (*p != NULL) { 402 while (*p != NULL) {
329 parent = *p; 403 parent = *p;
330 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); 404 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
331 405
332 if (new->so_server < sp->so_server) {
333 p = &parent->rb_left;
334 continue;
335 }
336 if (new->so_server > sp->so_server) {
337 p = &parent->rb_right;
338 continue;
339 }
340 if (new->so_cred < sp->so_cred) 406 if (new->so_cred < sp->so_cred)
341 p = &parent->rb_left; 407 p = &parent->rb_left;
342 else if (new->so_cred > sp->so_cred) 408 else if (new->so_cred > sp->so_cred)
@@ -346,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
346 return sp; 412 return sp;
347 } 413 }
348 } 414 }
349 nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); 415 nfs_alloc_unique_id_locked(&server->openowner_id,
350 rb_link_node(&new->so_client_node, parent, p); 416 &new->so_owner_id, 1, 64);
351 rb_insert_color(&new->so_client_node, &clp->cl_state_owners); 417 rb_link_node(&new->so_server_node, parent, p);
418 rb_insert_color(&new->so_server_node, &server->state_owners);
352 return new; 419 return new;
353} 420}
354 421
355static void 422static void
356nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) 423nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
357{ 424{
358 if (!RB_EMPTY_NODE(&sp->so_client_node)) 425 struct nfs_server *server = sp->so_server;
359 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 426
360 nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); 427 if (!RB_EMPTY_NODE(&sp->so_server_node))
428 rb_erase(&sp->so_server_node, &server->state_owners);
429 nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
361} 430}
362 431
363/* 432/*
@@ -386,23 +455,32 @@ nfs4_alloc_state_owner(void)
386static void 455static void
387nfs4_drop_state_owner(struct nfs4_state_owner *sp) 456nfs4_drop_state_owner(struct nfs4_state_owner *sp)
388{ 457{
389 if (!RB_EMPTY_NODE(&sp->so_client_node)) { 458 if (!RB_EMPTY_NODE(&sp->so_server_node)) {
390 struct nfs_client *clp = sp->so_server->nfs_client; 459 struct nfs_server *server = sp->so_server;
460 struct nfs_client *clp = server->nfs_client;
391 461
392 spin_lock(&clp->cl_lock); 462 spin_lock(&clp->cl_lock);
393 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 463 rb_erase(&sp->so_server_node, &server->state_owners);
394 RB_CLEAR_NODE(&sp->so_client_node); 464 RB_CLEAR_NODE(&sp->so_server_node);
395 spin_unlock(&clp->cl_lock); 465 spin_unlock(&clp->cl_lock);
396 } 466 }
397} 467}
398 468
399struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 469/**
470 * nfs4_get_state_owner - Look up a state owner given a credential
471 * @server: nfs_server to search
472 * @cred: RPC credential to match
473 *
474 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
475 */
476struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
477 struct rpc_cred *cred)
400{ 478{
401 struct nfs_client *clp = server->nfs_client; 479 struct nfs_client *clp = server->nfs_client;
402 struct nfs4_state_owner *sp, *new; 480 struct nfs4_state_owner *sp, *new;
403 481
404 spin_lock(&clp->cl_lock); 482 spin_lock(&clp->cl_lock);
405 sp = nfs4_find_state_owner(server, cred); 483 sp = nfs4_find_state_owner_locked(server, cred);
406 spin_unlock(&clp->cl_lock); 484 spin_unlock(&clp->cl_lock);
407 if (sp != NULL) 485 if (sp != NULL)
408 return sp; 486 return sp;
@@ -412,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
412 new->so_server = server; 490 new->so_server = server;
413 new->so_cred = cred; 491 new->so_cred = cred;
414 spin_lock(&clp->cl_lock); 492 spin_lock(&clp->cl_lock);
415 sp = nfs4_insert_state_owner(clp, new); 493 sp = nfs4_insert_state_owner_locked(new);
416 spin_unlock(&clp->cl_lock); 494 spin_unlock(&clp->cl_lock);
417 if (sp == new) 495 if (sp == new)
418 get_rpccred(cred); 496 get_rpccred(cred);
@@ -423,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
423 return sp; 501 return sp;
424} 502}
425 503
504/**
505 * nfs4_put_state_owner - Release a nfs4_state_owner
506 * @sp: state owner data to release
507 *
508 */
426void nfs4_put_state_owner(struct nfs4_state_owner *sp) 509void nfs4_put_state_owner(struct nfs4_state_owner *sp)
427{ 510{
428 struct nfs_client *clp = sp->so_server->nfs_client; 511 struct nfs_client *clp = sp->so_server->nfs_client;
@@ -430,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
430 513
431 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 514 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
432 return; 515 return;
433 nfs4_remove_state_owner(clp, sp); 516 nfs4_remove_state_owner_locked(sp);
434 spin_unlock(&clp->cl_lock); 517 spin_unlock(&clp->cl_lock);
435 rpc_destroy_wait_queue(&sp->so_sequence.wait); 518 rpc_destroy_wait_queue(&sp->so_sequence.wait);
436 put_rpccred(cred); 519 put_rpccred(cred);
@@ -585,8 +668,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
585 if (!call_close) { 668 if (!call_close) {
586 nfs4_put_open_state(state); 669 nfs4_put_open_state(state);
587 nfs4_put_state_owner(owner); 670 nfs4_put_state_owner(owner);
588 } else 671 } else {
589 nfs4_do_close(path, state, gfp_mask, wait); 672 bool roc = pnfs_roc(state->inode);
673
674 nfs4_do_close(path, state, gfp_mask, wait, roc);
675 }
590} 676}
591 677
592void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 678void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -633,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
633static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 719static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
634{ 720{
635 struct nfs4_lock_state *lsp; 721 struct nfs4_lock_state *lsp;
636 struct nfs_client *clp = state->owner->so_server->nfs_client; 722 struct nfs_server *server = state->owner->so_server;
723 struct nfs_client *clp = server->nfs_client;
637 724
638 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 725 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
639 if (lsp == NULL) 726 if (lsp == NULL)
@@ -657,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
657 return NULL; 744 return NULL;
658 } 745 }
659 spin_lock(&clp->cl_lock); 746 spin_lock(&clp->cl_lock);
660 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); 747 nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
661 spin_unlock(&clp->cl_lock); 748 spin_unlock(&clp->cl_lock);
662 INIT_LIST_HEAD(&lsp->ls_locks); 749 INIT_LIST_HEAD(&lsp->ls_locks);
663 return lsp; 750 return lsp;
@@ -665,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
665 752
666static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 753static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
667{ 754{
668 struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client; 755 struct nfs_server *server = lsp->ls_state->owner->so_server;
756 struct nfs_client *clp = server->nfs_client;
669 757
670 spin_lock(&clp->cl_lock); 758 spin_lock(&clp->cl_lock);
671 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); 759 nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
672 spin_unlock(&clp->cl_lock); 760 spin_unlock(&clp->cl_lock);
673 rpc_destroy_wait_queue(&lsp->ls_sequence.wait); 761 rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
674 kfree(lsp); 762 kfree(lsp);
@@ -1114,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1114 } 1202 }
1115} 1203}
1116 1204
1117static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) 1205static void nfs4_reset_seqids(struct nfs_server *server,
1206 int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1118{ 1207{
1208 struct nfs_client *clp = server->nfs_client;
1119 struct nfs4_state_owner *sp; 1209 struct nfs4_state_owner *sp;
1120 struct rb_node *pos; 1210 struct rb_node *pos;
1121 struct nfs4_state *state; 1211 struct nfs4_state *state;
1122 1212
1123 /* Reset all sequence ids to zero */ 1213 spin_lock(&clp->cl_lock);
1124 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1214 for (pos = rb_first(&server->state_owners);
1125 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1215 pos != NULL;
1216 pos = rb_next(pos)) {
1217 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1126 sp->so_seqid.flags = 0; 1218 sp->so_seqid.flags = 0;
1127 spin_lock(&sp->so_lock); 1219 spin_lock(&sp->so_lock);
1128 list_for_each_entry(state, &sp->so_states, open_states) { 1220 list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1131,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
1131 } 1223 }
1132 spin_unlock(&sp->so_lock); 1224 spin_unlock(&sp->so_lock);
1133 } 1225 }
1226 spin_unlock(&clp->cl_lock);
1227}
1228
1229static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
1230 int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1231{
1232 struct nfs_server *server;
1233
1234 rcu_read_lock();
1235 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1236 nfs4_reset_seqids(server, mark_reclaim);
1237 rcu_read_unlock();
1134} 1238}
1135 1239
1136static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) 1240static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1148,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1148 (void)ops->reclaim_complete(clp); 1252 (void)ops->reclaim_complete(clp);
1149} 1253}
1150 1254
1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) 1255static void nfs4_clear_reclaim_server(struct nfs_server *server)
1152{ 1256{
1257 struct nfs_client *clp = server->nfs_client;
1153 struct nfs4_state_owner *sp; 1258 struct nfs4_state_owner *sp;
1154 struct rb_node *pos; 1259 struct rb_node *pos;
1155 struct nfs4_state *state; 1260 struct nfs4_state *state;
1156 1261
1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1262 spin_lock(&clp->cl_lock);
1158 return 0; 1263 for (pos = rb_first(&server->state_owners);
1159 1264 pos != NULL;
1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1265 pos = rb_next(pos)) {
1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1266 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1162 spin_lock(&sp->so_lock); 1267 spin_lock(&sp->so_lock);
1163 list_for_each_entry(state, &sp->so_states, open_states) { 1268 list_for_each_entry(state, &sp->so_states, open_states) {
1164 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags)) 1269 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
1270 &state->flags))
1165 continue; 1271 continue;
1166 nfs4_state_mark_reclaim_nograce(clp, state); 1272 nfs4_state_mark_reclaim_nograce(clp, state);
1167 } 1273 }
1168 spin_unlock(&sp->so_lock); 1274 spin_unlock(&sp->so_lock);
1169 } 1275 }
1276 spin_unlock(&clp->cl_lock);
1277}
1278
1279static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1280{
1281 struct nfs_server *server;
1282
1283 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1284 return 0;
1285
1286 rcu_read_lock();
1287 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1288 nfs4_clear_reclaim_server(server);
1289 rcu_read_unlock();
1170 1290
1171 nfs_delegation_reap_unclaimed(clp); 1291 nfs_delegation_reap_unclaimed(clp);
1172 return 1; 1292 return 1;
@@ -1238,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1238 1358
1239static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1359static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1240{ 1360{
1361 struct nfs4_state_owner *sp;
1362 struct nfs_server *server;
1241 struct rb_node *pos; 1363 struct rb_node *pos;
1242 int status = 0; 1364 int status = 0;
1243 1365
1244restart: 1366restart:
1245 spin_lock(&clp->cl_lock); 1367 rcu_read_lock();
1246 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1368 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1247 struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1369 spin_lock(&clp->cl_lock);
1248 if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags)) 1370 for (pos = rb_first(&server->state_owners);
1249 continue; 1371 pos != NULL;
1250 atomic_inc(&sp->so_count); 1372 pos = rb_next(pos)) {
1251 spin_unlock(&clp->cl_lock); 1373 sp = rb_entry(pos,
1252 status = nfs4_reclaim_open_state(sp, ops); 1374 struct nfs4_state_owner, so_server_node);
1253 if (status < 0) { 1375 if (!test_and_clear_bit(ops->owner_flag_bit,
1254 set_bit(ops->owner_flag_bit, &sp->so_flags); 1376 &sp->so_flags))
1377 continue;
1378 atomic_inc(&sp->so_count);
1379 spin_unlock(&clp->cl_lock);
1380 rcu_read_unlock();
1381
1382 status = nfs4_reclaim_open_state(sp, ops);
1383 if (status < 0) {
1384 set_bit(ops->owner_flag_bit, &sp->so_flags);
1385 nfs4_put_state_owner(sp);
1386 return nfs4_recovery_handle_error(clp, status);
1387 }
1388
1255 nfs4_put_state_owner(sp); 1389 nfs4_put_state_owner(sp);
1256 return nfs4_recovery_handle_error(clp, status); 1390 goto restart;
1257 } 1391 }
1258 nfs4_put_state_owner(sp); 1392 spin_unlock(&clp->cl_lock);
1259 goto restart;
1260 } 1393 }
1261 spin_unlock(&clp->cl_lock); 1394 rcu_read_unlock();
1262 return status; 1395 return status;
1263} 1396}
1264 1397
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f1826b012e..2ab8e5cb8f5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -71,8 +71,8 @@ static int nfs4_stat_to_errno(int);
71/* lock,open owner id: 71/* lock,open owner id:
72 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) 72 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
73 */ 73 */
74#define open_owner_id_maxsz (1 + 4) 74#define open_owner_id_maxsz (1 + 1 + 4)
75#define lock_owner_id_maxsz (1 + 4) 75#define lock_owner_id_maxsz (1 + 1 + 4)
76#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 76#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
77#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 77#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
78#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 78#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -1088,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo
1088{ 1088{
1089 __be32 *p; 1089 __be32 *p;
1090 1090
1091 p = reserve_space(xdr, 28); 1091 p = reserve_space(xdr, 32);
1092 p = xdr_encode_hyper(p, lowner->clientid); 1092 p = xdr_encode_hyper(p, lowner->clientid);
1093 *p++ = cpu_to_be32(16); 1093 *p++ = cpu_to_be32(20);
1094 p = xdr_encode_opaque_fixed(p, "lock id:", 8); 1094 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1095 *p++ = cpu_to_be32(lowner->s_dev);
1095 xdr_encode_hyper(p, lowner->id); 1096 xdr_encode_hyper(p, lowner->id);
1096} 1097}
1097 1098
@@ -1210,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1210 *p++ = cpu_to_be32(OP_OPEN); 1211 *p++ = cpu_to_be32(OP_OPEN);
1211 *p = cpu_to_be32(arg->seqid->sequence->counter); 1212 *p = cpu_to_be32(arg->seqid->sequence->counter);
1212 encode_share_access(xdr, arg->fmode); 1213 encode_share_access(xdr, arg->fmode);
1213 p = reserve_space(xdr, 28); 1214 p = reserve_space(xdr, 32);
1214 p = xdr_encode_hyper(p, arg->clientid); 1215 p = xdr_encode_hyper(p, arg->clientid);
1215 *p++ = cpu_to_be32(16); 1216 *p++ = cpu_to_be32(20);
1216 p = xdr_encode_opaque_fixed(p, "open id:", 8); 1217 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1218 *p++ = cpu_to_be32(arg->server->s_dev);
1217 xdr_encode_hyper(p, arg->id); 1219 xdr_encode_hyper(p, arg->id);
1218} 1220}
1219 1221
@@ -1510,7 +1512,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1510 hdr->replen += decode_restorefh_maxsz; 1512 hdr->replen += decode_restorefh_maxsz;
1511} 1513}
1512 1514
1513static int 1515static void
1514encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr) 1516encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
1515{ 1517{
1516 __be32 *p; 1518 __be32 *p;
@@ -1521,14 +1523,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1521 p = reserve_space(xdr, 2*4); 1523 p = reserve_space(xdr, 2*4);
1522 *p++ = cpu_to_be32(1); 1524 *p++ = cpu_to_be32(1);
1523 *p = cpu_to_be32(FATTR4_WORD0_ACL); 1525 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1524 if (arg->acl_len % 4) 1526 BUG_ON(arg->acl_len % 4);
1525 return -EINVAL;
1526 p = reserve_space(xdr, 4); 1527 p = reserve_space(xdr, 4);
1527 *p = cpu_to_be32(arg->acl_len); 1528 *p = cpu_to_be32(arg->acl_len);
1528 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1529 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1529 hdr->nops++; 1530 hdr->nops++;
1530 hdr->replen += decode_setacl_maxsz; 1531 hdr->replen += decode_setacl_maxsz;
1531 return 0;
1532} 1532}
1533 1533
1534static void 1534static void
@@ -1789,7 +1789,6 @@ encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args, 1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr) 1790 struct compound_hdr *hdr)
1791{ 1791{
1792 nfs4_stateid stateid;
1793 __be32 *p; 1792 __be32 *p;
1794 1793
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1794 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
@@ -1800,9 +1799,7 @@ encode_layoutget(struct xdr_stream *xdr,
1800 p = xdr_encode_hyper(p, args->range.offset); 1799 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length); 1800 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength); 1801 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, 1802 p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount); 1803 *p = cpu_to_be32(args->maxcount);
1807 1804
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", 1805 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1833,393 +1830,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
1833/* 1830/*
1834 * Encode an ACCESS request 1831 * Encode an ACCESS request
1835 */ 1832 */
1836static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args) 1833static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
1834 const struct nfs4_accessargs *args)
1837{ 1835{
1838 struct xdr_stream xdr;
1839 struct compound_hdr hdr = { 1836 struct compound_hdr hdr = {
1840 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1837 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1841 }; 1838 };
1842 1839
1843 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1840 encode_compound_hdr(xdr, req, &hdr);
1844 encode_compound_hdr(&xdr, req, &hdr); 1841 encode_sequence(xdr, &args->seq_args, &hdr);
1845 encode_sequence(&xdr, &args->seq_args, &hdr); 1842 encode_putfh(xdr, args->fh, &hdr);
1846 encode_putfh(&xdr, args->fh, &hdr); 1843 encode_access(xdr, args->access, &hdr);
1847 encode_access(&xdr, args->access, &hdr); 1844 encode_getfattr(xdr, args->bitmask, &hdr);
1848 encode_getfattr(&xdr, args->bitmask, &hdr);
1849 encode_nops(&hdr); 1845 encode_nops(&hdr);
1850 return 0;
1851} 1846}
1852 1847
1853/* 1848/*
1854 * Encode LOOKUP request 1849 * Encode LOOKUP request
1855 */ 1850 */
1856static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args) 1851static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
1852 const struct nfs4_lookup_arg *args)
1857{ 1853{
1858 struct xdr_stream xdr;
1859 struct compound_hdr hdr = { 1854 struct compound_hdr hdr = {
1860 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1855 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1861 }; 1856 };
1862 1857
1863 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1858 encode_compound_hdr(xdr, req, &hdr);
1864 encode_compound_hdr(&xdr, req, &hdr); 1859 encode_sequence(xdr, &args->seq_args, &hdr);
1865 encode_sequence(&xdr, &args->seq_args, &hdr); 1860 encode_putfh(xdr, args->dir_fh, &hdr);
1866 encode_putfh(&xdr, args->dir_fh, &hdr); 1861 encode_lookup(xdr, args->name, &hdr);
1867 encode_lookup(&xdr, args->name, &hdr); 1862 encode_getfh(xdr, &hdr);
1868 encode_getfh(&xdr, &hdr); 1863 encode_getfattr(xdr, args->bitmask, &hdr);
1869 encode_getfattr(&xdr, args->bitmask, &hdr);
1870 encode_nops(&hdr); 1864 encode_nops(&hdr);
1871 return 0;
1872} 1865}
1873 1866
1874/* 1867/*
1875 * Encode LOOKUP_ROOT request 1868 * Encode LOOKUP_ROOT request
1876 */ 1869 */
1877static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args) 1870static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
1871 struct xdr_stream *xdr,
1872 const struct nfs4_lookup_root_arg *args)
1878{ 1873{
1879 struct xdr_stream xdr;
1880 struct compound_hdr hdr = { 1874 struct compound_hdr hdr = {
1881 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1875 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1882 }; 1876 };
1883 1877
1884 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1878 encode_compound_hdr(xdr, req, &hdr);
1885 encode_compound_hdr(&xdr, req, &hdr); 1879 encode_sequence(xdr, &args->seq_args, &hdr);
1886 encode_sequence(&xdr, &args->seq_args, &hdr); 1880 encode_putrootfh(xdr, &hdr);
1887 encode_putrootfh(&xdr, &hdr); 1881 encode_getfh(xdr, &hdr);
1888 encode_getfh(&xdr, &hdr); 1882 encode_getfattr(xdr, args->bitmask, &hdr);
1889 encode_getfattr(&xdr, args->bitmask, &hdr);
1890 encode_nops(&hdr); 1883 encode_nops(&hdr);
1891 return 0;
1892} 1884}
1893 1885
1894/* 1886/*
1895 * Encode REMOVE request 1887 * Encode REMOVE request
1896 */ 1888 */
1897static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) 1889static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
1890 const struct nfs_removeargs *args)
1898{ 1891{
1899 struct xdr_stream xdr;
1900 struct compound_hdr hdr = { 1892 struct compound_hdr hdr = {
1901 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1893 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1902 }; 1894 };
1903 1895
1904 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1896 encode_compound_hdr(xdr, req, &hdr);
1905 encode_compound_hdr(&xdr, req, &hdr); 1897 encode_sequence(xdr, &args->seq_args, &hdr);
1906 encode_sequence(&xdr, &args->seq_args, &hdr); 1898 encode_putfh(xdr, args->fh, &hdr);
1907 encode_putfh(&xdr, args->fh, &hdr); 1899 encode_remove(xdr, &args->name, &hdr);
1908 encode_remove(&xdr, &args->name, &hdr); 1900 encode_getfattr(xdr, args->bitmask, &hdr);
1909 encode_getfattr(&xdr, args->bitmask, &hdr);
1910 encode_nops(&hdr); 1901 encode_nops(&hdr);
1911 return 0;
1912} 1902}
1913 1903
1914/* 1904/*
1915 * Encode RENAME request 1905 * Encode RENAME request
1916 */ 1906 */
1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args) 1907static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
1908 const struct nfs_renameargs *args)
1918{ 1909{
1919 struct xdr_stream xdr;
1920 struct compound_hdr hdr = { 1910 struct compound_hdr hdr = {
1921 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1911 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1922 }; 1912 };
1923 1913
1924 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1914 encode_compound_hdr(xdr, req, &hdr);
1925 encode_compound_hdr(&xdr, req, &hdr); 1915 encode_sequence(xdr, &args->seq_args, &hdr);
1926 encode_sequence(&xdr, &args->seq_args, &hdr); 1916 encode_putfh(xdr, args->old_dir, &hdr);
1927 encode_putfh(&xdr, args->old_dir, &hdr); 1917 encode_savefh(xdr, &hdr);
1928 encode_savefh(&xdr, &hdr); 1918 encode_putfh(xdr, args->new_dir, &hdr);
1929 encode_putfh(&xdr, args->new_dir, &hdr); 1919 encode_rename(xdr, args->old_name, args->new_name, &hdr);
1930 encode_rename(&xdr, args->old_name, args->new_name, &hdr); 1920 encode_getfattr(xdr, args->bitmask, &hdr);
1931 encode_getfattr(&xdr, args->bitmask, &hdr); 1921 encode_restorefh(xdr, &hdr);
1932 encode_restorefh(&xdr, &hdr); 1922 encode_getfattr(xdr, args->bitmask, &hdr);
1933 encode_getfattr(&xdr, args->bitmask, &hdr);
1934 encode_nops(&hdr); 1923 encode_nops(&hdr);
1935 return 0;
1936} 1924}
1937 1925
1938/* 1926/*
1939 * Encode LINK request 1927 * Encode LINK request
1940 */ 1928 */
1941static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args) 1929static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
1930 const struct nfs4_link_arg *args)
1942{ 1931{
1943 struct xdr_stream xdr;
1944 struct compound_hdr hdr = { 1932 struct compound_hdr hdr = {
1945 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1933 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1946 }; 1934 };
1947 1935
1948 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1936 encode_compound_hdr(xdr, req, &hdr);
1949 encode_compound_hdr(&xdr, req, &hdr); 1937 encode_sequence(xdr, &args->seq_args, &hdr);
1950 encode_sequence(&xdr, &args->seq_args, &hdr); 1938 encode_putfh(xdr, args->fh, &hdr);
1951 encode_putfh(&xdr, args->fh, &hdr); 1939 encode_savefh(xdr, &hdr);
1952 encode_savefh(&xdr, &hdr); 1940 encode_putfh(xdr, args->dir_fh, &hdr);
1953 encode_putfh(&xdr, args->dir_fh, &hdr); 1941 encode_link(xdr, args->name, &hdr);
1954 encode_link(&xdr, args->name, &hdr); 1942 encode_getfattr(xdr, args->bitmask, &hdr);
1955 encode_getfattr(&xdr, args->bitmask, &hdr); 1943 encode_restorefh(xdr, &hdr);
1956 encode_restorefh(&xdr, &hdr); 1944 encode_getfattr(xdr, args->bitmask, &hdr);
1957 encode_getfattr(&xdr, args->bitmask, &hdr);
1958 encode_nops(&hdr); 1945 encode_nops(&hdr);
1959 return 0;
1960} 1946}
1961 1947
1962/* 1948/*
1963 * Encode CREATE request 1949 * Encode CREATE request
1964 */ 1950 */
1965static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) 1951static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
1952 const struct nfs4_create_arg *args)
1966{ 1953{
1967 struct xdr_stream xdr;
1968 struct compound_hdr hdr = { 1954 struct compound_hdr hdr = {
1969 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1955 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1970 }; 1956 };
1971 1957
1972 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1958 encode_compound_hdr(xdr, req, &hdr);
1973 encode_compound_hdr(&xdr, req, &hdr); 1959 encode_sequence(xdr, &args->seq_args, &hdr);
1974 encode_sequence(&xdr, &args->seq_args, &hdr); 1960 encode_putfh(xdr, args->dir_fh, &hdr);
1975 encode_putfh(&xdr, args->dir_fh, &hdr); 1961 encode_savefh(xdr, &hdr);
1976 encode_savefh(&xdr, &hdr); 1962 encode_create(xdr, args, &hdr);
1977 encode_create(&xdr, args, &hdr); 1963 encode_getfh(xdr, &hdr);
1978 encode_getfh(&xdr, &hdr); 1964 encode_getfattr(xdr, args->bitmask, &hdr);
1979 encode_getfattr(&xdr, args->bitmask, &hdr); 1965 encode_restorefh(xdr, &hdr);
1980 encode_restorefh(&xdr, &hdr); 1966 encode_getfattr(xdr, args->bitmask, &hdr);
1981 encode_getfattr(&xdr, args->bitmask, &hdr);
1982 encode_nops(&hdr); 1967 encode_nops(&hdr);
1983 return 0;
1984} 1968}
1985 1969
1986/* 1970/*
1987 * Encode SYMLINK request 1971 * Encode SYMLINK request
1988 */ 1972 */
1989static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) 1973static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
1974 const struct nfs4_create_arg *args)
1990{ 1975{
1991 return nfs4_xdr_enc_create(req, p, args); 1976 nfs4_xdr_enc_create(req, xdr, args);
1992} 1977}
1993 1978
1994/* 1979/*
1995 * Encode GETATTR request 1980 * Encode GETATTR request
1996 */ 1981 */
1997static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args) 1982static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
1983 const struct nfs4_getattr_arg *args)
1998{ 1984{
1999 struct xdr_stream xdr;
2000 struct compound_hdr hdr = { 1985 struct compound_hdr hdr = {
2001 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1986 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2002 }; 1987 };
2003 1988
2004 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1989 encode_compound_hdr(xdr, req, &hdr);
2005 encode_compound_hdr(&xdr, req, &hdr); 1990 encode_sequence(xdr, &args->seq_args, &hdr);
2006 encode_sequence(&xdr, &args->seq_args, &hdr); 1991 encode_putfh(xdr, args->fh, &hdr);
2007 encode_putfh(&xdr, args->fh, &hdr); 1992 encode_getfattr(xdr, args->bitmask, &hdr);
2008 encode_getfattr(&xdr, args->bitmask, &hdr);
2009 encode_nops(&hdr); 1993 encode_nops(&hdr);
2010 return 0;
2011} 1994}
2012 1995
2013/* 1996/*
2014 * Encode a CLOSE request 1997 * Encode a CLOSE request
2015 */ 1998 */
2016static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 1999static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
2000 struct nfs_closeargs *args)
2017{ 2001{
2018 struct xdr_stream xdr;
2019 struct compound_hdr hdr = { 2002 struct compound_hdr hdr = {
2020 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2003 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2021 }; 2004 };
2022 2005
2023 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2006 encode_compound_hdr(xdr, req, &hdr);
2024 encode_compound_hdr(&xdr, req, &hdr); 2007 encode_sequence(xdr, &args->seq_args, &hdr);
2025 encode_sequence(&xdr, &args->seq_args, &hdr); 2008 encode_putfh(xdr, args->fh, &hdr);
2026 encode_putfh(&xdr, args->fh, &hdr); 2009 encode_close(xdr, args, &hdr);
2027 encode_close(&xdr, args, &hdr); 2010 encode_getfattr(xdr, args->bitmask, &hdr);
2028 encode_getfattr(&xdr, args->bitmask, &hdr);
2029 encode_nops(&hdr); 2011 encode_nops(&hdr);
2030 return 0;
2031} 2012}
2032 2013
2033/* 2014/*
2034 * Encode an OPEN request 2015 * Encode an OPEN request
2035 */ 2016 */
2036static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) 2017static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2018 struct nfs_openargs *args)
2037{ 2019{
2038 struct xdr_stream xdr;
2039 struct compound_hdr hdr = { 2020 struct compound_hdr hdr = {
2040 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2021 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2041 }; 2022 };
2042 2023
2043 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2024 encode_compound_hdr(xdr, req, &hdr);
2044 encode_compound_hdr(&xdr, req, &hdr); 2025 encode_sequence(xdr, &args->seq_args, &hdr);
2045 encode_sequence(&xdr, &args->seq_args, &hdr); 2026 encode_putfh(xdr, args->fh, &hdr);
2046 encode_putfh(&xdr, args->fh, &hdr); 2027 encode_savefh(xdr, &hdr);
2047 encode_savefh(&xdr, &hdr); 2028 encode_open(xdr, args, &hdr);
2048 encode_open(&xdr, args, &hdr); 2029 encode_getfh(xdr, &hdr);
2049 encode_getfh(&xdr, &hdr); 2030 encode_getfattr(xdr, args->bitmask, &hdr);
2050 encode_getfattr(&xdr, args->bitmask, &hdr); 2031 encode_restorefh(xdr, &hdr);
2051 encode_restorefh(&xdr, &hdr); 2032 encode_getfattr(xdr, args->bitmask, &hdr);
2052 encode_getfattr(&xdr, args->bitmask, &hdr);
2053 encode_nops(&hdr); 2033 encode_nops(&hdr);
2054 return 0;
2055} 2034}
2056 2035
2057/* 2036/*
2058 * Encode an OPEN_CONFIRM request 2037 * Encode an OPEN_CONFIRM request
2059 */ 2038 */
2060static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args) 2039static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
2040 struct xdr_stream *xdr,
2041 struct nfs_open_confirmargs *args)
2061{ 2042{
2062 struct xdr_stream xdr;
2063 struct compound_hdr hdr = { 2043 struct compound_hdr hdr = {
2064 .nops = 0, 2044 .nops = 0,
2065 }; 2045 };
2066 2046
2067 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2047 encode_compound_hdr(xdr, req, &hdr);
2068 encode_compound_hdr(&xdr, req, &hdr); 2048 encode_putfh(xdr, args->fh, &hdr);
2069 encode_putfh(&xdr, args->fh, &hdr); 2049 encode_open_confirm(xdr, args, &hdr);
2070 encode_open_confirm(&xdr, args, &hdr);
2071 encode_nops(&hdr); 2050 encode_nops(&hdr);
2072 return 0;
2073} 2051}
2074 2052
2075/* 2053/*
2076 * Encode an OPEN request with no attributes. 2054 * Encode an OPEN request with no attributes.
2077 */ 2055 */
2078static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) 2056static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
2057 struct xdr_stream *xdr,
2058 struct nfs_openargs *args)
2079{ 2059{
2080 struct xdr_stream xdr;
2081 struct compound_hdr hdr = { 2060 struct compound_hdr hdr = {
2082 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2061 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2083 }; 2062 };
2084 2063
2085 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2064 encode_compound_hdr(xdr, req, &hdr);
2086 encode_compound_hdr(&xdr, req, &hdr); 2065 encode_sequence(xdr, &args->seq_args, &hdr);
2087 encode_sequence(&xdr, &args->seq_args, &hdr); 2066 encode_putfh(xdr, args->fh, &hdr);
2088 encode_putfh(&xdr, args->fh, &hdr); 2067 encode_open(xdr, args, &hdr);
2089 encode_open(&xdr, args, &hdr); 2068 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_getfattr(&xdr, args->bitmask, &hdr);
2091 encode_nops(&hdr); 2069 encode_nops(&hdr);
2092 return 0;
2093} 2070}
2094 2071
2095/* 2072/*
2096 * Encode an OPEN_DOWNGRADE request 2073 * Encode an OPEN_DOWNGRADE request
2097 */ 2074 */
2098static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 2075static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
2076 struct xdr_stream *xdr,
2077 struct nfs_closeargs *args)
2099{ 2078{
2100 struct xdr_stream xdr;
2101 struct compound_hdr hdr = { 2079 struct compound_hdr hdr = {
2102 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2080 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2103 }; 2081 };
2104 2082
2105 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2083 encode_compound_hdr(xdr, req, &hdr);
2106 encode_compound_hdr(&xdr, req, &hdr); 2084 encode_sequence(xdr, &args->seq_args, &hdr);
2107 encode_sequence(&xdr, &args->seq_args, &hdr); 2085 encode_putfh(xdr, args->fh, &hdr);
2108 encode_putfh(&xdr, args->fh, &hdr); 2086 encode_open_downgrade(xdr, args, &hdr);
2109 encode_open_downgrade(&xdr, args, &hdr); 2087 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_getfattr(&xdr, args->bitmask, &hdr);
2111 encode_nops(&hdr); 2088 encode_nops(&hdr);
2112 return 0;
2113} 2089}
2114 2090
2115/* 2091/*
2116 * Encode a LOCK request 2092 * Encode a LOCK request
2117 */ 2093 */
2118static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args) 2094static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
2095 struct nfs_lock_args *args)
2119{ 2096{
2120 struct xdr_stream xdr;
2121 struct compound_hdr hdr = { 2097 struct compound_hdr hdr = {
2122 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2098 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2123 }; 2099 };
2124 2100
2125 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2101 encode_compound_hdr(xdr, req, &hdr);
2126 encode_compound_hdr(&xdr, req, &hdr); 2102 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_sequence(&xdr, &args->seq_args, &hdr); 2103 encode_putfh(xdr, args->fh, &hdr);
2128 encode_putfh(&xdr, args->fh, &hdr); 2104 encode_lock(xdr, args, &hdr);
2129 encode_lock(&xdr, args, &hdr);
2130 encode_nops(&hdr); 2105 encode_nops(&hdr);
2131 return 0;
2132} 2106}
2133 2107
2134/* 2108/*
2135 * Encode a LOCKT request 2109 * Encode a LOCKT request
2136 */ 2110 */
2137static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args) 2111static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
2112 struct nfs_lockt_args *args)
2138{ 2113{
2139 struct xdr_stream xdr;
2140 struct compound_hdr hdr = { 2114 struct compound_hdr hdr = {
2141 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2115 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2142 }; 2116 };
2143 2117
2144 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2118 encode_compound_hdr(xdr, req, &hdr);
2145 encode_compound_hdr(&xdr, req, &hdr); 2119 encode_sequence(xdr, &args->seq_args, &hdr);
2146 encode_sequence(&xdr, &args->seq_args, &hdr); 2120 encode_putfh(xdr, args->fh, &hdr);
2147 encode_putfh(&xdr, args->fh, &hdr); 2121 encode_lockt(xdr, args, &hdr);
2148 encode_lockt(&xdr, args, &hdr);
2149 encode_nops(&hdr); 2122 encode_nops(&hdr);
2150 return 0;
2151} 2123}
2152 2124
2153/* 2125/*
2154 * Encode a LOCKU request 2126 * Encode a LOCKU request
2155 */ 2127 */
2156static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args) 2128static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
2129 struct nfs_locku_args *args)
2157{ 2130{
2158 struct xdr_stream xdr;
2159 struct compound_hdr hdr = { 2131 struct compound_hdr hdr = {
2160 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2132 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2161 }; 2133 };
2162 2134
2163 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2135 encode_compound_hdr(xdr, req, &hdr);
2164 encode_compound_hdr(&xdr, req, &hdr); 2136 encode_sequence(xdr, &args->seq_args, &hdr);
2165 encode_sequence(&xdr, &args->seq_args, &hdr); 2137 encode_putfh(xdr, args->fh, &hdr);
2166 encode_putfh(&xdr, args->fh, &hdr); 2138 encode_locku(xdr, args, &hdr);
2167 encode_locku(&xdr, args, &hdr);
2168 encode_nops(&hdr); 2139 encode_nops(&hdr);
2169 return 0;
2170} 2140}
2171 2141
2172static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args) 2142static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
2143 struct xdr_stream *xdr,
2144 struct nfs_release_lockowner_args *args)
2173{ 2145{
2174 struct xdr_stream xdr;
2175 struct compound_hdr hdr = { 2146 struct compound_hdr hdr = {
2176 .minorversion = 0, 2147 .minorversion = 0,
2177 }; 2148 };
2178 2149
2179 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2150 encode_compound_hdr(xdr, req, &hdr);
2180 encode_compound_hdr(&xdr, req, &hdr); 2151 encode_release_lockowner(xdr, &args->lock_owner, &hdr);
2181 encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
2182 encode_nops(&hdr); 2152 encode_nops(&hdr);
2183 return 0;
2184} 2153}
2185 2154
2186/* 2155/*
2187 * Encode a READLINK request 2156 * Encode a READLINK request
2188 */ 2157 */
2189static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args) 2158static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
2159 const struct nfs4_readlink *args)
2190{ 2160{
2191 struct xdr_stream xdr;
2192 struct compound_hdr hdr = { 2161 struct compound_hdr hdr = {
2193 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2162 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2194 }; 2163 };
2195 2164
2196 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2165 encode_compound_hdr(xdr, req, &hdr);
2197 encode_compound_hdr(&xdr, req, &hdr); 2166 encode_sequence(xdr, &args->seq_args, &hdr);
2198 encode_sequence(&xdr, &args->seq_args, &hdr); 2167 encode_putfh(xdr, args->fh, &hdr);
2199 encode_putfh(&xdr, args->fh, &hdr); 2168 encode_readlink(xdr, args, req, &hdr);
2200 encode_readlink(&xdr, args, req, &hdr);
2201 2169
2202 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, 2170 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
2203 args->pgbase, args->pglen); 2171 args->pgbase, args->pglen);
2204 encode_nops(&hdr); 2172 encode_nops(&hdr);
2205 return 0;
2206} 2173}
2207 2174
2208/* 2175/*
2209 * Encode a READDIR request 2176 * Encode a READDIR request
2210 */ 2177 */
2211static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args) 2178static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
2179 const struct nfs4_readdir_arg *args)
2212{ 2180{
2213 struct xdr_stream xdr;
2214 struct compound_hdr hdr = { 2181 struct compound_hdr hdr = {
2215 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2182 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2216 }; 2183 };
2217 2184
2218 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2185 encode_compound_hdr(xdr, req, &hdr);
2219 encode_compound_hdr(&xdr, req, &hdr); 2186 encode_sequence(xdr, &args->seq_args, &hdr);
2220 encode_sequence(&xdr, &args->seq_args, &hdr); 2187 encode_putfh(xdr, args->fh, &hdr);
2221 encode_putfh(&xdr, args->fh, &hdr); 2188 encode_readdir(xdr, args, req, &hdr);
2222 encode_readdir(&xdr, args, req, &hdr);
2223 2189
2224 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, 2190 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
2225 args->pgbase, args->count); 2191 args->pgbase, args->count);
@@ -2227,428 +2193,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
2227 __func__, hdr.replen << 2, args->pages, 2193 __func__, hdr.replen << 2, args->pages,
2228 args->pgbase, args->count); 2194 args->pgbase, args->count);
2229 encode_nops(&hdr); 2195 encode_nops(&hdr);
2230 return 0;
2231} 2196}
2232 2197
2233/* 2198/*
2234 * Encode a READ request 2199 * Encode a READ request
2235 */ 2200 */
2236static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 2201static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
2202 struct nfs_readargs *args)
2237{ 2203{
2238 struct xdr_stream xdr;
2239 struct compound_hdr hdr = { 2204 struct compound_hdr hdr = {
2240 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2205 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2241 }; 2206 };
2242 2207
2243 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2208 encode_compound_hdr(xdr, req, &hdr);
2244 encode_compound_hdr(&xdr, req, &hdr); 2209 encode_sequence(xdr, &args->seq_args, &hdr);
2245 encode_sequence(&xdr, &args->seq_args, &hdr); 2210 encode_putfh(xdr, args->fh, &hdr);
2246 encode_putfh(&xdr, args->fh, &hdr); 2211 encode_read(xdr, args, &hdr);
2247 encode_read(&xdr, args, &hdr);
2248 2212
2249 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, 2213 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2250 args->pages, args->pgbase, args->count); 2214 args->pages, args->pgbase, args->count);
2251 req->rq_rcv_buf.flags |= XDRBUF_READ; 2215 req->rq_rcv_buf.flags |= XDRBUF_READ;
2252 encode_nops(&hdr); 2216 encode_nops(&hdr);
2253 return 0;
2254} 2217}
2255 2218
2256/* 2219/*
2257 * Encode an SETATTR request 2220 * Encode an SETATTR request
2258 */ 2221 */
2259static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) 2222static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
2223 struct nfs_setattrargs *args)
2260{ 2224{
2261 struct xdr_stream xdr;
2262 struct compound_hdr hdr = { 2225 struct compound_hdr hdr = {
2263 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2226 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2264 }; 2227 };
2265 2228
2266 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2229 encode_compound_hdr(xdr, req, &hdr);
2267 encode_compound_hdr(&xdr, req, &hdr); 2230 encode_sequence(xdr, &args->seq_args, &hdr);
2268 encode_sequence(&xdr, &args->seq_args, &hdr); 2231 encode_putfh(xdr, args->fh, &hdr);
2269 encode_putfh(&xdr, args->fh, &hdr); 2232 encode_setattr(xdr, args, args->server, &hdr);
2270 encode_setattr(&xdr, args, args->server, &hdr); 2233 encode_getfattr(xdr, args->bitmask, &hdr);
2271 encode_getfattr(&xdr, args->bitmask, &hdr);
2272 encode_nops(&hdr); 2234 encode_nops(&hdr);
2273 return 0;
2274} 2235}
2275 2236
2276/* 2237/*
2277 * Encode a GETACL request 2238 * Encode a GETACL request
2278 */ 2239 */
2279static int 2240static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2280nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, 2241 struct nfs_getaclargs *args)
2281 struct nfs_getaclargs *args)
2282{ 2242{
2283 struct xdr_stream xdr;
2284 struct compound_hdr hdr = { 2243 struct compound_hdr hdr = {
2285 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2244 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2286 }; 2245 };
2287 uint32_t replen; 2246 uint32_t replen;
2288 2247
2289 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2248 encode_compound_hdr(xdr, req, &hdr);
2290 encode_compound_hdr(&xdr, req, &hdr); 2249 encode_sequence(xdr, &args->seq_args, &hdr);
2291 encode_sequence(&xdr, &args->seq_args, &hdr); 2250 encode_putfh(xdr, args->fh, &hdr);
2292 encode_putfh(&xdr, args->fh, &hdr);
2293 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; 2251 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
2294 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); 2252 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
2295 2253
2296 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2254 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
2297 args->acl_pages, args->acl_pgbase, args->acl_len); 2255 args->acl_pages, args->acl_pgbase, args->acl_len);
2298 encode_nops(&hdr); 2256 encode_nops(&hdr);
2299 return 0;
2300} 2257}
2301 2258
2302/* 2259/*
2303 * Encode a WRITE request 2260 * Encode a WRITE request
2304 */ 2261 */
2305static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 2262static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2263 struct nfs_writeargs *args)
2306{ 2264{
2307 struct xdr_stream xdr;
2308 struct compound_hdr hdr = { 2265 struct compound_hdr hdr = {
2309 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2266 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2310 }; 2267 };
2311 2268
2312 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2269 encode_compound_hdr(xdr, req, &hdr);
2313 encode_compound_hdr(&xdr, req, &hdr); 2270 encode_sequence(xdr, &args->seq_args, &hdr);
2314 encode_sequence(&xdr, &args->seq_args, &hdr); 2271 encode_putfh(xdr, args->fh, &hdr);
2315 encode_putfh(&xdr, args->fh, &hdr); 2272 encode_write(xdr, args, &hdr);
2316 encode_write(&xdr, args, &hdr);
2317 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2273 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2318 encode_getfattr(&xdr, args->bitmask, &hdr); 2274 encode_getfattr(xdr, args->bitmask, &hdr);
2319 encode_nops(&hdr); 2275 encode_nops(&hdr);
2320 return 0;
2321} 2276}
2322 2277
2323/* 2278/*
2324 * a COMMIT request 2279 * a COMMIT request
2325 */ 2280 */
2326static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 2281static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2282 struct nfs_writeargs *args)
2327{ 2283{
2328 struct xdr_stream xdr;
2329 struct compound_hdr hdr = { 2284 struct compound_hdr hdr = {
2330 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2285 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2331 }; 2286 };
2332 2287
2333 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2288 encode_compound_hdr(xdr, req, &hdr);
2334 encode_compound_hdr(&xdr, req, &hdr); 2289 encode_sequence(xdr, &args->seq_args, &hdr);
2335 encode_sequence(&xdr, &args->seq_args, &hdr); 2290 encode_putfh(xdr, args->fh, &hdr);
2336 encode_putfh(&xdr, args->fh, &hdr); 2291 encode_commit(xdr, args, &hdr);
2337 encode_commit(&xdr, args, &hdr); 2292 encode_getfattr(xdr, args->bitmask, &hdr);
2338 encode_getfattr(&xdr, args->bitmask, &hdr);
2339 encode_nops(&hdr); 2293 encode_nops(&hdr);
2340 return 0;
2341} 2294}
2342 2295
2343/* 2296/*
2344 * FSINFO request 2297 * FSINFO request
2345 */ 2298 */
2346static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args) 2299static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
2300 struct nfs4_fsinfo_arg *args)
2347{ 2301{
2348 struct xdr_stream xdr;
2349 struct compound_hdr hdr = { 2302 struct compound_hdr hdr = {
2350 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2303 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2351 }; 2304 };
2352 2305
2353 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2306 encode_compound_hdr(xdr, req, &hdr);
2354 encode_compound_hdr(&xdr, req, &hdr); 2307 encode_sequence(xdr, &args->seq_args, &hdr);
2355 encode_sequence(&xdr, &args->seq_args, &hdr); 2308 encode_putfh(xdr, args->fh, &hdr);
2356 encode_putfh(&xdr, args->fh, &hdr); 2309 encode_fsinfo(xdr, args->bitmask, &hdr);
2357 encode_fsinfo(&xdr, args->bitmask, &hdr);
2358 encode_nops(&hdr); 2310 encode_nops(&hdr);
2359 return 0;
2360} 2311}
2361 2312
2362/* 2313/*
2363 * a PATHCONF request 2314 * a PATHCONF request
2364 */ 2315 */
2365static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args) 2316static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
2317 const struct nfs4_pathconf_arg *args)
2366{ 2318{
2367 struct xdr_stream xdr;
2368 struct compound_hdr hdr = { 2319 struct compound_hdr hdr = {
2369 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2320 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2370 }; 2321 };
2371 2322
2372 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2323 encode_compound_hdr(xdr, req, &hdr);
2373 encode_compound_hdr(&xdr, req, &hdr); 2324 encode_sequence(xdr, &args->seq_args, &hdr);
2374 encode_sequence(&xdr, &args->seq_args, &hdr); 2325 encode_putfh(xdr, args->fh, &hdr);
2375 encode_putfh(&xdr, args->fh, &hdr); 2326 encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2376 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2377 &hdr); 2327 &hdr);
2378 encode_nops(&hdr); 2328 encode_nops(&hdr);
2379 return 0;
2380} 2329}
2381 2330
2382/* 2331/*
2383 * a STATFS request 2332 * a STATFS request
2384 */ 2333 */
2385static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args) 2334static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
2335 const struct nfs4_statfs_arg *args)
2386{ 2336{
2387 struct xdr_stream xdr;
2388 struct compound_hdr hdr = { 2337 struct compound_hdr hdr = {
2389 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2338 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2390 }; 2339 };
2391 2340
2392 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2341 encode_compound_hdr(xdr, req, &hdr);
2393 encode_compound_hdr(&xdr, req, &hdr); 2342 encode_sequence(xdr, &args->seq_args, &hdr);
2394 encode_sequence(&xdr, &args->seq_args, &hdr); 2343 encode_putfh(xdr, args->fh, &hdr);
2395 encode_putfh(&xdr, args->fh, &hdr); 2344 encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2396 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2397 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); 2345 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
2398 encode_nops(&hdr); 2346 encode_nops(&hdr);
2399 return 0;
2400} 2347}
2401 2348
2402/* 2349/*
2403 * GETATTR_BITMAP request 2350 * GETATTR_BITMAP request
2404 */ 2351 */
2405static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, 2352static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
2406 struct nfs4_server_caps_arg *args) 2353 struct xdr_stream *xdr,
2354 struct nfs4_server_caps_arg *args)
2407{ 2355{
2408 struct xdr_stream xdr;
2409 struct compound_hdr hdr = { 2356 struct compound_hdr hdr = {
2410 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2357 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2411 }; 2358 };
2412 2359
2413 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2360 encode_compound_hdr(xdr, req, &hdr);
2414 encode_compound_hdr(&xdr, req, &hdr); 2361 encode_sequence(xdr, &args->seq_args, &hdr);
2415 encode_sequence(&xdr, &args->seq_args, &hdr); 2362 encode_putfh(xdr, args->fhandle, &hdr);
2416 encode_putfh(&xdr, args->fhandle, &hdr); 2363 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2417 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2418 FATTR4_WORD0_LINK_SUPPORT| 2364 FATTR4_WORD0_LINK_SUPPORT|
2419 FATTR4_WORD0_SYMLINK_SUPPORT| 2365 FATTR4_WORD0_SYMLINK_SUPPORT|
2420 FATTR4_WORD0_ACLSUPPORT, &hdr); 2366 FATTR4_WORD0_ACLSUPPORT, &hdr);
2421 encode_nops(&hdr); 2367 encode_nops(&hdr);
2422 return 0;
2423} 2368}
2424 2369
2425/* 2370/*
2426 * a RENEW request 2371 * a RENEW request
2427 */ 2372 */
2428static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) 2373static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
2374 struct nfs_client *clp)
2429{ 2375{
2430 struct xdr_stream xdr;
2431 struct compound_hdr hdr = { 2376 struct compound_hdr hdr = {
2432 .nops = 0, 2377 .nops = 0,
2433 }; 2378 };
2434 2379
2435 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2380 encode_compound_hdr(xdr, req, &hdr);
2436 encode_compound_hdr(&xdr, req, &hdr); 2381 encode_renew(xdr, clp, &hdr);
2437 encode_renew(&xdr, clp, &hdr);
2438 encode_nops(&hdr); 2382 encode_nops(&hdr);
2439 return 0;
2440} 2383}
2441 2384
2442/* 2385/*
2443 * a SETCLIENTID request 2386 * a SETCLIENTID request
2444 */ 2387 */
2445static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc) 2388static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
2389 struct xdr_stream *xdr,
2390 struct nfs4_setclientid *sc)
2446{ 2391{
2447 struct xdr_stream xdr;
2448 struct compound_hdr hdr = { 2392 struct compound_hdr hdr = {
2449 .nops = 0, 2393 .nops = 0,
2450 }; 2394 };
2451 2395
2452 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2396 encode_compound_hdr(xdr, req, &hdr);
2453 encode_compound_hdr(&xdr, req, &hdr); 2397 encode_setclientid(xdr, sc, &hdr);
2454 encode_setclientid(&xdr, sc, &hdr);
2455 encode_nops(&hdr); 2398 encode_nops(&hdr);
2456 return 0;
2457} 2399}
2458 2400
2459/* 2401/*
2460 * a SETCLIENTID_CONFIRM request 2402 * a SETCLIENTID_CONFIRM request
2461 */ 2403 */
2462static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg) 2404static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
2405 struct xdr_stream *xdr,
2406 struct nfs4_setclientid_res *arg)
2463{ 2407{
2464 struct xdr_stream xdr;
2465 struct compound_hdr hdr = { 2408 struct compound_hdr hdr = {
2466 .nops = 0, 2409 .nops = 0,
2467 }; 2410 };
2468 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 2411 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2469 2412
2470 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2413 encode_compound_hdr(xdr, req, &hdr);
2471 encode_compound_hdr(&xdr, req, &hdr); 2414 encode_setclientid_confirm(xdr, arg, &hdr);
2472 encode_setclientid_confirm(&xdr, arg, &hdr); 2415 encode_putrootfh(xdr, &hdr);
2473 encode_putrootfh(&xdr, &hdr); 2416 encode_fsinfo(xdr, lease_bitmap, &hdr);
2474 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2475 encode_nops(&hdr); 2417 encode_nops(&hdr);
2476 return 0;
2477} 2418}
2478 2419
2479/* 2420/*
2480 * DELEGRETURN request 2421 * DELEGRETURN request
2481 */ 2422 */
2482static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args) 2423static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2424 struct xdr_stream *xdr,
2425 const struct nfs4_delegreturnargs *args)
2483{ 2426{
2484 struct xdr_stream xdr;
2485 struct compound_hdr hdr = { 2427 struct compound_hdr hdr = {
2486 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2428 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2487 }; 2429 };
2488 2430
2489 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2431 encode_compound_hdr(xdr, req, &hdr);
2490 encode_compound_hdr(&xdr, req, &hdr); 2432 encode_sequence(xdr, &args->seq_args, &hdr);
2491 encode_sequence(&xdr, &args->seq_args, &hdr); 2433 encode_putfh(xdr, args->fhandle, &hdr);
2492 encode_putfh(&xdr, args->fhandle, &hdr); 2434 encode_delegreturn(xdr, args->stateid, &hdr);
2493 encode_delegreturn(&xdr, args->stateid, &hdr); 2435 encode_getfattr(xdr, args->bitmask, &hdr);
2494 encode_getfattr(&xdr, args->bitmask, &hdr);
2495 encode_nops(&hdr); 2436 encode_nops(&hdr);
2496 return 0;
2497} 2437}
2498 2438
2499/* 2439/*
2500 * Encode FS_LOCATIONS request 2440 * Encode FS_LOCATIONS request
2501 */ 2441 */
2502static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args) 2442static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2443 struct xdr_stream *xdr,
2444 struct nfs4_fs_locations_arg *args)
2503{ 2445{
2504 struct xdr_stream xdr;
2505 struct compound_hdr hdr = { 2446 struct compound_hdr hdr = {
2506 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2447 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2507 }; 2448 };
2508 uint32_t replen; 2449 uint32_t replen;
2509 2450
2510 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2451 encode_compound_hdr(xdr, req, &hdr);
2511 encode_compound_hdr(&xdr, req, &hdr); 2452 encode_sequence(xdr, &args->seq_args, &hdr);
2512 encode_sequence(&xdr, &args->seq_args, &hdr); 2453 encode_putfh(xdr, args->dir_fh, &hdr);
2513 encode_putfh(&xdr, args->dir_fh, &hdr); 2454 encode_lookup(xdr, args->name, &hdr);
2514 encode_lookup(&xdr, args->name, &hdr);
2515 replen = hdr.replen; /* get the attribute into args->page */ 2455 replen = hdr.replen; /* get the attribute into args->page */
2516 encode_fs_locations(&xdr, args->bitmask, &hdr); 2456 encode_fs_locations(xdr, args->bitmask, &hdr);
2517 2457
2518 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 2458 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
2519 0, PAGE_SIZE); 2459 0, PAGE_SIZE);
2520 encode_nops(&hdr); 2460 encode_nops(&hdr);
2521 return 0;
2522} 2461}
2523 2462
2524#if defined(CONFIG_NFS_V4_1) 2463#if defined(CONFIG_NFS_V4_1)
2525/* 2464/*
2526 * EXCHANGE_ID request 2465 * EXCHANGE_ID request
2527 */ 2466 */
2528static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, 2467static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
2529 struct nfs41_exchange_id_args *args) 2468 struct xdr_stream *xdr,
2469 struct nfs41_exchange_id_args *args)
2530{ 2470{
2531 struct xdr_stream xdr;
2532 struct compound_hdr hdr = { 2471 struct compound_hdr hdr = {
2533 .minorversion = args->client->cl_mvops->minor_version, 2472 .minorversion = args->client->cl_mvops->minor_version,
2534 }; 2473 };
2535 2474
2536 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2475 encode_compound_hdr(xdr, req, &hdr);
2537 encode_compound_hdr(&xdr, req, &hdr); 2476 encode_exchange_id(xdr, args, &hdr);
2538 encode_exchange_id(&xdr, args, &hdr);
2539 encode_nops(&hdr); 2477 encode_nops(&hdr);
2540 return 0;
2541} 2478}
2542 2479
2543/* 2480/*
2544 * a CREATE_SESSION request 2481 * a CREATE_SESSION request
2545 */ 2482 */
2546static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, 2483static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
2547 struct nfs41_create_session_args *args) 2484 struct xdr_stream *xdr,
2485 struct nfs41_create_session_args *args)
2548{ 2486{
2549 struct xdr_stream xdr;
2550 struct compound_hdr hdr = { 2487 struct compound_hdr hdr = {
2551 .minorversion = args->client->cl_mvops->minor_version, 2488 .minorversion = args->client->cl_mvops->minor_version,
2552 }; 2489 };
2553 2490
2554 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2491 encode_compound_hdr(xdr, req, &hdr);
2555 encode_compound_hdr(&xdr, req, &hdr); 2492 encode_create_session(xdr, args, &hdr);
2556 encode_create_session(&xdr, args, &hdr);
2557 encode_nops(&hdr); 2493 encode_nops(&hdr);
2558 return 0;
2559} 2494}
2560 2495
2561/* 2496/*
2562 * a DESTROY_SESSION request 2497 * a DESTROY_SESSION request
2563 */ 2498 */
2564static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, 2499static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
2565 struct nfs4_session *session) 2500 struct xdr_stream *xdr,
2501 struct nfs4_session *session)
2566{ 2502{
2567 struct xdr_stream xdr;
2568 struct compound_hdr hdr = { 2503 struct compound_hdr hdr = {
2569 .minorversion = session->clp->cl_mvops->minor_version, 2504 .minorversion = session->clp->cl_mvops->minor_version,
2570 }; 2505 };
2571 2506
2572 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2507 encode_compound_hdr(xdr, req, &hdr);
2573 encode_compound_hdr(&xdr, req, &hdr); 2508 encode_destroy_session(xdr, session, &hdr);
2574 encode_destroy_session(&xdr, session, &hdr);
2575 encode_nops(&hdr); 2509 encode_nops(&hdr);
2576 return 0;
2577} 2510}
2578 2511
2579/* 2512/*
2580 * a SEQUENCE request 2513 * a SEQUENCE request
2581 */ 2514 */
2582static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, 2515static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
2583 struct nfs4_sequence_args *args) 2516 struct nfs4_sequence_args *args)
2584{ 2517{
2585 struct xdr_stream xdr;
2586 struct compound_hdr hdr = { 2518 struct compound_hdr hdr = {
2587 .minorversion = nfs4_xdr_minorversion(args), 2519 .minorversion = nfs4_xdr_minorversion(args),
2588 }; 2520 };
2589 2521
2590 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2522 encode_compound_hdr(xdr, req, &hdr);
2591 encode_compound_hdr(&xdr, req, &hdr); 2523 encode_sequence(xdr, args, &hdr);
2592 encode_sequence(&xdr, args, &hdr);
2593 encode_nops(&hdr); 2524 encode_nops(&hdr);
2594 return 0;
2595} 2525}
2596 2526
2597/* 2527/*
2598 * a GET_LEASE_TIME request 2528 * a GET_LEASE_TIME request
2599 */ 2529 */
2600static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, 2530static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
2601 struct nfs4_get_lease_time_args *args) 2531 struct xdr_stream *xdr,
2532 struct nfs4_get_lease_time_args *args)
2602{ 2533{
2603 struct xdr_stream xdr;
2604 struct compound_hdr hdr = { 2534 struct compound_hdr hdr = {
2605 .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), 2535 .minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
2606 }; 2536 };
2607 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 2537 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2608 2538
2609 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2539 encode_compound_hdr(xdr, req, &hdr);
2610 encode_compound_hdr(&xdr, req, &hdr); 2540 encode_sequence(xdr, &args->la_seq_args, &hdr);
2611 encode_sequence(&xdr, &args->la_seq_args, &hdr); 2541 encode_putrootfh(xdr, &hdr);
2612 encode_putrootfh(&xdr, &hdr); 2542 encode_fsinfo(xdr, lease_bitmap, &hdr);
2613 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2614 encode_nops(&hdr); 2543 encode_nops(&hdr);
2615 return 0;
2616} 2544}
2617 2545
2618/* 2546/*
2619 * a RECLAIM_COMPLETE request 2547 * a RECLAIM_COMPLETE request
2620 */ 2548 */
2621static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, 2549static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
2622 struct nfs41_reclaim_complete_args *args) 2550 struct xdr_stream *xdr,
2551 struct nfs41_reclaim_complete_args *args)
2623{ 2552{
2624 struct xdr_stream xdr;
2625 struct compound_hdr hdr = { 2553 struct compound_hdr hdr = {
2626 .minorversion = nfs4_xdr_minorversion(&args->seq_args) 2554 .minorversion = nfs4_xdr_minorversion(&args->seq_args)
2627 }; 2555 };
2628 2556
2629 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2557 encode_compound_hdr(xdr, req, &hdr);
2630 encode_compound_hdr(&xdr, req, &hdr); 2558 encode_sequence(xdr, &args->seq_args, &hdr);
2631 encode_sequence(&xdr, &args->seq_args, &hdr); 2559 encode_reclaim_complete(xdr, args, &hdr);
2632 encode_reclaim_complete(&xdr, args, &hdr);
2633 encode_nops(&hdr); 2560 encode_nops(&hdr);
2634 return 0;
2635} 2561}
2636 2562
2637/* 2563/*
2638 * Encode GETDEVICEINFO request 2564 * Encode GETDEVICEINFO request
2639 */ 2565 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, 2566static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
2641 struct nfs4_getdeviceinfo_args *args) 2567 struct xdr_stream *xdr,
2568 struct nfs4_getdeviceinfo_args *args)
2642{ 2569{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = { 2570 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2571 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 }; 2572 };
2647 2573
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2574 encode_compound_hdr(xdr, req, &hdr);
2649 encode_compound_hdr(&xdr, req, &hdr); 2575 encode_sequence(xdr, &args->seq_args, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr); 2576 encode_getdeviceinfo(xdr, args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652 2577
2653 /* set up reply kvec. Subtract notification bitmap max size (2) 2578 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */ 2579 * so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2582,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2657 args->pdev->pglen); 2582 args->pdev->pglen);
2658 2583
2659 encode_nops(&hdr); 2584 encode_nops(&hdr);
2660 return 0;
2661} 2585}
2662 2586
2663/* 2587/*
2664 * Encode LAYOUTGET request 2588 * Encode LAYOUTGET request
2665 */ 2589 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, 2590static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2667 struct nfs4_layoutget_args *args) 2591 struct xdr_stream *xdr,
2592 struct nfs4_layoutget_args *args)
2668{ 2593{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = { 2594 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2595 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 }; 2596 };
2673 2597
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2598 encode_compound_hdr(xdr, req, &hdr);
2675 encode_compound_hdr(&xdr, req, &hdr); 2599 encode_sequence(xdr, &args->seq_args, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr); 2600 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr); 2601 encode_layoutget(xdr, args, &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr); 2602 encode_nops(&hdr);
2680 return 0;
2681} 2603}
2682#endif /* CONFIG_NFS_V4_1 */ 2604#endif /* CONFIG_NFS_V4_1 */
2683 2605
@@ -4475,7 +4397,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
4475 goto out_overflow; 4397 goto out_overflow;
4476 eof = be32_to_cpup(p++); 4398 eof = be32_to_cpup(p++);
4477 count = be32_to_cpup(p); 4399 count = be32_to_cpup(p);
4478 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4400 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4479 recvd = req->rq_rcv_buf.len - hdrlen; 4401 recvd = req->rq_rcv_buf.len - hdrlen;
4480 if (count > recvd) { 4402 if (count > recvd) {
4481 dprintk("NFS: server cheating in read reply: " 4403 dprintk("NFS: server cheating in read reply: "
@@ -5000,7 +4922,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
5000 goto out_overflow; 4922 goto out_overflow;
5001 len = be32_to_cpup(p); 4923 len = be32_to_cpup(p);
5002 if (len) { 4924 if (len) {
5003 int i; 4925 uint32_t i;
5004 4926
5005 p = xdr_inline_decode(xdr, 4 * len); 4927 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p)) 4928 if (unlikely(!p))
@@ -5090,26 +5012,26 @@ out_overflow:
5090/* 5012/*
5091 * Decode OPEN_DOWNGRADE response 5013 * Decode OPEN_DOWNGRADE response
5092 */ 5014 */
5093static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 5015static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
5016 struct xdr_stream *xdr,
5017 struct nfs_closeres *res)
5094{ 5018{
5095 struct xdr_stream xdr;
5096 struct compound_hdr hdr; 5019 struct compound_hdr hdr;
5097 int status; 5020 int status;
5098 5021
5099 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5022 status = decode_compound_hdr(xdr, &hdr);
5100 status = decode_compound_hdr(&xdr, &hdr);
5101 if (status) 5023 if (status)
5102 goto out; 5024 goto out;
5103 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5025 status = decode_sequence(xdr, &res->seq_res, rqstp);
5104 if (status) 5026 if (status)
5105 goto out; 5027 goto out;
5106 status = decode_putfh(&xdr); 5028 status = decode_putfh(xdr);
5107 if (status) 5029 if (status)
5108 goto out; 5030 goto out;
5109 status = decode_open_downgrade(&xdr, res); 5031 status = decode_open_downgrade(xdr, res);
5110 if (status != 0) 5032 if (status != 0)
5111 goto out; 5033 goto out;
5112 decode_getfattr(&xdr, res->fattr, res->server, 5034 decode_getfattr(xdr, res->fattr, res->server,
5113 !RPC_IS_ASYNC(rqstp->rq_task)); 5035 !RPC_IS_ASYNC(rqstp->rq_task));
5114out: 5036out:
5115 return status; 5037 return status;
@@ -5118,26 +5040,25 @@ out:
5118/* 5040/*
5119 * Decode ACCESS response 5041 * Decode ACCESS response
5120 */ 5042 */
5121static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) 5043static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5044 struct nfs4_accessres *res)
5122{ 5045{
5123 struct xdr_stream xdr;
5124 struct compound_hdr hdr; 5046 struct compound_hdr hdr;
5125 int status; 5047 int status;
5126 5048
5127 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5049 status = decode_compound_hdr(xdr, &hdr);
5128 status = decode_compound_hdr(&xdr, &hdr);
5129 if (status) 5050 if (status)
5130 goto out; 5051 goto out;
5131 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5052 status = decode_sequence(xdr, &res->seq_res, rqstp);
5132 if (status) 5053 if (status)
5133 goto out; 5054 goto out;
5134 status = decode_putfh(&xdr); 5055 status = decode_putfh(xdr);
5135 if (status != 0) 5056 if (status != 0)
5136 goto out; 5057 goto out;
5137 status = decode_access(&xdr, res); 5058 status = decode_access(xdr, res);
5138 if (status != 0) 5059 if (status != 0)
5139 goto out; 5060 goto out;
5140 decode_getfattr(&xdr, res->fattr, res->server, 5061 decode_getfattr(xdr, res->fattr, res->server,
5141 !RPC_IS_ASYNC(rqstp->rq_task)); 5062 !RPC_IS_ASYNC(rqstp->rq_task));
5142out: 5063out:
5143 return status; 5064 return status;
@@ -5146,26 +5067,28 @@ out:
5146/* 5067/*
5147 * Decode LOOKUP response 5068 * Decode LOOKUP response
5148 */ 5069 */
5149static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) 5070static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5071 struct nfs4_lookup_res *res)
5150{ 5072{
5151 struct xdr_stream xdr;
5152 struct compound_hdr hdr; 5073 struct compound_hdr hdr;
5153 int status; 5074 int status;
5154 5075
5155 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5076 status = decode_compound_hdr(xdr, &hdr);
5156 status = decode_compound_hdr(&xdr, &hdr);
5157 if (status) 5077 if (status)
5158 goto out; 5078 goto out;
5159 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5079 status = decode_sequence(xdr, &res->seq_res, rqstp);
5160 if (status) 5080 if (status)
5161 goto out; 5081 goto out;
5162 if ((status = decode_putfh(&xdr)) != 0) 5082 status = decode_putfh(xdr);
5083 if (status)
5163 goto out; 5084 goto out;
5164 if ((status = decode_lookup(&xdr)) != 0) 5085 status = decode_lookup(xdr);
5086 if (status)
5165 goto out; 5087 goto out;
5166 if ((status = decode_getfh(&xdr, res->fh)) != 0) 5088 status = decode_getfh(xdr, res->fh);
5089 if (status)
5167 goto out; 5090 goto out;
5168 status = decode_getfattr(&xdr, res->fattr, res->server 5091 status = decode_getfattr(xdr, res->fattr, res->server
5169 ,!RPC_IS_ASYNC(rqstp->rq_task)); 5092 ,!RPC_IS_ASYNC(rqstp->rq_task));
5170out: 5093out:
5171 return status; 5094 return status;
@@ -5174,23 +5097,25 @@ out:
5174/* 5097/*
5175 * Decode LOOKUP_ROOT response 5098 * Decode LOOKUP_ROOT response
5176 */ 5099 */
5177static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) 5100static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
5101 struct xdr_stream *xdr,
5102 struct nfs4_lookup_res *res)
5178{ 5103{
5179 struct xdr_stream xdr;
5180 struct compound_hdr hdr; 5104 struct compound_hdr hdr;
5181 int status; 5105 int status;
5182 5106
5183 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5107 status = decode_compound_hdr(xdr, &hdr);
5184 status = decode_compound_hdr(&xdr, &hdr);
5185 if (status) 5108 if (status)
5186 goto out; 5109 goto out;
5187 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5110 status = decode_sequence(xdr, &res->seq_res, rqstp);
5188 if (status) 5111 if (status)
5189 goto out; 5112 goto out;
5190 if ((status = decode_putrootfh(&xdr)) != 0) 5113 status = decode_putrootfh(xdr);
5114 if (status)
5191 goto out; 5115 goto out;
5192 if ((status = decode_getfh(&xdr, res->fh)) == 0) 5116 status = decode_getfh(xdr, res->fh);
5193 status = decode_getfattr(&xdr, res->fattr, res->server, 5117 if (status == 0)
5118 status = decode_getfattr(xdr, res->fattr, res->server,
5194 !RPC_IS_ASYNC(rqstp->rq_task)); 5119 !RPC_IS_ASYNC(rqstp->rq_task));
5195out: 5120out:
5196 return status; 5121 return status;
@@ -5199,24 +5124,25 @@ out:
5199/* 5124/*
5200 * Decode REMOVE response 5125 * Decode REMOVE response
5201 */ 5126 */
5202static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res) 5127static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5128 struct nfs_removeres *res)
5203{ 5129{
5204 struct xdr_stream xdr;
5205 struct compound_hdr hdr; 5130 struct compound_hdr hdr;
5206 int status; 5131 int status;
5207 5132
5208 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5133 status = decode_compound_hdr(xdr, &hdr);
5209 status = decode_compound_hdr(&xdr, &hdr);
5210 if (status) 5134 if (status)
5211 goto out; 5135 goto out;
5212 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5136 status = decode_sequence(xdr, &res->seq_res, rqstp);
5213 if (status) 5137 if (status)
5214 goto out; 5138 goto out;
5215 if ((status = decode_putfh(&xdr)) != 0) 5139 status = decode_putfh(xdr);
5140 if (status)
5216 goto out; 5141 goto out;
5217 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 5142 status = decode_remove(xdr, &res->cinfo);
5143 if (status)
5218 goto out; 5144 goto out;
5219 decode_getfattr(&xdr, res->dir_attr, res->server, 5145 decode_getfattr(xdr, res->dir_attr, res->server,
5220 !RPC_IS_ASYNC(rqstp->rq_task)); 5146 !RPC_IS_ASYNC(rqstp->rq_task));
5221out: 5147out:
5222 return status; 5148 return status;
@@ -5225,34 +5151,38 @@ out:
5225/* 5151/*
5226 * Decode RENAME response 5152 * Decode RENAME response
5227 */ 5153 */
5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res) 5154static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5155 struct nfs_renameres *res)
5229{ 5156{
5230 struct xdr_stream xdr;
5231 struct compound_hdr hdr; 5157 struct compound_hdr hdr;
5232 int status; 5158 int status;
5233 5159
5234 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5160 status = decode_compound_hdr(xdr, &hdr);
5235 status = decode_compound_hdr(&xdr, &hdr);
5236 if (status) 5161 if (status)
5237 goto out; 5162 goto out;
5238 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5163 status = decode_sequence(xdr, &res->seq_res, rqstp);
5239 if (status) 5164 if (status)
5240 goto out; 5165 goto out;
5241 if ((status = decode_putfh(&xdr)) != 0) 5166 status = decode_putfh(xdr);
5167 if (status)
5242 goto out; 5168 goto out;
5243 if ((status = decode_savefh(&xdr)) != 0) 5169 status = decode_savefh(xdr);
5170 if (status)
5244 goto out; 5171 goto out;
5245 if ((status = decode_putfh(&xdr)) != 0) 5172 status = decode_putfh(xdr);
5173 if (status)
5246 goto out; 5174 goto out;
5247 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) 5175 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5176 if (status)
5248 goto out; 5177 goto out;
5249 /* Current FH is target directory */ 5178 /* Current FH is target directory */
5250 if (decode_getfattr(&xdr, res->new_fattr, res->server, 5179 if (decode_getfattr(xdr, res->new_fattr, res->server,
5251 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5180 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5252 goto out; 5181 goto out;
5253 if ((status = decode_restorefh(&xdr)) != 0) 5182 status = decode_restorefh(xdr);
5183 if (status)
5254 goto out; 5184 goto out;
5255 decode_getfattr(&xdr, res->old_fattr, res->server, 5185 decode_getfattr(xdr, res->old_fattr, res->server,
5256 !RPC_IS_ASYNC(rqstp->rq_task)); 5186 !RPC_IS_ASYNC(rqstp->rq_task));
5257out: 5187out:
5258 return status; 5188 return status;
@@ -5261,37 +5191,41 @@ out:
5261/* 5191/*
5262 * Decode LINK response 5192 * Decode LINK response
5263 */ 5193 */
5264static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res) 5194static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5195 struct nfs4_link_res *res)
5265{ 5196{
5266 struct xdr_stream xdr;
5267 struct compound_hdr hdr; 5197 struct compound_hdr hdr;
5268 int status; 5198 int status;
5269 5199
5270 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5200 status = decode_compound_hdr(xdr, &hdr);
5271 status = decode_compound_hdr(&xdr, &hdr);
5272 if (status) 5201 if (status)
5273 goto out; 5202 goto out;
5274 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5203 status = decode_sequence(xdr, &res->seq_res, rqstp);
5275 if (status) 5204 if (status)
5276 goto out; 5205 goto out;
5277 if ((status = decode_putfh(&xdr)) != 0) 5206 status = decode_putfh(xdr);
5207 if (status)
5278 goto out; 5208 goto out;
5279 if ((status = decode_savefh(&xdr)) != 0) 5209 status = decode_savefh(xdr);
5210 if (status)
5280 goto out; 5211 goto out;
5281 if ((status = decode_putfh(&xdr)) != 0) 5212 status = decode_putfh(xdr);
5213 if (status)
5282 goto out; 5214 goto out;
5283 if ((status = decode_link(&xdr, &res->cinfo)) != 0) 5215 status = decode_link(xdr, &res->cinfo);
5216 if (status)
5284 goto out; 5217 goto out;
5285 /* 5218 /*
5286 * Note order: OP_LINK leaves the directory as the current 5219 * Note order: OP_LINK leaves the directory as the current
5287 * filehandle. 5220 * filehandle.
5288 */ 5221 */
5289 if (decode_getfattr(&xdr, res->dir_attr, res->server, 5222 if (decode_getfattr(xdr, res->dir_attr, res->server,
5290 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5223 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5291 goto out; 5224 goto out;
5292 if ((status = decode_restorefh(&xdr)) != 0) 5225 status = decode_restorefh(xdr);
5226 if (status)
5293 goto out; 5227 goto out;
5294 decode_getfattr(&xdr, res->fattr, res->server, 5228 decode_getfattr(xdr, res->fattr, res->server,
5295 !RPC_IS_ASYNC(rqstp->rq_task)); 5229 !RPC_IS_ASYNC(rqstp->rq_task));
5296out: 5230out:
5297 return status; 5231 return status;
@@ -5300,33 +5234,37 @@ out:
5300/* 5234/*
5301 * Decode CREATE response 5235 * Decode CREATE response
5302 */ 5236 */
5303static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) 5237static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5238 struct nfs4_create_res *res)
5304{ 5239{
5305 struct xdr_stream xdr;
5306 struct compound_hdr hdr; 5240 struct compound_hdr hdr;
5307 int status; 5241 int status;
5308 5242
5309 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5243 status = decode_compound_hdr(xdr, &hdr);
5310 status = decode_compound_hdr(&xdr, &hdr);
5311 if (status) 5244 if (status)
5312 goto out; 5245 goto out;
5313 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5246 status = decode_sequence(xdr, &res->seq_res, rqstp);
5314 if (status) 5247 if (status)
5315 goto out; 5248 goto out;
5316 if ((status = decode_putfh(&xdr)) != 0) 5249 status = decode_putfh(xdr);
5250 if (status)
5317 goto out; 5251 goto out;
5318 if ((status = decode_savefh(&xdr)) != 0) 5252 status = decode_savefh(xdr);
5253 if (status)
5319 goto out; 5254 goto out;
5320 if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) 5255 status = decode_create(xdr, &res->dir_cinfo);
5256 if (status)
5321 goto out; 5257 goto out;
5322 if ((status = decode_getfh(&xdr, res->fh)) != 0) 5258 status = decode_getfh(xdr, res->fh);
5259 if (status)
5323 goto out; 5260 goto out;
5324 if (decode_getfattr(&xdr, res->fattr, res->server, 5261 if (decode_getfattr(xdr, res->fattr, res->server,
5325 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5262 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5326 goto out; 5263 goto out;
5327 if ((status = decode_restorefh(&xdr)) != 0) 5264 status = decode_restorefh(xdr);
5265 if (status)
5328 goto out; 5266 goto out;
5329 decode_getfattr(&xdr, res->dir_fattr, res->server, 5267 decode_getfattr(xdr, res->dir_fattr, res->server,
5330 !RPC_IS_ASYNC(rqstp->rq_task)); 5268 !RPC_IS_ASYNC(rqstp->rq_task));
5331out: 5269out:
5332 return status; 5270 return status;
@@ -5335,31 +5273,31 @@ out:
5335/* 5273/*
5336 * Decode SYMLINK response 5274 * Decode SYMLINK response
5337 */ 5275 */
5338static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) 5276static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5277 struct nfs4_create_res *res)
5339{ 5278{
5340 return nfs4_xdr_dec_create(rqstp, p, res); 5279 return nfs4_xdr_dec_create(rqstp, xdr, res);
5341} 5280}
5342 5281
5343/* 5282/*
5344 * Decode GETATTR response 5283 * Decode GETATTR response
5345 */ 5284 */
5346static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res) 5285static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5286 struct nfs4_getattr_res *res)
5347{ 5287{
5348 struct xdr_stream xdr;
5349 struct compound_hdr hdr; 5288 struct compound_hdr hdr;
5350 int status; 5289 int status;
5351 5290
5352 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5291 status = decode_compound_hdr(xdr, &hdr);
5353 status = decode_compound_hdr(&xdr, &hdr);
5354 if (status) 5292 if (status)
5355 goto out; 5293 goto out;
5356 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5294 status = decode_sequence(xdr, &res->seq_res, rqstp);
5357 if (status) 5295 if (status)
5358 goto out; 5296 goto out;
5359 status = decode_putfh(&xdr); 5297 status = decode_putfh(xdr);
5360 if (status) 5298 if (status)
5361 goto out; 5299 goto out;
5362 status = decode_getfattr(&xdr, res->fattr, res->server, 5300 status = decode_getfattr(xdr, res->fattr, res->server,
5363 !RPC_IS_ASYNC(rqstp->rq_task)); 5301 !RPC_IS_ASYNC(rqstp->rq_task));
5364out: 5302out:
5365 return status; 5303 return status;
@@ -5368,46 +5306,40 @@ out:
5368/* 5306/*
5369 * Encode an SETACL request 5307 * Encode an SETACL request
5370 */ 5308 */
5371static int 5309static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
5372nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) 5310 struct nfs_setaclargs *args)
5373{ 5311{
5374 struct xdr_stream xdr;
5375 struct compound_hdr hdr = { 5312 struct compound_hdr hdr = {
5376 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 5313 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
5377 }; 5314 };
5378 int status;
5379 5315
5380 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 5316 encode_compound_hdr(xdr, req, &hdr);
5381 encode_compound_hdr(&xdr, req, &hdr); 5317 encode_sequence(xdr, &args->seq_args, &hdr);
5382 encode_sequence(&xdr, &args->seq_args, &hdr); 5318 encode_putfh(xdr, args->fh, &hdr);
5383 encode_putfh(&xdr, args->fh, &hdr); 5319 encode_setacl(xdr, args, &hdr);
5384 status = encode_setacl(&xdr, args, &hdr);
5385 encode_nops(&hdr); 5320 encode_nops(&hdr);
5386 return status;
5387} 5321}
5388 5322
5389/* 5323/*
5390 * Decode SETACL response 5324 * Decode SETACL response
5391 */ 5325 */
5392static int 5326static int
5393nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, 5327nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5394 struct nfs_setaclres *res) 5328 struct nfs_setaclres *res)
5395{ 5329{
5396 struct xdr_stream xdr;
5397 struct compound_hdr hdr; 5330 struct compound_hdr hdr;
5398 int status; 5331 int status;
5399 5332
5400 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5333 status = decode_compound_hdr(xdr, &hdr);
5401 status = decode_compound_hdr(&xdr, &hdr);
5402 if (status) 5334 if (status)
5403 goto out; 5335 goto out;
5404 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5336 status = decode_sequence(xdr, &res->seq_res, rqstp);
5405 if (status) 5337 if (status)
5406 goto out; 5338 goto out;
5407 status = decode_putfh(&xdr); 5339 status = decode_putfh(xdr);
5408 if (status) 5340 if (status)
5409 goto out; 5341 goto out;
5410 status = decode_setattr(&xdr); 5342 status = decode_setattr(xdr);
5411out: 5343out:
5412 return status; 5344 return status;
5413} 5345}
@@ -5416,24 +5348,22 @@ out:
5416 * Decode GETACL response 5348 * Decode GETACL response
5417 */ 5349 */
5418static int 5350static int
5419nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, 5351nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5420 struct nfs_getaclres *res) 5352 struct nfs_getaclres *res)
5421{ 5353{
5422 struct xdr_stream xdr;
5423 struct compound_hdr hdr; 5354 struct compound_hdr hdr;
5424 int status; 5355 int status;
5425 5356
5426 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5357 status = decode_compound_hdr(xdr, &hdr);
5427 status = decode_compound_hdr(&xdr, &hdr);
5428 if (status) 5358 if (status)
5429 goto out; 5359 goto out;
5430 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5360 status = decode_sequence(xdr, &res->seq_res, rqstp);
5431 if (status) 5361 if (status)
5432 goto out; 5362 goto out;
5433 status = decode_putfh(&xdr); 5363 status = decode_putfh(xdr);
5434 if (status) 5364 if (status)
5435 goto out; 5365 goto out;
5436 status = decode_getacl(&xdr, rqstp, &res->acl_len); 5366 status = decode_getacl(xdr, rqstp, &res->acl_len);
5437 5367
5438out: 5368out:
5439 return status; 5369 return status;
@@ -5442,23 +5372,22 @@ out:
5442/* 5372/*
5443 * Decode CLOSE response 5373 * Decode CLOSE response
5444 */ 5374 */
5445static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 5375static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5376 struct nfs_closeres *res)
5446{ 5377{
5447 struct xdr_stream xdr;
5448 struct compound_hdr hdr; 5378 struct compound_hdr hdr;
5449 int status; 5379 int status;
5450 5380
5451 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5381 status = decode_compound_hdr(xdr, &hdr);
5452 status = decode_compound_hdr(&xdr, &hdr);
5453 if (status) 5382 if (status)
5454 goto out; 5383 goto out;
5455 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5384 status = decode_sequence(xdr, &res->seq_res, rqstp);
5456 if (status) 5385 if (status)
5457 goto out; 5386 goto out;
5458 status = decode_putfh(&xdr); 5387 status = decode_putfh(xdr);
5459 if (status) 5388 if (status)
5460 goto out; 5389 goto out;
5461 status = decode_close(&xdr, res); 5390 status = decode_close(xdr, res);
5462 if (status != 0) 5391 if (status != 0)
5463 goto out; 5392 goto out;
5464 /* 5393 /*
@@ -5467,7 +5396,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
5467 * an ESTALE error. Shouldn't be a problem, 5396 * an ESTALE error. Shouldn't be a problem,
5468 * though, since fattr->valid will remain unset. 5397 * though, since fattr->valid will remain unset.
5469 */ 5398 */
5470 decode_getfattr(&xdr, res->fattr, res->server, 5399 decode_getfattr(xdr, res->fattr, res->server,
5471 !RPC_IS_ASYNC(rqstp->rq_task)); 5400 !RPC_IS_ASYNC(rqstp->rq_task));
5472out: 5401out:
5473 return status; 5402 return status;
@@ -5476,36 +5405,35 @@ out:
5476/* 5405/*
5477 * Decode OPEN response 5406 * Decode OPEN response
5478 */ 5407 */
5479static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 5408static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5409 struct nfs_openres *res)
5480{ 5410{
5481 struct xdr_stream xdr;
5482 struct compound_hdr hdr; 5411 struct compound_hdr hdr;
5483 int status; 5412 int status;
5484 5413
5485 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5414 status = decode_compound_hdr(xdr, &hdr);
5486 status = decode_compound_hdr(&xdr, &hdr);
5487 if (status) 5415 if (status)
5488 goto out; 5416 goto out;
5489 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5417 status = decode_sequence(xdr, &res->seq_res, rqstp);
5490 if (status) 5418 if (status)
5491 goto out; 5419 goto out;
5492 status = decode_putfh(&xdr); 5420 status = decode_putfh(xdr);
5493 if (status) 5421 if (status)
5494 goto out; 5422 goto out;
5495 status = decode_savefh(&xdr); 5423 status = decode_savefh(xdr);
5496 if (status) 5424 if (status)
5497 goto out; 5425 goto out;
5498 status = decode_open(&xdr, res); 5426 status = decode_open(xdr, res);
5499 if (status) 5427 if (status)
5500 goto out; 5428 goto out;
5501 if (decode_getfh(&xdr, &res->fh) != 0) 5429 if (decode_getfh(xdr, &res->fh) != 0)
5502 goto out; 5430 goto out;
5503 if (decode_getfattr(&xdr, res->f_attr, res->server, 5431 if (decode_getfattr(xdr, res->f_attr, res->server,
5504 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5432 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5505 goto out; 5433 goto out;
5506 if (decode_restorefh(&xdr) != 0) 5434 if (decode_restorefh(xdr) != 0)
5507 goto out; 5435 goto out;
5508 decode_getfattr(&xdr, res->dir_attr, res->server, 5436 decode_getfattr(xdr, res->dir_attr, res->server,
5509 !RPC_IS_ASYNC(rqstp->rq_task)); 5437 !RPC_IS_ASYNC(rqstp->rq_task));
5510out: 5438out:
5511 return status; 5439 return status;
@@ -5514,20 +5442,20 @@ out:
5514/* 5442/*
5515 * Decode OPEN_CONFIRM response 5443 * Decode OPEN_CONFIRM response
5516 */ 5444 */
5517static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) 5445static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
5446 struct xdr_stream *xdr,
5447 struct nfs_open_confirmres *res)
5518{ 5448{
5519 struct xdr_stream xdr;
5520 struct compound_hdr hdr; 5449 struct compound_hdr hdr;
5521 int status; 5450 int status;
5522 5451
5523 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5452 status = decode_compound_hdr(xdr, &hdr);
5524 status = decode_compound_hdr(&xdr, &hdr);
5525 if (status) 5453 if (status)
5526 goto out; 5454 goto out;
5527 status = decode_putfh(&xdr); 5455 status = decode_putfh(xdr);
5528 if (status) 5456 if (status)
5529 goto out; 5457 goto out;
5530 status = decode_open_confirm(&xdr, res); 5458 status = decode_open_confirm(xdr, res);
5531out: 5459out:
5532 return status; 5460 return status;
5533} 5461}
@@ -5535,26 +5463,26 @@ out:
5535/* 5463/*
5536 * Decode OPEN response 5464 * Decode OPEN response
5537 */ 5465 */
5538static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 5466static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
5467 struct xdr_stream *xdr,
5468 struct nfs_openres *res)
5539{ 5469{
5540 struct xdr_stream xdr;
5541 struct compound_hdr hdr; 5470 struct compound_hdr hdr;
5542 int status; 5471 int status;
5543 5472
5544 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5473 status = decode_compound_hdr(xdr, &hdr);
5545 status = decode_compound_hdr(&xdr, &hdr);
5546 if (status) 5474 if (status)
5547 goto out; 5475 goto out;
5548 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5476 status = decode_sequence(xdr, &res->seq_res, rqstp);
5549 if (status) 5477 if (status)
5550 goto out; 5478 goto out;
5551 status = decode_putfh(&xdr); 5479 status = decode_putfh(xdr);
5552 if (status) 5480 if (status)
5553 goto out; 5481 goto out;
5554 status = decode_open(&xdr, res); 5482 status = decode_open(xdr, res);
5555 if (status) 5483 if (status)
5556 goto out; 5484 goto out;
5557 decode_getfattr(&xdr, res->f_attr, res->server, 5485 decode_getfattr(xdr, res->f_attr, res->server,
5558 !RPC_IS_ASYNC(rqstp->rq_task)); 5486 !RPC_IS_ASYNC(rqstp->rq_task));
5559out: 5487out:
5560 return status; 5488 return status;
@@ -5563,26 +5491,26 @@ out:
5563/* 5491/*
5564 * Decode SETATTR response 5492 * Decode SETATTR response
5565 */ 5493 */
5566static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) 5494static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
5495 struct xdr_stream *xdr,
5496 struct nfs_setattrres *res)
5567{ 5497{
5568 struct xdr_stream xdr;
5569 struct compound_hdr hdr; 5498 struct compound_hdr hdr;
5570 int status; 5499 int status;
5571 5500
5572 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5501 status = decode_compound_hdr(xdr, &hdr);
5573 status = decode_compound_hdr(&xdr, &hdr);
5574 if (status) 5502 if (status)
5575 goto out; 5503 goto out;
5576 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5504 status = decode_sequence(xdr, &res->seq_res, rqstp);
5577 if (status) 5505 if (status)
5578 goto out; 5506 goto out;
5579 status = decode_putfh(&xdr); 5507 status = decode_putfh(xdr);
5580 if (status) 5508 if (status)
5581 goto out; 5509 goto out;
5582 status = decode_setattr(&xdr); 5510 status = decode_setattr(xdr);
5583 if (status) 5511 if (status)
5584 goto out; 5512 goto out;
5585 decode_getfattr(&xdr, res->fattr, res->server, 5513 decode_getfattr(xdr, res->fattr, res->server,
5586 !RPC_IS_ASYNC(rqstp->rq_task)); 5514 !RPC_IS_ASYNC(rqstp->rq_task));
5587out: 5515out:
5588 return status; 5516 return status;
@@ -5591,23 +5519,22 @@ out:
5591/* 5519/*
5592 * Decode LOCK response 5520 * Decode LOCK response
5593 */ 5521 */
5594static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res) 5522static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5523 struct nfs_lock_res *res)
5595{ 5524{
5596 struct xdr_stream xdr;
5597 struct compound_hdr hdr; 5525 struct compound_hdr hdr;
5598 int status; 5526 int status;
5599 5527
5600 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5528 status = decode_compound_hdr(xdr, &hdr);
5601 status = decode_compound_hdr(&xdr, &hdr);
5602 if (status) 5529 if (status)
5603 goto out; 5530 goto out;
5604 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5531 status = decode_sequence(xdr, &res->seq_res, rqstp);
5605 if (status) 5532 if (status)
5606 goto out; 5533 goto out;
5607 status = decode_putfh(&xdr); 5534 status = decode_putfh(xdr);
5608 if (status) 5535 if (status)
5609 goto out; 5536 goto out;
5610 status = decode_lock(&xdr, res); 5537 status = decode_lock(xdr, res);
5611out: 5538out:
5612 return status; 5539 return status;
5613} 5540}
@@ -5615,23 +5542,22 @@ out:
5615/* 5542/*
5616 * Decode LOCKT response 5543 * Decode LOCKT response
5617 */ 5544 */
5618static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res) 5545static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5546 struct nfs_lockt_res *res)
5619{ 5547{
5620 struct xdr_stream xdr;
5621 struct compound_hdr hdr; 5548 struct compound_hdr hdr;
5622 int status; 5549 int status;
5623 5550
5624 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5551 status = decode_compound_hdr(xdr, &hdr);
5625 status = decode_compound_hdr(&xdr, &hdr);
5626 if (status) 5552 if (status)
5627 goto out; 5553 goto out;
5628 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5554 status = decode_sequence(xdr, &res->seq_res, rqstp);
5629 if (status) 5555 if (status)
5630 goto out; 5556 goto out;
5631 status = decode_putfh(&xdr); 5557 status = decode_putfh(xdr);
5632 if (status) 5558 if (status)
5633 goto out; 5559 goto out;
5634 status = decode_lockt(&xdr, res); 5560 status = decode_lockt(xdr, res);
5635out: 5561out:
5636 return status; 5562 return status;
5637} 5563}
@@ -5639,61 +5565,58 @@ out:
5639/* 5565/*
5640 * Decode LOCKU response 5566 * Decode LOCKU response
5641 */ 5567 */
5642static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res) 5568static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5569 struct nfs_locku_res *res)
5643{ 5570{
5644 struct xdr_stream xdr;
5645 struct compound_hdr hdr; 5571 struct compound_hdr hdr;
5646 int status; 5572 int status;
5647 5573
5648 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5574 status = decode_compound_hdr(xdr, &hdr);
5649 status = decode_compound_hdr(&xdr, &hdr);
5650 if (status) 5575 if (status)
5651 goto out; 5576 goto out;
5652 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5577 status = decode_sequence(xdr, &res->seq_res, rqstp);
5653 if (status) 5578 if (status)
5654 goto out; 5579 goto out;
5655 status = decode_putfh(&xdr); 5580 status = decode_putfh(xdr);
5656 if (status) 5581 if (status)
5657 goto out; 5582 goto out;
5658 status = decode_locku(&xdr, res); 5583 status = decode_locku(xdr, res);
5659out: 5584out:
5660 return status; 5585 return status;
5661} 5586}
5662 5587
5663static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy) 5588static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
5589 struct xdr_stream *xdr, void *dummy)
5664{ 5590{
5665 struct xdr_stream xdr;
5666 struct compound_hdr hdr; 5591 struct compound_hdr hdr;
5667 int status; 5592 int status;
5668 5593
5669 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5594 status = decode_compound_hdr(xdr, &hdr);
5670 status = decode_compound_hdr(&xdr, &hdr);
5671 if (!status) 5595 if (!status)
5672 status = decode_release_lockowner(&xdr); 5596 status = decode_release_lockowner(xdr);
5673 return status; 5597 return status;
5674} 5598}
5675 5599
5676/* 5600/*
5677 * Decode READLINK response 5601 * Decode READLINK response
5678 */ 5602 */
5679static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, 5603static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
5604 struct xdr_stream *xdr,
5680 struct nfs4_readlink_res *res) 5605 struct nfs4_readlink_res *res)
5681{ 5606{
5682 struct xdr_stream xdr;
5683 struct compound_hdr hdr; 5607 struct compound_hdr hdr;
5684 int status; 5608 int status;
5685 5609
5686 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5610 status = decode_compound_hdr(xdr, &hdr);
5687 status = decode_compound_hdr(&xdr, &hdr);
5688 if (status) 5611 if (status)
5689 goto out; 5612 goto out;
5690 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5613 status = decode_sequence(xdr, &res->seq_res, rqstp);
5691 if (status) 5614 if (status)
5692 goto out; 5615 goto out;
5693 status = decode_putfh(&xdr); 5616 status = decode_putfh(xdr);
5694 if (status) 5617 if (status)
5695 goto out; 5618 goto out;
5696 status = decode_readlink(&xdr, rqstp); 5619 status = decode_readlink(xdr, rqstp);
5697out: 5620out:
5698 return status; 5621 return status;
5699} 5622}
@@ -5701,23 +5624,22 @@ out:
5701/* 5624/*
5702 * Decode READDIR response 5625 * Decode READDIR response
5703 */ 5626 */
5704static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res) 5627static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5628 struct nfs4_readdir_res *res)
5705{ 5629{
5706 struct xdr_stream xdr;
5707 struct compound_hdr hdr; 5630 struct compound_hdr hdr;
5708 int status; 5631 int status;
5709 5632
5710 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5633 status = decode_compound_hdr(xdr, &hdr);
5711 status = decode_compound_hdr(&xdr, &hdr);
5712 if (status) 5634 if (status)
5713 goto out; 5635 goto out;
5714 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5636 status = decode_sequence(xdr, &res->seq_res, rqstp);
5715 if (status) 5637 if (status)
5716 goto out; 5638 goto out;
5717 status = decode_putfh(&xdr); 5639 status = decode_putfh(xdr);
5718 if (status) 5640 if (status)
5719 goto out; 5641 goto out;
5720 status = decode_readdir(&xdr, rqstp, res); 5642 status = decode_readdir(xdr, rqstp, res);
5721out: 5643out:
5722 return status; 5644 return status;
5723} 5645}
@@ -5725,23 +5647,22 @@ out:
5725/* 5647/*
5726 * Decode Read response 5648 * Decode Read response
5727 */ 5649 */
5728static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res) 5650static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5651 struct nfs_readres *res)
5729{ 5652{
5730 struct xdr_stream xdr;
5731 struct compound_hdr hdr; 5653 struct compound_hdr hdr;
5732 int status; 5654 int status;
5733 5655
5734 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5656 status = decode_compound_hdr(xdr, &hdr);
5735 status = decode_compound_hdr(&xdr, &hdr);
5736 if (status) 5657 if (status)
5737 goto out; 5658 goto out;
5738 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5659 status = decode_sequence(xdr, &res->seq_res, rqstp);
5739 if (status) 5660 if (status)
5740 goto out; 5661 goto out;
5741 status = decode_putfh(&xdr); 5662 status = decode_putfh(xdr);
5742 if (status) 5663 if (status)
5743 goto out; 5664 goto out;
5744 status = decode_read(&xdr, rqstp, res); 5665 status = decode_read(xdr, rqstp, res);
5745 if (!status) 5666 if (!status)
5746 status = res->count; 5667 status = res->count;
5747out: 5668out:
@@ -5751,26 +5672,25 @@ out:
5751/* 5672/*
5752 * Decode WRITE response 5673 * Decode WRITE response
5753 */ 5674 */
5754static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) 5675static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5676 struct nfs_writeres *res)
5755{ 5677{
5756 struct xdr_stream xdr;
5757 struct compound_hdr hdr; 5678 struct compound_hdr hdr;
5758 int status; 5679 int status;
5759 5680
5760 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5681 status = decode_compound_hdr(xdr, &hdr);
5761 status = decode_compound_hdr(&xdr, &hdr);
5762 if (status) 5682 if (status)
5763 goto out; 5683 goto out;
5764 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5684 status = decode_sequence(xdr, &res->seq_res, rqstp);
5765 if (status) 5685 if (status)
5766 goto out; 5686 goto out;
5767 status = decode_putfh(&xdr); 5687 status = decode_putfh(xdr);
5768 if (status) 5688 if (status)
5769 goto out; 5689 goto out;
5770 status = decode_write(&xdr, res); 5690 status = decode_write(xdr, res);
5771 if (status) 5691 if (status)
5772 goto out; 5692 goto out;
5773 decode_getfattr(&xdr, res->fattr, res->server, 5693 decode_getfattr(xdr, res->fattr, res->server,
5774 !RPC_IS_ASYNC(rqstp->rq_task)); 5694 !RPC_IS_ASYNC(rqstp->rq_task));
5775 if (!status) 5695 if (!status)
5776 status = res->count; 5696 status = res->count;
@@ -5781,26 +5701,25 @@ out:
5781/* 5701/*
5782 * Decode COMMIT response 5702 * Decode COMMIT response
5783 */ 5703 */
5784static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) 5704static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5705 struct nfs_writeres *res)
5785{ 5706{
5786 struct xdr_stream xdr;
5787 struct compound_hdr hdr; 5707 struct compound_hdr hdr;
5788 int status; 5708 int status;
5789 5709
5790 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5710 status = decode_compound_hdr(xdr, &hdr);
5791 status = decode_compound_hdr(&xdr, &hdr);
5792 if (status) 5711 if (status)
5793 goto out; 5712 goto out;
5794 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5713 status = decode_sequence(xdr, &res->seq_res, rqstp);
5795 if (status) 5714 if (status)
5796 goto out; 5715 goto out;
5797 status = decode_putfh(&xdr); 5716 status = decode_putfh(xdr);
5798 if (status) 5717 if (status)
5799 goto out; 5718 goto out;
5800 status = decode_commit(&xdr, res); 5719 status = decode_commit(xdr, res);
5801 if (status) 5720 if (status)
5802 goto out; 5721 goto out;
5803 decode_getfattr(&xdr, res->fattr, res->server, 5722 decode_getfattr(xdr, res->fattr, res->server,
5804 !RPC_IS_ASYNC(rqstp->rq_task)); 5723 !RPC_IS_ASYNC(rqstp->rq_task));
5805out: 5724out:
5806 return status; 5725 return status;
@@ -5809,85 +5728,80 @@ out:
5809/* 5728/*
5810 * Decode FSINFO response 5729 * Decode FSINFO response
5811 */ 5730 */
5812static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, 5731static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
5813 struct nfs4_fsinfo_res *res) 5732 struct nfs4_fsinfo_res *res)
5814{ 5733{
5815 struct xdr_stream xdr;
5816 struct compound_hdr hdr; 5734 struct compound_hdr hdr;
5817 int status; 5735 int status;
5818 5736
5819 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5737 status = decode_compound_hdr(xdr, &hdr);
5820 status = decode_compound_hdr(&xdr, &hdr);
5821 if (!status) 5738 if (!status)
5822 status = decode_sequence(&xdr, &res->seq_res, req); 5739 status = decode_sequence(xdr, &res->seq_res, req);
5823 if (!status) 5740 if (!status)
5824 status = decode_putfh(&xdr); 5741 status = decode_putfh(xdr);
5825 if (!status) 5742 if (!status)
5826 status = decode_fsinfo(&xdr, res->fsinfo); 5743 status = decode_fsinfo(xdr, res->fsinfo);
5827 return status; 5744 return status;
5828} 5745}
5829 5746
5830/* 5747/*
5831 * Decode PATHCONF response 5748 * Decode PATHCONF response
5832 */ 5749 */
5833static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, 5750static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
5834 struct nfs4_pathconf_res *res) 5751 struct nfs4_pathconf_res *res)
5835{ 5752{
5836 struct xdr_stream xdr;
5837 struct compound_hdr hdr; 5753 struct compound_hdr hdr;
5838 int status; 5754 int status;
5839 5755
5840 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5756 status = decode_compound_hdr(xdr, &hdr);
5841 status = decode_compound_hdr(&xdr, &hdr);
5842 if (!status) 5757 if (!status)
5843 status = decode_sequence(&xdr, &res->seq_res, req); 5758 status = decode_sequence(xdr, &res->seq_res, req);
5844 if (!status) 5759 if (!status)
5845 status = decode_putfh(&xdr); 5760 status = decode_putfh(xdr);
5846 if (!status) 5761 if (!status)
5847 status = decode_pathconf(&xdr, res->pathconf); 5762 status = decode_pathconf(xdr, res->pathconf);
5848 return status; 5763 return status;
5849} 5764}
5850 5765
5851/* 5766/*
5852 * Decode STATFS response 5767 * Decode STATFS response
5853 */ 5768 */
5854static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, 5769static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
5855 struct nfs4_statfs_res *res) 5770 struct nfs4_statfs_res *res)
5856{ 5771{
5857 struct xdr_stream xdr;
5858 struct compound_hdr hdr; 5772 struct compound_hdr hdr;
5859 int status; 5773 int status;
5860 5774
5861 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5775 status = decode_compound_hdr(xdr, &hdr);
5862 status = decode_compound_hdr(&xdr, &hdr);
5863 if (!status) 5776 if (!status)
5864 status = decode_sequence(&xdr, &res->seq_res, req); 5777 status = decode_sequence(xdr, &res->seq_res, req);
5865 if (!status) 5778 if (!status)
5866 status = decode_putfh(&xdr); 5779 status = decode_putfh(xdr);
5867 if (!status) 5780 if (!status)
5868 status = decode_statfs(&xdr, res->fsstat); 5781 status = decode_statfs(xdr, res->fsstat);
5869 return status; 5782 return status;
5870} 5783}
5871 5784
5872/* 5785/*
5873 * Decode GETATTR_BITMAP response 5786 * Decode GETATTR_BITMAP response
5874 */ 5787 */
5875static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) 5788static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
5789 struct xdr_stream *xdr,
5790 struct nfs4_server_caps_res *res)
5876{ 5791{
5877 struct xdr_stream xdr;
5878 struct compound_hdr hdr; 5792 struct compound_hdr hdr;
5879 int status; 5793 int status;
5880 5794
5881 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5795 status = decode_compound_hdr(xdr, &hdr);
5882 status = decode_compound_hdr(&xdr, &hdr);
5883 if (status) 5796 if (status)
5884 goto out; 5797 goto out;
5885 status = decode_sequence(&xdr, &res->seq_res, req); 5798 status = decode_sequence(xdr, &res->seq_res, req);
5886 if (status) 5799 if (status)
5887 goto out; 5800 goto out;
5888 if ((status = decode_putfh(&xdr)) != 0) 5801 status = decode_putfh(xdr);
5802 if (status)
5889 goto out; 5803 goto out;
5890 status = decode_server_caps(&xdr, res); 5804 status = decode_server_caps(xdr, res);
5891out: 5805out:
5892 return status; 5806 return status;
5893} 5807}
@@ -5895,79 +5809,77 @@ out:
5895/* 5809/*
5896 * Decode RENEW response 5810 * Decode RENEW response
5897 */ 5811 */
5898static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy) 5812static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5813 void *__unused)
5899{ 5814{
5900 struct xdr_stream xdr;
5901 struct compound_hdr hdr; 5815 struct compound_hdr hdr;
5902 int status; 5816 int status;
5903 5817
5904 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5818 status = decode_compound_hdr(xdr, &hdr);
5905 status = decode_compound_hdr(&xdr, &hdr);
5906 if (!status) 5819 if (!status)
5907 status = decode_renew(&xdr); 5820 status = decode_renew(xdr);
5908 return status; 5821 return status;
5909} 5822}
5910 5823
5911/* 5824/*
5912 * Decode SETCLIENTID response 5825 * Decode SETCLIENTID response
5913 */ 5826 */
5914static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, 5827static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
5915 struct nfs4_setclientid_res *res) 5828 struct xdr_stream *xdr,
5829 struct nfs4_setclientid_res *res)
5916{ 5830{
5917 struct xdr_stream xdr;
5918 struct compound_hdr hdr; 5831 struct compound_hdr hdr;
5919 int status; 5832 int status;
5920 5833
5921 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5834 status = decode_compound_hdr(xdr, &hdr);
5922 status = decode_compound_hdr(&xdr, &hdr);
5923 if (!status) 5835 if (!status)
5924 status = decode_setclientid(&xdr, res); 5836 status = decode_setclientid(xdr, res);
5925 return status; 5837 return status;
5926} 5838}
5927 5839
5928/* 5840/*
5929 * Decode SETCLIENTID_CONFIRM response 5841 * Decode SETCLIENTID_CONFIRM response
5930 */ 5842 */
5931static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) 5843static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
5844 struct xdr_stream *xdr,
5845 struct nfs_fsinfo *fsinfo)
5932{ 5846{
5933 struct xdr_stream xdr;
5934 struct compound_hdr hdr; 5847 struct compound_hdr hdr;
5935 int status; 5848 int status;
5936 5849
5937 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5850 status = decode_compound_hdr(xdr, &hdr);
5938 status = decode_compound_hdr(&xdr, &hdr);
5939 if (!status) 5851 if (!status)
5940 status = decode_setclientid_confirm(&xdr); 5852 status = decode_setclientid_confirm(xdr);
5941 if (!status) 5853 if (!status)
5942 status = decode_putrootfh(&xdr); 5854 status = decode_putrootfh(xdr);
5943 if (!status) 5855 if (!status)
5944 status = decode_fsinfo(&xdr, fsinfo); 5856 status = decode_fsinfo(xdr, fsinfo);
5945 return status; 5857 return status;
5946} 5858}
5947 5859
5948/* 5860/*
5949 * Decode DELEGRETURN response 5861 * Decode DELEGRETURN response
5950 */ 5862 */
5951static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) 5863static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
5864 struct xdr_stream *xdr,
5865 struct nfs4_delegreturnres *res)
5952{ 5866{
5953 struct xdr_stream xdr;
5954 struct compound_hdr hdr; 5867 struct compound_hdr hdr;
5955 int status; 5868 int status;
5956 5869
5957 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5870 status = decode_compound_hdr(xdr, &hdr);
5958 status = decode_compound_hdr(&xdr, &hdr);
5959 if (status) 5871 if (status)
5960 goto out; 5872 goto out;
5961 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5873 status = decode_sequence(xdr, &res->seq_res, rqstp);
5962 if (status) 5874 if (status)
5963 goto out; 5875 goto out;
5964 status = decode_putfh(&xdr); 5876 status = decode_putfh(xdr);
5965 if (status != 0) 5877 if (status != 0)
5966 goto out; 5878 goto out;
5967 status = decode_delegreturn(&xdr); 5879 status = decode_delegreturn(xdr);
5968 if (status != 0) 5880 if (status != 0)
5969 goto out; 5881 goto out;
5970 decode_getfattr(&xdr, res->fattr, res->server, 5882 decode_getfattr(xdr, res->fattr, res->server,
5971 !RPC_IS_ASYNC(rqstp->rq_task)); 5883 !RPC_IS_ASYNC(rqstp->rq_task));
5972out: 5884out:
5973 return status; 5885 return status;
@@ -5976,26 +5888,27 @@ out:
5976/* 5888/*
5977 * Decode FS_LOCATIONS response 5889 * Decode FS_LOCATIONS response
5978 */ 5890 */
5979static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, 5891static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
5892 struct xdr_stream *xdr,
5980 struct nfs4_fs_locations_res *res) 5893 struct nfs4_fs_locations_res *res)
5981{ 5894{
5982 struct xdr_stream xdr;
5983 struct compound_hdr hdr; 5895 struct compound_hdr hdr;
5984 int status; 5896 int status;
5985 5897
5986 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5898 status = decode_compound_hdr(xdr, &hdr);
5987 status = decode_compound_hdr(&xdr, &hdr);
5988 if (status) 5899 if (status)
5989 goto out; 5900 goto out;
5990 status = decode_sequence(&xdr, &res->seq_res, req); 5901 status = decode_sequence(xdr, &res->seq_res, req);
5991 if (status) 5902 if (status)
5992 goto out; 5903 goto out;
5993 if ((status = decode_putfh(&xdr)) != 0) 5904 status = decode_putfh(xdr);
5905 if (status)
5994 goto out; 5906 goto out;
5995 if ((status = decode_lookup(&xdr)) != 0) 5907 status = decode_lookup(xdr);
5908 if (status)
5996 goto out; 5909 goto out;
5997 xdr_enter_page(&xdr, PAGE_SIZE); 5910 xdr_enter_page(xdr, PAGE_SIZE);
5998 status = decode_getfattr(&xdr, &res->fs_locations->fattr, 5911 status = decode_getfattr(xdr, &res->fs_locations->fattr,
5999 res->fs_locations->server, 5912 res->fs_locations->server,
6000 !RPC_IS_ASYNC(req->rq_task)); 5913 !RPC_IS_ASYNC(req->rq_task));
6001out: 5914out:
@@ -6006,129 +5919,122 @@ out:
6006/* 5919/*
6007 * Decode EXCHANGE_ID response 5920 * Decode EXCHANGE_ID response
6008 */ 5921 */
6009static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, 5922static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
5923 struct xdr_stream *xdr,
6010 void *res) 5924 void *res)
6011{ 5925{
6012 struct xdr_stream xdr;
6013 struct compound_hdr hdr; 5926 struct compound_hdr hdr;
6014 int status; 5927 int status;
6015 5928
6016 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5929 status = decode_compound_hdr(xdr, &hdr);
6017 status = decode_compound_hdr(&xdr, &hdr);
6018 if (!status) 5930 if (!status)
6019 status = decode_exchange_id(&xdr, res); 5931 status = decode_exchange_id(xdr, res);
6020 return status; 5932 return status;
6021} 5933}
6022 5934
6023/* 5935/*
6024 * Decode CREATE_SESSION response 5936 * Decode CREATE_SESSION response
6025 */ 5937 */
6026static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, 5938static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
5939 struct xdr_stream *xdr,
6027 struct nfs41_create_session_res *res) 5940 struct nfs41_create_session_res *res)
6028{ 5941{
6029 struct xdr_stream xdr;
6030 struct compound_hdr hdr; 5942 struct compound_hdr hdr;
6031 int status; 5943 int status;
6032 5944
6033 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5945 status = decode_compound_hdr(xdr, &hdr);
6034 status = decode_compound_hdr(&xdr, &hdr);
6035 if (!status) 5946 if (!status)
6036 status = decode_create_session(&xdr, res); 5947 status = decode_create_session(xdr, res);
6037 return status; 5948 return status;
6038} 5949}
6039 5950
6040/* 5951/*
6041 * Decode DESTROY_SESSION response 5952 * Decode DESTROY_SESSION response
6042 */ 5953 */
6043static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, 5954static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
6044 void *dummy) 5955 struct xdr_stream *xdr,
5956 void *res)
6045{ 5957{
6046 struct xdr_stream xdr;
6047 struct compound_hdr hdr; 5958 struct compound_hdr hdr;
6048 int status; 5959 int status;
6049 5960
6050 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5961 status = decode_compound_hdr(xdr, &hdr);
6051 status = decode_compound_hdr(&xdr, &hdr);
6052 if (!status) 5962 if (!status)
6053 status = decode_destroy_session(&xdr, dummy); 5963 status = decode_destroy_session(xdr, res);
6054 return status; 5964 return status;
6055} 5965}
6056 5966
6057/* 5967/*
6058 * Decode SEQUENCE response 5968 * Decode SEQUENCE response
6059 */ 5969 */
6060static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, 5970static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
5971 struct xdr_stream *xdr,
6061 struct nfs4_sequence_res *res) 5972 struct nfs4_sequence_res *res)
6062{ 5973{
6063 struct xdr_stream xdr;
6064 struct compound_hdr hdr; 5974 struct compound_hdr hdr;
6065 int status; 5975 int status;
6066 5976
6067 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5977 status = decode_compound_hdr(xdr, &hdr);
6068 status = decode_compound_hdr(&xdr, &hdr);
6069 if (!status) 5978 if (!status)
6070 status = decode_sequence(&xdr, res, rqstp); 5979 status = decode_sequence(xdr, res, rqstp);
6071 return status; 5980 return status;
6072} 5981}
6073 5982
6074/* 5983/*
6075 * Decode GET_LEASE_TIME response 5984 * Decode GET_LEASE_TIME response
6076 */ 5985 */
6077static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, 5986static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
5987 struct xdr_stream *xdr,
6078 struct nfs4_get_lease_time_res *res) 5988 struct nfs4_get_lease_time_res *res)
6079{ 5989{
6080 struct xdr_stream xdr;
6081 struct compound_hdr hdr; 5990 struct compound_hdr hdr;
6082 int status; 5991 int status;
6083 5992
6084 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5993 status = decode_compound_hdr(xdr, &hdr);
6085 status = decode_compound_hdr(&xdr, &hdr);
6086 if (!status) 5994 if (!status)
6087 status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); 5995 status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
6088 if (!status) 5996 if (!status)
6089 status = decode_putrootfh(&xdr); 5997 status = decode_putrootfh(xdr);
6090 if (!status) 5998 if (!status)
6091 status = decode_fsinfo(&xdr, res->lr_fsinfo); 5999 status = decode_fsinfo(xdr, res->lr_fsinfo);
6092 return status; 6000 return status;
6093} 6001}
6094 6002
6095/* 6003/*
6096 * Decode RECLAIM_COMPLETE response 6004 * Decode RECLAIM_COMPLETE response
6097 */ 6005 */
6098static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, 6006static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
6007 struct xdr_stream *xdr,
6099 struct nfs41_reclaim_complete_res *res) 6008 struct nfs41_reclaim_complete_res *res)
6100{ 6009{
6101 struct xdr_stream xdr;
6102 struct compound_hdr hdr; 6010 struct compound_hdr hdr;
6103 int status; 6011 int status;
6104 6012
6105 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6013 status = decode_compound_hdr(xdr, &hdr);
6106 status = decode_compound_hdr(&xdr, &hdr);
6107 if (!status) 6014 if (!status)
6108 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6015 status = decode_sequence(xdr, &res->seq_res, rqstp);
6109 if (!status) 6016 if (!status)
6110 status = decode_reclaim_complete(&xdr, (void *)NULL); 6017 status = decode_reclaim_complete(xdr, (void *)NULL);
6111 return status; 6018 return status;
6112} 6019}
6113 6020
6114/* 6021/*
6115 * Decode GETDEVINFO response 6022 * Decode GETDEVINFO response
6116 */ 6023 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, 6024static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
6025 struct xdr_stream *xdr,
6118 struct nfs4_getdeviceinfo_res *res) 6026 struct nfs4_getdeviceinfo_res *res)
6119{ 6027{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr; 6028 struct compound_hdr hdr;
6122 int status; 6029 int status;
6123 6030
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6031 status = decode_compound_hdr(xdr, &hdr);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0) 6032 if (status != 0)
6127 goto out; 6033 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6034 status = decode_sequence(xdr, &res->seq_res, rqstp);
6129 if (status != 0) 6035 if (status != 0)
6130 goto out; 6036 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev); 6037 status = decode_getdeviceinfo(xdr, res->pdev);
6132out: 6038out:
6133 return status; 6039 return status;
6134} 6040}
@@ -6136,31 +6042,44 @@ out:
6136/* 6042/*
6137 * Decode LAYOUTGET response 6043 * Decode LAYOUTGET response
6138 */ 6044 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, 6045static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6046 struct xdr_stream *xdr,
6140 struct nfs4_layoutget_res *res) 6047 struct nfs4_layoutget_res *res)
6141{ 6048{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr; 6049 struct compound_hdr hdr;
6144 int status; 6050 int status;
6145 6051
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6052 status = decode_compound_hdr(xdr, &hdr);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status) 6053 if (status)
6149 goto out; 6054 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6055 status = decode_sequence(xdr, &res->seq_res, rqstp);
6151 if (status) 6056 if (status)
6152 goto out; 6057 goto out;
6153 status = decode_putfh(&xdr); 6058 status = decode_putfh(xdr);
6154 if (status) 6059 if (status)
6155 goto out; 6060 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res); 6061 status = decode_layoutget(xdr, rqstp, res);
6157out: 6062out:
6158 return status; 6063 return status;
6159} 6064}
6160#endif /* CONFIG_NFS_V4_1 */ 6065#endif /* CONFIG_NFS_V4_1 */
6161 6066
6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 6067/**
6163 struct nfs_server *server, int plus) 6068 * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in
6069 * the local page cache.
6070 * @xdr: XDR stream where entry resides
6071 * @entry: buffer to fill in with entry data
6072 * @plus: boolean indicating whether this should be a readdirplus entry
6073 *
6074 * Returns zero if successful, otherwise a negative errno value is
6075 * returned.
6076 *
6077 * This function is not invoked during READDIR reply decoding, but
6078 * rather whenever an application invokes the getdents(2) system call
6079 * on a directory already in our cache.
6080 */
6081int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6082 int plus)
6164{ 6083{
6165 uint32_t bitmap[2] = {0}; 6084 uint32_t bitmap[2] = {0};
6166 uint32_t len; 6085 uint32_t len;
@@ -6172,9 +6091,9 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6172 if (unlikely(!p)) 6091 if (unlikely(!p))
6173 goto out_overflow; 6092 goto out_overflow;
6174 if (!ntohl(*p++)) 6093 if (!ntohl(*p++))
6175 return ERR_PTR(-EAGAIN); 6094 return -EAGAIN;
6176 entry->eof = 1; 6095 entry->eof = 1;
6177 return ERR_PTR(-EBADCOOKIE); 6096 return -EBADCOOKIE;
6178 } 6097 }
6179 6098
6180 p = xdr_inline_decode(xdr, 12); 6099 p = xdr_inline_decode(xdr, 12);
@@ -6203,7 +6122,8 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6203 if (decode_attr_length(xdr, &len, &p) < 0) 6122 if (decode_attr_length(xdr, &len, &p) < 0)
6204 goto out_overflow; 6123 goto out_overflow;
6205 6124
6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0) 6125 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6126 entry->server, 1) < 0)
6207 goto out_overflow; 6127 goto out_overflow;
6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6128 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6209 entry->ino = entry->fattr->fileid; 6129 entry->ino = entry->fattr->fileid;
@@ -6215,17 +6135,11 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6215 if (verify_attr_len(xdr, p, len) < 0) 6135 if (verify_attr_len(xdr, p, len) < 0)
6216 goto out_overflow; 6136 goto out_overflow;
6217 6137
6218 p = xdr_inline_peek(xdr, 8); 6138 return 0;
6219 if (p != NULL)
6220 entry->eof = !p[0] && p[1];
6221 else
6222 entry->eof = 0;
6223
6224 return p;
6225 6139
6226out_overflow: 6140out_overflow:
6227 print_overflow_msg(__func__, xdr); 6141 print_overflow_msg(__func__, xdr);
6228 return ERR_PTR(-EAGAIN); 6142 return -EAGAIN;
6229} 6143}
6230 6144
6231/* 6145/*
@@ -6301,8 +6215,8 @@ nfs4_stat_to_errno(int stat)
6301#define PROC(proc, argtype, restype) \ 6215#define PROC(proc, argtype, restype) \
6302[NFSPROC4_CLNT_##proc] = { \ 6216[NFSPROC4_CLNT_##proc] = { \
6303 .p_proc = NFSPROC4_COMPOUND, \ 6217 .p_proc = NFSPROC4_COMPOUND, \
6304 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 6218 .p_encode = (kxdreproc_t)nfs4_xdr_##argtype, \
6305 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 6219 .p_decode = (kxdrdproc_t)nfs4_xdr_##restype, \
6306 .p_arglen = NFS4_##argtype##_sz, \ 6220 .p_arglen = NFS4_##argtype##_sz, \
6307 .p_replen = NFS4_##restype##_sz, \ 6221 .p_replen = NFS4_##restype##_sz, \
6308 .p_statidx = NFSPROC4_CLNT_##proc, \ 6222 .p_statidx = NFSPROC4_CLNT_##proc, \
@@ -6310,50 +6224,50 @@ nfs4_stat_to_errno(int stat)
6310} 6224}
6311 6225
6312struct rpc_procinfo nfs4_procedures[] = { 6226struct rpc_procinfo nfs4_procedures[] = {
6313 PROC(READ, enc_read, dec_read), 6227 PROC(READ, enc_read, dec_read),
6314 PROC(WRITE, enc_write, dec_write), 6228 PROC(WRITE, enc_write, dec_write),
6315 PROC(COMMIT, enc_commit, dec_commit), 6229 PROC(COMMIT, enc_commit, dec_commit),
6316 PROC(OPEN, enc_open, dec_open), 6230 PROC(OPEN, enc_open, dec_open),
6317 PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), 6231 PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
6318 PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), 6232 PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
6319 PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), 6233 PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
6320 PROC(CLOSE, enc_close, dec_close), 6234 PROC(CLOSE, enc_close, dec_close),
6321 PROC(SETATTR, enc_setattr, dec_setattr), 6235 PROC(SETATTR, enc_setattr, dec_setattr),
6322 PROC(FSINFO, enc_fsinfo, dec_fsinfo), 6236 PROC(FSINFO, enc_fsinfo, dec_fsinfo),
6323 PROC(RENEW, enc_renew, dec_renew), 6237 PROC(RENEW, enc_renew, dec_renew),
6324 PROC(SETCLIENTID, enc_setclientid, dec_setclientid), 6238 PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
6325 PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), 6239 PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
6326 PROC(LOCK, enc_lock, dec_lock), 6240 PROC(LOCK, enc_lock, dec_lock),
6327 PROC(LOCKT, enc_lockt, dec_lockt), 6241 PROC(LOCKT, enc_lockt, dec_lockt),
6328 PROC(LOCKU, enc_locku, dec_locku), 6242 PROC(LOCKU, enc_locku, dec_locku),
6329 PROC(ACCESS, enc_access, dec_access), 6243 PROC(ACCESS, enc_access, dec_access),
6330 PROC(GETATTR, enc_getattr, dec_getattr), 6244 PROC(GETATTR, enc_getattr, dec_getattr),
6331 PROC(LOOKUP, enc_lookup, dec_lookup), 6245 PROC(LOOKUP, enc_lookup, dec_lookup),
6332 PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), 6246 PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
6333 PROC(REMOVE, enc_remove, dec_remove), 6247 PROC(REMOVE, enc_remove, dec_remove),
6334 PROC(RENAME, enc_rename, dec_rename), 6248 PROC(RENAME, enc_rename, dec_rename),
6335 PROC(LINK, enc_link, dec_link), 6249 PROC(LINK, enc_link, dec_link),
6336 PROC(SYMLINK, enc_symlink, dec_symlink), 6250 PROC(SYMLINK, enc_symlink, dec_symlink),
6337 PROC(CREATE, enc_create, dec_create), 6251 PROC(CREATE, enc_create, dec_create),
6338 PROC(PATHCONF, enc_pathconf, dec_pathconf), 6252 PROC(PATHCONF, enc_pathconf, dec_pathconf),
6339 PROC(STATFS, enc_statfs, dec_statfs), 6253 PROC(STATFS, enc_statfs, dec_statfs),
6340 PROC(READLINK, enc_readlink, dec_readlink), 6254 PROC(READLINK, enc_readlink, dec_readlink),
6341 PROC(READDIR, enc_readdir, dec_readdir), 6255 PROC(READDIR, enc_readdir, dec_readdir),
6342 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), 6256 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
6343 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), 6257 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
6344 PROC(GETACL, enc_getacl, dec_getacl), 6258 PROC(GETACL, enc_getacl, dec_getacl),
6345 PROC(SETACL, enc_setacl, dec_setacl), 6259 PROC(SETACL, enc_setacl, dec_setacl),
6346 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6260 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6347 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6261 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6348#if defined(CONFIG_NFS_V4_1) 6262#if defined(CONFIG_NFS_V4_1)
6349 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6263 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6350 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6264 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
6351 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), 6265 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
6352 PROC(SEQUENCE, enc_sequence, dec_sequence), 6266 PROC(SEQUENCE, enc_sequence, dec_sequence),
6353 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6267 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
6354 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6268 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6355 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6269 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6356 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6270 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6357#endif /* CONFIG_NFS_V4_1 */ 6271#endif /* CONFIG_NFS_V4_1 */
6358}; 6272};
6359 6273
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b68536cc904..e1164e3f9e6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep;
26static inline struct nfs_page * 26static inline struct nfs_page *
27nfs_page_alloc(void) 27nfs_page_alloc(void)
28{ 28{
29 struct nfs_page *p; 29 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
30 p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); 30 if (p)
31 if (p) {
32 memset(p, 0, sizeof(*p));
33 INIT_LIST_HEAD(&p->wb_list); 31 INIT_LIST_HEAD(&p->wb_list);
34 }
35 return p; 32 return p;
36} 33}
37 34
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index db773428f95..bc408976973 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
177 * pNFS client layout cache 177 * pNFS client layout cache
178 */ 178 */
179 179
180/* Need to hold i_lock if caller does not already hold reference */
181void
182get_layout_hdr(struct pnfs_layout_hdr *lo)
183{
184 atomic_inc(&lo->plh_refcount);
185}
186
180static void 187static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo) 188destroy_layout_hdr(struct pnfs_layout_hdr *lo)
182{ 189{
183 assert_spin_locked(&lo->inode->i_lock); 190 dprintk("%s: freeing layout cache %p\n", __func__, lo);
184 lo->refcount++; 191 BUG_ON(!list_empty(&lo->plh_layouts));
192 NFS_I(lo->plh_inode)->layout = NULL;
193 kfree(lo);
185} 194}
186 195
187static void 196static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo) 197put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{ 198{
190 assert_spin_locked(&lo->inode->i_lock); 199 if (atomic_dec_and_test(&lo->plh_refcount))
191 BUG_ON(lo->refcount == 0); 200 destroy_layout_hdr(lo);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200} 201}
201 202
202void 203void
203put_layout_hdr(struct inode *inode) 204put_layout_hdr(struct pnfs_layout_hdr *lo)
204{ 205{
205 spin_lock(&inode->i_lock); 206 struct inode *inode = lo->plh_inode;
206 put_layout_hdr_locked(NFS_I(inode)->layout); 207
207 spin_unlock(&inode->i_lock); 208 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
209 destroy_layout_hdr(lo);
210 spin_unlock(&inode->i_lock);
211 }
208} 212}
209 213
210static void 214static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 215init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{ 216{
213 INIT_LIST_HEAD(&lseg->fi_list); 217 INIT_LIST_HEAD(&lseg->pls_list);
214 kref_init(&lseg->kref); 218 atomic_set(&lseg->pls_refcount, 1);
215 lseg->layout = lo; 219 smp_mb();
220 set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
221 lseg->pls_layout = lo;
216} 222}
217 223
218/* Called without i_lock held, as the free_lseg call may sleep */ 224static void free_lseg(struct pnfs_layout_segment *lseg)
219static void
220destroy_lseg(struct kref *kref)
221{ 225{
222 struct pnfs_layout_segment *lseg = 226 struct inode *ino = lseg->pls_layout->plh_inode;
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225 227
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 228 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ 229 /* Matched by get_layout_hdr in pnfs_insert_layout */
229 put_layout_hdr(ino); 230 put_layout_hdr(NFS_I(ino)->layout);
230} 231}
231 232
232static void 233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
233put_lseg(struct pnfs_layout_segment *lseg) 234 * could sleep, so must be called outside of the lock.
235 * Returns 1 if object was removed, otherwise return 0.
236 */
237static int
238put_lseg_locked(struct pnfs_layout_segment *lseg,
239 struct list_head *tmp_list)
240{
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) {
245 struct inode *ino = lseg->pls_layout->plh_inode;
246
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
248 list_del(&lseg->pls_list);
249 if (list_empty(&lseg->pls_layout->plh_segs)) {
250 struct nfs_client *clp;
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 }
263 return 0;
264}
265
266static bool
267should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
234{ 268{
235 if (!lseg) 269 return (recall_iomode == IOMODE_ANY ||
236 return; 270 lseg_iomode == recall_iomode);
271}
237 272
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 273/* Returns 1 if lseg is removed from list, 0 otherwise */
239 atomic_read(&lseg->kref.refcount)); 274static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
240 kref_put(&lseg->kref, destroy_lseg); 275 struct list_head *tmp_list)
276{
277 int rv = 0;
278
279 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
280 /* Remove the reference keeping the lseg in the
281 * list. It will now be removed when all
282 * outstanding io is finished.
283 */
284 rv = put_lseg_locked(lseg, tmp_list);
285 }
286 return rv;
241} 287}
242 288
243static void 289/* Returns count of number of matching invalid lsegs remaining in list
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) 290 * after call.
291 */
292int
293mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
294 struct list_head *tmp_list,
295 u32 iomode)
245{ 296{
246 struct pnfs_layout_segment *lseg, *next; 297 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp; 298 int invalid = 0, removed = 0;
248 299
249 dprintk("%s:Begin lo %p\n", __func__, lo); 300 dprintk("%s:Begin lo %p\n", __func__, lo);
250 301
251 assert_spin_locked(&lo->inode->i_lock); 302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { 303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg); 304 dprintk("%s: freeing lseg %p iomode %d "
254 list_move(&lseg->fi_list, tmp_list); 305 "offset %llu length %llu\n", __func__,
255 } 306 lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
256 clp = NFS_SERVER(lo->inode)->nfs_client; 307 lseg->pls_range.length);
257 spin_lock(&clp->cl_lock); 308 invalid++;
258 /* List does not take a reference, so no need for put here */ 309 removed += mark_lseg_invalid(lseg, tmp_list);
259 list_del_init(&lo->layouts); 310 }
260 spin_unlock(&clp->cl_lock); 311 dprintk("%s:Return %i\n", __func__, invalid - removed);
261 write_seqlock(&lo->seqlock); 312 return invalid - removed;
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266} 313}
267 314
268static void 315void
269pnfs_free_lseg_list(struct list_head *tmp_list) 316pnfs_free_lseg_list(struct list_head *free_me)
270{ 317{
271 struct pnfs_layout_segment *lseg; 318 struct pnfs_layout_segment *lseg, *tmp;
272 319
273 while (!list_empty(tmp_list)) { 320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, 321 list_del(&lseg->pls_list);
275 fi_list); 322 free_lseg(lseg);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 } 323 }
280} 324}
281 325
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
288 spin_lock(&nfsi->vfs_inode.i_lock); 332 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout; 333 lo = nfsi->layout;
290 if (lo) { 334 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list); 335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */ 337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo); 338 put_layout_hdr_locked(lo);
294 } 339 }
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
312 357
313 while (!list_empty(&tmp_list)) { 358 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 359 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts); 360 plh_layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__, 361 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino); 362 lo->plh_inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode)); 363 pnfs_destroy_layout(NFS_I(lo->plh_inode));
319 } 364 }
320} 365}
321 366
322/* update lo->stateid with new if is more recent 367/* update lo->plh_stateid with new if is more recent */
323 * 368void
324 * lo->stateid could be the open stateid, in which case we just use what given. 369pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
325 */ 370 bool update_barrier)
326static void 371{
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 372 u32 oldseq, newseq;
328 const nfs4_stateid *new) 373
329{ 374 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
330 nfs4_stateid *old = &lo->stateid; 375 newseq = be32_to_cpu(new->stateid.seqid);
331 bool overwrite = false; 376 if ((int)(newseq - oldseq) > 0) {
332 377 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
333 write_seqlock(&lo->seqlock); 378 if (update_barrier) {
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || 379 u32 new_barrier = be32_to_cpu(new->stateid.seqid);
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) 380
336 overwrite = true; 381 if ((int)(new_barrier - lo->plh_barrier))
337 else { 382 lo->plh_barrier = new_barrier;
338 u32 oldseq, newseq; 383 } else {
339 384 /* Because of wraparound, we want to keep the barrier
340 oldseq = be32_to_cpu(old->stateid.seqid); 385 * "close" to the current seqids. It needs to be
341 newseq = be32_to_cpu(new->stateid.seqid); 386 * within 2**31 to count as "behind", so if it
342 if ((int)(newseq - oldseq) > 0) 387 * gets too near that limit, give us a litle leeway
343 overwrite = true; 388 * and bring it to within 2**30.
389 * NOTE - and yes, this is all unsigned arithmetic.
390 */
391 if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
392 lo->plh_barrier = newseq - (1 << 30);
393 }
344 } 394 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348} 395}
349 396
350static void 397/* lget is set to 1 if called from inside send_layoutget call chain */
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, 398static bool
352 struct nfs4_state *state) 399pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
400 int lget)
353{ 401{
354 int seq; 402 if ((stateid) &&
355 403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
356 dprintk("--> %s\n", __func__); 404 return true;
357 write_seqlock(&lo->seqlock); 405 return lo->plh_block_lgets ||
358 do { 406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
359 seq = read_seqbegin(&state->seqlock); 407 (list_empty(&lo->plh_segs) &&
360 memcpy(lo->stateid.data, state->stateid.data, 408 (atomic_read(&lo->plh_outstanding) > lget));
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366} 409}
367 410
368void 411int
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 412pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state) 413 struct nfs4_state *open_state)
371{ 414{
372 int seq; 415 int status = 0;
373 416
374 dprintk("--> %s\n", __func__); 417 dprintk("--> %s\n", __func__);
375 do { 418 spin_lock(&lo->plh_inode->i_lock);
376 seq = read_seqbegin(&lo->seqlock); 419 if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { 420 status = -EAGAIN;
378 /* This will trigger retry of the read */ 421 } else if (list_empty(&lo->plh_segs)) {
379 pnfs_layout_from_open_stateid(lo, open_state); 422 int seq;
380 } else 423
381 memcpy(dst->data, lo->stateid.data, 424 do {
382 sizeof(lo->stateid.data)); 425 seq = read_seqbegin(&open_state->seqlock);
383 } while (read_seqretry(&lo->seqlock, seq)); 426 memcpy(dst->data, open_state->stateid.data,
427 sizeof(open_state->stateid.data));
428 } while (read_seqretry(&open_state->seqlock, seq));
429 } else
430 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
431 spin_unlock(&lo->plh_inode->i_lock);
384 dprintk("<-- %s\n", __func__); 432 dprintk("<-- %s\n", __func__);
433 return status;
385} 434}
386 435
387/* 436/*
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx, 444 struct nfs_open_context *ctx,
396 u32 iomode) 445 u32 iomode)
397{ 446{
398 struct inode *ino = lo->inode; 447 struct inode *ino = lo->plh_inode;
399 struct nfs_server *server = NFS_SERVER(ino); 448 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp; 449 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL; 450 struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
404 453
405 BUG_ON(ctx == NULL); 454 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 455 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) { 456 if (lgp == NULL)
408 put_layout_hdr(lo->inode);
409 return NULL; 457 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64; 458 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 459 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode; 460 lgp->args.range.iomode = iomode;
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
424 nfs4_proc_layoutget(lgp); 471 nfs4_proc_layoutget(lgp);
425 if (!lseg) { 472 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */ 473 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state); 474 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
428 } 475 }
429 return lseg; 476 return lseg;
430} 477}
431 478
479bool pnfs_roc(struct inode *ino)
480{
481 struct pnfs_layout_hdr *lo;
482 struct pnfs_layout_segment *lseg, *tmp;
483 LIST_HEAD(tmp_list);
484 bool found = false;
485
486 spin_lock(&ino->i_lock);
487 lo = NFS_I(ino)->layout;
488 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
489 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
490 goto out_nolayout;
491 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
492 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
493 mark_lseg_invalid(lseg, &tmp_list);
494 found = true;
495 }
496 if (!found)
497 goto out_nolayout;
498 lo->plh_block_lgets++;
499 get_layout_hdr(lo); /* matched in pnfs_roc_release */
500 spin_unlock(&ino->i_lock);
501 pnfs_free_lseg_list(&tmp_list);
502 return true;
503
504out_nolayout:
505 spin_unlock(&ino->i_lock);
506 return false;
507}
508
509void pnfs_roc_release(struct inode *ino)
510{
511 struct pnfs_layout_hdr *lo;
512
513 spin_lock(&ino->i_lock);
514 lo = NFS_I(ino)->layout;
515 lo->plh_block_lgets--;
516 put_layout_hdr_locked(lo);
517 spin_unlock(&ino->i_lock);
518}
519
520void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
521{
522 struct pnfs_layout_hdr *lo;
523
524 spin_lock(&ino->i_lock);
525 lo = NFS_I(ino)->layout;
526 if ((int)(barrier - lo->plh_barrier) > 0)
527 lo->plh_barrier = barrier;
528 spin_unlock(&ino->i_lock);
529}
530
531bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
532{
533 struct nfs_inode *nfsi = NFS_I(ino);
534 struct pnfs_layout_segment *lseg;
535 bool found = false;
536
537 spin_lock(&ino->i_lock);
538 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
539 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
540 found = true;
541 break;
542 }
543 if (!found) {
544 struct pnfs_layout_hdr *lo = nfsi->layout;
545 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
546
547 /* Since close does not return a layout stateid for use as
548 * a barrier, we choose the worst-case barrier.
549 */
550 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
551 }
552 spin_unlock(&ino->i_lock);
553 return found;
554}
555
432/* 556/*
433 * Compare two layout segments for sorting into layout cache. 557 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those 558 * We want to preferentially return RW over RO layouts, so ensure those
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
450 574
451 dprintk("%s:Begin\n", __func__); 575 dprintk("%s:Begin\n", __func__);
452 576
453 assert_spin_locked(&lo->inode->i_lock); 577 assert_spin_locked(&lo->plh_inode->i_lock);
454 if (list_empty(&lo->segs)) { 578 list_for_each_entry(lp, &lo->plh_segs, pls_list) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; 579 if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue; 580 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list); 581 list_add_tail(&lseg->pls_list, &lp->pls_list);
466 dprintk("%s: inserted lseg %p " 582 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before " 583 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n", 584 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode, 585 __func__, lseg, lseg->pls_range.iomode,
470 lseg->range.offset, lseg->range.length, 586 lseg->pls_range.offset, lseg->pls_range.length,
471 lp, lp->range.iomode, lp->range.offset, 587 lp, lp->pls_range.iomode, lp->pls_range.offset,
472 lp->range.length); 588 lp->pls_range.length);
473 found = 1; 589 found = 1;
474 break; 590 break;
475 } 591 }
476 if (!found) { 592 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs); 593 list_add_tail(&lseg->pls_list, &lo->plh_segs);
478 dprintk("%s: inserted lseg %p " 594 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n", 595 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode, 596 __func__, lseg, lseg->pls_range.iomode,
481 lseg->range.offset, lseg->range.length); 597 lseg->pls_range.offset, lseg->pls_range.length);
482 } 598 }
483 get_layout_hdr_locked(lo); 599 get_layout_hdr(lo);
484 600
485 dprintk("%s:Return\n", __func__); 601 dprintk("%s:Return\n", __func__);
486} 602}
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); 609 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo) 610 if (!lo)
495 return NULL; 611 return NULL;
496 lo->refcount = 1; 612 atomic_set(&lo->plh_refcount, 1);
497 INIT_LIST_HEAD(&lo->layouts); 613 INIT_LIST_HEAD(&lo->plh_layouts);
498 INIT_LIST_HEAD(&lo->segs); 614 INIT_LIST_HEAD(&lo->plh_segs);
499 seqlock_init(&lo->seqlock); 615 INIT_LIST_HEAD(&lo->plh_bulk_recall);
500 lo->inode = ino; 616 lo->plh_inode = ino;
501 return lo; 617 return lo;
502} 618}
503 619
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 626 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511 627
512 assert_spin_locked(&ino->i_lock); 628 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout) 629 if (nfsi->layout) {
514 return nfsi->layout; 630 if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
515 631 return NULL;
632 else
633 return nfsi->layout;
634 }
516 spin_unlock(&ino->i_lock); 635 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino); 636 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock); 637 spin_lock(&ino->i_lock);
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
538static int 657static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) 658is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{ 659{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); 660 return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
542} 661}
543 662
544/* 663/*
545 * lookup range in layout 664 * lookup range in layout
546 */ 665 */
547static struct pnfs_layout_segment * 666static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 667pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
549{ 668{
550 struct pnfs_layout_segment *lseg, *ret = NULL; 669 struct pnfs_layout_segment *lseg, *ret = NULL;
551 670
552 dprintk("%s:Begin\n", __func__); 671 dprintk("%s:Begin\n", __func__);
553 672
554 assert_spin_locked(&lo->inode->i_lock); 673 assert_spin_locked(&lo->plh_inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) { 674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
556 if (is_matching_lseg(lseg, iomode)) { 675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) {
557 ret = lseg; 677 ret = lseg;
558 break; 678 break;
559 } 679 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0) 680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
561 break; 681 break;
562 } 682 }
563 683
564 dprintk("%s:Return lseg %p ref %d\n", 684 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); 685 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
566 return ret; 686 return ret;
567} 687}
568 688
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
576 enum pnfs_iomode iomode) 696 enum pnfs_iomode iomode)
577{ 697{
578 struct nfs_inode *nfsi = NFS_I(ino); 698 struct nfs_inode *nfsi = NFS_I(ino);
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
579 struct pnfs_layout_hdr *lo; 700 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL; 701 struct pnfs_layout_segment *lseg = NULL;
581 702
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
588 goto out_unlock; 709 goto out_unlock;
589 } 710 }
590 711
591 /* Check to see if the layout for the given range already exists */ 712 /* Do we even need to bother with this? */
592 lseg = pnfs_has_layout(lo, iomode); 713 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
593 if (lseg) { 714 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n", 715 dprintk("%s matches recall, use MDS\n", __func__);
595 __func__, lseg, iomode);
596 goto out_unlock; 716 goto out_unlock;
597 } 717 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
598 722
599 /* if LAYOUTGET already failed once we don't try again */ 723 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) 724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock;
726
727 if (pnfs_layoutgets_blocked(lo, NULL, 0))
601 goto out_unlock; 728 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding);
602 730
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ 731 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) {
733 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */
736 spin_lock(&clp->cl_lock);
737 BUG_ON(!list_empty(&lo->plh_layouts));
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock);
740 }
604 spin_unlock(&ino->i_lock); 741 spin_unlock(&ino->i_lock);
605 742
606 lseg = send_layoutget(lo, ctx, iomode); 743 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) {
745 spin_lock(&ino->i_lock);
746 if (list_empty(&lo->plh_segs)) {
747 spin_lock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 }
754 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo);
607out: 756out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 757 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg); 758 nfsi->layout->plh_flags, lseg);
610 return lseg; 759 return lseg;
611out_unlock: 760out_unlock:
612 spin_unlock(&ino->i_lock); 761 spin_unlock(&ino->i_lock);
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 768 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res; 769 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg; 770 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode; 771 struct inode *ino = lo->plh_inode;
772 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
623 int status = 0; 773 int status = 0;
624 774
775 /* Verify we got what we asked for.
776 * Note that because the xdr parsing only accepts a single
777 * element array, this can fail even if the server is behaving
778 * correctly.
779 */
780 if (lgp->args.range.iomode > res->range.iomode ||
781 res->range.offset != 0 ||
782 res->range.length != NFS4_MAX_UINT64) {
783 status = -EINVAL;
784 goto out;
785 }
625 /* Inject layout blob into I/O device driver */ 786 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); 787 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) { 788 if (!lseg || IS_ERR(lseg)) {
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
635 } 796 }
636 797
637 spin_lock(&ino->i_lock); 798 spin_lock(&ino->i_lock);
799 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
800 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
801 dprintk("%s forget reply due to recall\n", __func__);
802 goto out_forget_reply;
803 }
804
805 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
806 dprintk("%s forget reply due to state\n", __func__);
807 goto out_forget_reply;
808 }
638 init_lseg(lo, lseg); 809 init_lseg(lo, lseg);
639 lseg->range = res->range; 810 lseg->pls_range = res->range;
640 *lgp->lsegpp = lseg; 811 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg); 812 pnfs_insert_layout(lo, lseg);
642 813
814 if (res->return_on_close) {
815 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
816 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
817 }
818
643 /* Done processing layoutget. Set the layout stateid */ 819 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid); 820 pnfs_set_layout_stateid(lo, &res->stateid, false);
645 spin_unlock(&ino->i_lock); 821 spin_unlock(&ino->i_lock);
646out: 822out:
647 return status; 823 return status;
824
825out_forget_reply:
826 spin_unlock(&ino->i_lock);
827 lseg->pls_layout = lo;
828 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
829 goto out;
648} 830}
649 831
650/* 832/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e12367d5048..e2612ea0cbe 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,11 +30,17 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */
36};
37
33struct pnfs_layout_segment { 38struct pnfs_layout_segment {
34 struct list_head fi_list; 39 struct list_head pls_list;
35 struct pnfs_layout_range range; 40 struct pnfs_layout_range pls_range;
36 struct kref kref; 41 atomic_t pls_refcount;
37 struct pnfs_layout_hdr *layout; 42 unsigned long pls_flags;
43 struct pnfs_layout_hdr *pls_layout;
38}; 44};
39 45
40#ifdef CONFIG_NFS_V4_1 46#ifdef CONFIG_NFS_V4_1
@@ -44,7 +50,9 @@ struct pnfs_layout_segment {
44enum { 50enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 51 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 52 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ 53 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
54 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
55 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
48}; 56};
49 57
50/* Per-layout driver specific registration structure */ 58/* Per-layout driver specific registration structure */
@@ -60,13 +68,16 @@ struct pnfs_layoutdriver_type {
60}; 68};
61 69
62struct pnfs_layout_hdr { 70struct pnfs_layout_hdr {
63 unsigned long refcount; 71 atomic_t plh_refcount;
64 struct list_head layouts; /* other client layouts */ 72 struct list_head plh_layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */ 73 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
66 seqlock_t seqlock; /* Protects the stateid */ 74 struct list_head plh_segs; /* layout segments list */
67 nfs4_stateid stateid; 75 nfs4_stateid plh_stateid;
68 unsigned long state; 76 atomic_t plh_outstanding; /* number of RPCs out */
69 struct inode *inode; 77 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
78 u32 plh_barrier; /* ignore lower seqids */
79 unsigned long plh_flags;
80 struct inode *plh_inode;
70}; 81};
71 82
72struct pnfs_device { 83struct pnfs_device {
@@ -134,17 +145,30 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 145extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135 146
136/* pnfs.c */ 147/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo);
137struct pnfs_layout_segment * 149struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type); 151 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 152void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *); 153void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp); 154int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list);
143void pnfs_destroy_layout(struct nfs_inode *); 156void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *); 157void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode); 158void put_layout_hdr(struct pnfs_layout_hdr *lo);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 159void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state); 160 const nfs4_stateid *new,
161 bool update_barrier);
162int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
163 struct pnfs_layout_hdr *lo,
164 struct nfs4_state *open_state);
165int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
166 struct list_head *tmp_list,
167 u32 iomode);
168bool pnfs_roc(struct inode *ino);
169void pnfs_roc_release(struct inode *ino);
170void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
171bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 172
149 173
150static inline int lo_fail_bit(u32 iomode) 174static inline int lo_fail_bit(u32 iomode)
@@ -176,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
176 return NULL; 200 return NULL;
177} 201}
178 202
203static inline bool
204pnfs_roc(struct inode *ino)
205{
206 return false;
207}
208
209static inline void
210pnfs_roc_release(struct inode *ino)
211{
212}
213
214static inline void
215pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
216{
217}
218
219static inline bool
220pnfs_roc_drain(struct inode *ino, u32 *barrier)
221{
222 return false;
223}
224
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) 225static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{ 226{
181} 227}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58e7f84fc1f..77d5e21c4ad 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -458,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
458 fattr = nfs_alloc_fattr(); 458 fattr = nfs_alloc_fattr();
459 status = -ENOMEM; 459 status = -ENOMEM;
460 if (fh == NULL || fattr == NULL) 460 if (fh == NULL || fattr == NULL)
461 goto out; 461 goto out_free;
462 462
463 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 463 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
464 nfs_mark_for_revalidate(dir); 464 nfs_mark_for_revalidate(dir);
@@ -471,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
471 if (status == 0) 471 if (status == 0)
472 status = nfs_instantiate(dentry, fh, fattr); 472 status = nfs_instantiate(dentry, fh, fattr);
473 473
474out_free:
474 nfs_free_fattr(fattr); 475 nfs_free_fattr(fattr);
475 nfs_free_fhandle(fh); 476 nfs_free_fhandle(fh);
476out: 477out:
@@ -731,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
731 .statfs = nfs_proc_statfs, 732 .statfs = nfs_proc_statfs,
732 .fsinfo = nfs_proc_fsinfo, 733 .fsinfo = nfs_proc_fsinfo,
733 .pathconf = nfs_proc_pathconf, 734 .pathconf = nfs_proc_pathconf,
734 .decode_dirent = nfs_decode_dirent, 735 .decode_dirent = nfs2_decode_dirent,
735 .read_setup = nfs_proc_read_setup, 736 .read_setup = nfs_proc_read_setup,
736 .read_done = nfs_read_done, 737 .read_done = nfs_read_done,
737 .write_setup = nfs_proc_write_setup, 738 .write_setup = nfs_proc_write_setup,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4100630c9a5..0f9ea73e778 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -598,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
598 598
599 if (nfss->mountd_version || showdefaults) 599 if (nfss->mountd_version || showdefaults)
600 seq_printf(m, ",mountvers=%u", nfss->mountd_version); 600 seq_printf(m, ",mountvers=%u", nfss->mountd_version);
601 if (nfss->mountd_port || showdefaults) 601 if ((nfss->mountd_port &&
602 nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) ||
603 showdefaults)
602 seq_printf(m, ",mountport=%u", nfss->mountd_port); 604 seq_printf(m, ",mountport=%u", nfss->mountd_port);
603 605
604 nfs_show_mountd_netid(m, nfss, showdefaults); 606 nfs_show_mountd_netid(m, nfss, showdefaults);
@@ -2494,7 +2496,13 @@ static void nfs4_clone_super(struct super_block *sb,
2494 sb->s_maxbytes = old_sb->s_maxbytes; 2496 sb->s_maxbytes = old_sb->s_maxbytes;
2495 sb->s_time_gran = 1; 2497 sb->s_time_gran = 1;
2496 sb->s_op = old_sb->s_op; 2498 sb->s_op = old_sb->s_op;
2497 nfs_initialise_sb(sb); 2499 /*
2500 * The VFS shouldn't apply the umask to mode bits. We will do
2501 * so ourselves when necessary.
2502 */
2503 sb->s_flags |= MS_POSIXACL;
2504 sb->s_xattr = old_sb->s_xattr;
2505 nfs_initialise_sb(sb);
2498} 2506}
2499 2507
2500/* 2508/*
@@ -2504,6 +2512,12 @@ static void nfs4_fill_super(struct super_block *sb)
2504{ 2512{
2505 sb->s_time_gran = 1; 2513 sb->s_time_gran = 1;
2506 sb->s_op = &nfs4_sops; 2514 sb->s_op = &nfs4_sops;
2515 /*
2516 * The VFS shouldn't apply the umask to mode bits. We will do
2517 * so ourselves when necessary.
2518 */
2519 sb->s_flags |= MS_POSIXACL;
2520 sb->s_xattr = nfs4_xattr_handlers;
2507 nfs_initialise_sb(sb); 2521 nfs_initialise_sb(sb);
2508} 2522}
2509 2523
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7bdec853140..e313a51acdd 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -429,7 +429,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
429 data = kzalloc(sizeof(*data), GFP_KERNEL); 429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL) 430 if (data == NULL)
431 return ERR_PTR(-ENOMEM); 431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data, 432 task_setup_data.callback_data = data;
433 433
434 data->cred = rpc_lookup_cred(); 434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) { 435 if (IS_ERR(data->cred)) {
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
496 496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", 497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name, 498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count)); 499 dentry->d_count);
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME); 500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501 501
502 /* 502 /*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 143da2eecd7..21a63da305f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -50,11 +50,6 @@ enum {
50 NFSPROC4_CLNT_CB_SEQUENCE, 50 NFSPROC4_CLNT_CB_SEQUENCE,
51}; 51};
52 52
53enum nfs_cb_opnum4 {
54 OP_CB_RECALL = 4,
55 OP_CB_SEQUENCE = 11,
56};
57
58#define NFS4_MAXTAGLEN 20 53#define NFS4_MAXTAGLEN 20
59 54
60#define NFS4_enc_cb_null_sz 0 55#define NFS4_enc_cb_null_sz 0
@@ -79,61 +74,6 @@ enum nfs_cb_opnum4 {
79 cb_sequence_dec_sz + \ 74 cb_sequence_dec_sz + \
80 op_dec_sz) 75 op_dec_sz)
81 76
82/*
83* Generic encode routines from fs/nfs/nfs4xdr.c
84*/
85static inline __be32 *
86xdr_writemem(__be32 *p, const void *ptr, int nbytes)
87{
88 int tmp = XDR_QUADLEN(nbytes);
89 if (!tmp)
90 return p;
91 p[tmp-1] = 0;
92 memcpy(p, ptr, nbytes);
93 return p + tmp;
94}
95
96#define WRITE32(n) *p++ = htonl(n)
97#define WRITEMEM(ptr,nbytes) do { \
98 p = xdr_writemem(p, ptr, nbytes); \
99} while (0)
100#define RESERVE_SPACE(nbytes) do { \
101 p = xdr_reserve_space(xdr, nbytes); \
102 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
103 BUG_ON(!p); \
104} while (0)
105
106/*
107 * Generic decode routines from fs/nfs/nfs4xdr.c
108 */
109#define DECODE_TAIL \
110 status = 0; \
111out: \
112 return status; \
113xdr_error: \
114 dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
115 status = -EIO; \
116 goto out
117
118#define READ32(x) (x) = ntohl(*p++)
119#define READ64(x) do { \
120 (x) = (u64)ntohl(*p++) << 32; \
121 (x) |= ntohl(*p++); \
122} while (0)
123#define READTIME(x) do { \
124 p++; \
125 (x.tv_sec) = ntohl(*p++); \
126 (x.tv_nsec) = ntohl(*p++); \
127} while (0)
128#define READ_BUF(nbytes) do { \
129 p = xdr_inline_decode(xdr, nbytes); \
130 if (!p) { \
131 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
132 __func__, __LINE__); \
133 return -EIO; \
134 } \
135} while (0)
136
137struct nfs4_cb_compound_hdr { 77struct nfs4_cb_compound_hdr {
138 /* args */ 78 /* args */
139 u32 ident; /* minorversion 0 only */ 79 u32 ident; /* minorversion 0 only */
@@ -144,295 +84,513 @@ struct nfs4_cb_compound_hdr {
144 int status; 84 int status;
145}; 85};
146 86
147static struct { 87/*
148int stat; 88 * Handle decode buffer overflows out-of-line.
149int errno; 89 */
150} nfs_cb_errtbl[] = { 90static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
151 { NFS4_OK, 0 }, 91{
152 { NFS4ERR_PERM, EPERM }, 92 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
153 { NFS4ERR_NOENT, ENOENT }, 93 "Remaining buffer length is %tu words.\n",
154 { NFS4ERR_IO, EIO }, 94 func, xdr->end - xdr->p);
155 { NFS4ERR_NXIO, ENXIO }, 95}
156 { NFS4ERR_ACCESS, EACCES },
157 { NFS4ERR_EXIST, EEXIST },
158 { NFS4ERR_XDEV, EXDEV },
159 { NFS4ERR_NOTDIR, ENOTDIR },
160 { NFS4ERR_ISDIR, EISDIR },
161 { NFS4ERR_INVAL, EINVAL },
162 { NFS4ERR_FBIG, EFBIG },
163 { NFS4ERR_NOSPC, ENOSPC },
164 { NFS4ERR_ROFS, EROFS },
165 { NFS4ERR_MLINK, EMLINK },
166 { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
167 { NFS4ERR_NOTEMPTY, ENOTEMPTY },
168 { NFS4ERR_DQUOT, EDQUOT },
169 { NFS4ERR_STALE, ESTALE },
170 { NFS4ERR_BADHANDLE, EBADHANDLE },
171 { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
172 { NFS4ERR_NOTSUPP, ENOTSUPP },
173 { NFS4ERR_TOOSMALL, ETOOSMALL },
174 { NFS4ERR_SERVERFAULT, ESERVERFAULT },
175 { NFS4ERR_BADTYPE, EBADTYPE },
176 { NFS4ERR_LOCKED, EAGAIN },
177 { NFS4ERR_RESOURCE, EREMOTEIO },
178 { NFS4ERR_SYMLINK, ELOOP },
179 { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
180 { NFS4ERR_DEADLOCK, EDEADLK },
181 { -1, EIO }
182};
183 96
184static int 97static __be32 *xdr_encode_empty_array(__be32 *p)
185nfs_cb_stat_to_errno(int stat)
186{ 98{
187 int i; 99 *p++ = xdr_zero;
188 for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { 100 return p;
189 if (nfs_cb_errtbl[i].stat == stat)
190 return nfs_cb_errtbl[i].errno;
191 }
192 /* If we cannot translate the error, the recovery routines should
193 * handle it.
194 * Note: remaining NFSv4 error codes have values > 10000, so should
195 * not conflict with native Linux error codes.
196 */
197 return stat;
198} 101}
199 102
200/* 103/*
201 * XDR encode 104 * Encode/decode NFSv4 CB basic data types
105 *
106 * Basic NFSv4 callback data types are defined in section 15 of RFC
107 * 3530: "Network File System (NFS) version 4 Protocol" and section
108 * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
109 * 1 Protocol"
110 */
111
112/*
113 * nfs_cb_opnum4
114 *
115 * enum nfs_cb_opnum4 {
116 * OP_CB_GETATTR = 3,
117 * ...
118 * };
202 */ 119 */
120enum nfs_cb_opnum4 {
121 OP_CB_GETATTR = 3,
122 OP_CB_RECALL = 4,
123 OP_CB_LAYOUTRECALL = 5,
124 OP_CB_NOTIFY = 6,
125 OP_CB_PUSH_DELEG = 7,
126 OP_CB_RECALL_ANY = 8,
127 OP_CB_RECALLABLE_OBJ_AVAIL = 9,
128 OP_CB_RECALL_SLOT = 10,
129 OP_CB_SEQUENCE = 11,
130 OP_CB_WANTS_CANCELLED = 12,
131 OP_CB_NOTIFY_LOCK = 13,
132 OP_CB_NOTIFY_DEVICEID = 14,
133 OP_CB_ILLEGAL = 10044
134};
203 135
204static void 136static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
205encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
206{ 137{
207 __be32 *p; 138 __be32 *p;
208 139
209 RESERVE_SPACE(sizeof(stateid_t)); 140 p = xdr_reserve_space(xdr, 4);
210 WRITE32(sid->si_generation); 141 *p = cpu_to_be32(op);
211 WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
212} 142}
213 143
214static void 144/*
215encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) 145 * nfs_fh4
146 *
147 * typedef opaque nfs_fh4<NFS4_FHSIZE>;
148 */
149static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
216{ 150{
217 __be32 * p; 151 u32 length = fh->fh_size;
152 __be32 *p;
218 153
219 RESERVE_SPACE(16); 154 BUG_ON(length > NFS4_FHSIZE);
220 WRITE32(0); /* tag length is always 0 */ 155 p = xdr_reserve_space(xdr, 4 + length);
221 WRITE32(hdr->minorversion); 156 xdr_encode_opaque(p, &fh->fh_base, length);
222 WRITE32(hdr->ident);
223 hdr->nops_p = p;
224 WRITE32(hdr->nops);
225} 157}
226 158
227static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) 159/*
160 * stateid4
161 *
162 * struct stateid4 {
163 * uint32_t seqid;
164 * opaque other[12];
165 * };
166 */
167static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
228{ 168{
229 *hdr->nops_p = htonl(hdr->nops); 169 __be32 *p;
170
171 p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
172 *p++ = cpu_to_be32(sid->si_generation);
173 xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
230} 174}
231 175
232static void 176/*
233encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, 177 * sessionid4
234 struct nfs4_cb_compound_hdr *hdr) 178 *
179 * typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
180 */
181static void encode_sessionid4(struct xdr_stream *xdr,
182 const struct nfsd4_session *session)
235{ 183{
236 __be32 *p; 184 __be32 *p;
237 int len = dp->dl_fh.fh_size; 185
238 186 p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
239 RESERVE_SPACE(4); 187 xdr_encode_opaque_fixed(p, session->se_sessionid.data,
240 WRITE32(OP_CB_RECALL); 188 NFS4_MAX_SESSIONID_LEN);
241 encode_stateid(xdr, &dp->dl_stateid);
242 RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
243 WRITE32(0); /* truncate optimization not implemented */
244 WRITE32(len);
245 WRITEMEM(&dp->dl_fh.fh_base, len);
246 hdr->nops++;
247} 189}
248 190
249static void 191/*
250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, 192 * nfsstat4
251 struct nfs4_cb_compound_hdr *hdr) 193 */
252{ 194static const struct {
253 __be32 *p; 195 int stat;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; 196 int errno;
197} nfs_cb_errtbl[] = {
198 { NFS4_OK, 0 },
199 { NFS4ERR_PERM, -EPERM },
200 { NFS4ERR_NOENT, -ENOENT },
201 { NFS4ERR_IO, -EIO },
202 { NFS4ERR_NXIO, -ENXIO },
203 { NFS4ERR_ACCESS, -EACCES },
204 { NFS4ERR_EXIST, -EEXIST },
205 { NFS4ERR_XDEV, -EXDEV },
206 { NFS4ERR_NOTDIR, -ENOTDIR },
207 { NFS4ERR_ISDIR, -EISDIR },
208 { NFS4ERR_INVAL, -EINVAL },
209 { NFS4ERR_FBIG, -EFBIG },
210 { NFS4ERR_NOSPC, -ENOSPC },
211 { NFS4ERR_ROFS, -EROFS },
212 { NFS4ERR_MLINK, -EMLINK },
213 { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
214 { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
215 { NFS4ERR_DQUOT, -EDQUOT },
216 { NFS4ERR_STALE, -ESTALE },
217 { NFS4ERR_BADHANDLE, -EBADHANDLE },
218 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
219 { NFS4ERR_NOTSUPP, -ENOTSUPP },
220 { NFS4ERR_TOOSMALL, -ETOOSMALL },
221 { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
222 { NFS4ERR_BADTYPE, -EBADTYPE },
223 { NFS4ERR_LOCKED, -EAGAIN },
224 { NFS4ERR_RESOURCE, -EREMOTEIO },
225 { NFS4ERR_SYMLINK, -ELOOP },
226 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
227 { NFS4ERR_DEADLOCK, -EDEADLK },
228 { -1, -EIO }
229};
255 230
256 if (hdr->minorversion == 0) 231/*
257 return; 232 * If we cannot translate the error, the recovery routines should
233 * handle it.
234 *
235 * Note: remaining NFSv4 error codes have values > 10000, so should
236 * not conflict with native Linux error codes.
237 */
238static int nfs_cb_stat_to_errno(int status)
239{
240 int i;
258 241
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 242 for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
243 if (nfs_cb_errtbl[i].stat == status)
244 return nfs_cb_errtbl[i].errno;
245 }
260 246
261 WRITE32(OP_CB_SEQUENCE); 247 dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); 248 return -status;
263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */
267 WRITE32(0); /* FIXME: support referring_call_lists */
268 hdr->nops++;
269} 249}
270 250
271static int 251static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
272nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 252 enum nfsstat4 *status)
273{ 253{
274 struct xdr_stream xdrs, *xdr = &xdrs; 254 __be32 *p;
255 u32 op;
275 256
276 xdr_init_encode(&xdrs, &req->rq_snd_buf, p); 257 p = xdr_inline_decode(xdr, 4 + 4);
277 RESERVE_SPACE(0); 258 if (unlikely(p == NULL))
259 goto out_overflow;
260 op = be32_to_cpup(p++);
261 if (unlikely(op != expected))
262 goto out_unexpected;
263 *status = be32_to_cpup(p);
278 return 0; 264 return 0;
265out_overflow:
266 print_overflow_msg(__func__, xdr);
267 return -EIO;
268out_unexpected:
269 dprintk("NFSD: Callback server returned operation %d but "
270 "we issued a request for %d\n", op, expected);
271 return -EIO;
279} 272}
280 273
281static int 274/*
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 275 * CB_COMPOUND4args
283 struct nfsd4_callback *cb) 276 *
277 * struct CB_COMPOUND4args {
278 * utf8str_cs tag;
279 * uint32_t minorversion;
280 * uint32_t callback_ident;
281 * nfs_cb_argop4 argarray<>;
282 * };
283*/
284static void encode_cb_compound4args(struct xdr_stream *xdr,
285 struct nfs4_cb_compound_hdr *hdr)
284{ 286{
285 struct xdr_stream xdr; 287 __be32 * p;
286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = cb->cb_minorversion,
290 };
291 288
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 289 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
293 encode_cb_compound_hdr(&xdr, &hdr); 290 p = xdr_encode_empty_array(p); /* empty tag */
294 encode_cb_sequence(&xdr, cb, &hdr); 291 *p++ = cpu_to_be32(hdr->minorversion);
295 encode_cb_recall(&xdr, args, &hdr); 292 *p++ = cpu_to_be32(hdr->ident);
296 encode_cb_nops(&hdr); 293
294 hdr->nops_p = p;
295 *p = cpu_to_be32(hdr->nops); /* argarray element count */
296}
297
298/*
299 * Update argarray element count
300 */
301static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
302{
303 BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
304 *hdr->nops_p = cpu_to_be32(hdr->nops);
305}
306
307/*
308 * CB_COMPOUND4res
309 *
310 * struct CB_COMPOUND4res {
311 * nfsstat4 status;
312 * utf8str_cs tag;
313 * nfs_cb_resop4 resarray<>;
314 * };
315 */
316static int decode_cb_compound4res(struct xdr_stream *xdr,
317 struct nfs4_cb_compound_hdr *hdr)
318{
319 u32 length;
320 __be32 *p;
321
322 p = xdr_inline_decode(xdr, 4 + 4);
323 if (unlikely(p == NULL))
324 goto out_overflow;
325 hdr->status = be32_to_cpup(p++);
326 /* Ignore the tag */
327 length = be32_to_cpup(p++);
328 p = xdr_inline_decode(xdr, length + 4);
329 if (unlikely(p == NULL))
330 goto out_overflow;
331 hdr->nops = be32_to_cpup(p);
297 return 0; 332 return 0;
333out_overflow:
334 print_overflow_msg(__func__, xdr);
335 return -EIO;
298} 336}
299 337
338/*
339 * CB_RECALL4args
340 *
341 * struct CB_RECALL4args {
342 * stateid4 stateid;
343 * bool truncate;
344 * nfs_fh4 fh;
345 * };
346 */
347static void encode_cb_recall4args(struct xdr_stream *xdr,
348 const struct nfs4_delegation *dp,
349 struct nfs4_cb_compound_hdr *hdr)
350{
351 __be32 *p;
352
353 encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
354 encode_stateid4(xdr, &dp->dl_stateid);
355
356 p = xdr_reserve_space(xdr, 4);
357 *p++ = xdr_zero; /* truncate */
300 358
301static int 359 encode_nfs_fh4(xdr, &dp->dl_fh);
302decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
303 __be32 *p;
304 u32 taglen;
305 360
306 READ_BUF(8); 361 hdr->nops++;
307 READ32(hdr->status);
308 /* We've got no use for the tag; ignore it: */
309 READ32(taglen);
310 READ_BUF(taglen + 4);
311 p += XDR_QUADLEN(taglen);
312 READ32(hdr->nops);
313 return 0;
314} 362}
315 363
316static int 364/*
317decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 365 * CB_SEQUENCE4args
366 *
367 * struct CB_SEQUENCE4args {
368 * sessionid4 csa_sessionid;
369 * sequenceid4 csa_sequenceid;
370 * slotid4 csa_slotid;
371 * slotid4 csa_highest_slotid;
372 * bool csa_cachethis;
373 * referring_call_list4 csa_referring_call_lists<>;
374 * };
375 */
376static void encode_cb_sequence4args(struct xdr_stream *xdr,
377 const struct nfsd4_callback *cb,
378 struct nfs4_cb_compound_hdr *hdr)
318{ 379{
380 struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
319 __be32 *p; 381 __be32 *p;
320 u32 op; 382
321 int32_t nfserr; 383 if (hdr->minorversion == 0)
322 384 return;
323 READ_BUF(8); 385
324 READ32(op); 386 encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
325 if (op != expected) { 387 encode_sessionid4(xdr, session);
326 dprintk("NFSD: decode_cb_op_hdr: Callback server returned " 388
327 " operation %d but we issued a request for %d\n", 389 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
328 op, expected); 390 *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
329 return -EIO; 391 *p++ = xdr_zero; /* csa_slotid */
330 } 392 *p++ = xdr_zero; /* csa_highest_slotid */
331 READ32(nfserr); 393 *p++ = xdr_zero; /* csa_cachethis */
332 if (nfserr != NFS_OK) 394 xdr_encode_empty_array(p); /* csa_referring_call_lists */
333 return -nfs_cb_stat_to_errno(nfserr); 395
334 return 0; 396 hdr->nops++;
335} 397}
336 398
337/* 399/*
400 * CB_SEQUENCE4resok
401 *
402 * struct CB_SEQUENCE4resok {
403 * sessionid4 csr_sessionid;
404 * sequenceid4 csr_sequenceid;
405 * slotid4 csr_slotid;
406 * slotid4 csr_highest_slotid;
407 * slotid4 csr_target_highest_slotid;
408 * };
409 *
410 * union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
411 * case NFS4_OK:
412 * CB_SEQUENCE4resok csr_resok4;
413 * default:
414 * void;
415 * };
416 *
338 * Our current back channel implmentation supports a single backchannel 417 * Our current back channel implmentation supports a single backchannel
339 * with a single slot. 418 * with a single slot.
340 */ 419 */
341static int 420static int decode_cb_sequence4resok(struct xdr_stream *xdr,
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, 421 struct nfsd4_callback *cb)
343 struct rpc_rqst *rqstp)
344{ 422{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; 423 struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
346 struct nfs4_sessionid id; 424 struct nfs4_sessionid id;
347 int status; 425 int status;
348 u32 dummy;
349 __be32 *p; 426 __be32 *p;
427 u32 dummy;
350 428
351 if (cb->cb_minorversion == 0) 429 status = -ESERVERFAULT;
352 return 0;
353
354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
355 if (status)
356 return status;
357 430
358 /* 431 /*
359 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
360 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
361 */ 434 */
362 status = -ESERVERFAULT; 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
363 436 if (unlikely(p == NULL))
364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 437 goto out_overflow;
365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 439 if (memcmp(id.data, session->se_sessionid.data,
367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { 440 NFS4_MAX_SESSIONID_LEN) != 0) {
368 dprintk("%s Invalid session id\n", __func__); 441 dprintk("NFS: %s Invalid session id\n", __func__);
369 goto out; 442 goto out;
370 } 443 }
371 READ32(dummy); 444 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
372 if (dummy != ses->se_cb_seq_nr) { 445
373 dprintk("%s Invalid sequence number\n", __func__); 446 dummy = be32_to_cpup(p++);
447 if (dummy != session->se_cb_seq_nr) {
448 dprintk("NFS: %s Invalid sequence number\n", __func__);
374 goto out; 449 goto out;
375 } 450 }
376 READ32(dummy); /* slotid must be 0 */ 451
452 dummy = be32_to_cpup(p++);
377 if (dummy != 0) { 453 if (dummy != 0) {
378 dprintk("%s Invalid slotid\n", __func__); 454 dprintk("NFS: %s Invalid slotid\n", __func__);
379 goto out; 455 goto out;
380 } 456 }
381 /* FIXME: process highest slotid and target highest slotid */ 457
458 /*
459 * FIXME: process highest slotid and target highest slotid
460 */
382 status = 0; 461 status = 0;
383out: 462out:
384 return status; 463 return status;
464out_overflow:
465 print_overflow_msg(__func__, xdr);
466 return -EIO;
385} 467}
386 468
469static int decode_cb_sequence4res(struct xdr_stream *xdr,
470 struct nfsd4_callback *cb)
471{
472 enum nfsstat4 nfserr;
473 int status;
474
475 if (cb->cb_minorversion == 0)
476 return 0;
477
478 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
479 if (unlikely(status))
480 goto out;
481 if (unlikely(nfserr != NFS4_OK))
482 goto out_default;
483 status = decode_cb_sequence4resok(xdr, cb);
484out:
485 return status;
486out_default:
487 return nfs_cb_stat_to_errno(status);
488}
387 489
388static int 490/*
389nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 491 * NFSv4.0 and NFSv4.1 XDR encode functions
492 *
493 * NFSv4.0 callback argument types are defined in section 15 of RFC
494 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
495 * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
496 * Protocol".
497 */
498
499/*
500 * NB: Without this zero space reservation, callbacks over krb5p fail
501 */
502static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
503 void *__unused)
504{
505 xdr_reserve_space(xdr, 0);
506}
507
508/*
509 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
510 */
511static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
512 const struct nfsd4_callback *cb)
513{
514 const struct nfs4_delegation *args = cb->cb_op;
515 struct nfs4_cb_compound_hdr hdr = {
516 .ident = cb->cb_clp->cl_cb_ident,
517 .minorversion = cb->cb_minorversion,
518 };
519
520 encode_cb_compound4args(xdr, &hdr);
521 encode_cb_sequence4args(xdr, cb, &hdr);
522 encode_cb_recall4args(xdr, args, &hdr);
523 encode_cb_nops(&hdr);
524}
525
526
527/*
528 * NFSv4.0 and NFSv4.1 XDR decode functions
529 *
530 * NFSv4.0 callback result types are defined in section 15 of RFC
531 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
532 * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
533 * Protocol".
534 */
535
536static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
537 void *__unused)
390{ 538{
391 return 0; 539 return 0;
392} 540}
393 541
394static int 542/*
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 543 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
396 struct nfsd4_callback *cb) 544 */
545static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
546 struct xdr_stream *xdr,
547 struct nfsd4_callback *cb)
397{ 548{
398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 549 struct nfs4_cb_compound_hdr hdr;
550 enum nfsstat4 nfserr;
400 int status; 551 int status;
401 552
402 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 553 status = decode_cb_compound4res(xdr, &hdr);
403 status = decode_cb_compound_hdr(&xdr, &hdr); 554 if (unlikely(status))
404 if (status)
405 goto out; 555 goto out;
406 if (cb) { 556
407 status = decode_cb_sequence(&xdr, cb, rqstp); 557 if (cb != NULL) {
408 if (status) 558 status = decode_cb_sequence4res(xdr, cb);
559 if (unlikely(status))
409 goto out; 560 goto out;
410 } 561 }
411 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 562
563 status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
564 if (unlikely(status))
565 goto out;
566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default;
412out: 568out:
413 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
414} 572}
415 573
416/* 574/*
417 * RPC procedure tables 575 * RPC procedure tables
418 */ 576 */
419#define PROC(proc, call, argtype, restype) \ 577#define PROC(proc, call, argtype, restype) \
420[NFSPROC4_CLNT_##proc] = { \ 578[NFSPROC4_CLNT_##proc] = { \
421 .p_proc = NFSPROC4_CB_##call, \ 579 .p_proc = NFSPROC4_CB_##call, \
422 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 580 .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \
423 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 581 .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \
424 .p_arglen = NFS4_##argtype##_sz, \ 582 .p_arglen = NFS4_enc_##argtype##_sz, \
425 .p_replen = NFS4_##restype##_sz, \ 583 .p_replen = NFS4_dec_##restype##_sz, \
426 .p_statidx = NFSPROC4_CB_##call, \ 584 .p_statidx = NFSPROC4_CB_##call, \
427 .p_name = #proc, \ 585 .p_name = #proc, \
428} 586}
429 587
430static struct rpc_procinfo nfs4_cb_procedures[] = { 588static struct rpc_procinfo nfs4_cb_procedures[] = {
431 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), 589 PROC(CB_NULL, NULL, cb_null, cb_null),
432 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), 590 PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
433}; 591};
434 592
435static struct rpc_version nfs_cb_version4 = { 593static struct rpc_version nfs_cb_version4 = {
436/* 594/*
437 * Note on the callback rpc program version number: despite language in rfc 595 * Note on the callback rpc program version number: despite language in rfc
438 * 5661 section 18.36.3 requiring servers to use 4 in this field, the 596 * 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -440,29 +598,29 @@ static struct rpc_version nfs_cb_version4 = {
440 * in practice that appears to be what implementations use. The section 598 * in practice that appears to be what implementations use. The section
441 * 18.36.3 language is expected to be fixed in an erratum. 599 * 18.36.3 language is expected to be fixed in an erratum.
442 */ 600 */
443 .number = 1, 601 .number = 1,
444 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), 602 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
445 .procs = nfs4_cb_procedures 603 .procs = nfs4_cb_procedures
446}; 604};
447 605
448static struct rpc_version * nfs_cb_version[] = { 606static struct rpc_version *nfs_cb_version[] = {
449 &nfs_cb_version4, 607 &nfs_cb_version4,
450}; 608};
451 609
452static struct rpc_program cb_program; 610static struct rpc_program cb_program;
453 611
454static struct rpc_stat cb_stats = { 612static struct rpc_stat cb_stats = {
455 .program = &cb_program 613 .program = &cb_program
456}; 614};
457 615
458#define NFS4_CALLBACK 0x40000000 616#define NFS4_CALLBACK 0x40000000
459static struct rpc_program cb_program = { 617static struct rpc_program cb_program = {
460 .name = "nfs4_cb", 618 .name = "nfs4_cb",
461 .number = NFS4_CALLBACK, 619 .number = NFS4_CALLBACK,
462 .nrvers = ARRAY_SIZE(nfs_cb_version), 620 .nrvers = ARRAY_SIZE(nfs_cb_version),
463 .version = nfs_cb_version, 621 .version = nfs_cb_version,
464 .stats = &cb_stats, 622 .stats = &cb_stats,
465 .pipe_dir_name = "/nfsd4_cb", 623 .pipe_dir_name = "/nfsd4_cb",
466}; 624};
467 625
468static int max_cb_time(void) 626static int max_cb_time(void)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 116cab970e0..fbd18c3074b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4336,7 +4336,7 @@ __nfs4_state_shutdown(void)
4336void 4336void
4337nfs4_state_shutdown(void) 4337nfs4_state_shutdown(void)
4338{ 4338{
4339 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); 4339 cancel_delayed_work_sync(&laundromat_work);
4340 destroy_workqueue(laundry_wq); 4340 destroy_workqueue(laundry_wq);
4341 locks_end_grace(&nfsd4_manager); 4341 locks_end_grace(&nfsd4_manager);
4342 nfs4_lock_state(); 4342 nfs4_lock_state();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff0..3a359023c9f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1756 goto out_dput_new; 1756 goto out_dput_new;
1757 1757
1758 if (svc_msnfs(ffhp) && 1758 if (svc_msnfs(ffhp) &&
1759 ((atomic_read(&odentry->d_count) > 1) 1759 ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
1760 || (atomic_read(&ndentry->d_count) > 1))) {
1761 host_err = -EPERM; 1760 host_err = -EPERM;
1762 goto out_dput_new; 1761 goto out_dput_new;
1763 } 1762 }
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1843 if (type != S_IFDIR) { /* It's UNLINK */ 1842 if (type != S_IFDIR) { /* It's UNLINK */
1844#ifdef MSNFS 1843#ifdef MSNFS
1845 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1844 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1846 (atomic_read(&rdentry->d_count) > 1)) { 1845 (rdentry->d_count > 1)) {
1847 host_err = -EPERM; 1846 host_err = -EPERM;
1848 } else 1847 } else
1849#endif 1848#endif
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 8b782b062ba..3ee67c67cc5 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -35,7 +35,20 @@
35 35
36struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 36struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
37{ 37{
38 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); 38 return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
39}
40
41static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
42 const char *fname, int err)
43{
44 struct inode *inode = bmap->b_inode;
45
46 if (err == -EINVAL) {
47 nilfs_error(inode->i_sb, fname,
48 "broken bmap (inode number=%lu)\n", inode->i_ino);
49 err = -EIO;
50 }
51 return err;
39} 52}
40 53
41/** 54/**
@@ -66,8 +79,10 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
66 79
67 down_read(&bmap->b_sem); 80 down_read(&bmap->b_sem);
68 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); 81 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
69 if (ret < 0) 82 if (ret < 0) {
83 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
70 goto out; 84 goto out;
85 }
71 if (NILFS_BMAP_USE_VBN(bmap)) { 86 if (NILFS_BMAP_USE_VBN(bmap)) {
72 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, 87 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
73 &blocknr); 88 &blocknr);
@@ -88,7 +103,8 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
88 down_read(&bmap->b_sem); 103 down_read(&bmap->b_sem);
89 ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); 104 ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
90 up_read(&bmap->b_sem); 105 up_read(&bmap->b_sem);
91 return ret; 106
107 return nilfs_bmap_convert_error(bmap, __func__, ret);
92} 108}
93 109
94static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 110static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -144,7 +160,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap,
144 down_write(&bmap->b_sem); 160 down_write(&bmap->b_sem);
145 ret = nilfs_bmap_do_insert(bmap, key, rec); 161 ret = nilfs_bmap_do_insert(bmap, key, rec);
146 up_write(&bmap->b_sem); 162 up_write(&bmap->b_sem);
147 return ret; 163
164 return nilfs_bmap_convert_error(bmap, __func__, ret);
148} 165}
149 166
150static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) 167static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
@@ -180,9 +197,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
180 197
181 down_read(&bmap->b_sem); 198 down_read(&bmap->b_sem);
182 ret = bmap->b_ops->bop_last_key(bmap, &lastkey); 199 ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
183 if (!ret)
184 *key = lastkey;
185 up_read(&bmap->b_sem); 200 up_read(&bmap->b_sem);
201
202 if (ret < 0)
203 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
204 else
205 *key = lastkey;
186 return ret; 206 return ret;
187} 207}
188 208
@@ -210,7 +230,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
210 down_write(&bmap->b_sem); 230 down_write(&bmap->b_sem);
211 ret = nilfs_bmap_do_delete(bmap, key); 231 ret = nilfs_bmap_do_delete(bmap, key);
212 up_write(&bmap->b_sem); 232 up_write(&bmap->b_sem);
213 return ret; 233
234 return nilfs_bmap_convert_error(bmap, __func__, ret);
214} 235}
215 236
216static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) 237static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
@@ -261,7 +282,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
261 down_write(&bmap->b_sem); 282 down_write(&bmap->b_sem);
262 ret = nilfs_bmap_do_truncate(bmap, key); 283 ret = nilfs_bmap_do_truncate(bmap, key);
263 up_write(&bmap->b_sem); 284 up_write(&bmap->b_sem);
264 return ret; 285
286 return nilfs_bmap_convert_error(bmap, __func__, ret);
265} 287}
266 288
267/** 289/**
@@ -300,7 +322,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
300 down_write(&bmap->b_sem); 322 down_write(&bmap->b_sem);
301 ret = bmap->b_ops->bop_propagate(bmap, bh); 323 ret = bmap->b_ops->bop_propagate(bmap, bh);
302 up_write(&bmap->b_sem); 324 up_write(&bmap->b_sem);
303 return ret; 325
326 return nilfs_bmap_convert_error(bmap, __func__, ret);
304} 327}
305 328
306/** 329/**
@@ -344,7 +367,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap,
344 down_write(&bmap->b_sem); 367 down_write(&bmap->b_sem);
345 ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); 368 ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo);
346 up_write(&bmap->b_sem); 369 up_write(&bmap->b_sem);
347 return ret; 370
371 return nilfs_bmap_convert_error(bmap, __func__, ret);
348} 372}
349 373
350/** 374/**
@@ -373,7 +397,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
373 down_write(&bmap->b_sem); 397 down_write(&bmap->b_sem);
374 ret = bmap->b_ops->bop_mark(bmap, key, level); 398 ret = bmap->b_ops->bop_mark(bmap, key, level);
375 up_write(&bmap->b_sem); 399 up_write(&bmap->b_sem);
376 return ret; 400
401 return nilfs_bmap_convert_error(bmap, __func__, ret);
377} 402}
378 403
379/** 404/**
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 5115814cb74..388e9e8f528 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -104,8 +104,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
104 if (pblocknr == 0) { 104 if (pblocknr == 0) {
105 pblocknr = blocknr; 105 pblocknr = blocknr;
106 if (inode->i_ino != NILFS_DAT_INO) { 106 if (inode->i_ino != NILFS_DAT_INO) {
107 struct inode *dat = 107 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
108 nilfs_dat_inode(NILFS_I_NILFS(inode));
109 108
110 /* blocknr is a virtual block number */ 109 /* blocknr is a virtual block number */
111 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 110 err = nilfs_dat_translate(dat, blocknr, &pblocknr);
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index cb003c8ee1f..9d45773b79e 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -91,7 +91,6 @@ static void nilfs_commit_chunk(struct page *page,
91 unsigned from, unsigned to) 91 unsigned from, unsigned to)
92{ 92{
93 struct inode *dir = mapping->host; 93 struct inode *dir = mapping->host;
94 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
95 loff_t pos = page_offset(page) + from; 94 loff_t pos = page_offset(page) + from;
96 unsigned len = to - from; 95 unsigned len = to - from;
97 unsigned nr_dirty, copied; 96 unsigned nr_dirty, copied;
@@ -103,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page,
103 i_size_write(dir, pos + copied); 102 i_size_write(dir, pos + copied);
104 if (IS_DIRSYNC(dir)) 103 if (IS_DIRSYNC(dir))
105 nilfs_set_transaction_flag(NILFS_TI_SYNC); 104 nilfs_set_transaction_flag(NILFS_TI_SYNC);
106 err = nilfs_set_file_dirty(sbi, dir, nr_dirty); 105 err = nilfs_set_file_dirty(dir, nr_dirty);
107 WARN_ON(err); /* do not happen */ 106 WARN_ON(err); /* do not happen */
108 unlock_page(page); 107 unlock_page(page);
109} 108}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index c9a30d7ff6f..2f560c9fb80 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -155,6 +155,7 @@ const struct inode_operations nilfs_file_inode_operations = {
155 .truncate = nilfs_truncate, 155 .truncate = nilfs_truncate,
156 .setattr = nilfs_setattr, 156 .setattr = nilfs_setattr,
157 .permission = nilfs_permission, 157 .permission = nilfs_permission,
158 .fiemap = nilfs_fiemap,
158}; 159};
159 160
160/* end of file */ 161/* end of file */
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 9f8a2da67f9..bfc73d3a30e 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -149,14 +149,9 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
149 } 149 }
150 150
151 err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); 151 err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
152 if (unlikely(err)) { 152 if (unlikely(err))
153 if (err == -EINVAL) 153 nilfs_warning(sb, __func__, "unable to read inode: %lu",
154 nilfs_error(sb, __func__, "ifile is broken"); 154 (unsigned long) ino);
155 else
156 nilfs_warning(sb, __func__,
157 "unable to read inode: %lu",
158 (unsigned long) ino);
159 }
160 return err; 155 return err;
161} 156}
162 157
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 71d4bc8464e..2fd440d8d6b 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -58,7 +58,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
58 struct nilfs_inode_info *ii = NILFS_I(inode); 58 struct nilfs_inode_info *ii = NILFS_I(inode);
59 __u64 blknum = 0; 59 __u64 blknum = 0;
60 int err = 0, ret; 60 int err = 0, ret;
61 struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); 61 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
62 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 62 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
63 63
64 down_read(&NILFS_MDT(dat)->mi_sem); 64 down_read(&NILFS_MDT(dat)->mi_sem);
@@ -96,11 +96,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
96 inode->i_ino, 96 inode->i_ino,
97 (unsigned long long)blkoff); 97 (unsigned long long)blkoff);
98 err = 0; 98 err = 0;
99 } else if (err == -EINVAL) {
100 nilfs_error(inode->i_sb, __func__,
101 "broken bmap (inode=%lu)\n",
102 inode->i_ino);
103 err = -EIO;
104 } 99 }
105 nilfs_transaction_abort(inode->i_sb); 100 nilfs_transaction_abort(inode->i_sb);
106 goto out; 101 goto out;
@@ -109,6 +104,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
109 nilfs_transaction_commit(inode->i_sb); /* never fails */ 104 nilfs_transaction_commit(inode->i_sb); /* never fails */
110 /* Error handling should be detailed */ 105 /* Error handling should be detailed */
111 set_buffer_new(bh_result); 106 set_buffer_new(bh_result);
107 set_buffer_delay(bh_result);
112 map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed 108 map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
113 to proper value */ 109 to proper value */
114 } else if (ret == -ENOENT) { 110 } else if (ret == -ENOENT) {
@@ -185,10 +181,9 @@ static int nilfs_set_page_dirty(struct page *page)
185 181
186 if (ret) { 182 if (ret) {
187 struct inode *inode = page->mapping->host; 183 struct inode *inode = page->mapping->host;
188 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
189 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); 184 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
190 185
191 nilfs_set_file_dirty(sbi, inode, nr_dirty); 186 nilfs_set_file_dirty(inode, nr_dirty);
192 } 187 }
193 return ret; 188 return ret;
194} 189}
@@ -229,7 +224,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
229 start + copied); 224 start + copied);
230 copied = generic_write_end(file, mapping, pos, len, copied, page, 225 copied = generic_write_end(file, mapping, pos, len, copied, page,
231 fsdata); 226 fsdata);
232 nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); 227 nilfs_set_file_dirty(inode, nr_dirty);
233 err = nilfs_transaction_commit(inode->i_sb); 228 err = nilfs_transaction_commit(inode->i_sb);
234 return err ? : copied; 229 return err ? : copied;
235} 230}
@@ -425,13 +420,12 @@ static int __nilfs_read_inode(struct super_block *sb,
425 struct nilfs_root *root, unsigned long ino, 420 struct nilfs_root *root, unsigned long ino,
426 struct inode *inode) 421 struct inode *inode)
427{ 422{
428 struct nilfs_sb_info *sbi = NILFS_SB(sb); 423 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
429 struct inode *dat = nilfs_dat_inode(sbi->s_nilfs);
430 struct buffer_head *bh; 424 struct buffer_head *bh;
431 struct nilfs_inode *raw_inode; 425 struct nilfs_inode *raw_inode;
432 int err; 426 int err;
433 427
434 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 428 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
435 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 429 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
436 if (unlikely(err)) 430 if (unlikely(err))
437 goto bad_inode; 431 goto bad_inode;
@@ -461,7 +455,7 @@ static int __nilfs_read_inode(struct super_block *sb,
461 } 455 }
462 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 456 nilfs_ifile_unmap_inode(root->ifile, ino, bh);
463 brelse(bh); 457 brelse(bh);
464 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 458 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
465 nilfs_set_inode_flags(inode); 459 nilfs_set_inode_flags(inode);
466 return 0; 460 return 0;
467 461
@@ -470,7 +464,7 @@ static int __nilfs_read_inode(struct super_block *sb,
470 brelse(bh); 464 brelse(bh);
471 465
472 bad_inode: 466 bad_inode:
473 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 467 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
474 return err; 468 return err;
475} 469}
476 470
@@ -629,7 +623,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
629 623
630 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 624 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
631 return; 625 return;
632 repeat: 626repeat:
633 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 627 ret = nilfs_bmap_last_key(ii->i_bmap, &b);
634 if (ret == -ENOENT) 628 if (ret == -ENOENT)
635 return; 629 return;
@@ -646,14 +640,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
646 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 640 nilfs_bmap_truncate(ii->i_bmap, b) == 0))
647 goto repeat; 641 goto repeat;
648 642
649 failed: 643failed:
650 if (ret == -EINVAL) 644 nilfs_warning(ii->vfs_inode.i_sb, __func__,
651 nilfs_error(ii->vfs_inode.i_sb, __func__, 645 "failed to truncate bmap (ino=%lu, err=%d)",
652 "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); 646 ii->vfs_inode.i_ino, ret);
653 else
654 nilfs_warning(ii->vfs_inode.i_sb, __func__,
655 "failed to truncate bmap (ino=%lu, err=%d)",
656 ii->vfs_inode.i_ino, ret);
657} 647}
658 648
659void nilfs_truncate(struct inode *inode) 649void nilfs_truncate(struct inode *inode)
@@ -682,7 +672,7 @@ void nilfs_truncate(struct inode *inode)
682 nilfs_set_transaction_flag(NILFS_TI_SYNC); 672 nilfs_set_transaction_flag(NILFS_TI_SYNC);
683 673
684 nilfs_mark_inode_dirty(inode); 674 nilfs_mark_inode_dirty(inode);
685 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); 675 nilfs_set_file_dirty(inode, 0);
686 nilfs_transaction_commit(sb); 676 nilfs_transaction_commit(sb);
687 /* May construct a logical segment and may fail in sync mode. 677 /* May construct a logical segment and may fail in sync mode.
688 But truncate has no return value. */ 678 But truncate has no return value. */
@@ -785,20 +775,24 @@ out_err:
785 return err; 775 return err;
786} 776}
787 777
788int nilfs_permission(struct inode *inode, int mask) 778int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
789{ 779{
790 struct nilfs_root *root = NILFS_I(inode)->i_root; 780 struct nilfs_root *root;
791 781
782 if (flags & IPERM_FLAG_RCU)
783 return -ECHILD;
784
785 root = NILFS_I(inode)->i_root;
792 if ((mask & MAY_WRITE) && root && 786 if ((mask & MAY_WRITE) && root &&
793 root->cno != NILFS_CPTREE_CURRENT_CNO) 787 root->cno != NILFS_CPTREE_CURRENT_CNO)
794 return -EROFS; /* snapshot is not writable */ 788 return -EROFS; /* snapshot is not writable */
795 789
796 return generic_permission(inode, mask, NULL); 790 return generic_permission(inode, mask, flags, NULL);
797} 791}
798 792
799int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, 793int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
800 struct buffer_head **pbh)
801{ 794{
795 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
802 struct nilfs_inode_info *ii = NILFS_I(inode); 796 struct nilfs_inode_info *ii = NILFS_I(inode);
803 int err; 797 int err;
804 798
@@ -839,9 +833,9 @@ int nilfs_inode_dirty(struct inode *inode)
839 return ret; 833 return ret;
840} 834}
841 835
842int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, 836int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
843 unsigned nr_dirty)
844{ 837{
838 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
845 struct nilfs_inode_info *ii = NILFS_I(inode); 839 struct nilfs_inode_info *ii = NILFS_I(inode);
846 840
847 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); 841 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks);
@@ -874,11 +868,10 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode,
874 868
875int nilfs_mark_inode_dirty(struct inode *inode) 869int nilfs_mark_inode_dirty(struct inode *inode)
876{ 870{
877 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
878 struct buffer_head *ibh; 871 struct buffer_head *ibh;
879 int err; 872 int err;
880 873
881 err = nilfs_load_inode_block(sbi, inode, &ibh); 874 err = nilfs_load_inode_block(inode, &ibh);
882 if (unlikely(err)) { 875 if (unlikely(err)) {
883 nilfs_warning(inode->i_sb, __func__, 876 nilfs_warning(inode->i_sb, __func__,
884 "failed to reget inode block.\n"); 877 "failed to reget inode block.\n");
@@ -920,3 +913,134 @@ void nilfs_dirty_inode(struct inode *inode)
920 nilfs_mark_inode_dirty(inode); 913 nilfs_mark_inode_dirty(inode);
921 nilfs_transaction_commit(inode->i_sb); /* never fails */ 914 nilfs_transaction_commit(inode->i_sb); /* never fails */
922} 915}
916
917int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
918 __u64 start, __u64 len)
919{
920 struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
921 __u64 logical = 0, phys = 0, size = 0;
922 __u32 flags = 0;
923 loff_t isize;
924 sector_t blkoff, end_blkoff;
925 sector_t delalloc_blkoff;
926 unsigned long delalloc_blklen;
927 unsigned int blkbits = inode->i_blkbits;
928 int ret, n;
929
930 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
931 if (ret)
932 return ret;
933
934 mutex_lock(&inode->i_mutex);
935
936 isize = i_size_read(inode);
937
938 blkoff = start >> blkbits;
939 end_blkoff = (start + len - 1) >> blkbits;
940
941 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
942 &delalloc_blkoff);
943
944 do {
945 __u64 blkphy;
946 unsigned int maxblocks;
947
948 if (delalloc_blklen && blkoff == delalloc_blkoff) {
949 if (size) {
950 /* End of the current extent */
951 ret = fiemap_fill_next_extent(
952 fieinfo, logical, phys, size, flags);
953 if (ret)
954 break;
955 }
956 if (blkoff > end_blkoff)
957 break;
958
959 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
960 logical = blkoff << blkbits;
961 phys = 0;
962 size = delalloc_blklen << blkbits;
963
964 blkoff = delalloc_blkoff + delalloc_blklen;
965 delalloc_blklen = nilfs_find_uncommitted_extent(
966 inode, blkoff, &delalloc_blkoff);
967 continue;
968 }
969
970 /*
971 * Limit the number of blocks that we look up so as
972 * not to get into the next delayed allocation extent.
973 */
974 maxblocks = INT_MAX;
975 if (delalloc_blklen)
976 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
977 maxblocks);
978 blkphy = 0;
979
980 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
981 n = nilfs_bmap_lookup_contig(
982 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
983 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
984
985 if (n < 0) {
986 int past_eof;
987
988 if (unlikely(n != -ENOENT))
989 break; /* error */
990
991 /* HOLE */
992 blkoff++;
993 past_eof = ((blkoff << blkbits) >= isize);
994
995 if (size) {
996 /* End of the current extent */
997
998 if (past_eof)
999 flags |= FIEMAP_EXTENT_LAST;
1000
1001 ret = fiemap_fill_next_extent(
1002 fieinfo, logical, phys, size, flags);
1003 if (ret)
1004 break;
1005 size = 0;
1006 }
1007 if (blkoff > end_blkoff || past_eof)
1008 break;
1009 } else {
1010 if (size) {
1011 if (phys && blkphy << blkbits == phys + size) {
1012 /* The current extent goes on */
1013 size += n << blkbits;
1014 } else {
1015 /* Terminate the current extent */
1016 ret = fiemap_fill_next_extent(
1017 fieinfo, logical, phys, size,
1018 flags);
1019 if (ret || blkoff > end_blkoff)
1020 break;
1021
1022 /* Start another extent */
1023 flags = FIEMAP_EXTENT_MERGED;
1024 logical = blkoff << blkbits;
1025 phys = blkphy << blkbits;
1026 size = n << blkbits;
1027 }
1028 } else {
1029 /* Start a new extent */
1030 flags = FIEMAP_EXTENT_MERGED;
1031 logical = blkoff << blkbits;
1032 phys = blkphy << blkbits;
1033 size = n << blkbits;
1034 }
1035 blkoff += n;
1036 }
1037 cond_resched();
1038 } while (true);
1039
1040 /* If ret is 1 then we just hit the end of the extent array */
1041 if (ret == 1)
1042 ret = 0;
1043
1044 mutex_unlock(&inode->i_mutex);
1045 return ret;
1046}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index b185e937a33..496738963fd 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -233,7 +233,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
233 int ret; 233 int ret;
234 234
235 down_read(&nilfs->ns_segctor_sem); 235 down_read(&nilfs->ns_segctor_sem);
236 ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); 236 ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs);
237 up_read(&nilfs->ns_segctor_sem); 237 up_read(&nilfs->ns_segctor_sem);
238 return ret; 238 return ret;
239} 239}
@@ -242,8 +242,7 @@ static ssize_t
242nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, 242nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
243 void *buf, size_t size, size_t nmembs) 243 void *buf, size_t size, size_t nmembs)
244{ 244{
245 struct inode *dat = nilfs_dat_inode(nilfs); 245 struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
246 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
247 struct nilfs_bdesc *bdescs = buf; 246 struct nilfs_bdesc *bdescs = buf;
248 int ret, i; 247 int ret, i;
249 248
@@ -421,7 +420,7 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
421 size_t nmembs = argv->v_nmembs; 420 size_t nmembs = argv->v_nmembs;
422 int ret; 421 int ret;
423 422
424 ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); 423 ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs);
425 424
426 return (ret < 0) ? ret : nmembs; 425 return (ret < 0) ? ret : nmembs;
427} 426}
@@ -430,8 +429,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
430 struct nilfs_argv *argv, void *buf) 429 struct nilfs_argv *argv, void *buf)
431{ 430{
432 size_t nmembs = argv->v_nmembs; 431 size_t nmembs = argv->v_nmembs;
433 struct inode *dat = nilfs_dat_inode(nilfs); 432 struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
434 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
435 struct nilfs_bdesc *bdescs = buf; 433 struct nilfs_bdesc *bdescs = buf;
436 int ret, i; 434 int ret, i;
437 435
@@ -450,7 +448,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
450 /* skip dead block */ 448 /* skip dead block */
451 continue; 449 continue;
452 if (bdescs[i].bd_level == 0) { 450 if (bdescs[i].bd_level == 0) {
453 ret = nilfs_mdt_mark_block_dirty(dat, 451 ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat,
454 bdescs[i].bd_offset); 452 bdescs[i].bd_offset);
455 if (ret < 0) { 453 if (ret < 0) {
456 WARN_ON(ret == -ENOENT); 454 WARN_ON(ret == -ENOENT);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 39a5b84e2c9..6a0e2a189f6 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -237,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
237 * 237 *
238 * %-ENOENT - the specified block does not exist (hole block) 238 * %-ENOENT - the specified block does not exist (hole block)
239 * 239 *
240 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
241 *
242 * %-EROFS - Read only filesystem (for create mode) 240 * %-EROFS - Read only filesystem (for create mode)
243 */ 241 */
244int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, 242int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
@@ -273,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
273 * %-ENOMEM - Insufficient memory available. 271 * %-ENOMEM - Insufficient memory available.
274 * 272 *
275 * %-EIO - I/O error 273 * %-EIO - I/O error
276 *
277 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
278 */ 274 */
279int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) 275int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
280{ 276{
@@ -350,8 +346,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
350 * %-EIO - I/O error 346 * %-EIO - I/O error
351 * 347 *
352 * %-ENOENT - the specified block does not exist (hole block) 348 * %-ENOENT - the specified block does not exist (hole block)
353 *
354 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
355 */ 349 */
356int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) 350int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
357{ 351{
@@ -499,31 +493,29 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
499 struct buffer_head *bh_frozen; 493 struct buffer_head *bh_frozen;
500 struct page *page; 494 struct page *page;
501 int blkbits = inode->i_blkbits; 495 int blkbits = inode->i_blkbits;
502 int ret = -ENOMEM;
503 496
504 page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); 497 page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
505 if (!page) 498 if (!page)
506 return ret; 499 return -ENOMEM;
507 500
508 if (!page_has_buffers(page)) 501 if (!page_has_buffers(page))
509 create_empty_buffers(page, 1 << blkbits, 0); 502 create_empty_buffers(page, 1 << blkbits, 0);
510 503
511 bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); 504 bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
512 if (bh_frozen) { 505
513 if (!buffer_uptodate(bh_frozen)) 506 if (!buffer_uptodate(bh_frozen))
514 nilfs_copy_buffer(bh_frozen, bh); 507 nilfs_copy_buffer(bh_frozen, bh);
515 if (list_empty(&bh_frozen->b_assoc_buffers)) { 508 if (list_empty(&bh_frozen->b_assoc_buffers)) {
516 list_add_tail(&bh_frozen->b_assoc_buffers, 509 list_add_tail(&bh_frozen->b_assoc_buffers,
517 &shadow->frozen_buffers); 510 &shadow->frozen_buffers);
518 set_buffer_nilfs_redirected(bh); 511 set_buffer_nilfs_redirected(bh);
519 } else { 512 } else {
520 brelse(bh_frozen); /* already frozen */ 513 brelse(bh_frozen); /* already frozen */
521 }
522 ret = 0;
523 } 514 }
515
524 unlock_page(page); 516 unlock_page(page);
525 page_cache_release(page); 517 page_cache_release(page);
526 return ret; 518 return 0;
527} 519}
528 520
529struct buffer_head * 521struct buffer_head *
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 6e9557ecf16..98034271cd0 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -577,6 +577,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
577 .rename = nilfs_rename, 577 .rename = nilfs_rename,
578 .setattr = nilfs_setattr, 578 .setattr = nilfs_setattr,
579 .permission = nilfs_permission, 579 .permission = nilfs_permission,
580 .fiemap = nilfs_fiemap,
580}; 581};
581 582
582const struct inode_operations nilfs_special_inode_operations = { 583const struct inode_operations nilfs_special_inode_operations = {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f7560da5a56..777e8fd0430 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -190,11 +190,6 @@ static inline int nilfs_doing_construction(void)
190 return nilfs_test_transaction_flag(NILFS_TI_WRITER); 190 return nilfs_test_transaction_flag(NILFS_TI_WRITER);
191} 191}
192 192
193static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs)
194{
195 return nilfs->ns_dat;
196}
197
198/* 193/*
199 * function prototype 194 * function prototype
200 */ 195 */
@@ -256,14 +251,14 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
256extern void nilfs_truncate(struct inode *); 251extern void nilfs_truncate(struct inode *);
257extern void nilfs_evict_inode(struct inode *); 252extern void nilfs_evict_inode(struct inode *);
258extern int nilfs_setattr(struct dentry *, struct iattr *); 253extern int nilfs_setattr(struct dentry *, struct iattr *);
259int nilfs_permission(struct inode *inode, int mask); 254int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 255int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
261 struct buffer_head **);
262extern int nilfs_inode_dirty(struct inode *); 256extern int nilfs_inode_dirty(struct inode *);
263extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, 257int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
264 unsigned);
265extern int nilfs_mark_inode_dirty(struct inode *); 258extern int nilfs_mark_inode_dirty(struct inode *);
266extern void nilfs_dirty_inode(struct inode *); 259extern void nilfs_dirty_inode(struct inode *);
260int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
261 __u64 start, __u64 len);
267 262
268/* super.c */ 263/* super.c */
269extern struct inode *nilfs_alloc_inode(struct super_block *); 264extern struct inode *nilfs_alloc_inode(struct super_block *);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a6c3c2e817f..0c432416cfe 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -491,7 +491,7 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
491 } 491 }
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping) 495void nilfs_mapping_init_once(struct address_space *mapping)
496{ 496{
497 memset(mapping, 0, sizeof(*mapping)); 497 memset(mapping, 0, sizeof(*mapping));
@@ -546,3 +546,87 @@ int __nilfs_clear_page_dirty(struct page *page)
546 } 546 }
547 return TestClearPageDirty(page); 547 return TestClearPageDirty(page);
548} 548}
549
550/**
551 * nilfs_find_uncommitted_extent - find extent of uncommitted data
552 * @inode: inode
553 * @start_blk: start block offset (in)
554 * @blkoff: start offset of the found extent (out)
555 *
556 * This function searches an extent of buffers marked "delayed" which
557 * starts from a block offset equal to or larger than @start_blk. If
558 * such an extent was found, this will store the start offset in
559 * @blkoff and return its length in blocks. Otherwise, zero is
560 * returned.
561 */
562unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
563 sector_t start_blk,
564 sector_t *blkoff)
565{
566 unsigned int i;
567 pgoff_t index;
568 unsigned int nblocks_in_page;
569 unsigned long length = 0;
570 sector_t b;
571 struct pagevec pvec;
572 struct page *page;
573
574 if (inode->i_mapping->nrpages == 0)
575 return 0;
576
577 index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
578 nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
579
580 pagevec_init(&pvec, 0);
581
582repeat:
583 pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
584 pvec.pages);
585 if (pvec.nr == 0)
586 return length;
587
588 if (length > 0 && pvec.pages[0]->index > index)
589 goto out;
590
591 b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
592 i = 0;
593 do {
594 page = pvec.pages[i];
595
596 lock_page(page);
597 if (page_has_buffers(page)) {
598 struct buffer_head *bh, *head;
599
600 bh = head = page_buffers(page);
601 do {
602 if (b < start_blk)
603 continue;
604 if (buffer_delay(bh)) {
605 if (length == 0)
606 *blkoff = b;
607 length++;
608 } else if (length > 0) {
609 goto out_locked;
610 }
611 } while (++b, bh = bh->b_this_page, bh != head);
612 } else {
613 if (length > 0)
614 goto out_locked;
615
616 b += nblocks_in_page;
617 }
618 unlock_page(page);
619
620 } while (++i < pagevec_count(&pvec));
621
622 index = page->index + 1;
623 pagevec_release(&pvec);
624 cond_resched();
625 goto repeat;
626
627out_locked:
628 unlock_page(page);
629out:
630 pagevec_release(&pvec);
631 return length;
632}
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index fb9e8a8a203..622df27cd89 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -66,6 +66,9 @@ void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 66 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 67 const struct address_space_operations *aops);
68unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 68unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
69unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
70 sector_t start_blk,
71 sector_t *blkoff);
69 72
70#define NILFS_PAGE_BUG(page, m, a...) \ 73#define NILFS_PAGE_BUG(page, m, a...) \
71 do { nilfs_page_bug(page); BUG(); } while (0) 74 do { nilfs_page_bug(page); BUG(); } while (0)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 5d2711c28da..3dfcd3b7d38 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -535,7 +535,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
535 if (unlikely(err)) 535 if (unlikely(err))
536 goto failed_page; 536 goto failed_page;
537 537
538 err = nilfs_set_file_dirty(sbi, inode, 1); 538 err = nilfs_set_file_dirty(inode, 1);
539 if (unlikely(err)) 539 if (unlikely(err))
540 goto failed_page; 540 goto failed_page;
541 541
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index 35a07157b98..7a17715f215 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -27,14 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
30/*
31 * Mount options
32 */
33struct nilfs_mount_options {
34 unsigned long mount_opt;
35 __u64 snapshot_cno;
36};
37
38struct the_nilfs; 30struct the_nilfs;
39struct nilfs_sc_info; 31struct nilfs_sc_info;
40 32
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 687d090cea3..55ebae5c7f3 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -504,17 +504,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
504 return err; 504 return err;
505} 505}
506 506
507static int nilfs_handle_bmap_error(int err, const char *fname,
508 struct inode *inode, struct super_block *sb)
509{
510 if (err == -EINVAL) {
511 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n",
512 inode->i_ino);
513 err = -EIO;
514 }
515 return err;
516}
517
518/* 507/*
519 * Callback functions that enumerate, mark, and collect dirty blocks 508 * Callback functions that enumerate, mark, and collect dirty blocks
520 */ 509 */
@@ -524,9 +513,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
524 int err; 513 int err;
525 514
526 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 515 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
527 if (unlikely(err < 0)) 516 if (err < 0)
528 return nilfs_handle_bmap_error(err, __func__, inode, 517 return err;
529 sci->sc_super);
530 518
531 err = nilfs_segctor_add_file_block(sci, bh, inode, 519 err = nilfs_segctor_add_file_block(sci, bh, inode,
532 sizeof(struct nilfs_binfo_v)); 520 sizeof(struct nilfs_binfo_v));
@@ -539,13 +527,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
539 struct buffer_head *bh, 527 struct buffer_head *bh,
540 struct inode *inode) 528 struct inode *inode)
541{ 529{
542 int err; 530 return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
543
544 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
545 if (unlikely(err < 0))
546 return nilfs_handle_bmap_error(err, __func__, inode,
547 sci->sc_super);
548 return 0;
549} 531}
550 532
551static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 533static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
@@ -588,9 +570,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
588 int err; 570 int err;
589 571
590 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 572 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
591 if (unlikely(err < 0)) 573 if (err < 0)
592 return nilfs_handle_bmap_error(err, __func__, inode, 574 return err;
593 sci->sc_super);
594 575
595 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 576 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
596 if (!err) 577 if (!err)
@@ -776,9 +757,8 @@ static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
776 ret++; 757 ret++;
777 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 758 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
778 ret++; 759 ret++;
779 if (ret || nilfs_doing_gc()) 760 if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
780 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) 761 ret++;
781 ret++;
782 return ret; 762 return ret;
783} 763}
784 764
@@ -814,7 +794,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
814 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 794 nilfs_mdt_clear_dirty(sci->sc_root->ifile);
815 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 795 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
816 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 796 nilfs_mdt_clear_dirty(nilfs->ns_sufile);
817 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); 797 nilfs_mdt_clear_dirty(nilfs->ns_dat);
818} 798}
819 799
820static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 800static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
@@ -923,7 +903,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
923 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 903 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
924 raw_sr->sr_flags = 0; 904 raw_sr->sr_flags = 0;
925 905
926 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
927 NILFS_SR_DAT_OFFSET(isz), 1); 907 NILFS_SR_DAT_OFFSET(isz), 1);
928 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + 908 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
929 NILFS_SR_CPFILE_OFFSET(isz), 1); 909 NILFS_SR_CPFILE_OFFSET(isz), 1);
@@ -1179,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1179 sci->sc_stage.scnt++; /* Fall through */ 1159 sci->sc_stage.scnt++; /* Fall through */
1180 case NILFS_ST_DAT: 1160 case NILFS_ST_DAT:
1181 dat_stage: 1161 dat_stage:
1182 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), 1162 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1183 &nilfs_sc_dat_ops); 1163 &nilfs_sc_dat_ops);
1184 if (unlikely(err)) 1164 if (unlikely(err))
1185 break; 1165 break;
@@ -1563,7 +1543,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1563 return 0; 1543 return 0;
1564 1544
1565 failed_bmap: 1545 failed_bmap:
1566 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super);
1567 return err; 1546 return err;
1568} 1547}
1569 1548
@@ -1783,6 +1762,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1783 if (!err) { 1762 if (!err) {
1784 set_buffer_uptodate(bh); 1763 set_buffer_uptodate(bh);
1785 clear_buffer_dirty(bh); 1764 clear_buffer_dirty(bh);
1765 clear_buffer_delay(bh);
1786 clear_buffer_nilfs_volatile(bh); 1766 clear_buffer_nilfs_volatile(bh);
1787 } 1767 }
1788 brelse(bh); /* for b_assoc_buffers */ 1768 brelse(bh); /* for b_assoc_buffers */
@@ -1909,6 +1889,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1909 b_assoc_buffers) { 1889 b_assoc_buffers) {
1910 set_buffer_uptodate(bh); 1890 set_buffer_uptodate(bh);
1911 clear_buffer_dirty(bh); 1891 clear_buffer_dirty(bh);
1892 clear_buffer_delay(bh);
1912 clear_buffer_nilfs_volatile(bh); 1893 clear_buffer_nilfs_volatile(bh);
1913 clear_buffer_nilfs_redirected(bh); 1894 clear_buffer_nilfs_redirected(bh);
1914 if (bh == segbuf->sb_super_root) { 1895 if (bh == segbuf->sb_super_root) {
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41ec9d..70dfdd532b8 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -47,7 +47,6 @@
47#include <linux/crc32.h> 47#include <linux/crc32.h>
48#include <linux/vfs.h> 48#include <linux/vfs.h>
49#include <linux/writeback.h> 49#include <linux/writeback.h>
50#include <linux/kobject.h>
51#include <linux/seq_file.h> 50#include <linux/seq_file.h>
52#include <linux/mount.h> 51#include <linux/mount.h>
53#include "nilfs.h" 52#include "nilfs.h"
@@ -111,12 +110,17 @@ void nilfs_error(struct super_block *sb, const char *function,
111 const char *fmt, ...) 110 const char *fmt, ...)
112{ 111{
113 struct nilfs_sb_info *sbi = NILFS_SB(sb); 112 struct nilfs_sb_info *sbi = NILFS_SB(sb);
113 struct va_format vaf;
114 va_list args; 114 va_list args;
115 115
116 va_start(args, fmt); 116 va_start(args, fmt);
117 printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); 117
118 vprintk(fmt, args); 118 vaf.fmt = fmt;
119 printk("\n"); 119 vaf.va = &args;
120
121 printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
122 sb->s_id, function, &vaf);
123
120 va_end(args); 124 va_end(args);
121 125
122 if (!(sb->s_flags & MS_RDONLY)) { 126 if (!(sb->s_flags & MS_RDONLY)) {
@@ -136,13 +140,17 @@ void nilfs_error(struct super_block *sb, const char *function,
136void nilfs_warning(struct super_block *sb, const char *function, 140void nilfs_warning(struct super_block *sb, const char *function,
137 const char *fmt, ...) 141 const char *fmt, ...)
138{ 142{
143 struct va_format vaf;
139 va_list args; 144 va_list args;
140 145
141 va_start(args, fmt); 146 va_start(args, fmt);
142 printk(KERN_WARNING "NILFS warning (device %s): %s: ", 147
143 sb->s_id, function); 148 vaf.fmt = fmt;
144 vprintk(fmt, args); 149 vaf.va = &args;
145 printk("\n"); 150
151 printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
152 sb->s_id, function, &vaf);
153
146 va_end(args); 154 va_end(args);
147} 155}
148 156
@@ -162,10 +170,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
162 return &ii->vfs_inode; 170 return &ii->vfs_inode;
163} 171}
164 172
165void nilfs_destroy_inode(struct inode *inode) 173static void nilfs_i_callback(struct rcu_head *head)
166{ 174{
175 struct inode *inode = container_of(head, struct inode, i_rcu);
167 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 176 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
168 177
178 INIT_LIST_HEAD(&inode->i_dentry);
179
169 if (mdi) { 180 if (mdi) {
170 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 181 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
171 kfree(mdi); 182 kfree(mdi);
@@ -173,6 +184,11 @@ void nilfs_destroy_inode(struct inode *inode)
173 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 184 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
174} 185}
175 186
187void nilfs_destroy_inode(struct inode *inode)
188{
189 call_rcu(&inode->i_rcu, nilfs_i_callback);
190}
191
176static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 192static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
177{ 193{
178 struct the_nilfs *nilfs = sbi->s_nilfs; 194 struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -838,7 +854,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
838 854
839static int nilfs_tree_was_touched(struct dentry *root_dentry) 855static int nilfs_tree_was_touched(struct dentry *root_dentry)
840{ 856{
841 return atomic_read(&root_dentry->d_count) > 1; 857 return root_dentry->d_count > 1;
842} 858}
843 859
844/** 860/**
@@ -1002,11 +1018,11 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1002 struct nilfs_sb_info *sbi = NILFS_SB(sb); 1018 struct nilfs_sb_info *sbi = NILFS_SB(sb);
1003 struct the_nilfs *nilfs = sbi->s_nilfs; 1019 struct the_nilfs *nilfs = sbi->s_nilfs;
1004 unsigned long old_sb_flags; 1020 unsigned long old_sb_flags;
1005 struct nilfs_mount_options old_opts; 1021 unsigned long old_mount_opt;
1006 int err; 1022 int err;
1007 1023
1008 old_sb_flags = sb->s_flags; 1024 old_sb_flags = sb->s_flags;
1009 old_opts.mount_opt = sbi->s_mount_opt; 1025 old_mount_opt = sbi->s_mount_opt;
1010 1026
1011 if (!parse_options(data, sb, 1)) { 1027 if (!parse_options(data, sb, 1)) {
1012 err = -EINVAL; 1028 err = -EINVAL;
@@ -1075,7 +1091,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1075 1091
1076 restore_opts: 1092 restore_opts:
1077 sb->s_flags = old_sb_flags; 1093 sb->s_flags = old_sb_flags;
1078 sbi->s_mount_opt = old_opts.mount_opt; 1094 sbi->s_mount_opt = old_mount_opt;
1079 return err; 1095 return err;
1080} 1096}
1081 1097
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 0254be2d73c..ad4ac607cf5 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -329,7 +329,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
329 printk(KERN_INFO "NILFS: recovery complete.\n"); 329 printk(KERN_INFO "NILFS: recovery complete.\n");
330 330
331 skip_recovery: 331 skip_recovery:
332 set_nilfs_loaded(nilfs);
333 nilfs_clear_recovery_info(&ri); 332 nilfs_clear_recovery_info(&ri);
334 sbi->s_super->s_flags = s_flags; 333 sbi->s_super->s_flags = s_flags;
335 return 0; 334 return 0;
@@ -651,12 +650,11 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
651 650
652int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) 651int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
653{ 652{
654 struct inode *dat = nilfs_dat_inode(nilfs);
655 unsigned long ncleansegs; 653 unsigned long ncleansegs;
656 654
657 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 655 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
658 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); 656 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
659 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 657 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
660 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; 658 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
661 return 0; 659 return 0;
662} 660}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 69226e14b74..fd85e4c05c6 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -36,8 +36,6 @@
36/* the_nilfs struct */ 36/* the_nilfs struct */
37enum { 37enum {
38 THE_NILFS_INIT = 0, /* Information from super_block is set */ 38 THE_NILFS_INIT = 0, /* Information from super_block is set */
39 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
40 the latest checkpoint was loaded */
41 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ 39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
42 THE_NILFS_GC_RUNNING, /* gc process is running */ 40 THE_NILFS_GC_RUNNING, /* gc process is running */
43 THE_NILFS_SB_DIRTY, /* super block is dirty */ 41 THE_NILFS_SB_DIRTY, /* super block is dirty */
@@ -178,7 +176,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
178} 176}
179 177
180THE_NILFS_FNS(INIT, init) 178THE_NILFS_FNS(INIT, init)
181THE_NILFS_FNS(LOADED, loaded)
182THE_NILFS_FNS(DISCONTINUED, discontinued) 179THE_NILFS_FNS(DISCONTINUED, discontinued)
183THE_NILFS_FNS(GC_RUNNING, gc_running) 180THE_NILFS_FNS(GC_RUNNING, gc_running)
184THE_NILFS_FNS(SB_DIRTY, sb_dirty) 181THE_NILFS_FNS(SB_DIRTY, sb_dirty)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 20dc218707c..79b47cbb5cd 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
59 /* determine if the children should tell inode about their events */ 59 /* determine if the children should tell inode about their events */
60 watched = fsnotify_inode_watches_children(inode); 60 watched = fsnotify_inode_watches_children(inode);
61 61
62 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
63 /* run all of the dentries associated with this inode. Since this is a 63 /* run all of the dentries associated with this inode. Since this is a
64 * directory, there damn well better only be one item on this list */ 64 * directory, there damn well better only be one item on this list */
65 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 65 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -68,19 +68,21 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
68 /* run all of the children of the original inode and fix their 68 /* run all of the children of the original inode and fix their
69 * d_flags to indicate parental interest (their parent is the 69 * d_flags to indicate parental interest (their parent is the
70 * original inode) */ 70 * original inode) */
71 spin_lock(&alias->d_lock);
71 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { 72 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
72 if (!child->d_inode) 73 if (!child->d_inode)
73 continue; 74 continue;
74 75
75 spin_lock(&child->d_lock); 76 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
76 if (watched) 77 if (watched)
77 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; 78 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
78 else 79 else
79 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; 80 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
80 spin_unlock(&child->d_lock); 81 spin_unlock(&child->d_lock);
81 } 82 }
83 spin_unlock(&alias->d_lock);
82 } 84 }
83 spin_unlock(&dcache_lock); 85 spin_unlock(&inode->i_lock);
84} 86}
85 87
86/* Notify this dentry's parent about a child's events. */ 88/* Notify this dentry's parent about a child's events. */
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 58b6be99254..4ff028fcfd6 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 113ebd9f25a..f4b1057abdd 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2007 Anton Altaparmakov 4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -1380,15 +1380,14 @@ static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s 1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1381 * single-segment behaviour. 1381 * single-segment behaviour.
1382 * 1382 *
1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both 1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
1384 * when atomic and when not atomic. This is ok because 1384 * atomic and when not atomic. This is ok because it calls
1385 * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic() 1385 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
1386 * and it is ok to call this when non-atomic. 1386 * fact, the only difference between __copy_from_user_inatomic() and
1387 * Infact, the only difference between __copy_from_user_inatomic() and
1388 * __copy_from_user() is that the latter calls might_sleep() and the former 1387 * __copy_from_user() is that the latter calls might_sleep() and the former
1389 * should not zero the tail of the buffer on error. And on many 1388 * should not zero the tail of the buffer on error. And on many architectures
1390 * architectures __copy_from_user_inatomic() is just defined to 1389 * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
1391 * __copy_from_user() so it makes no difference at all on those architectures. 1390 * makes no difference at all on those architectures.
1392 */ 1391 */
1393static inline size_t ntfs_copy_from_user_iovec(struct page **pages, 1392static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1394 unsigned nr_pages, unsigned ofs, const struct iovec **iov, 1393 unsigned nr_pages, unsigned ofs, const struct iovec **iov,
@@ -1409,28 +1408,28 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1409 if (unlikely(copied != len)) { 1408 if (unlikely(copied != len)) {
1410 /* Do it the slow way. */ 1409 /* Do it the slow way. */
1411 addr = kmap(*pages); 1410 addr = kmap(*pages);
1412 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, 1411 copied = __ntfs_copy_from_user_iovec_inatomic(addr +
1413 *iov, *iov_ofs, len); 1412 ofs, *iov, *iov_ofs, len);
1414 /*
1415 * Zero the rest of the target like __copy_from_user().
1416 */
1417 memset(addr + ofs + copied, 0, len - copied);
1418 kunmap(*pages);
1419 if (unlikely(copied != len)) 1413 if (unlikely(copied != len))
1420 goto err_out; 1414 goto err_out;
1415 kunmap(*pages);
1421 } 1416 }
1422 total += len; 1417 total += len;
1418 ntfs_set_next_iovec(iov, iov_ofs, len);
1423 bytes -= len; 1419 bytes -= len;
1424 if (!bytes) 1420 if (!bytes)
1425 break; 1421 break;
1426 ntfs_set_next_iovec(iov, iov_ofs, len);
1427 ofs = 0; 1422 ofs = 0;
1428 } while (++pages < last_page); 1423 } while (++pages < last_page);
1429out: 1424out:
1430 return total; 1425 return total;
1431err_out: 1426err_out:
1432 total += copied; 1427 BUG_ON(copied > len);
1433 /* Zero the rest of the target like __copy_from_user(). */ 1428 /* Zero the rest of the target like __copy_from_user(). */
1429 memset(addr + ofs + copied, 0, len - copied);
1430 kunmap(*pages);
1431 total += copied;
1432 ntfs_set_next_iovec(iov, iov_ofs, copied);
1434 while (++pages < last_page) { 1433 while (++pages < last_page) {
1435 bytes -= len; 1434 bytes -= len;
1436 if (!bytes) 1435 if (!bytes)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc..a627ed82c0a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
332 return NULL; 332 return NULL;
333} 333}
334 334
335static void ntfs_i_callback(struct rcu_head *head)
336{
337 struct inode *inode = container_of(head, struct inode, i_rcu);
338 INIT_LIST_HEAD(&inode->i_dentry);
339 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
340}
341
335void ntfs_destroy_big_inode(struct inode *inode) 342void ntfs_destroy_big_inode(struct inode *inode)
336{ 343{
337 ntfs_inode *ni = NTFS_I(inode); 344 ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
340 BUG_ON(ni->page); 347 BUG_ON(ni->page);
341 if (!atomic_dec_and_test(&ni->count)) 348 if (!atomic_dec_and_test(&ni->count))
342 BUG(); 349 BUG();
343 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); 350 call_rcu(&inode->i_rcu, ntfs_i_callback);
344} 351}
345 352
346static inline ntfs_inode *ntfs_alloc_extent_inode(void) 353static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index a30ecacc01f..29099a07b9f 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2007 Anton Altaparmakov 4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -3193,8 +3193,8 @@ static void __exit exit_ntfs_fs(void)
3193 ntfs_sysctl(0); 3193 ntfs_sysctl(0);
3194} 3194}
3195 3195
3196MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); 3196MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>");
3197MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2007 Anton Altaparmakov"); 3197MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.");
3198MODULE_VERSION(NTFS_VERSION); 3198MODULE_VERSION(NTFS_VERSION);
3199MODULE_LICENSE("GPL"); 3199MODULE_LICENSE("GPL");
3200#ifdef DEBUG 3200#ifdef DEBUG
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 0d840669698..ab152c00cd3 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
51 51
52config OCFS2_FS_STATS 52config OCFS2_FS_STATS
53 bool "OCFS2 statistics" 53 bool "OCFS2 statistics"
54 depends on OCFS2_FS 54 depends on OCFS2_FS && DEBUG_FS
55 default y 55 default y
56 help 56 help
57 This option allows some fs statistics to be captured. Enabling 57 This option allows some fs statistics to be captured. Enabling
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 391915093fe..704f6b1742f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle,
291 return ret; 291 return ret;
292} 292}
293 293
294int ocfs2_check_acl(struct inode *inode, int mask) 294int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
295{ 295{
296 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 296 struct ocfs2_super *osb;
297 struct buffer_head *di_bh = NULL; 297 struct buffer_head *di_bh = NULL;
298 struct posix_acl *acl; 298 struct posix_acl *acl;
299 int ret = -EAGAIN; 299 int ret = -EAGAIN;
300 300
301 if (flags & IPERM_FLAG_RCU)
302 return -ECHILD;
303
304 osb = OCFS2_SB(inode->i_sb);
301 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 305 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
302 return ret; 306 return ret;
303 307
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 5c5d31f0585..4fe7c9cf4bf 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int, unsigned int);
30extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
32 struct buffer_head *, struct buffer_head *, 32 struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5007d..e4984e259cb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
565 return ret; 565 return ret;
566} 566}
567 567
568static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
569static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 568static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
570 struct ocfs2_extent_block *eb); 569 struct ocfs2_extent_block *eb);
571static void ocfs2_adjust_rightmost_records(handle_t *handle, 570static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -5858,6 +5857,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5858 5857
5859 ocfs2_journal_dirty(handle, tl_bh); 5858 ocfs2_journal_dirty(handle, tl_bh);
5860 5859
5860 osb->truncated_clusters += num_clusters;
5861bail: 5861bail:
5862 mlog_exit(status); 5862 mlog_exit(status);
5863 return status; 5863 return status;
@@ -5929,6 +5929,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5929 i--; 5929 i--;
5930 } 5930 }
5931 5931
5932 osb->truncated_clusters = 0;
5933
5932bail: 5934bail:
5933 mlog_exit(status); 5935 mlog_exit(status);
5934 return status; 5936 return status;
@@ -7139,64 +7141,6 @@ bail:
7139} 7141}
7140 7142
7141/* 7143/*
7142 * Expects the inode to already be locked.
7143 */
7144int ocfs2_prepare_truncate(struct ocfs2_super *osb,
7145 struct inode *inode,
7146 struct buffer_head *fe_bh,
7147 struct ocfs2_truncate_context **tc)
7148{
7149 int status;
7150 unsigned int new_i_clusters;
7151 struct ocfs2_dinode *fe;
7152 struct ocfs2_extent_block *eb;
7153 struct buffer_head *last_eb_bh = NULL;
7154
7155 mlog_entry_void();
7156
7157 *tc = NULL;
7158
7159 new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
7160 i_size_read(inode));
7161 fe = (struct ocfs2_dinode *) fe_bh->b_data;
7162
7163 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
7164 "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
7165 (unsigned long long)le64_to_cpu(fe->i_size));
7166
7167 *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
7168 if (!(*tc)) {
7169 status = -ENOMEM;
7170 mlog_errno(status);
7171 goto bail;
7172 }
7173 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
7174
7175 if (fe->id2.i_list.l_tree_depth) {
7176 status = ocfs2_read_extent_block(INODE_CACHE(inode),
7177 le64_to_cpu(fe->i_last_eb_blk),
7178 &last_eb_bh);
7179 if (status < 0) {
7180 mlog_errno(status);
7181 goto bail;
7182 }
7183 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
7184 }
7185
7186 (*tc)->tc_last_eb_bh = last_eb_bh;
7187
7188 status = 0;
7189bail:
7190 if (status < 0) {
7191 if (*tc)
7192 ocfs2_free_truncate_context(*tc);
7193 *tc = NULL;
7194 }
7195 mlog_exit_void();
7196 return status;
7197}
7198
7199/*
7200 * 'start' is inclusive, 'end' is not. 7144 * 'start' is inclusive, 'end' is not.
7201 */ 7145 */
7202int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 7146int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
@@ -7270,18 +7214,3 @@ out_commit:
7270out: 7214out:
7271 return ret; 7215 return ret;
7272} 7216}
7273
7274static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
7275{
7276 /*
7277 * The caller is responsible for completing deallocation
7278 * before freeing the context.
7279 */
7280 if (tc->tc_dealloc.c_first_suballocator != NULL)
7281 mlog(ML_NOTICE,
7282 "Truncate completion has non-empty dealloc context\n");
7283
7284 brelse(tc->tc_last_eb_bh);
7285
7286 kfree(tc);
7287}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 55762b554b9..3bd08a03251 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
228 228
229int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, 229int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
230 u64 range_start, u64 range_end); 230 u64 range_start, u64 range_end);
231int ocfs2_prepare_truncate(struct ocfs2_super *osb,
232 struct inode *inode,
233 struct buffer_head *fe_bh,
234 struct ocfs2_truncate_context **tc);
235int ocfs2_commit_truncate(struct ocfs2_super *osb, 231int ocfs2_commit_truncate(struct ocfs2_super *osb,
236 struct inode *inode, 232 struct inode *inode,
237 struct buffer_head *di_bh); 233 struct buffer_head *di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b7..1fbb0e20131 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
573 /* this io's submitter should not have unlocked this before we could */ 573 /* this io's submitter should not have unlocked this before we could */
574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
575 575
576 if (ocfs2_iocb_is_sem_locked(iocb)) {
577 up_read(&inode->i_alloc_sem);
578 ocfs2_iocb_clear_sem_locked(iocb);
579 }
580
576 ocfs2_iocb_clear_rw_locked(iocb); 581 ocfs2_iocb_clear_rw_locked(iocb);
577 582
578 level = ocfs2_iocb_rw_locked_level(iocb); 583 level = ocfs2_iocb_rw_locked_level(iocb);
579 if (!level)
580 up_read(&inode->i_alloc_sem);
581 ocfs2_rw_unlock(inode, level); 584 ocfs2_rw_unlock(inode, level);
582 585
583 if (is_async) 586 if (is_async)
@@ -1627,6 +1630,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1627 return ret; 1630 return ret;
1628} 1631}
1629 1632
1633/*
1634 * Try to flush truncate logs if we can free enough clusters from it.
1635 * As for return value, "< 0" means error, "0" no space and "1" means
1636 * we have freed enough spaces and let the caller try to allocate again.
1637 */
1638static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
1639 unsigned int needed)
1640{
1641 tid_t target;
1642 int ret = 0;
1643 unsigned int truncated_clusters;
1644
1645 mutex_lock(&osb->osb_tl_inode->i_mutex);
1646 truncated_clusters = osb->truncated_clusters;
1647 mutex_unlock(&osb->osb_tl_inode->i_mutex);
1648
1649 /*
1650 * Check whether we can succeed in allocating if we free
1651 * the truncate log.
1652 */
1653 if (truncated_clusters < needed)
1654 goto out;
1655
1656 ret = ocfs2_flush_truncate_log(osb);
1657 if (ret) {
1658 mlog_errno(ret);
1659 goto out;
1660 }
1661
1662 if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
1663 jbd2_log_wait_commit(osb->journal->j_journal, target);
1664 ret = 1;
1665 }
1666out:
1667 return ret;
1668}
1669
1630int ocfs2_write_begin_nolock(struct file *filp, 1670int ocfs2_write_begin_nolock(struct file *filp,
1631 struct address_space *mapping, 1671 struct address_space *mapping,
1632 loff_t pos, unsigned len, unsigned flags, 1672 loff_t pos, unsigned len, unsigned flags,
@@ -1634,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1634 struct buffer_head *di_bh, struct page *mmap_page) 1674 struct buffer_head *di_bh, struct page *mmap_page)
1635{ 1675{
1636 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; 1676 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1637 unsigned int clusters_to_alloc, extents_to_split; 1677 unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
1638 struct ocfs2_write_ctxt *wc; 1678 struct ocfs2_write_ctxt *wc;
1639 struct inode *inode = mapping->host; 1679 struct inode *inode = mapping->host;
1640 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1680 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1643,7 +1683,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
1643 struct ocfs2_alloc_context *meta_ac = NULL; 1683 struct ocfs2_alloc_context *meta_ac = NULL;
1644 handle_t *handle; 1684 handle_t *handle;
1645 struct ocfs2_extent_tree et; 1685 struct ocfs2_extent_tree et;
1686 int try_free = 1, ret1;
1646 1687
1688try_again:
1647 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); 1689 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
1648 if (ret) { 1690 if (ret) {
1649 mlog_errno(ret); 1691 mlog_errno(ret);
@@ -1678,6 +1720,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1678 mlog_errno(ret); 1720 mlog_errno(ret);
1679 goto out; 1721 goto out;
1680 } else if (ret == 1) { 1722 } else if (ret == 1) {
1723 clusters_need = wc->w_clen;
1681 ret = ocfs2_refcount_cow(inode, filp, di_bh, 1724 ret = ocfs2_refcount_cow(inode, filp, di_bh,
1682 wc->w_cpos, wc->w_clen, UINT_MAX); 1725 wc->w_cpos, wc->w_clen, UINT_MAX);
1683 if (ret) { 1726 if (ret) {
@@ -1692,6 +1735,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1692 mlog_errno(ret); 1735 mlog_errno(ret);
1693 goto out; 1736 goto out;
1694 } 1737 }
1738 clusters_need += clusters_to_alloc;
1695 1739
1696 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 1740 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1697 1741
@@ -1814,6 +1858,22 @@ out:
1814 ocfs2_free_alloc_context(data_ac); 1858 ocfs2_free_alloc_context(data_ac);
1815 if (meta_ac) 1859 if (meta_ac)
1816 ocfs2_free_alloc_context(meta_ac); 1860 ocfs2_free_alloc_context(meta_ac);
1861
1862 if (ret == -ENOSPC && try_free) {
1863 /*
1864 * Try to free some truncate log so that we can have enough
1865 * clusters to allocate.
1866 */
1867 try_free = 0;
1868
1869 ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
1870 if (ret1 == 1)
1871 goto try_again;
1872
1873 if (ret1 < 0)
1874 mlog_errno(ret1);
1875 }
1876
1817 return ret; 1877 return ret;
1818} 1878}
1819 1879
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfda691..eceb456037c 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
68 else 68 else
69 clear_bit(1, (unsigned long *)&iocb->private); 69 clear_bit(1, (unsigned long *)&iocb->private);
70} 70}
71
72/*
73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */
77enum ocfs2_iocb_lock_bits {
78 OCFS2_IOCB_RW_LOCK = 0,
79 OCFS2_IOCB_RW_LOCK_LEVEL,
80 OCFS2_IOCB_SEM,
81 OCFS2_IOCB_NUM_LOCKS
82};
83
71#define ocfs2_iocb_clear_rw_locked(iocb) \ 84#define ocfs2_iocb_clear_rw_locked(iocb) \
72 clear_bit(0, (unsigned long *)&iocb->private) 85 clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
73#define ocfs2_iocb_rw_locked_level(iocb) \ 86#define ocfs2_iocb_rw_locked_level(iocb) \
74 test_bit(1, (unsigned long *)&iocb->private) 87 test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
88#define ocfs2_iocb_set_sem_locked(iocb) \
89 set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
90#define ocfs2_iocb_clear_sem_locked(iocb) \
91 clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
92#define ocfs2_iocb_is_sem_locked(iocb) \
93 test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
75#endif /* OCFS2_FILE_H */ 94#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9f26ac9be2a..a6cc05302e9 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
82#define O2HB_DB_TYPE_REGION_LIVENODES 4 82#define O2HB_DB_TYPE_REGION_LIVENODES 4
83#define O2HB_DB_TYPE_REGION_NUMBER 5 83#define O2HB_DB_TYPE_REGION_NUMBER 5
84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
85#define O2HB_DB_TYPE_REGION_PINNED 7
85struct o2hb_debug_buf { 86struct o2hb_debug_buf {
86 int db_type; 87 int db_type;
87 int db_size; 88 int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
101#define O2HB_DEBUG_FAILEDREGIONS "failed_regions" 102#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
102#define O2HB_DEBUG_REGION_NUMBER "num" 103#define O2HB_DEBUG_REGION_NUMBER "num"
103#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" 104#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
105#define O2HB_DEBUG_REGION_PINNED "pinned"
104 106
105static struct dentry *o2hb_debug_dir; 107static struct dentry *o2hb_debug_dir;
106static struct dentry *o2hb_debug_livenodes; 108static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
132unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 134unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
133unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; 135unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
134 136
137/*
138 * o2hb_dependent_users tracks the number of registered callbacks that depend
139 * on heartbeat. o2net and o2dlm are two entities that register this callback.
140 * However only o2dlm depends on the heartbeat. It does not want the heartbeat
141 * to stop while a dlm domain is still active.
142 */
143unsigned int o2hb_dependent_users;
144
145/*
146 * In global heartbeat mode, all regions are pinned if there are one or more
147 * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
148 * regions are unpinned if the region count exceeds the cut off or the number
149 * of dependent users falls to zero.
150 */
151#define O2HB_PIN_CUT_OFF 3
152
153/*
154 * In local heartbeat mode, we assume the dlm domain name to be the same as
155 * region uuid. This is true for domains created for the file system but not
156 * necessarily true for userdlm domains. This is a known limitation.
157 *
158 * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
159 * works for both file system and userdlm domains.
160 */
161static int o2hb_region_pin(const char *region_uuid);
162static void o2hb_region_unpin(const char *region_uuid);
163
135/* Only sets a new threshold if there are no active regions. 164/* Only sets a new threshold if there are no active regions.
136 * 165 *
137 * No locking or otherwise interesting code is required for reading 166 * No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
186 struct config_item hr_item; 215 struct config_item hr_item;
187 216
188 struct list_head hr_all_item; 217 struct list_head hr_all_item;
189 unsigned hr_unclean_stop:1; 218 unsigned hr_unclean_stop:1,
219 hr_item_pinned:1,
220 hr_item_dropped:1;
190 221
191 /* protected by the hr_callback_sem */ 222 /* protected by the hr_callback_sem */
192 struct task_struct *hr_task; 223 struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
212 struct dentry *hr_debug_livenodes; 243 struct dentry *hr_debug_livenodes;
213 struct dentry *hr_debug_regnum; 244 struct dentry *hr_debug_regnum;
214 struct dentry *hr_debug_elapsed_time; 245 struct dentry *hr_debug_elapsed_time;
246 struct dentry *hr_debug_pinned;
215 struct o2hb_debug_buf *hr_db_livenodes; 247 struct o2hb_debug_buf *hr_db_livenodes;
216 struct o2hb_debug_buf *hr_db_regnum; 248 struct o2hb_debug_buf *hr_db_regnum;
217 struct o2hb_debug_buf *hr_db_elapsed_time; 249 struct o2hb_debug_buf *hr_db_elapsed_time;
250 struct o2hb_debug_buf *hr_db_pinned;
218 251
219 /* let the person setting up hb wait for it to return until it 252 /* let the person setting up hb wait for it to return until it
220 * has reached a 'steady' state. This will be fixed when we have 253 * has reached a 'steady' state. This will be fixed when we have
@@ -307,8 +340,7 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
307 340
308static void o2hb_disarm_write_timeout(struct o2hb_region *reg) 341static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
309{ 342{
310 cancel_delayed_work(&reg->hr_write_timeout_work); 343 cancel_delayed_work_sync(&reg->hr_write_timeout_work);
311 flush_scheduled_work();
312} 344}
313 345
314static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) 346static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@ -702,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
702 config_item_name(&reg->hr_item)); 734 config_item_name(&reg->hr_item));
703 735
704 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); 736 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
737
738 /*
739 * If global heartbeat active, unpin all regions if the
740 * region count > CUT_OFF
741 */
742 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
743 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
744 o2hb_region_unpin(NULL);
705} 745}
706 746
707static int o2hb_check_slot(struct o2hb_region *reg, 747static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1042,6 +1082,9 @@ static int o2hb_thread(void *data)
1042 1082
1043 set_user_nice(current, -20); 1083 set_user_nice(current, -20);
1044 1084
1085 /* Pin node */
1086 o2nm_depend_this_node();
1087
1045 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 1088 while (!kthread_should_stop() && !reg->hr_unclean_stop) {
1046 /* We track the time spent inside 1089 /* We track the time spent inside
1047 * o2hb_do_disk_heartbeat so that we avoid more than 1090 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1091,6 +1134,9 @@ static int o2hb_thread(void *data)
1091 mlog_errno(ret); 1134 mlog_errno(ret);
1092 } 1135 }
1093 1136
1137 /* Unpin node */
1138 o2nm_undepend_this_node();
1139
1094 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); 1140 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
1095 1141
1096 return 0; 1142 return 0;
@@ -1143,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1143 reg->hr_last_timeout_start)); 1189 reg->hr_last_timeout_start));
1144 goto done; 1190 goto done;
1145 1191
1192 case O2HB_DB_TYPE_REGION_PINNED:
1193 reg = (struct o2hb_region *)db->db_data;
1194 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
1195 !!reg->hr_item_pinned);
1196 goto done;
1197
1146 default: 1198 default:
1147 goto done; 1199 goto done;
1148 } 1200 }
@@ -1316,6 +1368,8 @@ int o2hb_init(void)
1316 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); 1368 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
1317 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); 1369 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
1318 1370
1371 o2hb_dependent_users = 0;
1372
1319 return o2hb_debug_init(); 1373 return o2hb_debug_init();
1320} 1374}
1321 1375
@@ -1385,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
1385 debugfs_remove(reg->hr_debug_livenodes); 1439 debugfs_remove(reg->hr_debug_livenodes);
1386 debugfs_remove(reg->hr_debug_regnum); 1440 debugfs_remove(reg->hr_debug_regnum);
1387 debugfs_remove(reg->hr_debug_elapsed_time); 1441 debugfs_remove(reg->hr_debug_elapsed_time);
1442 debugfs_remove(reg->hr_debug_pinned);
1388 debugfs_remove(reg->hr_debug_dir); 1443 debugfs_remove(reg->hr_debug_dir);
1389 1444
1390 spin_lock(&o2hb_live_lock); 1445 spin_lock(&o2hb_live_lock);
@@ -1949,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
1949 goto bail; 2004 goto bail;
1950 } 2005 }
1951 2006
2007 reg->hr_debug_pinned =
2008 o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
2009 reg->hr_debug_dir,
2010 &(reg->hr_db_pinned),
2011 sizeof(*(reg->hr_db_pinned)),
2012 O2HB_DB_TYPE_REGION_PINNED,
2013 0, 0, reg);
2014 if (!reg->hr_debug_pinned) {
2015 mlog_errno(ret);
2016 goto bail;
2017 }
2018
1952 ret = 0; 2019 ret = 0;
1953bail: 2020bail:
1954 return ret; 2021 return ret;
@@ -2003,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2003{ 2070{
2004 struct task_struct *hb_task; 2071 struct task_struct *hb_task;
2005 struct o2hb_region *reg = to_o2hb_region(item); 2072 struct o2hb_region *reg = to_o2hb_region(item);
2073 int quorum_region = 0;
2006 2074
2007 /* stop the thread when the user removes the region dir */ 2075 /* stop the thread when the user removes the region dir */
2008 spin_lock(&o2hb_live_lock); 2076 spin_lock(&o2hb_live_lock);
2009 if (o2hb_global_heartbeat_active()) { 2077 if (o2hb_global_heartbeat_active()) {
2010 clear_bit(reg->hr_region_num, o2hb_region_bitmap); 2078 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2011 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); 2079 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2080 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2081 quorum_region = 1;
2082 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2012 } 2083 }
2013 hb_task = reg->hr_task; 2084 hb_task = reg->hr_task;
2014 reg->hr_task = NULL; 2085 reg->hr_task = NULL;
2086 reg->hr_item_dropped = 1;
2015 spin_unlock(&o2hb_live_lock); 2087 spin_unlock(&o2hb_live_lock);
2016 2088
2017 if (hb_task) 2089 if (hb_task)
@@ -2029,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2029 if (o2hb_global_heartbeat_active()) 2101 if (o2hb_global_heartbeat_active())
2030 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", 2102 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2031 config_item_name(&reg->hr_item)); 2103 config_item_name(&reg->hr_item));
2104
2032 config_item_put(item); 2105 config_item_put(item);
2106
2107 if (!o2hb_global_heartbeat_active() || !quorum_region)
2108 return;
2109
2110 /*
2111 * If global heartbeat active and there are dependent users,
2112 * pin all regions if quorum region count <= CUT_OFF
2113 */
2114 spin_lock(&o2hb_live_lock);
2115
2116 if (!o2hb_dependent_users)
2117 goto unlock;
2118
2119 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2120 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2121 o2hb_region_pin(NULL);
2122
2123unlock:
2124 spin_unlock(&o2hb_live_lock);
2033} 2125}
2034 2126
2035struct o2hb_heartbeat_group_attribute { 2127struct o2hb_heartbeat_group_attribute {
@@ -2215,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
2215} 2307}
2216EXPORT_SYMBOL_GPL(o2hb_setup_callback); 2308EXPORT_SYMBOL_GPL(o2hb_setup_callback);
2217 2309
2218static struct o2hb_region *o2hb_find_region(const char *region_uuid) 2310/*
2311 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2312 * In global heartbeat mode, region_uuid passed is NULL.
2313 *
2314 * In local, we only pin the matching region. In global we pin all the active
2315 * regions.
2316 */
2317static int o2hb_region_pin(const char *region_uuid)
2219{ 2318{
2220 struct o2hb_region *p, *reg = NULL; 2319 int ret = 0, found = 0;
2320 struct o2hb_region *reg;
2321 char *uuid;
2221 2322
2222 assert_spin_locked(&o2hb_live_lock); 2323 assert_spin_locked(&o2hb_live_lock);
2223 2324
2224 list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { 2325 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2225 if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { 2326 uuid = config_item_name(&reg->hr_item);
2226 reg = p; 2327
2227 break; 2328 /* local heartbeat */
2329 if (region_uuid) {
2330 if (strcmp(region_uuid, uuid))
2331 continue;
2332 found = 1;
2333 }
2334
2335 if (reg->hr_item_pinned || reg->hr_item_dropped)
2336 goto skip_pin;
2337
2338 /* Ignore ENOENT only for local hb (userdlm domain) */
2339 ret = o2nm_depend_item(&reg->hr_item);
2340 if (!ret) {
2341 mlog(ML_CLUSTER, "Pin region %s\n", uuid);
2342 reg->hr_item_pinned = 1;
2343 } else {
2344 if (ret == -ENOENT && found)
2345 ret = 0;
2346 else {
2347 mlog(ML_ERROR, "Pin region %s fails with %d\n",
2348 uuid, ret);
2349 break;
2350 }
2228 } 2351 }
2352skip_pin:
2353 if (found)
2354 break;
2229 } 2355 }
2230 2356
2231 return reg; 2357 return ret;
2232} 2358}
2233 2359
2234static int o2hb_region_get(const char *region_uuid) 2360/*
2361 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2362 * In global heartbeat mode, region_uuid passed is NULL.
2363 *
2364 * In local, we only unpin the matching region. In global we unpin all the
2365 * active regions.
2366 */
2367static void o2hb_region_unpin(const char *region_uuid)
2235{ 2368{
2236 int ret = 0;
2237 struct o2hb_region *reg; 2369 struct o2hb_region *reg;
2370 char *uuid;
2371 int found = 0;
2238 2372
2239 spin_lock(&o2hb_live_lock); 2373 assert_spin_locked(&o2hb_live_lock);
2240 2374
2241 reg = o2hb_find_region(region_uuid); 2375 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2242 if (!reg) 2376 uuid = config_item_name(&reg->hr_item);
2243 ret = -ENOENT; 2377 if (region_uuid) {
2244 spin_unlock(&o2hb_live_lock); 2378 if (strcmp(region_uuid, uuid))
2379 continue;
2380 found = 1;
2381 }
2245 2382
2246 if (ret) 2383 if (reg->hr_item_pinned) {
2247 goto out; 2384 mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
2385 o2nm_undepend_item(&reg->hr_item);
2386 reg->hr_item_pinned = 0;
2387 }
2388 if (found)
2389 break;
2390 }
2391}
2248 2392
2249 ret = o2nm_depend_this_node(); 2393static int o2hb_region_inc_user(const char *region_uuid)
2250 if (ret) 2394{
2251 goto out; 2395 int ret = 0;
2252 2396
2253 ret = o2nm_depend_item(&reg->hr_item); 2397 spin_lock(&o2hb_live_lock);
2254 if (ret)
2255 o2nm_undepend_this_node();
2256 2398
2257out: 2399 /* local heartbeat */
2400 if (!o2hb_global_heartbeat_active()) {
2401 ret = o2hb_region_pin(region_uuid);
2402 goto unlock;
2403 }
2404
2405 /*
2406 * if global heartbeat active and this is the first dependent user,
2407 * pin all regions if quorum region count <= CUT_OFF
2408 */
2409 o2hb_dependent_users++;
2410 if (o2hb_dependent_users > 1)
2411 goto unlock;
2412
2413 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2414 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2415 ret = o2hb_region_pin(NULL);
2416
2417unlock:
2418 spin_unlock(&o2hb_live_lock);
2258 return ret; 2419 return ret;
2259} 2420}
2260 2421
2261static void o2hb_region_put(const char *region_uuid) 2422void o2hb_region_dec_user(const char *region_uuid)
2262{ 2423{
2263 struct o2hb_region *reg;
2264
2265 spin_lock(&o2hb_live_lock); 2424 spin_lock(&o2hb_live_lock);
2266 2425
2267 reg = o2hb_find_region(region_uuid); 2426 /* local heartbeat */
2427 if (!o2hb_global_heartbeat_active()) {
2428 o2hb_region_unpin(region_uuid);
2429 goto unlock;
2430 }
2268 2431
2269 spin_unlock(&o2hb_live_lock); 2432 /*
2433 * if global heartbeat active and there are no dependent users,
2434 * unpin all quorum regions
2435 */
2436 o2hb_dependent_users--;
2437 if (!o2hb_dependent_users)
2438 o2hb_region_unpin(NULL);
2270 2439
2271 if (reg) { 2440unlock:
2272 o2nm_undepend_item(&reg->hr_item); 2441 spin_unlock(&o2hb_live_lock);
2273 o2nm_undepend_this_node();
2274 }
2275} 2442}
2276 2443
2277int o2hb_register_callback(const char *region_uuid, 2444int o2hb_register_callback(const char *region_uuid,
@@ -2292,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
2292 } 2459 }
2293 2460
2294 if (region_uuid) { 2461 if (region_uuid) {
2295 ret = o2hb_region_get(region_uuid); 2462 ret = o2hb_region_inc_user(region_uuid);
2296 if (ret) 2463 if (ret) {
2464 mlog_errno(ret);
2297 goto out; 2465 goto out;
2466 }
2298 } 2467 }
2299 2468
2300 down_write(&o2hb_callback_sem); 2469 down_write(&o2hb_callback_sem);
@@ -2312,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
2312 up_write(&o2hb_callback_sem); 2481 up_write(&o2hb_callback_sem);
2313 ret = 0; 2482 ret = 0;
2314out: 2483out:
2315 mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", 2484 mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
2316 ret, __builtin_return_address(0), hc); 2485 ret, __builtin_return_address(0), hc);
2317 return ret; 2486 return ret;
2318} 2487}
@@ -2323,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2323{ 2492{
2324 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); 2493 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
2325 2494
2326 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", 2495 mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
2327 __builtin_return_address(0), hc); 2496 __builtin_return_address(0), hc);
2328 2497
2329 /* XXX Can this happen _with_ a region reference? */ 2498 /* XXX Can this happen _with_ a region reference? */
@@ -2331,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2331 return; 2500 return;
2332 2501
2333 if (region_uuid) 2502 if (region_uuid)
2334 o2hb_region_put(region_uuid); 2503 o2hb_region_dec_user(region_uuid);
2335 2504
2336 down_write(&o2hb_callback_sem); 2505 down_write(&o2hb_callback_sem);
2337 2506
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392..6c61771469a 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT), 114 define_mask(REFCOUNT),
115 define_mask(BASTS), 115 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER),
116 define_mask(ERROR), 118 define_mask(ERROR),
117 define_mask(NOTICE), 119 define_mask(NOTICE),
118 define_mask(KTHREAD), 120 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
120}; 121};
121 122
122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 123static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c9..34d6544357d 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
81#include <linux/sched.h> 81#include <linux/sched.h>
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update mlog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_EXIT 0x0000000000000002ULL /* func call exit */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120
118/* bits that are infrequently given and frequently matched in the high word */ 121/* bits that are infrequently given and frequently matched in the high word */
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 123#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
124 125
125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e52b0..3a5835904b3 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
46#define O2NET_DEBUG_DIR "o2net" 46#define O2NET_DEBUG_DIR "o2net"
47#define SC_DEBUG_NAME "sock_containers" 47#define SC_DEBUG_NAME "sock_containers"
48#define NST_DEBUG_NAME "send_tracking" 48#define NST_DEBUG_NAME "send_tracking"
49#define STATS_DEBUG_NAME "stats"
50
51#define SHOW_SOCK_CONTAINERS 0
52#define SHOW_SOCK_STATS 1
49 53
50static struct dentry *o2net_dentry; 54static struct dentry *o2net_dentry;
51static struct dentry *sc_dentry; 55static struct dentry *sc_dentry;
52static struct dentry *nst_dentry; 56static struct dentry *nst_dentry;
57static struct dentry *stats_dentry;
53 58
54static DEFINE_SPINLOCK(o2net_debug_lock); 59static DEFINE_SPINLOCK(o2net_debug_lock);
55 60
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
123static int nst_seq_show(struct seq_file *seq, void *v) 128static int nst_seq_show(struct seq_file *seq, void *v)
124{ 129{
125 struct o2net_send_tracking *nst, *dummy_nst = seq->private; 130 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
131 ktime_t now;
132 s64 sock, send, status;
126 133
127 spin_lock(&o2net_debug_lock); 134 spin_lock(&o2net_debug_lock);
128 nst = next_nst(dummy_nst); 135 nst = next_nst(dummy_nst);
136 if (!nst)
137 goto out;
129 138
130 if (nst != NULL) { 139 now = ktime_get();
131 /* get_task_comm isn't exported. oh well. */ 140 sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
132 seq_printf(seq, "%p:\n" 141 send = ktime_to_us(ktime_sub(now, nst->st_send_time));
133 " pid: %lu\n" 142 status = ktime_to_us(ktime_sub(now, nst->st_status_time));
134 " tgid: %lu\n" 143
135 " process name: %s\n" 144 /* get_task_comm isn't exported. oh well. */
136 " node: %u\n" 145 seq_printf(seq, "%p:\n"
137 " sc: %p\n" 146 " pid: %lu\n"
138 " message id: %d\n" 147 " tgid: %lu\n"
139 " message type: %u\n" 148 " process name: %s\n"
140 " message key: 0x%08x\n" 149 " node: %u\n"
141 " sock acquiry: %lu.%ld\n" 150 " sc: %p\n"
142 " send start: %lu.%ld\n" 151 " message id: %d\n"
143 " wait start: %lu.%ld\n", 152 " message type: %u\n"
144 nst, (unsigned long)nst->st_task->pid, 153 " message key: 0x%08x\n"
145 (unsigned long)nst->st_task->tgid, 154 " sock acquiry: %lld usecs ago\n"
146 nst->st_task->comm, nst->st_node, 155 " send start: %lld usecs ago\n"
147 nst->st_sc, nst->st_id, nst->st_msg_type, 156 " wait start: %lld usecs ago\n",
148 nst->st_msg_key, 157 nst, (unsigned long)task_pid_nr(nst->st_task),
149 nst->st_sock_time.tv_sec, 158 (unsigned long)nst->st_task->tgid,
150 (long)nst->st_sock_time.tv_usec, 159 nst->st_task->comm, nst->st_node,
151 nst->st_send_time.tv_sec, 160 nst->st_sc, nst->st_id, nst->st_msg_type,
152 (long)nst->st_send_time.tv_usec, 161 nst->st_msg_key,
153 nst->st_status_time.tv_sec, 162 (long long)sock,
154 (long)nst->st_status_time.tv_usec); 163 (long long)send,
155 } 164 (long long)status);
156 165
166out:
157 spin_unlock(&o2net_debug_lock); 167 spin_unlock(&o2net_debug_lock);
158 168
159 return 0; 169 return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
228 spin_unlock(&o2net_debug_lock); 238 spin_unlock(&o2net_debug_lock);
229} 239}
230 240
241struct o2net_sock_debug {
242 int dbg_ctxt;
243 struct o2net_sock_container *dbg_sock;
244};
245
231static struct o2net_sock_container 246static struct o2net_sock_container
232 *next_sc(struct o2net_sock_container *sc_start) 247 *next_sc(struct o2net_sock_container *sc_start)
233{ 248{
@@ -253,7 +268,8 @@ static struct o2net_sock_container
253 268
254static void *sc_seq_start(struct seq_file *seq, loff_t *pos) 269static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
255{ 270{
256 struct o2net_sock_container *sc, *dummy_sc = seq->private; 271 struct o2net_sock_debug *sd = seq->private;
272 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
257 273
258 spin_lock(&o2net_debug_lock); 274 spin_lock(&o2net_debug_lock);
259 sc = next_sc(dummy_sc); 275 sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
264 280
265static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 281static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
266{ 282{
267 struct o2net_sock_container *sc, *dummy_sc = seq->private; 283 struct o2net_sock_debug *sd = seq->private;
284 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
268 285
269 spin_lock(&o2net_debug_lock); 286 spin_lock(&o2net_debug_lock);
270 sc = next_sc(dummy_sc); 287 sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
276 return sc; /* unused, just needs to be null when done */ 293 return sc; /* unused, just needs to be null when done */
277} 294}
278 295
279#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec 296#ifdef CONFIG_OCFS2_FS_STATS
297# define sc_send_count(_s) ((_s)->sc_send_count)
298# define sc_recv_count(_s) ((_s)->sc_recv_count)
299# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total))
300# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total))
301# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total))
302# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total))
303#else
304# define sc_send_count(_s) (0U)
305# define sc_recv_count(_s) (0U)
306# define sc_tv_acquiry_total_ns(_s) (0LL)
307# define sc_tv_send_total_ns(_s) (0LL)
308# define sc_tv_status_total_ns(_s) (0LL)
309# define sc_tv_process_total_ns(_s) (0LL)
310#endif
311
312/* So that debugfs.ocfs2 can determine which format is being used */
313#define O2NET_STATS_STR_VERSION 1
314static void sc_show_sock_stats(struct seq_file *seq,
315 struct o2net_sock_container *sc)
316{
317 if (!sc)
318 return;
319
320 seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
321 sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
322 (long long)sc_tv_acquiry_total_ns(sc),
323 (long long)sc_tv_send_total_ns(sc),
324 (long long)sc_tv_status_total_ns(sc),
325 (unsigned long)sc_recv_count(sc),
326 (long long)sc_tv_process_total_ns(sc));
327}
328
329static void sc_show_sock_container(struct seq_file *seq,
330 struct o2net_sock_container *sc)
331{
332 struct inet_sock *inet = NULL;
333 __be32 saddr = 0, daddr = 0;
334 __be16 sport = 0, dport = 0;
335
336 if (!sc)
337 return;
338
339 if (sc->sc_sock) {
340 inet = inet_sk(sc->sc_sock->sk);
341 /* the stack's structs aren't sparse endian clean */
342 saddr = (__force __be32)inet->inet_saddr;
343 daddr = (__force __be32)inet->inet_daddr;
344 sport = (__force __be16)inet->inet_sport;
345 dport = (__force __be16)inet->inet_dport;
346 }
347
348 /* XXX sigh, inet-> doesn't have sparse annotation so any
349 * use of it here generates a warning with -Wbitwise */
350 seq_printf(seq, "%p:\n"
351 " krefs: %d\n"
352 " sock: %pI4:%u -> "
353 "%pI4:%u\n"
354 " remote node: %s\n"
355 " page off: %zu\n"
356 " handshake ok: %u\n"
357 " timer: %lld usecs\n"
358 " data ready: %lld usecs\n"
359 " advance start: %lld usecs\n"
360 " advance stop: %lld usecs\n"
361 " func start: %lld usecs\n"
362 " func stop: %lld usecs\n"
363 " func key: 0x%08x\n"
364 " func type: %u\n",
365 sc,
366 atomic_read(&sc->sc_kref.refcount),
367 &saddr, inet ? ntohs(sport) : 0,
368 &daddr, inet ? ntohs(dport) : 0,
369 sc->sc_node->nd_name,
370 sc->sc_page_off,
371 sc->sc_handshake_ok,
372 (long long)ktime_to_us(sc->sc_tv_timer),
373 (long long)ktime_to_us(sc->sc_tv_data_ready),
374 (long long)ktime_to_us(sc->sc_tv_advance_start),
375 (long long)ktime_to_us(sc->sc_tv_advance_stop),
376 (long long)ktime_to_us(sc->sc_tv_func_start),
377 (long long)ktime_to_us(sc->sc_tv_func_stop),
378 sc->sc_msg_key,
379 sc->sc_msg_type);
380}
280 381
281static int sc_seq_show(struct seq_file *seq, void *v) 382static int sc_seq_show(struct seq_file *seq, void *v)
282{ 383{
283 struct o2net_sock_container *sc, *dummy_sc = seq->private; 384 struct o2net_sock_debug *sd = seq->private;
385 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
284 386
285 spin_lock(&o2net_debug_lock); 387 spin_lock(&o2net_debug_lock);
286 sc = next_sc(dummy_sc); 388 sc = next_sc(dummy_sc);
287 389
288 if (sc != NULL) { 390 if (sc) {
289 struct inet_sock *inet = NULL; 391 if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
290 392 sc_show_sock_container(seq, sc);
291 __be32 saddr = 0, daddr = 0; 393 else
292 __be16 sport = 0, dport = 0; 394 sc_show_sock_stats(seq, sc);
293
294 if (sc->sc_sock) {
295 inet = inet_sk(sc->sc_sock->sk);
296 /* the stack's structs aren't sparse endian clean */
297 saddr = (__force __be32)inet->inet_saddr;
298 daddr = (__force __be32)inet->inet_daddr;
299 sport = (__force __be16)inet->inet_sport;
300 dport = (__force __be16)inet->inet_dport;
301 }
302
303 /* XXX sigh, inet-> doesn't have sparse annotation so any
304 * use of it here generates a warning with -Wbitwise */
305 seq_printf(seq, "%p:\n"
306 " krefs: %d\n"
307 " sock: %pI4:%u -> "
308 "%pI4:%u\n"
309 " remote node: %s\n"
310 " page off: %zu\n"
311 " handshake ok: %u\n"
312 " timer: %lu.%ld\n"
313 " data ready: %lu.%ld\n"
314 " advance start: %lu.%ld\n"
315 " advance stop: %lu.%ld\n"
316 " func start: %lu.%ld\n"
317 " func stop: %lu.%ld\n"
318 " func key: %u\n"
319 " func type: %u\n",
320 sc,
321 atomic_read(&sc->sc_kref.refcount),
322 &saddr, inet ? ntohs(sport) : 0,
323 &daddr, inet ? ntohs(dport) : 0,
324 sc->sc_node->nd_name,
325 sc->sc_page_off,
326 sc->sc_handshake_ok,
327 TV_SEC_USEC(sc->sc_tv_timer),
328 TV_SEC_USEC(sc->sc_tv_data_ready),
329 TV_SEC_USEC(sc->sc_tv_advance_start),
330 TV_SEC_USEC(sc->sc_tv_advance_stop),
331 TV_SEC_USEC(sc->sc_tv_func_start),
332 TV_SEC_USEC(sc->sc_tv_func_stop),
333 sc->sc_msg_key,
334 sc->sc_msg_type);
335 } 395 }
336 396
337
338 spin_unlock(&o2net_debug_lock); 397 spin_unlock(&o2net_debug_lock);
339 398
340 return 0; 399 return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
351 .show = sc_seq_show, 410 .show = sc_seq_show,
352}; 411};
353 412
354static int sc_fop_open(struct inode *inode, struct file *file) 413static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
355{ 414{
356 struct o2net_sock_container *dummy_sc; 415 struct o2net_sock_container *dummy_sc;
357 struct seq_file *seq; 416 struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
369 goto out; 428 goto out;
370 429
371 seq = file->private_data; 430 seq = file->private_data;
372 seq->private = dummy_sc; 431 seq->private = sd;
432 sd->dbg_sock = dummy_sc;
373 o2net_debug_add_sc(dummy_sc); 433 o2net_debug_add_sc(dummy_sc);
374 434
375 dummy_sc = NULL; 435 dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
382static int sc_fop_release(struct inode *inode, struct file *file) 442static int sc_fop_release(struct inode *inode, struct file *file)
383{ 443{
384 struct seq_file *seq = file->private_data; 444 struct seq_file *seq = file->private_data;
385 struct o2net_sock_container *dummy_sc = seq->private; 445 struct o2net_sock_debug *sd = seq->private;
446 struct o2net_sock_container *dummy_sc = sd->dbg_sock;
386 447
387 o2net_debug_del_sc(dummy_sc); 448 o2net_debug_del_sc(dummy_sc);
388 return seq_release_private(inode, file); 449 return seq_release_private(inode, file);
389} 450}
390 451
452static int stats_fop_open(struct inode *inode, struct file *file)
453{
454 struct o2net_sock_debug *sd;
455
456 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
457 if (sd == NULL)
458 return -ENOMEM;
459
460 sd->dbg_ctxt = SHOW_SOCK_STATS;
461 sd->dbg_sock = NULL;
462
463 return sc_common_open(file, sd);
464}
465
466static const struct file_operations stats_seq_fops = {
467 .open = stats_fop_open,
468 .read = seq_read,
469 .llseek = seq_lseek,
470 .release = sc_fop_release,
471};
472
473static int sc_fop_open(struct inode *inode, struct file *file)
474{
475 struct o2net_sock_debug *sd;
476
477 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
478 if (sd == NULL)
479 return -ENOMEM;
480
481 sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
482 sd->dbg_sock = NULL;
483
484 return sc_common_open(file, sd);
485}
486
391static const struct file_operations sc_seq_fops = { 487static const struct file_operations sc_seq_fops = {
392 .open = sc_fop_open, 488 .open = sc_fop_open,
393 .read = seq_read, 489 .read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
419 goto bail; 515 goto bail;
420 } 516 }
421 517
518 stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
519 o2net_dentry, NULL,
520 &stats_seq_fops);
521 if (!stats_dentry) {
522 mlog_errno(-ENOMEM);
523 goto bail;
524 }
525
422 return 0; 526 return 0;
423bail: 527bail:
424 if (sc_dentry) 528 debugfs_remove(stats_dentry);
425 debugfs_remove(sc_dentry); 529 debugfs_remove(sc_dentry);
426 if (nst_dentry) 530 debugfs_remove(nst_dentry);
427 debugfs_remove(nst_dentry); 531 debugfs_remove(o2net_dentry);
428 if (o2net_dentry)
429 debugfs_remove(o2net_dentry);
430 return -ENOMEM; 532 return -ENOMEM;
431} 533}
432 534
433void o2net_debugfs_exit(void) 535void o2net_debugfs_exit(void)
434{ 536{
435 if (sc_dentry) 537 debugfs_remove(stats_dentry);
436 debugfs_remove(sc_dentry); 538 debugfs_remove(sc_dentry);
437 if (nst_dentry) 539 debugfs_remove(nst_dentry);
438 debugfs_remove(nst_dentry); 540 debugfs_remove(o2net_dentry);
439 if (o2net_dentry)
440 debugfs_remove(o2net_dentry);
441} 541}
442 542
443#endif /* CONFIG_DEBUG_FS */ 543#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index cf3e1669621..a87366750f2 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -325,5 +325,7 @@ void o2quo_init(void)
325 325
326void o2quo_exit(void) 326void o2quo_exit(void)
327{ 327{
328 flush_scheduled_work(); 328 struct o2quo_state *qs = &o2quo_state;
329
330 flush_work_sync(&qs->qs_work);
329} 331}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e4212..3b11cb1e38f 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
153 nst->st_node = node; 153 nst->st_node = node;
154} 154}
155 155
156static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
157{ 157{
158 do_gettimeofday(&nst->st_sock_time); 158 nst->st_sock_time = ktime_get();
159} 159}
160 160
161static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
162{ 162{
163 do_gettimeofday(&nst->st_send_time); 163 nst->st_send_time = ktime_get();
164} 164}
165 165
166static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
167{ 167{
168 do_gettimeofday(&nst->st_status_time); 168 nst->st_status_time = ktime_get();
169} 169}
170 170
171static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
172 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
173{ 173{
174 nst->st_sc = sc; 174 nst->st_sc = sc;
175} 175}
176 176
177static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
178 u32 msg_id)
178{ 179{
179 nst->st_id = msg_id; 180 nst->st_id = msg_id;
180} 181}
181 182
182#else /* CONFIG_DEBUG_FS */ 183static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
183
184static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
185 u32 msgkey, struct task_struct *task, u8 node)
186{ 184{
185 sc->sc_tv_timer = ktime_get();
187} 186}
188 187
189static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 188static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
190{ 189{
190 sc->sc_tv_data_ready = ktime_get();
191} 191}
192 192
193static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 193static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
194{ 194{
195 sc->sc_tv_advance_start = ktime_get();
195} 196}
196 197
197static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 198static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
198{ 199{
200 sc->sc_tv_advance_stop = ktime_get();
199} 201}
200 202
201static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 203static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
202 struct o2net_sock_container *sc)
203{ 204{
205 sc->sc_tv_func_start = ktime_get();
204} 206}
205 207
206static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, 208static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
207 u32 msg_id)
208{ 209{
210 sc->sc_tv_func_stop = ktime_get();
209} 211}
210 212
213static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
214{
215 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
216}
217#else /* CONFIG_DEBUG_FS */
218# define o2net_init_nst(a, b, c, d, e)
219# define o2net_set_nst_sock_time(a)
220# define o2net_set_nst_send_time(a)
221# define o2net_set_nst_status_time(a)
222# define o2net_set_nst_sock_container(a, b)
223# define o2net_set_nst_msg_id(a, b)
224# define o2net_set_sock_timer(a)
225# define o2net_set_data_ready_time(a)
226# define o2net_set_advance_start_time(a)
227# define o2net_set_advance_stop_time(a)
228# define o2net_set_func_start_time(a)
229# define o2net_set_func_stop_time(a)
230# define o2net_get_func_run_time(a) (ktime_t)0
211#endif /* CONFIG_DEBUG_FS */ 231#endif /* CONFIG_DEBUG_FS */
212 232
233#ifdef CONFIG_OCFS2_FS_STATS
234static void o2net_update_send_stats(struct o2net_send_tracking *nst,
235 struct o2net_sock_container *sc)
236{
237 sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
238 ktime_sub(ktime_get(),
239 nst->st_status_time));
240 sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
241 ktime_sub(nst->st_status_time,
242 nst->st_send_time));
243 sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
244 ktime_sub(nst->st_send_time,
245 nst->st_sock_time));
246 sc->sc_send_count++;
247}
248
249static void o2net_update_recv_stats(struct o2net_sock_container *sc)
250{
251 sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
252 o2net_get_func_run_time(sc));
253 sc->sc_recv_count++;
254}
255
256#else
257
258# define o2net_update_send_stats(a, b)
259
260# define o2net_update_recv_stats(sc)
261
262#endif /* CONFIG_OCFS2_FS_STATS */
263
213static inline int o2net_reconnect_delay(void) 264static inline int o2net_reconnect_delay(void)
214{ 265{
215 return o2nm_single_cluster->cl_reconnect_delay_ms; 266 return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
355 sc->sc_sock = NULL; 406 sc->sc_sock = NULL;
356 } 407 }
357 408
409 o2nm_undepend_item(&sc->sc_node->nd_item);
358 o2nm_node_put(sc->sc_node); 410 o2nm_node_put(sc->sc_node);
359 sc->sc_node = NULL; 411 sc->sc_node = NULL;
360 412
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
376{ 428{
377 struct o2net_sock_container *sc, *ret = NULL; 429 struct o2net_sock_container *sc, *ret = NULL;
378 struct page *page = NULL; 430 struct page *page = NULL;
431 int status = 0;
379 432
380 page = alloc_page(GFP_NOFS); 433 page = alloc_page(GFP_NOFS);
381 sc = kzalloc(sizeof(*sc), GFP_NOFS); 434 sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
386 o2nm_node_get(node); 439 o2nm_node_get(node);
387 sc->sc_node = node; 440 sc->sc_node = node;
388 441
442 /* pin the node item of the remote node */
443 status = o2nm_depend_item(&node->nd_item);
444 if (status) {
445 mlog_errno(status);
446 o2nm_node_put(node);
447 goto out;
448 }
389 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); 449 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
390 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); 450 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
391 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); 451 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
546 if (sk->sk_user_data) { 606 if (sk->sk_user_data) {
547 struct o2net_sock_container *sc = sk->sk_user_data; 607 struct o2net_sock_container *sc = sk->sk_user_data;
548 sclog(sc, "data_ready hit\n"); 608 sclog(sc, "data_ready hit\n");
549 do_gettimeofday(&sc->sc_tv_data_ready); 609 o2net_set_data_ready_time(sc);
550 o2net_sc_queue_work(sc, &sc->sc_rx_work); 610 o2net_sc_queue_work(sc, &sc->sc_rx_work);
551 ready = sc->sc_data_ready; 611 ready = sc->sc_data_ready;
552 } else { 612 } else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
1070 o2net_set_nst_status_time(&nst); 1130 o2net_set_nst_status_time(&nst);
1071 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 1131 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
1072 1132
1133 o2net_update_send_stats(&nst, sc);
1134
1073 /* Note that we avoid overwriting the callers status return 1135 /* Note that we avoid overwriting the callers status return
1074 * variable if a system error was reported on the other 1136 * variable if a system error was reported on the other
1075 * side. Callers beware. */ 1137 * side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
1183 if (syserr != O2NET_ERR_NONE) 1245 if (syserr != O2NET_ERR_NONE)
1184 goto out_respond; 1246 goto out_respond;
1185 1247
1186 do_gettimeofday(&sc->sc_tv_func_start); 1248 o2net_set_func_start_time(sc);
1187 sc->sc_msg_key = be32_to_cpu(hdr->key); 1249 sc->sc_msg_key = be32_to_cpu(hdr->key);
1188 sc->sc_msg_type = be16_to_cpu(hdr->msg_type); 1250 sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
1189 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + 1251 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
1190 be16_to_cpu(hdr->data_len), 1252 be16_to_cpu(hdr->data_len),
1191 nmh->nh_func_data, &ret_data); 1253 nmh->nh_func_data, &ret_data);
1192 do_gettimeofday(&sc->sc_tv_func_stop); 1254 o2net_set_func_stop_time(sc);
1255
1256 o2net_update_recv_stats(sc);
1193 1257
1194out_respond: 1258out_respond:
1195 /* this destroys the hdr, so don't use it after this */ 1259 /* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1300 size_t datalen; 1364 size_t datalen;
1301 1365
1302 sclog(sc, "receiving\n"); 1366 sclog(sc, "receiving\n");
1303 do_gettimeofday(&sc->sc_tv_advance_start); 1367 o2net_set_advance_start_time(sc);
1304 1368
1305 if (unlikely(sc->sc_handshake_ok == 0)) { 1369 if (unlikely(sc->sc_handshake_ok == 0)) {
1306 if(sc->sc_page_off < sizeof(struct o2net_handshake)) { 1370 if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1375 1439
1376out: 1440out:
1377 sclog(sc, "ret = %d\n", ret); 1441 sclog(sc, "ret = %d\n", ret);
1378 do_gettimeofday(&sc->sc_tv_advance_stop); 1442 o2net_set_advance_stop_time(sc);
1379 return ret; 1443 return ret;
1380} 1444}
1381 1445
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
1475{ 1539{
1476 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1540 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1477 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 1541 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1478 struct timeval now;
1479 1542
1480 do_gettimeofday(&now); 1543#ifdef CONFIG_DEBUG_FS
1544 ktime_t now = ktime_get();
1545#endif
1481 1546
1482 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1547 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1483 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1548 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1484 o2net_idle_timeout() / 1000, 1549 o2net_idle_timeout() / 1000,
1485 o2net_idle_timeout() % 1000); 1550 o2net_idle_timeout() % 1000);
1486 mlog(ML_NOTICE, "here are some times that might help debug the " 1551
1487 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1552#ifdef CONFIG_DEBUG_FS
1488 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1553 mlog(ML_NOTICE, "Here are some times that might help debug the "
1489 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 1554 "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
1490 now.tv_sec, (long) now.tv_usec, 1555 "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
1491 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, 1556 (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
1492 sc->sc_tv_advance_start.tv_sec, 1557 (long long)ktime_to_us(sc->sc_tv_data_ready),
1493 (long) sc->sc_tv_advance_start.tv_usec, 1558 (long long)ktime_to_us(sc->sc_tv_advance_start),
1494 sc->sc_tv_advance_stop.tv_sec, 1559 (long long)ktime_to_us(sc->sc_tv_advance_stop),
1495 (long) sc->sc_tv_advance_stop.tv_usec,
1496 sc->sc_msg_key, sc->sc_msg_type, 1560 sc->sc_msg_key, sc->sc_msg_type,
1497 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, 1561 (long long)ktime_to_us(sc->sc_tv_func_start),
1498 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); 1562 (long long)ktime_to_us(sc->sc_tv_func_stop));
1563#endif
1499 1564
1500 /* 1565 /*
1501 * Initialize the nn_timeout so that the next connection attempt 1566 * Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1511 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1576 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1512 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1577 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1513 msecs_to_jiffies(o2net_keepalive_delay())); 1578 msecs_to_jiffies(o2net_keepalive_delay()));
1514 do_gettimeofday(&sc->sc_tv_timer); 1579 o2net_set_sock_timer(sc);
1515 mod_timer(&sc->sc_idle_timeout, 1580 mod_timer(&sc->sc_idle_timeout,
1516 jiffies + msecs_to_jiffies(o2net_idle_timeout())); 1581 jiffies + msecs_to_jiffies(o2net_idle_timeout()));
1517} 1582}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf9eb4..4cbcb65784a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,27 @@ struct o2net_sock_container {
166 /* original handlers for the sockets */ 166 /* original handlers for the sockets */
167 void (*sc_state_change)(struct sock *sk); 167 void (*sc_state_change)(struct sock *sk);
168 void (*sc_data_ready)(struct sock *sk, int bytes); 168 void (*sc_data_ready)(struct sock *sk, int bytes);
169#ifdef CONFIG_DEBUG_FS 169
170 struct list_head sc_net_debug_item;
171#endif
172 struct timeval sc_tv_timer;
173 struct timeval sc_tv_data_ready;
174 struct timeval sc_tv_advance_start;
175 struct timeval sc_tv_advance_stop;
176 struct timeval sc_tv_func_start;
177 struct timeval sc_tv_func_stop;
178 u32 sc_msg_key; 170 u32 sc_msg_key;
179 u16 sc_msg_type; 171 u16 sc_msg_type;
180 172
173#ifdef CONFIG_DEBUG_FS
174 struct list_head sc_net_debug_item;
175 ktime_t sc_tv_timer;
176 ktime_t sc_tv_data_ready;
177 ktime_t sc_tv_advance_start;
178 ktime_t sc_tv_advance_stop;
179 ktime_t sc_tv_func_start;
180 ktime_t sc_tv_func_stop;
181#endif
182#ifdef CONFIG_OCFS2_FS_STATS
183 ktime_t sc_tv_acquiry_total;
184 ktime_t sc_tv_send_total;
185 ktime_t sc_tv_status_total;
186 u32 sc_send_count;
187 u32 sc_recv_count;
188 ktime_t sc_tv_process_total;
189#endif
181 struct mutex sc_send_lock; 190 struct mutex sc_send_lock;
182}; 191};
183 192
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
220 u32 st_msg_type; 229 u32 st_msg_type;
221 u32 st_msg_key; 230 u32 st_msg_key;
222 u8 st_node; 231 u8 st_node;
223 struct timeval st_sock_time; 232 ktime_t st_sock_time;
224 struct timeval st_send_time; 233 ktime_t st_send_time;
225 struct timeval st_status_time; 234 ktime_t st_status_time;
226}; 235};
227#else 236#else
228struct o2net_send_tracking { 237struct o2net_send_tracking {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 895532ac4d9..6d80ecc7834 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
52static int ocfs2_dentry_revalidate(struct dentry *dentry, 52static int ocfs2_dentry_revalidate(struct dentry *dentry,
53 struct nameidata *nd) 53 struct nameidata *nd)
54{ 54{
55 struct inode *inode = dentry->d_inode; 55 struct inode *inode;
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 57 struct ocfs2_super *osb;
58
59 if (nd->flags & LOOKUP_RCU)
60 return -ECHILD;
61
62 inode = dentry->d_inode;
63 osb = OCFS2_SB(dentry->d_sb);
58 64
59 mlog_entry("(0x%p, '%.*s')\n", dentry, 65 mlog_entry("(0x%p, '%.*s')\n", dentry,
60 dentry->d_name.len, dentry->d_name.name); 66 dentry->d_name.len, dentry->d_name.name);
@@ -169,23 +175,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
169 struct list_head *p; 175 struct list_head *p;
170 struct dentry *dentry = NULL; 176 struct dentry *dentry = NULL;
171 177
172 spin_lock(&dcache_lock); 178 spin_lock(&inode->i_lock);
173
174 list_for_each(p, &inode->i_dentry) { 179 list_for_each(p, &inode->i_dentry) {
175 dentry = list_entry(p, struct dentry, d_alias); 180 dentry = list_entry(p, struct dentry, d_alias);
176 181
182 spin_lock(&dentry->d_lock);
177 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 183 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
178 mlog(0, "dentry found: %.*s\n", 184 mlog(0, "dentry found: %.*s\n",
179 dentry->d_name.len, dentry->d_name.name); 185 dentry->d_name.len, dentry->d_name.name);
180 186
181 dget_locked(dentry); 187 dget_dlock(dentry);
188 spin_unlock(&dentry->d_lock);
182 break; 189 break;
183 } 190 }
191 spin_unlock(&dentry->d_lock);
184 192
185 dentry = NULL; 193 dentry = NULL;
186 } 194 }
187 195
188 spin_unlock(&dcache_lock); 196 spin_unlock(&inode->i_lock);
189 197
190 return dentry; 198 return dentry;
191} 199}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7a..d417b3f9b0c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2461 2461
2462 di->i_dx_root = cpu_to_le64(dr_blkno); 2462 di->i_dx_root = cpu_to_le64(dr_blkno);
2463 2463
2464 spin_lock(&OCFS2_I(dir)->ip_lock);
2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2465 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2466 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2467 spin_unlock(&OCFS2_I(dir)->ip_lock);
2466 2468
2467 ocfs2_journal_dirty(handle, di_bh); 2469 ocfs2_journal_dirty(handle, di_bh);
2468 2470
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4466 goto out_commit; 4468 goto out_commit;
4467 } 4469 }
4468 4470
4471 spin_lock(&OCFS2_I(dir)->ip_lock);
4469 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; 4472 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4470 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4473 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4474 spin_unlock(&OCFS2_I(dir)->ip_lock);
4471 di->i_dx_root = cpu_to_le64(0ULL); 4475 di->i_dx_root = cpu_to_le64(0ULL);
4472 4476
4473 ocfs2_journal_dirty(handle, di_bh); 4477 ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f4499915683..3a3ed4bb794 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
90 90
91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
92{ 92{
93 mlog_entry_void(); 93 struct dlm_lock_resource *res;
94 94
95 BUG_ON(!dlm); 95 BUG_ON(!dlm);
96 BUG_ON(!lock); 96 BUG_ON(!lock);
97 97
98 res = lock->lockres;
99
98 assert_spin_locked(&dlm->ast_lock); 100 assert_spin_locked(&dlm->ast_lock);
101
99 if (!list_empty(&lock->ast_list)) { 102 if (!list_empty(&lock->ast_list)) {
100 mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n", 103 mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
104 "AST list not empty, pending %d, newlevel %d\n",
105 dlm->name, res->lockname.len, res->lockname.name,
106 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
107 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
101 lock->ast_pending, lock->ml.type); 108 lock->ast_pending, lock->ml.type);
102 BUG(); 109 BUG();
103 } 110 }
104 if (lock->ast_pending) 111 if (lock->ast_pending)
105 mlog(0, "lock has an ast getting flushed right now\n"); 112 mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
113 dlm->name, res->lockname.len, res->lockname.name,
114 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
115 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
106 116
107 /* putting lock on list, add a ref */ 117 /* putting lock on list, add a ref */
108 dlm_lock_get(lock); 118 dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
110 120
111 /* check to see if this ast obsoletes the bast */ 121 /* check to see if this ast obsoletes the bast */
112 if (dlm_should_cancel_bast(dlm, lock)) { 122 if (dlm_should_cancel_bast(dlm, lock)) {
113 struct dlm_lock_resource *res = lock->lockres; 123 mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
114 mlog(0, "%s: cancelling bast for %.*s\n", 124 dlm->name, res->lockname.len, res->lockname.name,
115 dlm->name, res->lockname.len, res->lockname.name); 125 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
126 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
116 lock->bast_pending = 0; 127 lock->bast_pending = 0;
117 list_del_init(&lock->bast_list); 128 list_del_init(&lock->bast_list);
118 lock->ml.highest_blocked = LKM_IVMODE; 129 lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
134 145
135void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 146void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
136{ 147{
137 mlog_entry_void();
138
139 BUG_ON(!dlm); 148 BUG_ON(!dlm);
140 BUG_ON(!lock); 149 BUG_ON(!lock);
141 150
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
147 156
148void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 157void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
149{ 158{
150 mlog_entry_void(); 159 struct dlm_lock_resource *res;
151 160
152 BUG_ON(!dlm); 161 BUG_ON(!dlm);
153 BUG_ON(!lock); 162 BUG_ON(!lock);
163
154 assert_spin_locked(&dlm->ast_lock); 164 assert_spin_locked(&dlm->ast_lock);
155 165
166 res = lock->lockres;
167
156 BUG_ON(!list_empty(&lock->bast_list)); 168 BUG_ON(!list_empty(&lock->bast_list));
157 if (lock->bast_pending) 169 if (lock->bast_pending)
158 mlog(0, "lock has a bast getting flushed right now\n"); 170 mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
171 dlm->name, res->lockname.len, res->lockname.name,
172 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
173 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
159 174
160 /* putting lock on list, add a ref */ 175 /* putting lock on list, add a ref */
161 dlm_lock_get(lock); 176 dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
167 182
168void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 183void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
169{ 184{
170 mlog_entry_void();
171
172 BUG_ON(!dlm); 185 BUG_ON(!dlm);
173 BUG_ON(!lock); 186 BUG_ON(!lock);
174 187
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
213 dlm_astlockfunc_t *fn; 226 dlm_astlockfunc_t *fn;
214 struct dlm_lockstatus *lksb; 227 struct dlm_lockstatus *lksb;
215 228
216 mlog_entry_void(); 229 mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
230 res->lockname.len, res->lockname.name,
231 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
232 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
217 233
218 lksb = lock->lksb; 234 lksb = lock->lksb;
219 fn = lock->ast; 235 fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
231 struct dlm_lockstatus *lksb; 247 struct dlm_lockstatus *lksb;
232 int lksbflags; 248 int lksbflags;
233 249
234 mlog_entry_void(); 250 mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
251 res->lockname.len, res->lockname.name,
252 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
253 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
235 254
236 lksb = lock->lksb; 255 lksb = lock->lksb;
237 BUG_ON(lock->ml.node == dlm->node_num); 256 BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
250{ 269{
251 dlm_bastlockfunc_t *fn = lock->bast; 270 dlm_bastlockfunc_t *fn = lock->bast;
252 271
253 mlog_entry_void();
254 BUG_ON(lock->ml.node != dlm->node_num); 272 BUG_ON(lock->ml.node != dlm->node_num);
255 273
274 mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
275 dlm->name, res->lockname.len, res->lockname.name,
276 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
277 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
278 blocked_type);
279
256 (*fn)(lock->astdata, blocked_type); 280 (*fn)(lock->astdata, blocked_type);
257} 281}
258 282
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
332 /* cannot get a proxy ast message if this node owns it */ 356 /* cannot get a proxy ast message if this node owns it */
333 BUG_ON(res->owner == dlm->node_num); 357 BUG_ON(res->owner == dlm->node_num);
334 358
335 mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name); 359 mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
360 res->lockname.name);
336 361
337 spin_lock(&res->spinlock); 362 spin_lock(&res->spinlock);
338 if (res->state & DLM_LOCK_RES_RECOVERING) { 363 if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
382 if (past->type == DLM_AST) { 407 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 408 /* do not alter lock refcount. switching lists. */
384 list_move_tail(&lock->list, &res->granted); 409 list_move_tail(&lock->list, &res->granted);
385 mlog(0, "ast: Adding to granted list... type=%d, " 410 mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
386 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 411 dlm->name, res->lockname.len, res->lockname.name,
412 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
413 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
414 lock->ml.type, lock->ml.convert_type);
415
387 if (lock->ml.convert_type != LKM_IVMODE) { 416 if (lock->ml.convert_type != LKM_IVMODE) {
388 lock->ml.type = lock->ml.convert_type; 417 lock->ml.type = lock->ml.convert_type;
389 lock->ml.convert_type = LKM_IVMODE; 418 lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
426 size_t veclen = 1; 455 size_t veclen = 1;
427 int status; 456 int status;
428 457
429 mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n", 458 mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
430 res->lockname.len, res->lockname.name, lock->ml.node, 459 res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
431 msg_type, blocked_type); 460 blocked_type);
432 461
433 memset(&past, 0, sizeof(struct dlm_proxy_ast)); 462 memset(&past, 0, sizeof(struct dlm_proxy_ast));
434 past.node_idx = dlm->node_num; 463 past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
441 vec[0].iov_len = sizeof(struct dlm_proxy_ast); 470 vec[0].iov_len = sizeof(struct dlm_proxy_ast);
442 vec[0].iov_base = &past; 471 vec[0].iov_base = &past;
443 if (flags & DLM_LKSB_GET_LVB) { 472 if (flags & DLM_LKSB_GET_LVB) {
444 mlog(0, "returning requested LVB data\n");
445 be32_add_cpu(&past.flags, LKM_GET_LVB); 473 be32_add_cpu(&past.flags, LKM_GET_LVB);
446 vec[1].iov_len = DLM_LVB_LEN; 474 vec[1].iov_len = DLM_LVB_LEN;
447 vec[1].iov_base = lock->lksb->lvb; 475 vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, 479 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
452 lock->ml.node, &status); 480 lock->ml.node, &status);
453 if (ret < 0) 481 if (ret < 0)
454 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 482 mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
455 "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key, 483 dlm->name, res->lockname.len, res->lockname.name, ret,
456 lock->ml.node); 484 lock->ml.node);
457 else { 485 else {
458 if (status == DLM_RECOVERING) { 486 if (status == DLM_RECOVERING) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index b36d0bf77a5..4bdf7baee34 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -50,10 +50,10 @@
50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) 50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
51 51
52enum dlm_mle_type { 52enum dlm_mle_type {
53 DLM_MLE_BLOCK, 53 DLM_MLE_BLOCK = 0,
54 DLM_MLE_MASTER, 54 DLM_MLE_MASTER = 1,
55 DLM_MLE_MIGRATION, 55 DLM_MLE_MIGRATION = 2,
56 DLM_MLE_NUM_TYPES 56 DLM_MLE_NUM_TYPES = 3,
57}; 57};
58 58
59struct dlm_master_list_entry { 59struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
82 82
83enum dlm_ast_type { 83enum dlm_ast_type {
84 DLM_AST = 0, 84 DLM_AST = 0,
85 DLM_BAST, 85 DLM_BAST = 1,
86 DLM_ASTUNLOCK 86 DLM_ASTUNLOCK = 2,
87}; 87};
88 88
89 89
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
119 119
120enum dlm_ctxt_state { 120enum dlm_ctxt_state {
121 DLM_CTXT_NEW = 0, 121 DLM_CTXT_NEW = 0,
122 DLM_CTXT_JOINED, 122 DLM_CTXT_JOINED = 1,
123 DLM_CTXT_IN_SHUTDOWN, 123 DLM_CTXT_IN_SHUTDOWN = 2,
124 DLM_CTXT_LEAVING, 124 DLM_CTXT_LEAVING = 3,
125}; 125};
126 126
127struct dlm_ctxt 127struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
388 388
389enum dlm_lockres_list { 389enum dlm_lockres_list {
390 DLM_GRANTED_LIST = 0, 390 DLM_GRANTED_LIST = 0,
391 DLM_CONVERTING_LIST, 391 DLM_CONVERTING_LIST = 1,
392 DLM_BLOCKED_LIST 392 DLM_BLOCKED_LIST = 2,
393}; 393};
394 394
395static inline int dlm_lvb_is_empty(char *lvb) 395static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
427 427
428 428
429enum { 429enum {
430 DLM_MASTER_REQUEST_MSG = 500, 430 DLM_MASTER_REQUEST_MSG = 500,
431 DLM_UNUSED_MSG1, /* 501 */ 431 DLM_UNUSED_MSG1 = 501,
432 DLM_ASSERT_MASTER_MSG, /* 502 */ 432 DLM_ASSERT_MASTER_MSG = 502,
433 DLM_CREATE_LOCK_MSG, /* 503 */ 433 DLM_CREATE_LOCK_MSG = 503,
434 DLM_CONVERT_LOCK_MSG, /* 504 */ 434 DLM_CONVERT_LOCK_MSG = 504,
435 DLM_PROXY_AST_MSG, /* 505 */ 435 DLM_PROXY_AST_MSG = 505,
436 DLM_UNLOCK_LOCK_MSG, /* 506 */ 436 DLM_UNLOCK_LOCK_MSG = 506,
437 DLM_DEREF_LOCKRES_MSG, /* 507 */ 437 DLM_DEREF_LOCKRES_MSG = 507,
438 DLM_MIGRATE_REQUEST_MSG, /* 508 */ 438 DLM_MIGRATE_REQUEST_MSG = 508,
439 DLM_MIG_LOCKRES_MSG, /* 509 */ 439 DLM_MIG_LOCKRES_MSG = 509,
440 DLM_QUERY_JOIN_MSG, /* 510 */ 440 DLM_QUERY_JOIN_MSG = 510,
441 DLM_ASSERT_JOINED_MSG, /* 511 */ 441 DLM_ASSERT_JOINED_MSG = 511,
442 DLM_CANCEL_JOIN_MSG, /* 512 */ 442 DLM_CANCEL_JOIN_MSG = 512,
443 DLM_EXIT_DOMAIN_MSG, /* 513 */ 443 DLM_EXIT_DOMAIN_MSG = 513,
444 DLM_MASTER_REQUERY_MSG, /* 514 */ 444 DLM_MASTER_REQUERY_MSG = 514,
445 DLM_LOCK_REQUEST_MSG, /* 515 */ 445 DLM_LOCK_REQUEST_MSG = 515,
446 DLM_RECO_DATA_DONE_MSG, /* 516 */ 446 DLM_RECO_DATA_DONE_MSG = 516,
447 DLM_BEGIN_RECO_MSG, /* 517 */ 447 DLM_BEGIN_RECO_MSG = 517,
448 DLM_FINALIZE_RECO_MSG, /* 518 */ 448 DLM_FINALIZE_RECO_MSG = 518,
449 DLM_QUERY_REGION, /* 519 */ 449 DLM_QUERY_REGION = 519,
450 DLM_QUERY_NODEINFO, /* 520 */ 450 DLM_QUERY_NODEINFO = 520,
451}; 451};
452 452
453struct dlm_reco_node_data 453struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
460enum { 460enum {
461 DLM_RECO_NODE_DATA_DEAD = -1, 461 DLM_RECO_NODE_DATA_DEAD = -1,
462 DLM_RECO_NODE_DATA_INIT = 0, 462 DLM_RECO_NODE_DATA_INIT = 0,
463 DLM_RECO_NODE_DATA_REQUESTING, 463 DLM_RECO_NODE_DATA_REQUESTING = 1,
464 DLM_RECO_NODE_DATA_REQUESTED, 464 DLM_RECO_NODE_DATA_REQUESTED = 2,
465 DLM_RECO_NODE_DATA_RECEIVING, 465 DLM_RECO_NODE_DATA_RECEIVING = 3,
466 DLM_RECO_NODE_DATA_DONE, 466 DLM_RECO_NODE_DATA_DONE = 4,
467 DLM_RECO_NODE_DATA_FINALIZE_SENT, 467 DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
468}; 468};
469 469
470 470
471enum { 471enum {
472 DLM_MASTER_RESP_NO = 0, 472 DLM_MASTER_RESP_NO = 0,
473 DLM_MASTER_RESP_YES, 473 DLM_MASTER_RESP_YES = 1,
474 DLM_MASTER_RESP_MAYBE, 474 DLM_MASTER_RESP_MAYBE = 2,
475 DLM_MASTER_RESP_ERROR 475 DLM_MASTER_RESP_ERROR = 3,
476}; 476};
477 477
478 478
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
649#define DLM_MOD_KEY (0x666c6172) 649#define DLM_MOD_KEY (0x666c6172)
650enum dlm_query_join_response_code { 650enum dlm_query_join_response_code {
651 JOIN_DISALLOW = 0, 651 JOIN_DISALLOW = 0,
652 JOIN_OK, 652 JOIN_OK = 1,
653 JOIN_OK_NO_MAP, 653 JOIN_OK_NO_MAP = 2,
654 JOIN_PROTOCOL_MISMATCH, 654 JOIN_PROTOCOL_MISMATCH = 3,
655}; 655};
656 656
657struct dlm_query_join_packet { 657struct dlm_query_join_packet {
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 272ec8631a5..04a32be0aeb 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
370 kref_get(&dc->debug_refcnt); 370 kref_get(&dc->debug_refcnt);
371} 371}
372 372
373static struct debug_buffer *debug_buffer_allocate(void) 373static int debug_release(struct inode *inode, struct file *file)
374{ 374{
375 struct debug_buffer *db = NULL; 375 free_page((unsigned long)file->private_data);
376 376 return 0;
377 db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
378 if (!db)
379 goto bail;
380
381 db->len = PAGE_SIZE;
382 db->buf = kmalloc(db->len, GFP_KERNEL);
383 if (!db->buf)
384 goto bail;
385
386 return db;
387bail:
388 kfree(db);
389 return NULL;
390}
391
392static ssize_t debug_buffer_read(struct file *file, char __user *buf,
393 size_t nbytes, loff_t *ppos)
394{
395 struct debug_buffer *db = file->private_data;
396
397 return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
398}
399
400static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
401{
402 struct debug_buffer *db = file->private_data;
403 loff_t new = -1;
404
405 switch (whence) {
406 case 0:
407 new = off;
408 break;
409 case 1:
410 new = file->f_pos + off;
411 break;
412 }
413
414 if (new < 0 || new > db->len)
415 return -EINVAL;
416
417 return (file->f_pos = new);
418} 377}
419 378
420static int debug_buffer_release(struct inode *inode, struct file *file) 379static ssize_t debug_read(struct file *file, char __user *buf,
380 size_t nbytes, loff_t *ppos)
421{ 381{
422 struct debug_buffer *db = file->private_data; 382 return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
423 383 i_size_read(file->f_mapping->host));
424 if (db)
425 kfree(db->buf);
426 kfree(db);
427
428 return 0;
429} 384}
430/* end - util funcs */ 385/* end - util funcs */
431 386
432/* begin - purge list funcs */ 387/* begin - purge list funcs */
433static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 388static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
434{ 389{
435 struct dlm_lock_resource *res; 390 struct dlm_lock_resource *res;
436 int out = 0; 391 int out = 0;
437 unsigned long total = 0; 392 unsigned long total = 0;
438 393
439 out += snprintf(db->buf + out, db->len - out, 394 out += snprintf(buf + out, len - out,
440 "Dumping Purgelist for Domain: %s\n", dlm->name); 395 "Dumping Purgelist for Domain: %s\n", dlm->name);
441 396
442 spin_lock(&dlm->spinlock); 397 spin_lock(&dlm->spinlock);
443 list_for_each_entry(res, &dlm->purge_list, purge) { 398 list_for_each_entry(res, &dlm->purge_list, purge) {
444 ++total; 399 ++total;
445 if (db->len - out < 100) 400 if (len - out < 100)
446 continue; 401 continue;
447 spin_lock(&res->spinlock); 402 spin_lock(&res->spinlock);
448 out += stringify_lockname(res->lockname.name, 403 out += stringify_lockname(res->lockname.name,
449 res->lockname.len, 404 res->lockname.len,
450 db->buf + out, db->len - out); 405 buf + out, len - out);
451 out += snprintf(db->buf + out, db->len - out, "\t%ld\n", 406 out += snprintf(buf + out, len - out, "\t%ld\n",
452 (jiffies - res->last_used)/HZ); 407 (jiffies - res->last_used)/HZ);
453 spin_unlock(&res->spinlock); 408 spin_unlock(&res->spinlock);
454 } 409 }
455 spin_unlock(&dlm->spinlock); 410 spin_unlock(&dlm->spinlock);
456 411
457 out += snprintf(db->buf + out, db->len - out, 412 out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
458 "Total on list: %ld\n", total);
459 413
460 return out; 414 return out;
461} 415}
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
463static int debug_purgelist_open(struct inode *inode, struct file *file) 417static int debug_purgelist_open(struct inode *inode, struct file *file)
464{ 418{
465 struct dlm_ctxt *dlm = inode->i_private; 419 struct dlm_ctxt *dlm = inode->i_private;
466 struct debug_buffer *db; 420 char *buf = NULL;
467 421
468 db = debug_buffer_allocate(); 422 buf = (char *) get_zeroed_page(GFP_NOFS);
469 if (!db) 423 if (!buf)
470 goto bail; 424 goto bail;
471 425
472 db->len = debug_purgelist_print(dlm, db); 426 i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
473 427
474 file->private_data = db; 428 file->private_data = buf;
475 429
476 return 0; 430 return 0;
477bail: 431bail:
@@ -480,14 +434,14 @@ bail:
480 434
481static const struct file_operations debug_purgelist_fops = { 435static const struct file_operations debug_purgelist_fops = {
482 .open = debug_purgelist_open, 436 .open = debug_purgelist_open,
483 .release = debug_buffer_release, 437 .release = debug_release,
484 .read = debug_buffer_read, 438 .read = debug_read,
485 .llseek = debug_buffer_llseek, 439 .llseek = generic_file_llseek,
486}; 440};
487/* end - purge list funcs */ 441/* end - purge list funcs */
488 442
489/* begin - debug mle funcs */ 443/* begin - debug mle funcs */
490static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 444static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
491{ 445{
492 struct dlm_master_list_entry *mle; 446 struct dlm_master_list_entry *mle;
493 struct hlist_head *bucket; 447 struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
495 int i, out = 0; 449 int i, out = 0;
496 unsigned long total = 0, longest = 0, bucket_count = 0; 450 unsigned long total = 0, longest = 0, bucket_count = 0;
497 451
498 out += snprintf(db->buf + out, db->len - out, 452 out += snprintf(buf + out, len - out,
499 "Dumping MLEs for Domain: %s\n", dlm->name); 453 "Dumping MLEs for Domain: %s\n", dlm->name);
500 454
501 spin_lock(&dlm->master_lock); 455 spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
506 master_hash_node); 460 master_hash_node);
507 ++total; 461 ++total;
508 ++bucket_count; 462 ++bucket_count;
509 if (db->len - out < 200) 463 if (len - out < 200)
510 continue; 464 continue;
511 out += dump_mle(mle, db->buf + out, db->len - out); 465 out += dump_mle(mle, buf + out, len - out);
512 } 466 }
513 longest = max(longest, bucket_count); 467 longest = max(longest, bucket_count);
514 bucket_count = 0; 468 bucket_count = 0;
515 } 469 }
516 spin_unlock(&dlm->master_lock); 470 spin_unlock(&dlm->master_lock);
517 471
518 out += snprintf(db->buf + out, db->len - out, 472 out += snprintf(buf + out, len - out,
519 "Total: %ld, Longest: %ld\n", total, longest); 473 "Total: %ld, Longest: %ld\n", total, longest);
520 return out; 474 return out;
521} 475}
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
523static int debug_mle_open(struct inode *inode, struct file *file) 477static int debug_mle_open(struct inode *inode, struct file *file)
524{ 478{
525 struct dlm_ctxt *dlm = inode->i_private; 479 struct dlm_ctxt *dlm = inode->i_private;
526 struct debug_buffer *db; 480 char *buf = NULL;
527 481
528 db = debug_buffer_allocate(); 482 buf = (char *) get_zeroed_page(GFP_NOFS);
529 if (!db) 483 if (!buf)
530 goto bail; 484 goto bail;
531 485
532 db->len = debug_mle_print(dlm, db); 486 i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
533 487
534 file->private_data = db; 488 file->private_data = buf;
535 489
536 return 0; 490 return 0;
537bail: 491bail:
@@ -540,9 +494,9 @@ bail:
540 494
541static const struct file_operations debug_mle_fops = { 495static const struct file_operations debug_mle_fops = {
542 .open = debug_mle_open, 496 .open = debug_mle_open,
543 .release = debug_buffer_release, 497 .release = debug_release,
544 .read = debug_buffer_read, 498 .read = debug_read,
545 .llseek = debug_buffer_llseek, 499 .llseek = generic_file_llseek,
546}; 500};
547 501
548/* end - debug mle funcs */ 502/* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
757/* end - debug lockres funcs */ 711/* end - debug lockres funcs */
758 712
759/* begin - debug state funcs */ 713/* begin - debug state funcs */
760static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 714static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
761{ 715{
762 int out = 0; 716 int out = 0;
763 struct dlm_reco_node_data *node; 717 struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
781 } 735 }
782 736
783 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ 737 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
784 out += snprintf(db->buf + out, db->len - out, 738 out += snprintf(buf + out, len - out,
785 "Domain: %s Key: 0x%08x Protocol: %d.%d\n", 739 "Domain: %s Key: 0x%08x Protocol: %d.%d\n",
786 dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, 740 dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
787 dlm->dlm_locking_proto.pv_minor); 741 dlm->dlm_locking_proto.pv_minor);
788 742
789 /* Thread Pid: xxx Node: xxx State: xxxxx */ 743 /* Thread Pid: xxx Node: xxx State: xxxxx */
790 out += snprintf(db->buf + out, db->len - out, 744 out += snprintf(buf + out, len - out,
791 "Thread Pid: %d Node: %d State: %s\n", 745 "Thread Pid: %d Node: %d State: %s\n",
792 dlm->dlm_thread_task->pid, dlm->node_num, state); 746 task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
793 747
794 /* Number of Joins: xxx Joining Node: xxx */ 748 /* Number of Joins: xxx Joining Node: xxx */
795 out += snprintf(db->buf + out, db->len - out, 749 out += snprintf(buf + out, len - out,
796 "Number of Joins: %d Joining Node: %d\n", 750 "Number of Joins: %d Joining Node: %d\n",
797 dlm->num_joins, dlm->joining_node); 751 dlm->num_joins, dlm->joining_node);
798 752
799 /* Domain Map: xx xx xx */ 753 /* Domain Map: xx xx xx */
800 out += snprintf(db->buf + out, db->len - out, "Domain Map: "); 754 out += snprintf(buf + out, len - out, "Domain Map: ");
801 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, 755 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
802 db->buf + out, db->len - out); 756 buf + out, len - out);
803 out += snprintf(db->buf + out, db->len - out, "\n"); 757 out += snprintf(buf + out, len - out, "\n");
804 758
805 /* Live Map: xx xx xx */ 759 /* Live Map: xx xx xx */
806 out += snprintf(db->buf + out, db->len - out, "Live Map: "); 760 out += snprintf(buf + out, len - out, "Live Map: ");
807 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, 761 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
808 db->buf + out, db->len - out); 762 buf + out, len - out);
809 out += snprintf(db->buf + out, db->len - out, "\n"); 763 out += snprintf(buf + out, len - out, "\n");
810 764
811 /* Lock Resources: xxx (xxx) */ 765 /* Lock Resources: xxx (xxx) */
812 out += snprintf(db->buf + out, db->len - out, 766 out += snprintf(buf + out, len - out,
813 "Lock Resources: %d (%d)\n", 767 "Lock Resources: %d (%d)\n",
814 atomic_read(&dlm->res_cur_count), 768 atomic_read(&dlm->res_cur_count),
815 atomic_read(&dlm->res_tot_count)); 769 atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
821 cur_mles += atomic_read(&dlm->mle_cur_count[i]); 775 cur_mles += atomic_read(&dlm->mle_cur_count[i]);
822 776
823 /* MLEs: xxx (xxx) */ 777 /* MLEs: xxx (xxx) */
824 out += snprintf(db->buf + out, db->len - out, 778 out += snprintf(buf + out, len - out,
825 "MLEs: %d (%d)\n", cur_mles, tot_mles); 779 "MLEs: %d (%d)\n", cur_mles, tot_mles);
826 780
827 /* Blocking: xxx (xxx) */ 781 /* Blocking: xxx (xxx) */
828 out += snprintf(db->buf + out, db->len - out, 782 out += snprintf(buf + out, len - out,
829 " Blocking: %d (%d)\n", 783 " Blocking: %d (%d)\n",
830 atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), 784 atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
831 atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); 785 atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
832 786
833 /* Mastery: xxx (xxx) */ 787 /* Mastery: xxx (xxx) */
834 out += snprintf(db->buf + out, db->len - out, 788 out += snprintf(buf + out, len - out,
835 " Mastery: %d (%d)\n", 789 " Mastery: %d (%d)\n",
836 atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), 790 atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
837 atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); 791 atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
838 792
839 /* Migration: xxx (xxx) */ 793 /* Migration: xxx (xxx) */
840 out += snprintf(db->buf + out, db->len - out, 794 out += snprintf(buf + out, len - out,
841 " Migration: %d (%d)\n", 795 " Migration: %d (%d)\n",
842 atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), 796 atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
843 atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); 797 atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
844 798
845 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ 799 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
846 out += snprintf(db->buf + out, db->len - out, 800 out += snprintf(buf + out, len - out,
847 "Lists: Dirty=%s Purge=%s PendingASTs=%s " 801 "Lists: Dirty=%s Purge=%s PendingASTs=%s "
848 "PendingBASTs=%s\n", 802 "PendingBASTs=%s\n",
849 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), 803 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
852 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); 806 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
853 807
854 /* Purge Count: xxx Refs: xxx */ 808 /* Purge Count: xxx Refs: xxx */
855 out += snprintf(db->buf + out, db->len - out, 809 out += snprintf(buf + out, len - out,
856 "Purge Count: %d Refs: %d\n", dlm->purge_count, 810 "Purge Count: %d Refs: %d\n", dlm->purge_count,
857 atomic_read(&dlm->dlm_refs.refcount)); 811 atomic_read(&dlm->dlm_refs.refcount));
858 812
859 /* Dead Node: xxx */ 813 /* Dead Node: xxx */
860 out += snprintf(db->buf + out, db->len - out, 814 out += snprintf(buf + out, len - out,
861 "Dead Node: %d\n", dlm->reco.dead_node); 815 "Dead Node: %d\n", dlm->reco.dead_node);
862 816
863 /* What about DLM_RECO_STATE_FINALIZE? */ 817 /* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
867 state = "INACTIVE"; 821 state = "INACTIVE";
868 822
869 /* Recovery Pid: xxxx Master: xxx State: xxxx */ 823 /* Recovery Pid: xxxx Master: xxx State: xxxx */
870 out += snprintf(db->buf + out, db->len - out, 824 out += snprintf(buf + out, len - out,
871 "Recovery Pid: %d Master: %d State: %s\n", 825 "Recovery Pid: %d Master: %d State: %s\n",
872 dlm->dlm_reco_thread_task->pid, 826 task_pid_nr(dlm->dlm_reco_thread_task),
873 dlm->reco.new_master, state); 827 dlm->reco.new_master, state);
874 828
875 /* Recovery Map: xx xx */ 829 /* Recovery Map: xx xx */
876 out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); 830 out += snprintf(buf + out, len - out, "Recovery Map: ");
877 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, 831 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
878 db->buf + out, db->len - out); 832 buf + out, len - out);
879 out += snprintf(db->buf + out, db->len - out, "\n"); 833 out += snprintf(buf + out, len - out, "\n");
880 834
881 /* Recovery Node State: */ 835 /* Recovery Node State: */
882 out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); 836 out += snprintf(buf + out, len - out, "Recovery Node State:\n");
883 list_for_each_entry(node, &dlm->reco.node_data, list) { 837 list_for_each_entry(node, &dlm->reco.node_data, list) {
884 switch (node->state) { 838 switch (node->state) {
885 case DLM_RECO_NODE_DATA_INIT: 839 case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
907 state = "BAD"; 861 state = "BAD";
908 break; 862 break;
909 } 863 }
910 out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", 864 out += snprintf(buf + out, len - out, "\t%u - %s\n",
911 node->node_num, state); 865 node->node_num, state);
912 } 866 }
913 867
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
919static int debug_state_open(struct inode *inode, struct file *file) 873static int debug_state_open(struct inode *inode, struct file *file)
920{ 874{
921 struct dlm_ctxt *dlm = inode->i_private; 875 struct dlm_ctxt *dlm = inode->i_private;
922 struct debug_buffer *db = NULL; 876 char *buf = NULL;
923 877
924 db = debug_buffer_allocate(); 878 buf = (char *) get_zeroed_page(GFP_NOFS);
925 if (!db) 879 if (!buf)
926 goto bail; 880 goto bail;
927 881
928 db->len = debug_state_print(dlm, db); 882 i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
929 883
930 file->private_data = db; 884 file->private_data = buf;
931 885
932 return 0; 886 return 0;
933bail: 887bail:
@@ -936,9 +890,9 @@ bail:
936 890
937static const struct file_operations debug_state_fops = { 891static const struct file_operations debug_state_fops = {
938 .open = debug_state_open, 892 .open = debug_state_open,
939 .release = debug_buffer_release, 893 .release = debug_release,
940 .read = debug_buffer_read, 894 .read = debug_read,
941 .llseek = debug_buffer_llseek, 895 .llseek = generic_file_llseek,
942}; 896};
943/* end - debug state funcs */ 897/* end - debug state funcs */
944 898
@@ -1002,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
1002 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; 956 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
1003 957
1004 if (dc) { 958 if (dc) {
1005 if (dc->debug_purgelist_dentry) 959 debugfs_remove(dc->debug_purgelist_dentry);
1006 debugfs_remove(dc->debug_purgelist_dentry); 960 debugfs_remove(dc->debug_mle_dentry);
1007 if (dc->debug_mle_dentry) 961 debugfs_remove(dc->debug_lockres_dentry);
1008 debugfs_remove(dc->debug_mle_dentry); 962 debugfs_remove(dc->debug_state_dentry);
1009 if (dc->debug_lockres_dentry)
1010 debugfs_remove(dc->debug_lockres_dentry);
1011 if (dc->debug_state_dentry)
1012 debugfs_remove(dc->debug_state_dentry);
1013 dlm_debug_put(dc); 963 dlm_debug_put(dc);
1014 } 964 }
1015} 965}
@@ -1040,8 +990,7 @@ bail:
1040 990
1041void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) 991void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
1042{ 992{
1043 if (dlm->dlm_debugfs_subroot) 993 debugfs_remove(dlm->dlm_debugfs_subroot);
1044 debugfs_remove(dlm->dlm_debugfs_subroot);
1045} 994}
1046 995
1047/* debugfs root */ 996/* debugfs root */
@@ -1057,7 +1006,6 @@ int dlm_create_debugfs_root(void)
1057 1006
1058void dlm_destroy_debugfs_root(void) 1007void dlm_destroy_debugfs_root(void)
1059{ 1008{
1060 if (dlm_debugfs_root) 1009 debugfs_remove(dlm_debugfs_root);
1061 debugfs_remove(dlm_debugfs_root);
1062} 1010}
1063#endif /* CONFIG_DEBUG_FS */ 1011#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 8c686d22f9c..1f27c4812d1 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
37 struct dentry *debug_purgelist_dentry; 37 struct dentry *debug_purgelist_dentry;
38}; 38};
39 39
40struct debug_buffer {
41 int len;
42 char *buf;
43};
44
45struct debug_lockres { 40struct debug_lockres {
46 int dl_len; 41 int dl_len;
47 char *dl_buf; 42 char *dl_buf;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index cc2aaa96cfe..7e38a072d72 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -460,8 +460,6 @@ redo_bucket:
460 } 460 }
461 cond_resched_lock(&dlm->spinlock); 461 cond_resched_lock(&dlm->spinlock);
462 num += n; 462 num += n;
463 mlog(0, "%s: touched %d lockreses in bucket %d "
464 "(tot=%d)\n", dlm->name, n, i, num);
465 } 463 }
466 spin_unlock(&dlm->spinlock); 464 spin_unlock(&dlm->spinlock);
467 wake_up(&dlm->dlm_thread_wq); 465 wake_up(&dlm->dlm_thread_wq);
@@ -1661,8 +1659,8 @@ bail:
1661 1659
1662static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) 1660static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
1663{ 1661{
1664 o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); 1662 o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
1665 o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); 1663 o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
1666 o2net_unregister_handler_list(&dlm->dlm_domain_handlers); 1664 o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
1667} 1665}
1668 1666
@@ -1674,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
1674 1672
1675 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, 1673 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
1676 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); 1674 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
1677 status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); 1675 status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
1678 if (status) 1676 if (status)
1679 goto bail; 1677 goto bail;
1680 1678
1681 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, 1679 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
1682 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); 1680 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
1683 status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); 1681 status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
1684 if (status) 1682 if (status)
1685 goto bail; 1683 goto bail;
1686 1684
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 69cf369961c..7009292aac5 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
106 106
107 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) 107 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
108 return 0; 108 return 0;
109 if (!dlm_lock_compatible(tmplock->ml.convert_type,
110 lock->ml.type))
111 return 0;
109 } 112 }
110 113
111 return 1; 114 return 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80..59f0f6bdfc6 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2346 */ 2346 */
2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2348 struct dlm_lock_resource *res, 2348 struct dlm_lock_resource *res,
2349 int *numlocks) 2349 int *numlocks,
2350 int *hasrefs)
2350{ 2351{
2351 int ret; 2352 int ret;
2352 int i; 2353 int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2356 2357
2357 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2358 2359
2360 *numlocks = 0;
2361 *hasrefs = 0;
2362
2359 ret = -EINVAL; 2363 ret = -EINVAL;
2360 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 2364 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2361 mlog(0, "cannot migrate lockres with unknown owner!\n"); 2365 mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2386 } 2390 }
2387 2391
2388 *numlocks = count; 2392 *numlocks = count;
2389 mlog(0, "migrateable lockres having %d locks\n", *numlocks); 2393
2394 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2395 if (count < O2NM_MAX_NODES)
2396 *hasrefs = 1;
2397
2398 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
2399 res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
2390 2400
2391leave: 2401leave:
2392 return ret; 2402 return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2408 const char *name; 2418 const char *name;
2409 unsigned int namelen; 2419 unsigned int namelen;
2410 int mle_added = 0; 2420 int mle_added = 0;
2411 int numlocks; 2421 int numlocks, hasrefs;
2412 int wake = 0; 2422 int wake = 0;
2413 2423
2414 if (!dlm_grab(dlm)) 2424 if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2417 name = res->lockname.name; 2427 name = res->lockname.name;
2418 namelen = res->lockname.len; 2428 namelen = res->lockname.len;
2419 2429
2420 mlog(0, "migrating %.*s to %u\n", namelen, name, target); 2430 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
2421 2431
2422 /* 2432 /*
2423 * ensure this lockres is a proper candidate for migration 2433 * ensure this lockres is a proper candidate for migration
2424 */ 2434 */
2425 spin_lock(&res->spinlock); 2435 spin_lock(&res->spinlock);
2426 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2436 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2427 if (ret < 0) { 2437 if (ret < 0) {
2428 spin_unlock(&res->spinlock); 2438 spin_unlock(&res->spinlock);
2429 goto leave; 2439 goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2431 spin_unlock(&res->spinlock); 2441 spin_unlock(&res->spinlock);
2432 2442
2433 /* no work to do */ 2443 /* no work to do */
2434 if (numlocks == 0) { 2444 if (numlocks == 0 && !hasrefs)
2435 mlog(0, "no locks were found on this lockres! done!\n");
2436 goto leave; 2445 goto leave;
2437 }
2438 2446
2439 /* 2447 /*
2440 * preallocate up front 2448 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2459 * find a node to migrate the lockres to 2467 * find a node to migrate the lockres to
2460 */ 2468 */
2461 2469
2462 mlog(0, "picking a migration node\n");
2463 spin_lock(&dlm->spinlock); 2470 spin_lock(&dlm->spinlock);
2464 /* pick a new node */ 2471 /* pick a new node */
2465 if (!test_bit(target, dlm->domain_map) || 2472 if (!test_bit(target, dlm->domain_map) ||
2466 target >= O2NM_MAX_NODES) { 2473 target >= O2NM_MAX_NODES) {
2467 target = dlm_pick_migration_target(dlm, res); 2474 target = dlm_pick_migration_target(dlm, res);
2468 } 2475 }
2469 mlog(0, "node %u chosen for migration\n", target); 2476 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2477 namelen, name, target);
2470 2478
2471 if (target >= O2NM_MAX_NODES || 2479 if (target >= O2NM_MAX_NODES ||
2472 !test_bit(target, dlm->domain_map)) { 2480 !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2667{ 2675{
2668 int ret; 2676 int ret;
2669 int lock_dropped = 0; 2677 int lock_dropped = 0;
2670 int numlocks; 2678 int numlocks, hasrefs;
2671 2679
2672 spin_lock(&res->spinlock); 2680 spin_lock(&res->spinlock);
2673 if (res->owner != dlm->node_num) { 2681 if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2681 } 2689 }
2682 2690
2683 /* No need to migrate a lockres having no locks */ 2691 /* No need to migrate a lockres having no locks */
2684 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2692 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2685 if (ret >= 0 && numlocks == 0) { 2693 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2686 spin_unlock(&res->spinlock); 2694 spin_unlock(&res->spinlock);
2687 goto leave; 2695 goto leave;
2688 } 2696 }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2915 } 2923 }
2916 queue++; 2924 queue++;
2917 } 2925 }
2926
2927 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2928 if (nodenum < O2NM_MAX_NODES) {
2929 spin_unlock(&res->spinlock);
2930 return nodenum;
2931 }
2918 spin_unlock(&res->spinlock); 2932 spin_unlock(&res->spinlock);
2919 mlog(0, "have not found a suitable target yet! checking domain map\n"); 2933 mlog(0, "have not found a suitable target yet! checking domain map\n");
2920 2934
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2211acf33d9..1d6d1d22c47 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
122void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 122void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
123 struct dlm_lock_resource *res) 123 struct dlm_lock_resource *res)
124{ 124{
125 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
126
127 assert_spin_locked(&dlm->spinlock); 125 assert_spin_locked(&dlm->spinlock);
128 assert_spin_locked(&res->spinlock); 126 assert_spin_locked(&res->spinlock);
129 127
130 if (__dlm_lockres_unused(res)){ 128 if (__dlm_lockres_unused(res)){
131 if (list_empty(&res->purge)) { 129 if (list_empty(&res->purge)) {
132 mlog(0, "putting lockres %.*s:%p onto purge list\n", 130 mlog(0, "%s: Adding res %.*s to purge list\n",
133 res->lockname.len, res->lockname.name, res); 131 dlm->name, res->lockname.len, res->lockname.name);
134 132
135 res->last_used = jiffies; 133 res->last_used = jiffies;
136 dlm_lockres_get(res); 134 dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
138 dlm->purge_count++; 136 dlm->purge_count++;
139 } 137 }
140 } else if (!list_empty(&res->purge)) { 138 } else if (!list_empty(&res->purge)) {
141 mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", 139 mlog(0, "%s: Removing res %.*s from purge list\n",
142 res->lockname.len, res->lockname.name, res, res->owner); 140 dlm->name, res->lockname.len, res->lockname.name);
143 141
144 list_del_init(&res->purge); 142 list_del_init(&res->purge);
145 dlm_lockres_put(res); 143 dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
150void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 148void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
151 struct dlm_lock_resource *res) 149 struct dlm_lock_resource *res)
152{ 150{
153 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
154 spin_lock(&dlm->spinlock); 151 spin_lock(&dlm->spinlock);
155 spin_lock(&res->spinlock); 152 spin_lock(&res->spinlock);
156 153
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
171 168
172 master = (res->owner == dlm->node_num); 169 master = (res->owner == dlm->node_num);
173 170
174 171 mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
175 mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, 172 res->lockname.len, res->lockname.name, master);
176 res->lockname.name, master);
177 173
178 if (!master) { 174 if (!master) {
179 res->state |= DLM_LOCK_RES_DROPPING_REF; 175 res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
189 /* clear our bit from the master's refmap, ignore errors */ 185 /* clear our bit from the master's refmap, ignore errors */
190 ret = dlm_drop_lockres_ref(dlm, res); 186 ret = dlm_drop_lockres_ref(dlm, res);
191 if (ret < 0) { 187 if (ret < 0) {
192 mlog_errno(ret); 188 mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
189 res->lockname.len, res->lockname.name, ret);
193 if (!dlm_is_host_down(ret)) 190 if (!dlm_is_host_down(ret))
194 BUG(); 191 BUG();
195 } 192 }
196 mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
197 dlm->name, res->lockname.len, res->lockname.name, ret);
198 spin_lock(&dlm->spinlock); 193 spin_lock(&dlm->spinlock);
199 spin_lock(&res->spinlock); 194 spin_lock(&res->spinlock);
200 } 195 }
201 196
202 if (!list_empty(&res->purge)) { 197 if (!list_empty(&res->purge)) {
203 mlog(0, "removing lockres %.*s:%p from purgelist, " 198 mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
204 "master = %d\n", res->lockname.len, res->lockname.name, 199 dlm->name, res->lockname.len, res->lockname.name, master);
205 res, master);
206 list_del_init(&res->purge); 200 list_del_init(&res->purge);
207 dlm_lockres_put(res); 201 dlm_lockres_put(res);
208 dlm->purge_count--; 202 dlm->purge_count--;
209 } 203 }
210 204
211 if (!__dlm_lockres_unused(res)) { 205 if (!__dlm_lockres_unused(res)) {
212 mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", 206 mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
213 dlm->name, res->lockname.len, res->lockname.name); 207 dlm->name, res->lockname.len, res->lockname.name);
214 __dlm_print_one_lock_resource(res); 208 __dlm_print_one_lock_resource(res);
215 BUG(); 209 BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
266 unused = __dlm_lockres_unused(lockres); 260 unused = __dlm_lockres_unused(lockres);
267 if (!unused || 261 if (!unused ||
268 (lockres->state & DLM_LOCK_RES_MIGRATING)) { 262 (lockres->state & DLM_LOCK_RES_MIGRATING)) {
269 mlog(0, "lockres %s:%.*s: is in use or " 263 mlog(0, "%s: res %.*s is in use or being remastered, "
270 "being remastered, used %d, state %d\n", 264 "used %d, state %d\n", dlm->name,
271 dlm->name, lockres->lockname.len, 265 lockres->lockname.len, lockres->lockname.name,
272 lockres->lockname.name, !unused, lockres->state); 266 !unused, lockres->state);
273 list_move_tail(&dlm->purge_list, &lockres->purge); 267 list_move_tail(&dlm->purge_list, &lockres->purge);
274 spin_unlock(&lockres->spinlock); 268 spin_unlock(&lockres->spinlock);
275 continue; 269 continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
296 struct list_head *head; 290 struct list_head *head;
297 int can_grant = 1; 291 int can_grant = 1;
298 292
299 //mlog(0, "res->lockname.len=%d\n", res->lockname.len); 293 /*
300 //mlog(0, "res->lockname.name=%p\n", res->lockname.name); 294 * Because this function is called with the lockres
301 //mlog(0, "shuffle res %.*s\n", res->lockname.len,
302 // res->lockname.name);
303
304 /* because this function is called with the lockres
305 * spinlock, and because we know that it is not migrating/ 295 * spinlock, and because we know that it is not migrating/
306 * recovering/in-progress, it is fine to reserve asts and 296 * recovering/in-progress, it is fine to reserve asts and
307 * basts right before queueing them all throughout */ 297 * basts right before queueing them all throughout
298 */
308 assert_spin_locked(&dlm->ast_lock); 299 assert_spin_locked(&dlm->ast_lock);
309 assert_spin_locked(&res->spinlock); 300 assert_spin_locked(&res->spinlock);
310 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 301 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
314converting: 305converting:
315 if (list_empty(&res->converting)) 306 if (list_empty(&res->converting))
316 goto blocked; 307 goto blocked;
317 mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len, 308 mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
318 res->lockname.name); 309 res->lockname.len, res->lockname.name);
319 310
320 target = list_entry(res->converting.next, struct dlm_lock, list); 311 target = list_entry(res->converting.next, struct dlm_lock, list);
321 if (target->ml.convert_type == LKM_IVMODE) { 312 if (target->ml.convert_type == LKM_IVMODE) {
322 mlog(ML_ERROR, "%.*s: converting a lock with no " 313 mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
323 "convert_type!\n", res->lockname.len, res->lockname.name); 314 dlm->name, res->lockname.len, res->lockname.name);
324 BUG(); 315 BUG();
325 } 316 }
326 head = &res->granted; 317 head = &res->granted;
@@ -365,9 +356,12 @@ converting:
365 spin_lock(&target->spinlock); 356 spin_lock(&target->spinlock);
366 BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 357 BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
367 358
368 mlog(0, "calling ast for converting lock: %.*s, have: %d, " 359 mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
369 "granting: %d, node: %u\n", res->lockname.len, 360 "%d => %d, node %u\n", dlm->name, res->lockname.len,
370 res->lockname.name, target->ml.type, 361 res->lockname.name,
362 dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
363 dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
364 target->ml.type,
371 target->ml.convert_type, target->ml.node); 365 target->ml.convert_type, target->ml.node);
372 366
373 target->ml.type = target->ml.convert_type; 367 target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
428 spin_lock(&target->spinlock); 422 spin_lock(&target->spinlock);
429 BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 423 BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
430 424
431 mlog(0, "calling ast for blocked lock: %.*s, granting: %d, " 425 mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
432 "node: %u\n", res->lockname.len, res->lockname.name, 426 "node %u\n", dlm->name, res->lockname.len,
427 res->lockname.name,
428 dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
429 dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
433 target->ml.type, target->ml.node); 430 target->ml.type, target->ml.node);
434 431
435 // target->ml.type is already correct 432 /* target->ml.type is already correct */
436 list_move_tail(&target->list, &res->granted); 433 list_move_tail(&target->list, &res->granted);
437 434
438 BUG_ON(!target->lksb); 435 BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
453/* must have NO locks when calling this with res !=NULL * */ 450/* must have NO locks when calling this with res !=NULL * */
454void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 451void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
455{ 452{
456 mlog_entry("dlm=%p, res=%p\n", dlm, res);
457 if (res) { 453 if (res) {
458 spin_lock(&dlm->spinlock); 454 spin_lock(&dlm->spinlock);
459 spin_lock(&res->spinlock); 455 spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
466 462
467void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 463void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
468{ 464{
469 mlog_entry("dlm=%p, res=%p\n", dlm, res);
470
471 assert_spin_locked(&dlm->spinlock); 465 assert_spin_locked(&dlm->spinlock);
472 assert_spin_locked(&res->spinlock); 466 assert_spin_locked(&res->spinlock);
473 467
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
484 res->state |= DLM_LOCK_RES_DIRTY; 478 res->state |= DLM_LOCK_RES_DIRTY;
485 } 479 }
486 } 480 }
481
482 mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
483 res->lockname.name);
487} 484}
488 485
489 486
490/* Launch the NM thread for the mounted volume */ 487/* Launch the NM thread for the mounted volume */
491int dlm_launch_thread(struct dlm_ctxt *dlm) 488int dlm_launch_thread(struct dlm_ctxt *dlm)
492{ 489{
493 mlog(0, "starting dlm thread...\n"); 490 mlog(0, "Starting dlm_thread...\n");
494 491
495 dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread"); 492 dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
496 if (IS_ERR(dlm->dlm_thread_task)) { 493 if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
505void dlm_complete_thread(struct dlm_ctxt *dlm) 502void dlm_complete_thread(struct dlm_ctxt *dlm)
506{ 503{
507 if (dlm->dlm_thread_task) { 504 if (dlm->dlm_thread_task) {
508 mlog(ML_KTHREAD, "waiting for dlm thread to exit\n"); 505 mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
509 kthread_stop(dlm->dlm_thread_task); 506 kthread_stop(dlm->dlm_thread_task);
510 dlm->dlm_thread_task = NULL; 507 dlm->dlm_thread_task = NULL;
511 } 508 }
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
536 /* get an extra ref on lock */ 533 /* get an extra ref on lock */
537 dlm_lock_get(lock); 534 dlm_lock_get(lock);
538 res = lock->lockres; 535 res = lock->lockres;
539 mlog(0, "delivering an ast for this lockres\n"); 536 mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
537 "node %u\n", dlm->name, res->lockname.len,
538 res->lockname.name,
539 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
540 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
541 lock->ml.type, lock->ml.node);
540 542
541 BUG_ON(!lock->ast_pending); 543 BUG_ON(!lock->ast_pending);
542 544
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
557 /* possible that another ast was queued while 559 /* possible that another ast was queued while
558 * we were delivering the last one */ 560 * we were delivering the last one */
559 if (!list_empty(&lock->ast_list)) { 561 if (!list_empty(&lock->ast_list)) {
560 mlog(0, "aha another ast got queued while " 562 mlog(0, "%s: res %.*s, AST queued while flushing last "
561 "we were finishing the last one. will " 563 "one\n", dlm->name, res->lockname.len,
562 "keep the ast_pending flag set.\n"); 564 res->lockname.name);
563 } else 565 } else
564 lock->ast_pending = 0; 566 lock->ast_pending = 0;
565 567
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
590 dlm_lock_put(lock); 592 dlm_lock_put(lock);
591 spin_unlock(&dlm->ast_lock); 593 spin_unlock(&dlm->ast_lock);
592 594
593 mlog(0, "delivering a bast for this lockres " 595 mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
594 "(blocked = %d\n", hi); 596 "blocked %d, node %u\n",
597 dlm->name, res->lockname.len, res->lockname.name,
598 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
599 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
600 hi, lock->ml.node);
595 601
596 if (lock->ml.node != dlm->node_num) { 602 if (lock->ml.node != dlm->node_num) {
597 ret = dlm_send_proxy_bast(dlm, res, lock, hi); 603 ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
605 /* possible that another bast was queued while 611 /* possible that another bast was queued while
606 * we were delivering the last one */ 612 * we were delivering the last one */
607 if (!list_empty(&lock->bast_list)) { 613 if (!list_empty(&lock->bast_list)) {
608 mlog(0, "aha another bast got queued while " 614 mlog(0, "%s: res %.*s, BAST queued while flushing last "
609 "we were finishing the last one. will " 615 "one\n", dlm->name, res->lockname.len,
610 "keep the bast_pending flag set.\n"); 616 res->lockname.name);
611 } else 617 } else
612 lock->bast_pending = 0; 618 lock->bast_pending = 0;
613 619
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
675 spin_lock(&res->spinlock); 681 spin_lock(&res->spinlock);
676 if (res->owner != dlm->node_num) { 682 if (res->owner != dlm->node_num) {
677 __dlm_print_one_lock_resource(res); 683 __dlm_print_one_lock_resource(res);
678 mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n", 684 mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
679 res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no", 685 " dirty %d\n", dlm->name,
680 res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no", 686 !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
681 res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no", 687 !!(res->state & DLM_LOCK_RES_MIGRATING),
682 res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); 688 !!(res->state & DLM_LOCK_RES_RECOVERING),
689 !!(res->state & DLM_LOCK_RES_DIRTY));
683 } 690 }
684 BUG_ON(res->owner != dlm->node_num); 691 BUG_ON(res->owner != dlm->node_num);
685 692
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
693 res->state &= ~DLM_LOCK_RES_DIRTY; 700 res->state &= ~DLM_LOCK_RES_DIRTY;
694 spin_unlock(&res->spinlock); 701 spin_unlock(&res->spinlock);
695 spin_unlock(&dlm->ast_lock); 702 spin_unlock(&dlm->ast_lock);
696 mlog(0, "delaying list shuffling for in-" 703 mlog(0, "%s: res %.*s, inprogress, delay list "
697 "progress lockres %.*s, state=%d\n", 704 "shuffle, state %d\n", dlm->name,
698 res->lockname.len, res->lockname.name, 705 res->lockname.len, res->lockname.name,
699 res->state); 706 res->state);
700 delay = 1; 707 delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
706 * spinlock and do NOT have the dlm lock. 713 * spinlock and do NOT have the dlm lock.
707 * safe to reserve/queue asts and run the lists. */ 714 * safe to reserve/queue asts and run the lists. */
708 715
709 mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
710 "res=%.*s\n", dlm->name,
711 res->lockname.len, res->lockname.name);
712
713 /* called while holding lockres lock */ 716 /* called while holding lockres lock */
714 dlm_shuffle_lists(dlm, res); 717 dlm_shuffle_lists(dlm, res);
715 res->state &= ~DLM_LOCK_RES_DIRTY; 718 res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
733 /* unlikely, but we may need to give time to 736 /* unlikely, but we may need to give time to
734 * other tasks */ 737 * other tasks */
735 if (!--n) { 738 if (!--n) {
736 mlog(0, "throttling dlm_thread\n"); 739 mlog(0, "%s: Throttling dlm thread\n",
740 dlm->name);
737 break; 741 break;
738 } 742 }
739 } 743 }
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19e..8c5c0eddc36 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
351 return &ip->ip_vfs_inode; 351 return &ip->ip_vfs_inode;
352} 352}
353 353
354static void dlmfs_destroy_inode(struct inode *inode) 354static void dlmfs_i_callback(struct rcu_head *head)
355{ 355{
356 struct inode *inode = container_of(head, struct inode, i_rcu);
357 INIT_LIST_HEAD(&inode->i_dentry);
356 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 358 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
357} 359}
358 360
361static void dlmfs_destroy_inode(struct inode *inode)
362{
363 call_rcu(&inode->i_rcu, dlmfs_i_callback);
364}
365
359static void dlmfs_evict_inode(struct inode *inode) 366static void dlmfs_evict_inode(struct inode *inode)
360{ 367{
361 int status; 368 int status;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af..6adafa57606 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
138 138
139 result = d_obtain_alias(inode); 139 result = d_obtain_alias(inode);
140 if (!IS_ERR(result)) 140 if (!IS_ERR(result))
141 result->d_op = &ocfs2_dentry_ops; 141 d_set_d_op(result, &ocfs2_dentry_ops);
142 else 142 else
143 mlog_errno(PTR_ERR(result)); 143 mlog_errno(PTR_ERR(result));
144 144
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
176 176
177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); 177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
178 if (!IS_ERR(parent)) 178 if (!IS_ERR(parent))
179 parent->d_op = &ocfs2_dentry_ops; 179 d_set_d_op(parent, &ocfs2_dentry_ops);
180 180
181bail_unlock: 181bail_unlock:
182 ocfs2_inode_unlock(dir, 0); 182 ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04a280..bdadbae0909 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1307,10 +1307,13 @@ bail:
1307 return err; 1307 return err;
1308} 1308}
1309 1309
1310int ocfs2_permission(struct inode *inode, int mask) 1310int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1311{ 1311{
1312 int ret; 1312 int ret;
1313 1313
1314 if (flags & IPERM_FLAG_RCU)
1315 return -ECHILD;
1316
1314 mlog_entry_void(); 1317 mlog_entry_void();
1315 1318
1316 ret = ocfs2_inode_lock(inode, NULL, 0); 1319 ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask)
1320 goto out; 1323 goto out;
1321 } 1324 }
1322 1325
1323 ret = generic_permission(inode, mask, ocfs2_check_acl); 1326 ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
1324 1327
1325 ocfs2_inode_unlock(inode, 0); 1328 ocfs2_inode_unlock(inode, 0);
1326out: 1329out:
@@ -2241,11 +2244,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2241 2244
2242 mutex_lock(&inode->i_mutex); 2245 mutex_lock(&inode->i_mutex);
2243 2246
2247 ocfs2_iocb_clear_sem_locked(iocb);
2248
2244relock: 2249relock:
2245 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2250 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2246 if (direct_io) { 2251 if (direct_io) {
2247 down_read(&inode->i_alloc_sem); 2252 down_read(&inode->i_alloc_sem);
2248 have_alloc_sem = 1; 2253 have_alloc_sem = 1;
2254 /* communicate with ocfs2_dio_end_io */
2255 ocfs2_iocb_set_sem_locked(iocb);
2249 } 2256 }
2250 2257
2251 /* 2258 /*
@@ -2382,8 +2389,10 @@ out:
2382 ocfs2_rw_unlock(inode, rw_level); 2389 ocfs2_rw_unlock(inode, rw_level);
2383 2390
2384out_sems: 2391out_sems:
2385 if (have_alloc_sem) 2392 if (have_alloc_sem) {
2386 up_read(&inode->i_alloc_sem); 2393 up_read(&inode->i_alloc_sem);
2394 ocfs2_iocb_clear_sem_locked(iocb);
2395 }
2387 2396
2388 mutex_unlock(&inode->i_mutex); 2397 mutex_unlock(&inode->i_mutex);
2389 2398
@@ -2527,6 +2536,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2527 goto bail; 2536 goto bail;
2528 } 2537 }
2529 2538
2539 ocfs2_iocb_clear_sem_locked(iocb);
2540
2530 /* 2541 /*
2531 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2542 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2532 * need locks to protect pending reads from racing with truncate. 2543 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2545,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2534 if (filp->f_flags & O_DIRECT) { 2545 if (filp->f_flags & O_DIRECT) {
2535 down_read(&inode->i_alloc_sem); 2546 down_read(&inode->i_alloc_sem);
2536 have_alloc_sem = 1; 2547 have_alloc_sem = 1;
2548 ocfs2_iocb_set_sem_locked(iocb);
2537 2549
2538 ret = ocfs2_rw_lock(inode, 0); 2550 ret = ocfs2_rw_lock(inode, 0);
2539 if (ret < 0) { 2551 if (ret < 0) {
@@ -2575,8 +2587,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2587 }
2576 2588
2577bail: 2589bail:
2578 if (have_alloc_sem) 2590 if (have_alloc_sem) {
2579 up_read(&inode->i_alloc_sem); 2591 up_read(&inode->i_alloc_sem);
2592 ocfs2_iocb_clear_sem_locked(iocb);
2593 }
2580 if (rw_level != -1) 2594 if (rw_level != -1)
2581 ocfs2_rw_unlock(inode, rw_level); 2595 ocfs2_rw_unlock(inode, rw_level);
2582 mlog_exit(ret); 2596 mlog_exit(ret);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7..f5afbbef670 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 63 struct kstat *stat);
64int ocfs2_permission(struct inode *inode, int mask); 64int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
65 65
66int ocfs2_should_update_atime(struct inode *inode, 66int ocfs2_should_update_atime(struct inode *inode,
67 struct vfsmount *vfsmnt); 67 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36..30c52314445 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
147 spin_unlock(&oi->ip_lock); 147 spin_unlock(&oi->ip_lock);
148 148
149bail_add: 149bail_add:
150 dentry->d_op = &ocfs2_dentry_ops; 150 d_set_d_op(dentry, &ocfs2_dentry_ops);
151 ret = d_splice_alias(inode, dentry); 151 ret = d_splice_alias(inode, dentry);
152 152
153 if (inode) { 153 if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
415 mlog_errno(status); 415 mlog_errno(status);
416 goto leave; 416 goto leave;
417 } 417 }
418 dentry->d_op = &ocfs2_dentry_ops; 418 d_set_d_op(dentry, &ocfs2_dentry_ops);
419 419
420 status = ocfs2_add_entry(handle, dentry, inode, 420 status = ocfs2_add_entry(handle, dentry, inode,
421 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 421 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
743 } 743 }
744 744
745 ihold(inode); 745 ihold(inode);
746 dentry->d_op = &ocfs2_dentry_ops; 746 d_set_d_op(dentry, &ocfs2_dentry_ops);
747 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
748 748
749out_commit: 749out_commit:
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1017 * An error return must mean that no cluster locks 1017 * An error return must mean that no cluster locks
1018 * were held on function exit. 1018 * were held on function exit.
1019 */ 1019 */
1020 if (oi1->ip_blkno != oi2->ip_blkno) 1020 if (oi1->ip_blkno != oi2->ip_blkno) {
1021 ocfs2_inode_unlock(inode2, 1); 1021 ocfs2_inode_unlock(inode2, 1);
1022 brelse(*bh2);
1023 *bh2 = NULL;
1024 }
1022 1025
1023 if (status != -ENOENT) 1026 if (status != -ENOENT)
1024 mlog_errno(status); 1027 mlog_errno(status);
@@ -1794,7 +1797,7 @@ static int ocfs2_symlink(struct inode *dir,
1794 mlog_errno(status); 1797 mlog_errno(status);
1795 goto bail; 1798 goto bail;
1796 } 1799 }
1797 dentry->d_op = &ocfs2_dentry_ops; 1800 d_set_d_op(dentry, &ocfs2_dentry_ops);
1798 1801
1799 status = ocfs2_add_entry(handle, dentry, inode, 1802 status = ocfs2_add_entry(handle, dentry, inode,
1800 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1803 le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2462,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2459 goto out_commit; 2462 goto out_commit;
2460 } 2463 }
2461 2464
2462 dentry->d_op = &ocfs2_dentry_ops; 2465 d_set_d_op(dentry, &ocfs2_dentry_ops);
2463 d_instantiate(dentry, inode); 2466 d_instantiate(dentry, inode);
2464 status = 0; 2467 status = 0;
2465out_commit: 2468out_commit:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 70dd3b1798f..51cd6898e7f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -420,6 +420,11 @@ struct ocfs2_super
420 struct inode *osb_tl_inode; 420 struct inode *osb_tl_inode;
421 struct buffer_head *osb_tl_bh; 421 struct buffer_head *osb_tl_bh;
422 struct delayed_work osb_truncate_log_wq; 422 struct delayed_work osb_truncate_log_wq;
423 /*
424 * How many clusters in our truncate log.
425 * It must be protected by osb_tl_inode->i_mutex.
426 */
427 unsigned int truncated_clusters;
423 428
424 struct ocfs2_node_map osb_recovering_orphan_dirs; 429 struct ocfs2_node_map osb_recovering_orphan_dirs;
425 unsigned int *osb_orphan_wipes; 430 unsigned int *osb_orphan_wipes;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2..bf2e7764920 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
351 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
352}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \ 354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
356 356
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cfeab7ce369..17ff46fa8a1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
569 return &oi->vfs_inode; 569 return &oi->vfs_inode;
570} 570}
571 571
572static void ocfs2_destroy_inode(struct inode *inode) 572static void ocfs2_i_callback(struct rcu_head *head)
573{ 573{
574 struct inode *inode = container_of(head, struct inode, i_rcu);
575 INIT_LIST_HEAD(&inode->i_dentry);
574 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 576 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
575} 577}
576 578
579static void ocfs2_destroy_inode(struct inode *inode)
580{
581 call_rcu(&inode->i_rcu, ocfs2_i_callback);
582}
583
577static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 584static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
578 unsigned int cbits) 585 unsigned int cbits)
579{ 586{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911e61f348f..a2a5bff774e 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
343 return &oi->vfs_inode; 343 return &oi->vfs_inode;
344} 344}
345 345
346static void openprom_destroy_inode(struct inode *inode) 346static void openprom_i_callback(struct rcu_head *head)
347{ 347{
348 struct inode *inode = container_of(head, struct inode, i_rcu);
349 INIT_LIST_HEAD(&inode->i_dentry);
348 kmem_cache_free(op_inode_cachep, OP_I(inode)); 350 kmem_cache_free(op_inode_cachep, OP_I(inode));
349} 351}
350 352
353static void openprom_destroy_inode(struct inode *inode)
354{
355 call_rcu(&inode->i_rcu, openprom_i_callback);
356}
357
351static struct inode *openprom_iget(struct super_block *sb, ino_t ino) 358static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
352{ 359{
353 struct inode *inode; 360 struct inode *inode;
diff --git a/fs/pipe.c b/fs/pipe.c
index 04629f36e39..04151e2aee9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -441,7 +441,7 @@ redo:
441 break; 441 break;
442 } 442 }
443 if (do_wakeup) { 443 if (do_wakeup) {
444 wake_up_interruptible_sync(&pipe->wait); 444 wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
445 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 445 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
446 } 446 }
447 pipe_wait(pipe); 447 pipe_wait(pipe);
@@ -450,7 +450,7 @@ redo:
450 450
451 /* Signal writers asynchronously that there is more room. */ 451 /* Signal writers asynchronously that there is more room. */
452 if (do_wakeup) { 452 if (do_wakeup) {
453 wake_up_interruptible_sync(&pipe->wait); 453 wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
454 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 454 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
455 } 455 }
456 if (ret > 0) 456 if (ret > 0)
@@ -612,7 +612,7 @@ redo2:
612 break; 612 break;
613 } 613 }
614 if (do_wakeup) { 614 if (do_wakeup) {
615 wake_up_interruptible_sync(&pipe->wait); 615 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
616 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 616 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
617 do_wakeup = 0; 617 do_wakeup = 0;
618 } 618 }
@@ -623,7 +623,7 @@ redo2:
623out: 623out:
624 mutex_unlock(&inode->i_mutex); 624 mutex_unlock(&inode->i_mutex);
625 if (do_wakeup) { 625 if (do_wakeup) {
626 wake_up_interruptible_sync(&pipe->wait); 626 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
627 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 627 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
628 } 628 }
629 if (ret > 0) 629 if (ret > 0)
@@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw)
715 if (!pipe->readers && !pipe->writers) { 715 if (!pipe->readers && !pipe->writers) {
716 free_pipe_info(inode); 716 free_pipe_info(inode);
717 } else { 717 } else {
718 wake_up_interruptible_sync(&pipe->wait); 718 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT);
719 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 719 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
720 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 720 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
721 } 721 }
@@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags)
999 goto err; 999 goto err;
1000 1000
1001 err = -ENOMEM; 1001 err = -ENOMEM;
1002 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 1002 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
1003 if (!path.dentry) 1003 if (!path.dentry)
1004 goto err_inode; 1004 goto err_inode;
1005 path.mnt = mntget(pipe_mnt); 1005 path.mnt = mntget(pipe_mnt);
1006 1006
1007 path.dentry->d_op = &pipefs_dentry_operations; 1007 d_set_d_op(path.dentry, &pipefs_dentry_operations);
1008 d_instantiate(path.dentry, inode); 1008 d_instantiate(path.dentry, inode);
1009 1009
1010 err = -ENFILE; 1010 err = -ENFILE;
@@ -1253,6 +1253,10 @@ out:
1253 return ret; 1253 return ret;
1254} 1254}
1255 1255
1256static const struct super_operations pipefs_ops = {
1257 .destroy_inode = free_inode_nonrcu,
1258};
1259
1256/* 1260/*
1257 * pipefs should _never_ be mounted by userland - too much of security hassle, 1261 * pipefs should _never_ be mounted by userland - too much of security hassle,
1258 * no real gain from having the whole whorehouse mounted. So we don't need 1262 * no real gain from having the whole whorehouse mounted. So we don't need
@@ -1262,7 +1266,7 @@ out:
1262static struct dentry *pipefs_mount(struct file_system_type *fs_type, 1266static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1263 int flags, const char *dev_name, void *data) 1267 int flags, const char *dev_name, void *data)
1264{ 1268{
1265 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 1269 return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
1266} 1270}
1267 1271
1268static struct file_system_type pipe_fs_type = { 1272static struct file_system_type pipe_fs_type = {
@@ -1288,7 +1292,7 @@ static int __init init_pipe_fs(void)
1288static void __exit exit_pipe_fs(void) 1292static void __exit exit_pipe_fs(void)
1289{ 1293{
1290 unregister_filesystem(&pipe_fs_type); 1294 unregister_filesystem(&pipe_fs_type);
1291 mntput(pipe_mnt); 1295 mntput_long(pipe_mnt);
1292} 1296}
1293 1297
1294fs_initcall(init_pipe_fs); 1298fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748..d42514e3238 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
288 */ 288 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 289static inline int do_refcount_check(struct vfsmount *mnt, int count)
290{ 290{
291 int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 292 return (mycount > count);
293} 293}
294 294
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
300 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
301 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 * 302 *
303 * vfsmount lock must be held for read or write 303 * vfsmount lock must be held for write
304 */ 304 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
306{ 306{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 2758e2afc51..df434c5f28f 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -10,6 +10,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o 11 proc_tty.o
12proc-y += cmdline.o 12proc-y += cmdline.o
13proc-y += consoles.o
13proc-y += cpuinfo.o 14proc-y += cpuinfo.o
14proc-y += devices.o 15proc-y += devices.o
15proc-y += interrupts.o 16proc-y += interrupts.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fff6572676a..df2b703b9d0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -95,7 +95,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
95 95
96 get_task_comm(tcomm, p); 96 get_task_comm(tcomm, p);
97 97
98 seq_printf(m, "Name:\t"); 98 seq_puts(m, "Name:\t");
99 end = m->buf + m->size; 99 end = m->buf + m->size;
100 buf = m->buf + m->count; 100 buf = m->buf + m->count;
101 name = tcomm; 101 name = tcomm;
@@ -122,7 +122,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
122 buf++; 122 buf++;
123 } 123 }
124 m->count = buf - m->buf; 124 m->count = buf - m->buf;
125 seq_printf(m, "\n"); 125 seq_putc(m, '\n');
126} 126}
127 127
128/* 128/*
@@ -208,7 +208,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
208 seq_printf(m, "%d ", GROUP_AT(group_info, g)); 208 seq_printf(m, "%d ", GROUP_AT(group_info, g));
209 put_cred(cred); 209 put_cred(cred);
210 210
211 seq_printf(m, "\n"); 211 seq_putc(m, '\n');
212} 212}
213 213
214static void render_sigset_t(struct seq_file *m, const char *header, 214static void render_sigset_t(struct seq_file *m, const char *header,
@@ -216,7 +216,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
216{ 216{
217 int i; 217 int i;
218 218
219 seq_printf(m, "%s", header); 219 seq_puts(m, header);
220 220
221 i = _NSIG; 221 i = _NSIG;
222 do { 222 do {
@@ -230,7 +230,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
230 seq_printf(m, "%x", x); 230 seq_printf(m, "%x", x);
231 } while (i >= 4); 231 } while (i >= 4);
232 232
233 seq_printf(m, "\n"); 233 seq_putc(m, '\n');
234} 234}
235 235
236static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, 236static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -291,12 +291,12 @@ static void render_cap_t(struct seq_file *m, const char *header,
291{ 291{
292 unsigned __capi; 292 unsigned __capi;
293 293
294 seq_printf(m, "%s", header); 294 seq_puts(m, header);
295 CAP_FOR_EACH_U32(__capi) { 295 CAP_FOR_EACH_U32(__capi) {
296 seq_printf(m, "%08x", 296 seq_printf(m, "%08x",
297 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); 297 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
298 } 298 }
299 seq_printf(m, "\n"); 299 seq_putc(m, '\n');
300} 300}
301 301
302static inline void task_cap(struct seq_file *m, struct task_struct *p) 302static inline void task_cap(struct seq_file *m, struct task_struct *p)
@@ -329,12 +329,12 @@ static inline void task_context_switch_counts(struct seq_file *m,
329 329
330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
331{ 331{
332 seq_printf(m, "Cpus_allowed:\t"); 332 seq_puts(m, "Cpus_allowed:\t");
333 seq_cpumask(m, &task->cpus_allowed); 333 seq_cpumask(m, &task->cpus_allowed);
334 seq_printf(m, "\n"); 334 seq_putc(m, '\n');
335 seq_printf(m, "Cpus_allowed_list:\t"); 335 seq_puts(m, "Cpus_allowed_list:\t");
336 seq_cpumask_list(m, &task->cpus_allowed); 336 seq_cpumask_list(m, &task->cpus_allowed);
337 seq_printf(m, "\n"); 337 seq_putc(m, '\n');
338} 338}
339 339
340int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 340int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -535,15 +535,15 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
535int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, 535int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
536 struct pid *pid, struct task_struct *task) 536 struct pid *pid, struct task_struct *task)
537{ 537{
538 int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; 538 unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
539 struct mm_struct *mm = get_task_mm(task); 539 struct mm_struct *mm = get_task_mm(task);
540 540
541 if (mm) { 541 if (mm) {
542 size = task_statm(mm, &shared, &text, &data, &resident); 542 size = task_statm(mm, &shared, &text, &data, &resident);
543 mmput(mm); 543 mmput(mm);
544 } 544 }
545 seq_printf(m, "%d %d %d %d %d %d %d\n", 545 seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
546 size, resident, shared, text, lib, data, 0); 546 size, resident, shared, text, data);
547 547
548 return 0; 548 return 0;
549} 549}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 182845147fe..93f1cdd5d3d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -373,26 +373,20 @@ static int lstats_show_proc(struct seq_file *m, void *v)
373 return -ESRCH; 373 return -ESRCH;
374 seq_puts(m, "Latency Top version : v0.1\n"); 374 seq_puts(m, "Latency Top version : v0.1\n");
375 for (i = 0; i < 32; i++) { 375 for (i = 0; i < 32; i++) {
376 if (task->latency_record[i].backtrace[0]) { 376 struct latency_record *lr = &task->latency_record[i];
377 if (lr->backtrace[0]) {
377 int q; 378 int q;
378 seq_printf(m, "%i %li %li ", 379 seq_printf(m, "%i %li %li",
379 task->latency_record[i].count, 380 lr->count, lr->time, lr->max);
380 task->latency_record[i].time,
381 task->latency_record[i].max);
382 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 381 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
383 char sym[KSYM_SYMBOL_LEN]; 382 unsigned long bt = lr->backtrace[q];
384 char *c; 383 if (!bt)
385 if (!task->latency_record[i].backtrace[q])
386 break; 384 break;
387 if (task->latency_record[i].backtrace[q] == ULONG_MAX) 385 if (bt == ULONG_MAX)
388 break; 386 break;
389 sprint_symbol(sym, task->latency_record[i].backtrace[q]); 387 seq_printf(m, " %ps", (void *)bt);
390 c = strchr(sym, '+');
391 if (c)
392 *c = 0;
393 seq_printf(m, "%s ", sym);
394 } 388 }
395 seq_printf(m, "\n"); 389 seq_putc(m, '\n');
396 } 390 }
397 391
398 } 392 }
@@ -751,14 +745,7 @@ static int proc_single_show(struct seq_file *m, void *v)
751 745
752static int proc_single_open(struct inode *inode, struct file *filp) 746static int proc_single_open(struct inode *inode, struct file *filp)
753{ 747{
754 int ret; 748 return single_open(filp, proc_single_show, inode);
755 ret = single_open(filp, proc_single_show, NULL);
756 if (!ret) {
757 struct seq_file *m = filp->private_data;
758
759 m->private = inode;
760 }
761 return ret;
762} 749}
763 750
764static const struct file_operations proc_single_file_operations = { 751static const struct file_operations proc_single_file_operations = {
@@ -1386,9 +1373,77 @@ sched_write(struct file *file, const char __user *buf,
1386 1373
1387static int sched_open(struct inode *inode, struct file *filp) 1374static int sched_open(struct inode *inode, struct file *filp)
1388{ 1375{
1376 return single_open(filp, sched_show, inode);
1377}
1378
1379static const struct file_operations proc_pid_sched_operations = {
1380 .open = sched_open,
1381 .read = seq_read,
1382 .write = sched_write,
1383 .llseek = seq_lseek,
1384 .release = single_release,
1385};
1386
1387#endif
1388
1389#ifdef CONFIG_SCHED_AUTOGROUP
1390/*
1391 * Print out autogroup related information:
1392 */
1393static int sched_autogroup_show(struct seq_file *m, void *v)
1394{
1395 struct inode *inode = m->private;
1396 struct task_struct *p;
1397
1398 p = get_proc_task(inode);
1399 if (!p)
1400 return -ESRCH;
1401 proc_sched_autogroup_show_task(p, m);
1402
1403 put_task_struct(p);
1404
1405 return 0;
1406}
1407
1408static ssize_t
1409sched_autogroup_write(struct file *file, const char __user *buf,
1410 size_t count, loff_t *offset)
1411{
1412 struct inode *inode = file->f_path.dentry->d_inode;
1413 struct task_struct *p;
1414 char buffer[PROC_NUMBUF];
1415 long nice;
1416 int err;
1417
1418 memset(buffer, 0, sizeof(buffer));
1419 if (count > sizeof(buffer) - 1)
1420 count = sizeof(buffer) - 1;
1421 if (copy_from_user(buffer, buf, count))
1422 return -EFAULT;
1423
1424 err = strict_strtol(strstrip(buffer), 0, &nice);
1425 if (err)
1426 return -EINVAL;
1427
1428 p = get_proc_task(inode);
1429 if (!p)
1430 return -ESRCH;
1431
1432 err = nice;
1433 err = proc_sched_autogroup_set_nice(p, &err);
1434 if (err)
1435 count = err;
1436
1437 put_task_struct(p);
1438
1439 return count;
1440}
1441
1442static int sched_autogroup_open(struct inode *inode, struct file *filp)
1443{
1389 int ret; 1444 int ret;
1390 1445
1391 ret = single_open(filp, sched_show, NULL); 1446 ret = single_open(filp, sched_autogroup_show, NULL);
1392 if (!ret) { 1447 if (!ret) {
1393 struct seq_file *m = filp->private_data; 1448 struct seq_file *m = filp->private_data;
1394 1449
@@ -1397,15 +1452,15 @@ static int sched_open(struct inode *inode, struct file *filp)
1397 return ret; 1452 return ret;
1398} 1453}
1399 1454
1400static const struct file_operations proc_pid_sched_operations = { 1455static const struct file_operations proc_pid_sched_autogroup_operations = {
1401 .open = sched_open, 1456 .open = sched_autogroup_open,
1402 .read = seq_read, 1457 .read = seq_read,
1403 .write = sched_write, 1458 .write = sched_autogroup_write,
1404 .llseek = seq_lseek, 1459 .llseek = seq_lseek,
1405 .release = single_release, 1460 .release = single_release,
1406}; 1461};
1407 1462
1408#endif 1463#endif /* CONFIG_SCHED_AUTOGROUP */
1409 1464
1410static ssize_t comm_write(struct file *file, const char __user *buf, 1465static ssize_t comm_write(struct file *file, const char __user *buf,
1411 size_t count, loff_t *offset) 1466 size_t count, loff_t *offset)
@@ -1454,15 +1509,7 @@ static int comm_show(struct seq_file *m, void *v)
1454 1509
1455static int comm_open(struct inode *inode, struct file *filp) 1510static int comm_open(struct inode *inode, struct file *filp)
1456{ 1511{
1457 int ret; 1512 return single_open(filp, comm_show, inode);
1458
1459 ret = single_open(filp, comm_show, NULL);
1460 if (!ret) {
1461 struct seq_file *m = filp->private_data;
1462
1463 m->private = inode;
1464 }
1465 return ret;
1466} 1513}
1467 1514
1468static const struct file_operations proc_pid_set_comm_operations = { 1515static const struct file_operations proc_pid_set_comm_operations = {
@@ -1719,10 +1766,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1719 */ 1766 */
1720static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1767static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1721{ 1768{
1722 struct inode *inode = dentry->d_inode; 1769 struct inode *inode;
1723 struct task_struct *task = get_proc_task(inode); 1770 struct task_struct *task;
1724 const struct cred *cred; 1771 const struct cred *cred;
1725 1772
1773 if (nd && nd->flags & LOOKUP_RCU)
1774 return -ECHILD;
1775
1776 inode = dentry->d_inode;
1777 task = get_proc_task(inode);
1778
1726 if (task) { 1779 if (task) {
1727 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1780 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1728 task_dumpable(task)) { 1781 task_dumpable(task)) {
@@ -1744,7 +1797,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1744 return 0; 1797 return 0;
1745} 1798}
1746 1799
1747static int pid_delete_dentry(struct dentry * dentry) 1800static int pid_delete_dentry(const struct dentry * dentry)
1748{ 1801{
1749 /* Is the task we represent dead? 1802 /* Is the task we represent dead?
1750 * If so, then don't put the dentry on the lru list, 1803 * If so, then don't put the dentry on the lru list,
@@ -1888,12 +1941,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
1888 1941
1889static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1942static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1890{ 1943{
1891 struct inode *inode = dentry->d_inode; 1944 struct inode *inode;
1892 struct task_struct *task = get_proc_task(inode); 1945 struct task_struct *task;
1893 int fd = proc_fd(inode); 1946 int fd;
1894 struct files_struct *files; 1947 struct files_struct *files;
1895 const struct cred *cred; 1948 const struct cred *cred;
1896 1949
1950 if (nd && nd->flags & LOOKUP_RCU)
1951 return -ECHILD;
1952
1953 inode = dentry->d_inode;
1954 task = get_proc_task(inode);
1955 fd = proc_fd(inode);
1956
1897 if (task) { 1957 if (task) {
1898 files = get_files_struct(task); 1958 files = get_files_struct(task);
1899 if (files) { 1959 if (files) {
@@ -1969,7 +2029,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1969 inode->i_op = &proc_pid_link_inode_operations; 2029 inode->i_op = &proc_pid_link_inode_operations;
1970 inode->i_size = 64; 2030 inode->i_size = 64;
1971 ei->op.proc_get_link = proc_fd_link; 2031 ei->op.proc_get_link = proc_fd_link;
1972 dentry->d_op = &tid_fd_dentry_operations; 2032 d_set_d_op(dentry, &tid_fd_dentry_operations);
1973 d_add(dentry, inode); 2033 d_add(dentry, inode);
1974 /* Close the race of the process dying before we return the dentry */ 2034 /* Close the race of the process dying before we return the dentry */
1975 if (tid_fd_revalidate(dentry, NULL)) 2035 if (tid_fd_revalidate(dentry, NULL))
@@ -2101,11 +2161,13 @@ static const struct file_operations proc_fd_operations = {
2101 * /proc/pid/fd needs a special permission handler so that a process can still 2161 * /proc/pid/fd needs a special permission handler so that a process can still
2102 * access /proc/self/fd after it has executed a setuid(). 2162 * access /proc/self/fd after it has executed a setuid().
2103 */ 2163 */
2104static int proc_fd_permission(struct inode *inode, int mask) 2164static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2105{ 2165{
2106 int rv; 2166 int rv;
2107 2167
2108 rv = generic_permission(inode, mask, NULL); 2168 if (flags & IPERM_FLAG_RCU)
2169 return -ECHILD;
2170 rv = generic_permission(inode, mask, flags, NULL);
2109 if (rv == 0) 2171 if (rv == 0)
2110 return 0; 2172 return 0;
2111 if (task_pid(current) == proc_pid(inode)) 2173 if (task_pid(current) == proc_pid(inode))
@@ -2137,7 +2199,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2137 ei->fd = fd; 2199 ei->fd = fd;
2138 inode->i_mode = S_IFREG | S_IRUSR; 2200 inode->i_mode = S_IFREG | S_IRUSR;
2139 inode->i_fop = &proc_fdinfo_file_operations; 2201 inode->i_fop = &proc_fdinfo_file_operations;
2140 dentry->d_op = &tid_fd_dentry_operations; 2202 d_set_d_op(dentry, &tid_fd_dentry_operations);
2141 d_add(dentry, inode); 2203 d_add(dentry, inode);
2142 /* Close the race of the process dying before we return the dentry */ 2204 /* Close the race of the process dying before we return the dentry */
2143 if (tid_fd_revalidate(dentry, NULL)) 2205 if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2258,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2196 if (p->fop) 2258 if (p->fop)
2197 inode->i_fop = p->fop; 2259 inode->i_fop = p->fop;
2198 ei->op = p->op; 2260 ei->op = p->op;
2199 dentry->d_op = &pid_dentry_operations; 2261 d_set_d_op(dentry, &pid_dentry_operations);
2200 d_add(dentry, inode); 2262 d_add(dentry, inode);
2201 /* Close the race of the process dying before we return the dentry */ 2263 /* Close the race of the process dying before we return the dentry */
2202 if (pid_revalidate(dentry, NULL)) 2264 if (pid_revalidate(dentry, NULL))
@@ -2563,8 +2625,14 @@ static const struct pid_entry proc_base_stuff[] = {
2563 */ 2625 */
2564static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) 2626static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2565{ 2627{
2566 struct inode *inode = dentry->d_inode; 2628 struct inode *inode;
2567 struct task_struct *task = get_proc_task(inode); 2629 struct task_struct *task;
2630
2631 if (nd->flags & LOOKUP_RCU)
2632 return -ECHILD;
2633
2634 inode = dentry->d_inode;
2635 task = get_proc_task(inode);
2568 if (task) { 2636 if (task) {
2569 put_task_struct(task); 2637 put_task_struct(task);
2570 return 1; 2638 return 1;
@@ -2615,7 +2683,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2615 if (p->fop) 2683 if (p->fop)
2616 inode->i_fop = p->fop; 2684 inode->i_fop = p->fop;
2617 ei->op = p->op; 2685 ei->op = p->op;
2618 dentry->d_op = &proc_base_dentry_operations; 2686 d_set_d_op(dentry, &proc_base_dentry_operations);
2619 d_add(dentry, inode); 2687 d_add(dentry, inode);
2620 error = NULL; 2688 error = NULL;
2621out: 2689out:
@@ -2733,6 +2801,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2733#ifdef CONFIG_SCHED_DEBUG 2801#ifdef CONFIG_SCHED_DEBUG
2734 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2802 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2735#endif 2803#endif
2804#ifdef CONFIG_SCHED_AUTOGROUP
2805 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2806#endif
2736 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2807 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2737#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2808#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2738 INF("syscall", S_IRUSR, proc_pid_syscall), 2809 INF("syscall", S_IRUSR, proc_pid_syscall),
@@ -2926,7 +2997,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2926 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2997 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2927 ARRAY_SIZE(tgid_base_stuff)); 2998 ARRAY_SIZE(tgid_base_stuff));
2928 2999
2929 dentry->d_op = &pid_dentry_operations; 3000 d_set_d_op(dentry, &pid_dentry_operations);
2930 3001
2931 d_add(dentry, inode); 3002 d_add(dentry, inode);
2932 /* Close the race of the process dying before we return the dentry */ 3003 /* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3240,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3169 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3240 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3170 ARRAY_SIZE(tid_base_stuff)); 3241 ARRAY_SIZE(tid_base_stuff));
3171 3242
3172 dentry->d_op = &pid_dentry_operations; 3243 d_set_d_op(dentry, &pid_dentry_operations);
3173 3244
3174 d_add(dentry, inode); 3245 d_add(dentry, inode);
3175 /* Close the race of the process dying before we return the dentry */ 3246 /* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
new file mode 100644
index 00000000000..eafc22ab1fd
--- /dev/null
+++ b/fs/proc/consoles.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (c) 2010 Werner Fink, Jiri Slaby
3 *
4 * Licensed under GPLv2
5 */
6
7#include <linux/console.h>
8#include <linux/kernel.h>
9#include <linux/proc_fs.h>
10#include <linux/seq_file.h>
11#include <linux/tty_driver.h>
12
13/*
14 * This is handler for /proc/consoles
15 */
16static int show_console_dev(struct seq_file *m, void *v)
17{
18 static const struct {
19 short flag;
20 char name;
21 } con_flags[] = {
22 { CON_ENABLED, 'E' },
23 { CON_CONSDEV, 'C' },
24 { CON_BOOT, 'B' },
25 { CON_PRINTBUFFER, 'p' },
26 { CON_BRL, 'b' },
27 { CON_ANYTIME, 'a' },
28 };
29 char flags[ARRAY_SIZE(con_flags) + 1];
30 struct console *con = v;
31 unsigned int a;
32 int len;
33 dev_t dev = 0;
34
35 if (con->device) {
36 const struct tty_driver *driver;
37 int index;
38 driver = con->device(con, &index);
39 if (driver) {
40 dev = MKDEV(driver->major, driver->minor_start);
41 dev += index;
42 }
43 }
44
45 for (a = 0; a < ARRAY_SIZE(con_flags); a++)
46 flags[a] = (con->flags & con_flags[a].flag) ?
47 con_flags[a].name : ' ';
48 flags[a] = 0;
49
50 seq_printf(m, "%s%d%n", con->name, con->index, &len);
51 len = 21 - len;
52 if (len < 1)
53 len = 1;
54 seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
55 con->write ? 'W' : '-', con->unblank ? 'U' : '-',
56 flags);
57 if (dev)
58 seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
59
60 seq_printf(m, "\n");
61
62 return 0;
63}
64
65static void *c_start(struct seq_file *m, loff_t *pos)
66{
67 struct console *con;
68 loff_t off = 0;
69
70 acquire_console_sem();
71 for_each_console(con)
72 if (off++ == *pos)
73 break;
74
75 return con;
76}
77
78static void *c_next(struct seq_file *m, void *v, loff_t *pos)
79{
80 struct console *con = v;
81 ++*pos;
82 return con->next;
83}
84
85static void c_stop(struct seq_file *m, void *v)
86{
87 release_console_sem();
88}
89
90static const struct seq_operations consoles_op = {
91 .start = c_start,
92 .next = c_next,
93 .stop = c_stop,
94 .show = show_console_dev
95};
96
97static int consoles_open(struct inode *inode, struct file *file)
98{
99 return seq_open(file, &consoles_op);
100}
101
102static const struct file_operations proc_consoles_operations = {
103 .open = consoles_open,
104 .read = seq_read,
105 .llseek = seq_lseek,
106 .release = seq_release,
107};
108
109static int __init proc_consoles_init(void)
110{
111 proc_create("consoles", 0, NULL, &proc_consoles_operations);
112 return 0;
113}
114module_init(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 59ee7da959c..b14347167c3 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -9,14 +9,14 @@ static int devinfo_show(struct seq_file *f, void *v)
9 9
10 if (i < CHRDEV_MAJOR_HASH_SIZE) { 10 if (i < CHRDEV_MAJOR_HASH_SIZE) {
11 if (i == 0) 11 if (i == 0)
12 seq_printf(f, "Character devices:\n"); 12 seq_puts(f, "Character devices:\n");
13 chrdev_show(f, i); 13 chrdev_show(f, i);
14 } 14 }
15#ifdef CONFIG_BLOCK 15#ifdef CONFIG_BLOCK
16 else { 16 else {
17 i -= CHRDEV_MAJOR_HASH_SIZE; 17 i -= CHRDEV_MAJOR_HASH_SIZE;
18 if (i == 0) 18 if (i == 0)
19 seq_printf(f, "\nBlock devices:\n"); 19 seq_puts(f, "\nBlock devices:\n");
20 blkdev_show(f, i); 20 blkdev_show(f, i);
21 } 21 }
22#endif 22#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f033766..01e07f2a188 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
400 * smarter: we could keep a "volatile" flag in the 400 * smarter: we could keep a "volatile" flag in the
401 * inode to indicate which ones to keep. 401 * inode to indicate which ones to keep.
402 */ 402 */
403static int proc_delete_dentry(struct dentry * dentry) 403static int proc_delete_dentry(const struct dentry * dentry)
404{ 404{
405 return 1; 405 return 1;
406} 406}
@@ -425,13 +425,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
425 if (de->namelen != dentry->d_name.len) 425 if (de->namelen != dentry->d_name.len)
426 continue; 426 continue;
427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
428 unsigned int ino;
429
430 ino = de->low_ino;
431 pde_get(de); 428 pde_get(de);
432 spin_unlock(&proc_subdir_lock); 429 spin_unlock(&proc_subdir_lock);
433 error = -EINVAL; 430 error = -EINVAL;
434 inode = proc_get_inode(dir->i_sb, ino, de); 431 inode = proc_get_inode(dir->i_sb, de);
435 goto out_unlock; 432 goto out_unlock;
436 } 433 }
437 } 434 }
@@ -439,7 +436,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
439out_unlock: 436out_unlock:
440 437
441 if (inode) { 438 if (inode) {
442 dentry->d_op = &proc_dentry_operations; 439 d_set_d_op(dentry, &proc_dentry_operations);
443 d_add(dentry, inode); 440 d_add(dentry, inode);
444 return NULL; 441 return NULL;
445 } 442 }
@@ -768,12 +765,7 @@ EXPORT_SYMBOL(proc_create_data);
768 765
769static void free_proc_entry(struct proc_dir_entry *de) 766static void free_proc_entry(struct proc_dir_entry *de)
770{ 767{
771 unsigned int ino = de->low_ino; 768 release_inode_number(de->low_ino);
772
773 if (ino < PROC_DYNAMIC_FIRST)
774 return;
775
776 release_inode_number(ino);
777 769
778 if (S_ISLNK(de->mode)) 770 if (S_ISLNK(de->mode))
779 kfree(de->data); 771 kfree(de->data);
@@ -834,12 +826,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
834 826
835 wait_for_completion(de->pde_unload_completion); 827 wait_for_completion(de->pde_unload_completion);
836 828
837 goto continue_removing; 829 spin_lock(&de->pde_unload_lock);
838 } 830 }
839 spin_unlock(&de->pde_unload_lock);
840 831
841continue_removing:
842 spin_lock(&de->pde_unload_lock);
843 while (!list_empty(&de->pde_openers)) { 832 while (!list_empty(&de->pde_openers)) {
844 struct pde_opener *pdeo; 833 struct pde_opener *pdeo;
845 834
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3ddb6068177..176ce4cda68 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
65 return inode; 65 return inode;
66} 66}
67 67
68static void proc_destroy_inode(struct inode *inode) 68static void proc_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 72 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
71} 73}
72 74
75static void proc_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, proc_i_callback);
78}
79
73static void init_once(void *foo) 80static void init_once(void *foo)
74{ 81{
75 struct proc_inode *ei = (struct proc_inode *) foo; 82 struct proc_inode *ei = (struct proc_inode *) foo;
@@ -409,12 +416,11 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
409}; 416};
410#endif 417#endif
411 418
412struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, 419struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
413 struct proc_dir_entry *de)
414{ 420{
415 struct inode * inode; 421 struct inode * inode;
416 422
417 inode = iget_locked(sb, ino); 423 inode = iget_locked(sb, de->low_ino);
418 if (!inode) 424 if (!inode)
419 return NULL; 425 return NULL;
420 if (inode->i_state & I_NEW) { 426 if (inode->i_state & I_NEW) {
@@ -464,7 +470,7 @@ int proc_fill_super(struct super_block *s)
464 s->s_time_gran = 1; 470 s->s_time_gran = 1;
465 471
466 pde_get(&proc_root); 472 pde_get(&proc_root);
467 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 473 root_inode = proc_get_inode(s, &proc_root);
468 if (!root_inode) 474 if (!root_inode)
469 goto out_no_root; 475 goto out_no_root;
470 root_inode->i_uid = 0; 476 root_inode->i_uid = 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1f24a3eddd1..9ad561ded40 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -96,7 +96,8 @@ extern spinlock_t proc_subdir_lock;
96struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); 96struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
97int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 97int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
98unsigned long task_vsize(struct mm_struct *); 98unsigned long task_vsize(struct mm_struct *);
99int task_statm(struct mm_struct *, int *, int *, int *, int *); 99unsigned long task_statm(struct mm_struct *,
100 unsigned long *, unsigned long *, unsigned long *, unsigned long *);
100void task_mem(struct seq_file *, struct mm_struct *); 101void task_mem(struct seq_file *, struct mm_struct *);
101 102
102static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) 103static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
@@ -108,7 +109,7 @@ void pde_put(struct proc_dir_entry *pde);
108 109
109extern struct vfsmount *proc_mnt; 110extern struct vfsmount *proc_mnt;
110int proc_fill_super(struct super_block *); 111int proc_fill_super(struct super_block *);
111struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); 112struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
112 113
113/* 114/*
114 * These are generic /proc routines that use the internal 115 * These are generic /proc routines that use the internal
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6f37c391468..d245cb23dd7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -558,7 +558,7 @@ static int open_kcore(struct inode *inode, struct file *filp)
558static const struct file_operations proc_kcore_operations = { 558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore, 559 .read = read_kcore,
560 .open = open_kcore, 560 .open = open_kcore,
561 .llseek = generic_file_llseek, 561 .llseek = default_llseek,
562}; 562};
563 563
564#ifdef CONFIG_MEMORY_HOTPLUG 564#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 3b8b4566033..b06c674624e 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -40,7 +40,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
40 ppage = pfn_to_page(pfn); 40 ppage = pfn_to_page(pfn);
41 else 41 else
42 ppage = NULL; 42 ppage = NULL;
43 if (!ppage) 43 if (!ppage || PageSlab(ppage))
44 pcount = 0; 44 pcount = 0;
45 else 45 else
46 pcount = page_mapcount(ppage); 46 pcount = page_mapcount(ppage);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906..09a1f92a34e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/proc_fs.h> 6#include <linux/proc_fs.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/namei.h>
8#include "internal.h" 9#include "internal.h"
9 10
10static const struct dentry_operations proc_sys_dentry_operations; 11static const struct dentry_operations proc_sys_dentry_operations;
@@ -120,7 +121,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
120 goto out; 121 goto out;
121 122
122 err = NULL; 123 err = NULL;
123 dentry->d_op = &proc_sys_dentry_operations; 124 d_set_d_op(dentry, &proc_sys_dentry_operations);
124 d_add(dentry, inode); 125 d_add(dentry, inode);
125 126
126out: 127out:
@@ -201,7 +202,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
201 dput(child); 202 dput(child);
202 return -ENOMEM; 203 return -ENOMEM;
203 } else { 204 } else {
204 child->d_op = &proc_sys_dentry_operations; 205 d_set_d_op(child, &proc_sys_dentry_operations);
205 d_add(child, inode); 206 d_add(child, inode);
206 } 207 }
207 } else { 208 } else {
@@ -294,7 +295,7 @@ out:
294 return ret; 295 return ret;
295} 296}
296 297
297static int proc_sys_permission(struct inode *inode, int mask) 298static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
298{ 299{
299 /* 300 /*
300 * sysctl entries that are not writeable, 301 * sysctl entries that are not writeable,
@@ -304,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask)
304 struct ctl_table *table; 305 struct ctl_table *table;
305 int error; 306 int error;
306 307
308 if (flags & IPERM_FLAG_RCU)
309 return -ECHILD;
310
307 /* Executable files are not allowed under /proc/sys/ */ 311 /* Executable files are not allowed under /proc/sys/ */
308 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 312 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
309 return -EACCES; 313 return -EACCES;
@@ -389,23 +393,30 @@ static const struct inode_operations proc_sys_dir_operations = {
389 393
390static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 394static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 395{
396 if (nd->flags & LOOKUP_RCU)
397 return -ECHILD;
392 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 398 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
393} 399}
394 400
395static int proc_sys_delete(struct dentry *dentry) 401static int proc_sys_delete(const struct dentry *dentry)
396{ 402{
397 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 403 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
398} 404}
399 405
400static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, 406static int proc_sys_compare(const struct dentry *parent,
401 struct qstr *name) 407 const struct inode *pinode,
408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name)
402{ 410{
403 struct dentry *dentry = container_of(qstr, struct dentry, d_name); 411 /* Although proc doesn't have negative dentries, rcu-walk means
404 if (qstr->len != name->len) 412 * that inode here can be NULL */
413 if (!inode)
414 return 0;
415 if (name->len != len)
405 return 1; 416 return 1;
406 if (memcmp(qstr->name, name->name, name->len)) 417 if (memcmp(name->name, str, len))
407 return 1; 418 return 1;
408 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); 419 return !sysctl_is_seen(PROC_I(inode)->sysctl);
409} 420}
410 421
411static const struct dentry_operations proc_sys_dentry_operations = { 422static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 83adcc86943..cb761f01030 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -36,27 +36,27 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
36 } 36 }
37 switch (p->type) { 37 switch (p->type) {
38 case TTY_DRIVER_TYPE_SYSTEM: 38 case TTY_DRIVER_TYPE_SYSTEM:
39 seq_printf(m, "system"); 39 seq_puts(m, "system");
40 if (p->subtype == SYSTEM_TYPE_TTY) 40 if (p->subtype == SYSTEM_TYPE_TTY)
41 seq_printf(m, ":/dev/tty"); 41 seq_puts(m, ":/dev/tty");
42 else if (p->subtype == SYSTEM_TYPE_SYSCONS) 42 else if (p->subtype == SYSTEM_TYPE_SYSCONS)
43 seq_printf(m, ":console"); 43 seq_puts(m, ":console");
44 else if (p->subtype == SYSTEM_TYPE_CONSOLE) 44 else if (p->subtype == SYSTEM_TYPE_CONSOLE)
45 seq_printf(m, ":vtmaster"); 45 seq_puts(m, ":vtmaster");
46 break; 46 break;
47 case TTY_DRIVER_TYPE_CONSOLE: 47 case TTY_DRIVER_TYPE_CONSOLE:
48 seq_printf(m, "console"); 48 seq_puts(m, "console");
49 break; 49 break;
50 case TTY_DRIVER_TYPE_SERIAL: 50 case TTY_DRIVER_TYPE_SERIAL:
51 seq_printf(m, "serial"); 51 seq_puts(m, "serial");
52 break; 52 break;
53 case TTY_DRIVER_TYPE_PTY: 53 case TTY_DRIVER_TYPE_PTY:
54 if (p->subtype == PTY_TYPE_MASTER) 54 if (p->subtype == PTY_TYPE_MASTER)
55 seq_printf(m, "pty:master"); 55 seq_puts(m, "pty:master");
56 else if (p->subtype == PTY_TYPE_SLAVE) 56 else if (p->subtype == PTY_TYPE_SLAVE)
57 seq_printf(m, "pty:slave"); 57 seq_puts(m, "pty:slave");
58 else 58 else
59 seq_printf(m, "pty"); 59 seq_puts(m, "pty");
60 break; 60 break;
61 default: 61 default:
62 seq_printf(m, "type:%d.%d", p->type, p->subtype); 62 seq_printf(m, "type:%d.%d", p->type, p->subtype);
@@ -74,19 +74,19 @@ static int show_tty_driver(struct seq_file *m, void *v)
74 /* pseudo-drivers first */ 74 /* pseudo-drivers first */
75 seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty"); 75 seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty");
76 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0); 76 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0);
77 seq_printf(m, "system:/dev/tty\n"); 77 seq_puts(m, "system:/dev/tty\n");
78 seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console"); 78 seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console");
79 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1); 79 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1);
80 seq_printf(m, "system:console\n"); 80 seq_puts(m, "system:console\n");
81#ifdef CONFIG_UNIX98_PTYS 81#ifdef CONFIG_UNIX98_PTYS
82 seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx"); 82 seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx");
83 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2); 83 seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2);
84 seq_printf(m, "system\n"); 84 seq_puts(m, "system\n");
85#endif 85#endif
86#ifdef CONFIG_VT 86#ifdef CONFIG_VT
87 seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0"); 87 seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0");
88 seq_printf(m, "%3d %7d ", TTY_MAJOR, 0); 88 seq_printf(m, "%3d %7d ", TTY_MAJOR, 0);
89 seq_printf(m, "system:vtmaster\n"); 89 seq_puts(m, "system:vtmaster\n");
90#endif 90#endif
91 } 91 }
92 92
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 37994737c98..62604be9f58 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,16 +10,16 @@ static int show_softirqs(struct seq_file *p, void *v)
10{ 10{
11 int i, j; 11 int i, j;
12 12
13 seq_printf(p, " "); 13 seq_puts(p, " ");
14 for_each_possible_cpu(i) 14 for_each_possible_cpu(i)
15 seq_printf(p, "CPU%-8d", i); 15 seq_printf(p, "CPU%-8d", i);
16 seq_printf(p, "\n"); 16 seq_putc(p, '\n');
17 17
18 for (i = 0; i < NR_SOFTIRQS; i++) { 18 for (i = 0; i < NR_SOFTIRQS; i++) {
19 seq_printf(p, "%12s:", softirq_to_name[i]); 19 seq_printf(p, "%12s:", softirq_to_name[i]);
20 for_each_possible_cpu(j) 20 for_each_possible_cpu(j)
21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); 21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
22 seq_printf(p, "\n"); 22 seq_putc(p, '\n');
23 } 23 }
24 return 0; 24 return 0;
25} 25}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e15a19c93ba..1cffa2b8a2f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -126,7 +126,7 @@ static int show_stat(struct seq_file *p, void *v)
126 126
127 for (i = 0; i < NR_SOFTIRQS; i++) 127 for (i = 0; i < NR_SOFTIRQS; i++)
128 seq_printf(p, " %u", per_softirq_sums[i]); 128 seq_printf(p, " %u", per_softirq_sums[i]);
129 seq_printf(p, "\n"); 129 seq_putc(p, '\n');
130 130
131 return 0; 131 return 0;
132} 132}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c126c83b9a4..c3755bd8dd3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -66,8 +66,9 @@ unsigned long task_vsize(struct mm_struct *mm)
66 return PAGE_SIZE * mm->total_vm; 66 return PAGE_SIZE * mm->total_vm;
67} 67}
68 68
69int task_statm(struct mm_struct *mm, int *shared, int *text, 69unsigned long task_statm(struct mm_struct *mm,
70 int *data, int *resident) 70 unsigned long *shared, unsigned long *text,
71 unsigned long *data, unsigned long *resident)
71{ 72{
72 *shared = get_mm_counter(mm, MM_FILEPAGES); 73 *shared = get_mm_counter(mm, MM_FILEPAGES);
73 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 74 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index cb6306e6384..b535d3e5d5f 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -92,13 +92,14 @@ unsigned long task_vsize(struct mm_struct *mm)
92 return vsize; 92 return vsize;
93} 93}
94 94
95int task_statm(struct mm_struct *mm, int *shared, int *text, 95unsigned long task_statm(struct mm_struct *mm,
96 int *data, int *resident) 96 unsigned long *shared, unsigned long *text,
97 unsigned long *data, unsigned long *resident)
97{ 98{
98 struct vm_area_struct *vma; 99 struct vm_area_struct *vma;
99 struct vm_region *region; 100 struct vm_region *region;
100 struct rb_node *p; 101 struct rb_node *p;
101 int size = kobjsize(mm); 102 unsigned long size = kobjsize(mm);
102 103
103 down_read(&mm->mmap_sem); 104 down_read(&mm->mmap_sem);
104 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { 105 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2367fb3f70b..74802bc5ded 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void)
499 /* Do some basic Verification. */ 499 /* Do some basic Verification. */
500 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || 500 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
501 (ehdr.e_type != ET_CORE) || 501 (ehdr.e_type != ET_CORE) ||
502 !vmcore_elf_check_arch(&ehdr) || 502 !vmcore_elf64_check_arch(&ehdr) ||
503 ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 503 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
504 ehdr.e_ident[EI_VERSION] != EV_CURRENT || 504 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
505 ehdr.e_version != EV_CURRENT || 505 ehdr.e_version != EV_CURRENT ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa..e63b4171d58 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
425 return &ei->vfs_inode; 425 return &ei->vfs_inode;
426} 426}
427 427
428static void qnx4_destroy_inode(struct inode *inode) 428static void qnx4_i_callback(struct rcu_head *head)
429{ 429{
430 struct inode *inode = container_of(head, struct inode, i_rcu);
431 INIT_LIST_HEAD(&inode->i_dentry);
430 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 432 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
431} 433}
432 434
435static void qnx4_destroy_inode(struct inode *inode)
436{
437 call_rcu(&inode->i_rcu, qnx4_i_callback);
438}
439
433static void init_once(void *foo) 440static void init_once(void *foo)
434{ 441{
435 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 442 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0fed41e6efc..84becd3e477 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -133,16 +133,20 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 133EXPORT_SYMBOL(dq_data_lock);
134 134
135void __quota_error(struct super_block *sb, const char *func, 135void __quota_error(struct super_block *sb, const char *func,
136 const char *fmt, ...) 136 const char *fmt, ...)
137{ 137{
138 va_list args;
139
140 if (printk_ratelimit()) { 138 if (printk_ratelimit()) {
139 va_list args;
140 struct va_format vaf;
141
141 va_start(args, fmt); 142 va_start(args, fmt);
142 printk(KERN_ERR "Quota error (device %s): %s: ", 143
143 sb->s_id, func); 144 vaf.fmt = fmt;
144 vprintk(fmt, args); 145 vaf.va = &args;
145 printk("\n"); 146
147 printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
148 sb->s_id, func, &vaf);
149
146 va_end(args); 150 va_end(args);
147 } 151 }
148} 152}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874eabc..e41c1becf09 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -468,8 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
468 return -ENOMEM; 468 return -ENOMEM;
469 ret = read_blk(info, *blk, buf); 469 ret = read_blk(info, *blk, buf);
470 if (ret < 0) { 470 if (ret < 0) {
471 quota_error(dquot->dq_sb, "Can't read quota data " 471 quota_error(dquot->dq_sb, "Can't read quota data block %u",
472 "block %u", blk); 472 *blk);
473 goto out_buf; 473 goto out_buf;
474 } 474 }
475 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); 475 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -493,8 +493,9 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
493 } else { 493 } else {
494 ret = write_blk(info, *blk, buf); 494 ret = write_blk(info, *blk, buf);
495 if (ret < 0) 495 if (ret < 0)
496 quota_error(dquot->dq_sb, "Can't write quota " 496 quota_error(dquot->dq_sb,
497 "tree block %u", blk); 497 "Can't write quota tree block %u",
498 *blk);
498 } 499 }
499 } 500 }
500out_buf: 501out_buf:
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index adbc6f53851..45de98b5946 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -586,13 +586,13 @@ void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int l
586 va_list args; 586 va_list args;
587 int mode, first, last; 587 int mode, first, last;
588 588
589 va_start(args, bh);
590
591 if (!bh) { 589 if (!bh) {
592 printk("print_block: buffer is NULL\n"); 590 printk("print_block: buffer is NULL\n");
593 return; 591 return;
594 } 592 }
595 593
594 va_start(args, bh);
595
596 mode = va_arg(args, int); 596 mode = va_arg(args, int);
597 first = va_arg(args, int); 597 first = va_arg(args, int);
598 last = va_arg(args, int); 598 last = va_arg(args, int);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b243117b875..2575682a9ea 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
529 return &ei->vfs_inode; 529 return &ei->vfs_inode;
530} 530}
531 531
532static void reiserfs_destroy_inode(struct inode *inode) 532static void reiserfs_i_callback(struct rcu_head *head)
533{ 533{
534 struct inode *inode = container_of(head, struct inode, i_rcu);
535 INIT_LIST_HEAD(&inode->i_dentry);
534 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 536 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
535} 537}
536 538
539static void reiserfs_destroy_inode(struct inode *inode)
540{
541 call_rcu(&inode->i_rcu, reiserfs_i_callback);
542}
543
537static void init_once(void *foo) 544static void init_once(void *foo)
538{ 545{
539 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 546 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7..3cfb2e93364 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -870,11 +870,14 @@ out:
870 return err; 870 return err;
871} 871}
872 872
873static int reiserfs_check_acl(struct inode *inode, int mask) 873static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
874{ 874{
875 struct posix_acl *acl; 875 struct posix_acl *acl;
876 int error = -EAGAIN; /* do regular unix permission checks by default */ 876 int error = -EAGAIN; /* do regular unix permission checks by default */
877 877
878 if (flags & IPERM_FLAG_RCU)
879 return -ECHILD;
880
878 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 881 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
879 882
880 if (acl) { 883 if (acl) {
@@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s)
951 return 0; 954 return 0;
952} 955}
953 956
954int reiserfs_permission(struct inode *inode, int mask) 957int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
955{ 958{
959 if (flags & IPERM_FLAG_RCU)
960 return -ECHILD;
956 /* 961 /*
957 * We don't do permission checks on the internal objects. 962 * We don't do permission checks on the internal objects.
958 * Permissions are determined by the "owning" object. 963 * Permissions are determined by the "owning" object.
@@ -965,13 +970,16 @@ int reiserfs_permission(struct inode *inode, int mask)
965 * Stat data v1 doesn't support ACLs. 970 * Stat data v1 doesn't support ACLs.
966 */ 971 */
967 if (get_inode_sd_version(inode) != STAT_DATA_V1) 972 if (get_inode_sd_version(inode) != STAT_DATA_V1)
968 return generic_permission(inode, mask, reiserfs_check_acl); 973 return generic_permission(inode, mask, flags,
974 reiserfs_check_acl);
969#endif 975#endif
970 return generic_permission(inode, mask, NULL); 976 return generic_permission(inode, mask, flags, NULL);
971} 977}
972 978
973static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
974{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
975 return -EPERM; 983 return -EPERM;
976} 984}
977 985
@@ -990,7 +998,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
990 strlen(PRIVROOT_NAME)); 998 strlen(PRIVROOT_NAME));
991 if (!IS_ERR(dentry)) { 999 if (!IS_ERR(dentry)) {
992 REISERFS_SB(s)->priv_root = dentry; 1000 REISERFS_SB(s)->priv_root = dentry;
993 dentry->d_op = &xattr_lookup_poison_ops; 1001 d_set_d_op(dentry, &xattr_lookup_poison_ops);
994 if (dentry->d_inode) 1002 if (dentry->d_inode)
995 dentry->d_inode->i_flags |= S_PRIVATE; 1003 dentry->d_inode->i_flags |= S_PRIVATE;
996 } else 1004 } else
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55c..2305e3121cb 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
400/* 400/*
401 * return a spent inode to the slab cache 401 * return a spent inode to the slab cache
402 */ 402 */
403static void romfs_destroy_inode(struct inode *inode) 403static void romfs_i_callback(struct rcu_head *head)
404{ 404{
405 struct inode *inode = container_of(head, struct inode, i_rcu);
406 INIT_LIST_HEAD(&inode->i_dentry);
405 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
406} 408}
407 409
410static void romfs_destroy_inode(struct inode *inode)
411{
412 call_rcu(&inode->i_rcu, romfs_i_callback);
413}
414
408/* 415/*
409 * get filesystem statistics 416 * get filesystem statistics
410 */ 417 */
diff --git a/fs/select.c b/fs/select.c
index b7b10aa3086..e56560d2b08 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -306,6 +306,8 @@ static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
306 rts.tv_sec = rts.tv_nsec = 0; 306 rts.tv_sec = rts.tv_nsec = 0;
307 307
308 if (timeval) { 308 if (timeval) {
309 if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
310 memset(&rtv, 0, sizeof(rtv));
309 rtv.tv_sec = rts.tv_sec; 311 rtv.tv_sec = rts.tv_sec;
310 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; 312 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
311 313
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c..20700b9f2b4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
440} 440}
441 441
442 442
443static void squashfs_destroy_inode(struct inode *inode) 443static void squashfs_i_callback(struct rcu_head *head)
444{ 444{
445 struct inode *inode = container_of(head, struct inode, i_rcu);
446 INIT_LIST_HEAD(&inode->i_dentry);
445 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); 447 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
446} 448}
447 449
450static void squashfs_destroy_inode(struct inode *inode)
451{
452 call_rcu(&inode->i_rcu, squashfs_i_callback);
453}
454
448 455
449static struct file_system_type squashfs_fs_type = { 456static struct file_system_type squashfs_fs_type = {
450 .owner = THIS_MODULE, 457 .owner = THIS_MODULE,
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9..823e061faa8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
30#include <linux/idr.h> 30#include <linux/idr.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
33#include "internal.h" 34#include "internal.h"
34 35
35 36
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
71 INIT_LIST_HEAD(&s->s_files); 72 INIT_LIST_HEAD(&s->s_files);
72#endif 73#endif
73 INIT_LIST_HEAD(&s->s_instances); 74 INIT_LIST_HEAD(&s->s_instances);
74 INIT_HLIST_HEAD(&s->s_anon); 75 INIT_HLIST_BL_HEAD(&s->s_anon);
75 INIT_LIST_HEAD(&s->s_inodes); 76 INIT_LIST_HEAD(&s->s_inodes);
76 INIT_LIST_HEAD(&s->s_dentry_lru); 77 INIT_LIST_HEAD(&s->s_dentry_lru);
77 init_rwsem(&s->s_umount); 78 init_rwsem(&s->s_umount);
@@ -1139,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1139 return mnt; 1140 return mnt;
1140 1141
1141 err: 1142 err:
1142 mntput(mnt); 1143 mntput_long(mnt);
1143 return ERR_PTR(err); 1144 return ERR_PTR(err);
1144} 1145}
1145 1146
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b..ea9120a830d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
231 goto repeat; 231 goto repeat;
232} 232}
233 233
234static int sysfs_dentry_delete(struct dentry *dentry) 234static int sysfs_dentry_delete(const struct dentry *dentry)
235{ 235{
236 struct sysfs_dirent *sd = dentry->d_fsdata; 236 struct sysfs_dirent *sd = dentry->d_fsdata;
237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED); 237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
@@ -239,9 +239,13 @@ static int sysfs_dentry_delete(struct dentry *dentry)
239 239
240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) 240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
241{ 241{
242 struct sysfs_dirent *sd = dentry->d_fsdata; 242 struct sysfs_dirent *sd;
243 int is_dir; 243 int is_dir;
244 244
245 if (nd->flags & LOOKUP_RCU)
246 return -ECHILD;
247
248 sd = dentry->d_fsdata;
245 mutex_lock(&sysfs_mutex); 249 mutex_lock(&sysfs_mutex);
246 250
247 /* The sysfs dirent has been deleted */ 251 /* The sysfs dirent has been deleted */
@@ -701,7 +705,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
701 /* instantiate and hash dentry */ 705 /* instantiate and hash dentry */
702 ret = d_find_alias(inode); 706 ret = d_find_alias(inode);
703 if (!ret) { 707 if (!ret) {
704 dentry->d_op = &sysfs_dentry_ops; 708 d_set_d_op(dentry, &sysfs_dentry_ops);
705 dentry->d_fsdata = sysfs_get(sd); 709 dentry->d_fsdata = sysfs_get(sd);
706 d_add(dentry, inode); 710 d_add(dentry, inode);
707 } else { 711 } else {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 442f34ff1af..c8769dc222d 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -165,10 +165,7 @@ int sysfs_merge_group(struct kobject *kobj,
165 struct attribute *const *attr; 165 struct attribute *const *attr;
166 int i; 166 int i;
167 167
168 if (grp) 168 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
169 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
170 else
171 dir_sd = sysfs_get(kobj->sd);
172 if (!dir_sd) 169 if (!dir_sd)
173 return -ENOENT; 170 return -ENOENT;
174 171
@@ -195,10 +192,7 @@ void sysfs_unmerge_group(struct kobject *kobj,
195 struct sysfs_dirent *dir_sd; 192 struct sysfs_dirent *dir_sd;
196 struct attribute *const *attr; 193 struct attribute *const *attr;
197 194
198 if (grp) 195 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
199 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
200 else
201 dir_sd = sysfs_get(kobj->sd);
202 if (dir_sd) { 196 if (dir_sd) {
203 for (attr = grp->attrs; *attr; ++attr) 197 for (attr = grp->attrs; *attr; ++attr)
204 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); 198 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd8ba3..0a12eb89cd3 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sysfs.h>
22#include <linux/xattr.h> 23#include <linux/xattr.h>
23#include <linux/security.h> 24#include <linux/security.h>
24#include "sysfs.h" 25#include "sysfs.h"
@@ -348,13 +349,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
348 return -ENOENT; 349 return -ENOENT;
349} 350}
350 351
351int sysfs_permission(struct inode *inode, int mask) 352int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
352{ 353{
353 struct sysfs_dirent *sd = inode->i_private; 354 struct sysfs_dirent *sd;
355
356 if (flags & IPERM_FLAG_RCU)
357 return -ECHILD;
358
359 sd = inode->i_private;
354 360
355 mutex_lock(&sysfs_mutex); 361 mutex_lock(&sysfs_mutex);
356 sysfs_refresh_inode(sd, inode); 362 sysfs_refresh_inode(sd, inode);
357 mutex_unlock(&sysfs_mutex); 363 mutex_unlock(&sysfs_mutex);
358 364
359 return generic_permission(inode, mask, NULL); 365 return generic_permission(inode, mask, flags, NULL);
360} 366}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d9be60a2e95..3d28af31d86 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/lockdep.h> 11#include <linux/lockdep.h>
12#include <linux/kobject_ns.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14struct sysfs_open_dirent; 15struct sysfs_open_dirent;
@@ -200,7 +201,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 201struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
201void sysfs_evict_inode(struct inode *inode); 202void sysfs_evict_inode(struct inode *inode);
202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 203int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
203int sysfs_permission(struct inode *inode, int mask); 204int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 205int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 206int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 207int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e..0630eb969a2 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
333 return &si->vfs_inode; 333 return &si->vfs_inode;
334} 334}
335 335
336static void sysv_destroy_inode(struct inode *inode) 336static void sysv_i_callback(struct rcu_head *head)
337{ 337{
338 struct inode *inode = container_of(head, struct inode, i_rcu);
339 INIT_LIST_HEAD(&inode->i_dentry);
338 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 340 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
339} 341}
340 342
343static void sysv_destroy_inode(struct inode *inode)
344{
345 call_rcu(&inode->i_rcu, sysv_i_callback);
346}
347
341static void init_once(void *p) 348static void init_once(void *p)
342{ 349{
343 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 350 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d11cd..b5e68da2db3 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
27 return err; 27 return err;
28} 28}
29 29
30static int sysv_hash(struct dentry *dentry, struct qstr *qstr) 30static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
31 struct qstr *qstr)
31{ 32{
32 /* Truncate the name in place, avoids having to define a compare 33 /* Truncate the name in place, avoids having to define a compare
33 function. */ 34 function. */
@@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
47 struct inode * inode = NULL; 48 struct inode * inode = NULL;
48 ino_t ino; 49 ino_t ino;
49 50
50 dentry->d_op = dir->i_sb->s_root->d_op; 51 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
51 if (dentry->d_name.len > SYSV_NAMELEN) 52 if (dentry->d_name.len > SYSV_NAMELEN)
52 return ERR_PTR(-ENAMETOOLONG); 53 return ERR_PTR(-ENAMETOOLONG);
53 ino = sysv_inode_by_name(dentry); 54 ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c1..76712aefc4a 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
346 if (sbi->s_forced_ro) 346 if (sbi->s_forced_ro)
347 sb->s_flags |= MS_RDONLY; 347 sb->s_flags |= MS_RDONLY;
348 if (sbi->s_truncate) 348 if (sbi->s_truncate)
349 sb->s_root->d_op = &sysv_dentry_operations; 349 d_set_d_op(sb->s_root, &sysv_dentry_operations);
350 return 1; 350 return 1;
351} 351}
352 352
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e..6e11c2975dc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
272 return &ui->vfs_inode; 272 return &ui->vfs_inode;
273}; 273};
274 274
275static void ubifs_i_callback(struct rcu_head *head)
276{
277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui);
281}
282
275static void ubifs_destroy_inode(struct inode *inode) 283static void ubifs_destroy_inode(struct inode *inode)
276{ 284{
277 struct ubifs_inode *ui = ubifs_inode(inode); 285 struct ubifs_inode *ui = ubifs_inode(inode);
278 286
279 kfree(ui->data); 287 kfree(ui->data);
280 kmem_cache_free(ubifs_inode_slab, inode); 288 call_rcu(&inode->i_rcu, ubifs_i_callback);
281} 289}
282 290
283/* 291/*
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index f8def3c8ea4..0e0e99bd6bc 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -1,6 +1,5 @@
1config UDF_FS 1config UDF_FS
2 tristate "UDF file system support" 2 tristate "UDF file system support"
3 depends on BKL # needs serious work to remove
4 select CRC_ITU_T 3 select CRC_ITU_T
5 help 4 help
6 This is the new file system used on some CD-ROMs and DVDs. Say Y if 5 This is the new file system used on some CD-ROMs and DVDs. Say Y if
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index b608efaa4ce..306ee39ef2c 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -157,10 +157,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
157 udf_debug("bit %ld already set\n", bit + i); 157 udf_debug("bit %ld already set\n", bit + i);
158 udf_debug("byte=%2x\n", 158 udf_debug("byte=%2x\n",
159 ((char *)bh->b_data)[(bit + i) >> 3]); 159 ((char *)bh->b_data)[(bit + i) >> 3]);
160 } else {
161 udf_add_free_space(sb, sbi->s_partition, 1);
162 } 160 }
163 } 161 }
162 udf_add_free_space(sb, sbi->s_partition, count);
164 mark_buffer_dirty(bh); 163 mark_buffer_dirty(bh);
165 if (overflow) { 164 if (overflow) {
166 block += count; 165 block += count;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 51552bf5022..eb8bfe2b89a 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/smp_lock.h>
34#include <linux/buffer_head.h> 33#include <linux/buffer_head.h>
35 34
36#include "udf_i.h" 35#include "udf_i.h"
@@ -190,18 +189,14 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
190 struct inode *dir = filp->f_path.dentry->d_inode; 189 struct inode *dir = filp->f_path.dentry->d_inode;
191 int result; 190 int result;
192 191
193 lock_kernel();
194
195 if (filp->f_pos == 0) { 192 if (filp->f_pos == 0) {
196 if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { 193 if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) {
197 unlock_kernel();
198 return 0; 194 return 0;
199 } 195 }
200 filp->f_pos++; 196 filp->f_pos++;
201 } 197 }
202 198
203 result = do_udf_readdir(dir, filp, filldir, dirent); 199 result = do_udf_readdir(dir, filp, filldir, dirent);
204 unlock_kernel();
205 return result; 200 return result;
206} 201}
207 202
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 66b9e7e7e4c..89c78486cbb 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -32,7 +32,6 @@
32#include <linux/string.h> /* memset */ 32#include <linux/string.h> /* memset */
33#include <linux/capability.h> 33#include <linux/capability.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/smp_lock.h>
36#include <linux/pagemap.h> 35#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
38#include <linux/aio.h> 37#include <linux/aio.h>
@@ -114,6 +113,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
114 size_t count = iocb->ki_left; 113 size_t count = iocb->ki_left;
115 struct udf_inode_info *iinfo = UDF_I(inode); 114 struct udf_inode_info *iinfo = UDF_I(inode);
116 115
116 down_write(&iinfo->i_data_sem);
117 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 117 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
118 if (file->f_flags & O_APPEND) 118 if (file->f_flags & O_APPEND)
119 pos = inode->i_size; 119 pos = inode->i_size;
@@ -126,6 +126,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
126 udf_expand_file_adinicb(inode, pos + count, &err); 126 udf_expand_file_adinicb(inode, pos + count, &err);
127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
128 udf_debug("udf_expand_adinicb: err=%d\n", err); 128 udf_debug("udf_expand_adinicb: err=%d\n", err);
129 up_write(&iinfo->i_data_sem);
129 return err; 130 return err;
130 } 131 }
131 } else { 132 } else {
@@ -135,6 +136,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
135 iinfo->i_lenAlloc = inode->i_size; 136 iinfo->i_lenAlloc = inode->i_size;
136 } 137 }
137 } 138 }
139 up_write(&iinfo->i_data_sem);
138 140
139 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); 141 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
140 if (retval > 0) 142 if (retval > 0)
@@ -149,8 +151,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
149 long old_block, new_block; 151 long old_block, new_block;
150 int result = -EINVAL; 152 int result = -EINVAL;
151 153
152 lock_kernel();
153
154 if (file_permission(filp, MAY_READ) != 0) { 154 if (file_permission(filp, MAY_READ) != 0) {
155 udf_debug("no permission to access inode %lu\n", inode->i_ino); 155 udf_debug("no permission to access inode %lu\n", inode->i_ino);
156 result = -EPERM; 156 result = -EPERM;
@@ -196,7 +196,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
196 } 196 }
197 197
198out: 198out:
199 unlock_kernel();
200 return result; 199 return result;
201} 200}
202 201
@@ -204,10 +203,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
204{ 203{
205 if (filp->f_mode & FMODE_WRITE) { 204 if (filp->f_mode & FMODE_WRITE) {
206 mutex_lock(&inode->i_mutex); 205 mutex_lock(&inode->i_mutex);
207 lock_kernel(); 206 down_write(&UDF_I(inode)->i_data_sem);
208 udf_discard_prealloc(inode); 207 udf_discard_prealloc(inode);
209 udf_truncate_tail_extent(inode); 208 udf_truncate_tail_extent(inode);
210 unlock_kernel(); 209 up_write(&UDF_I(inode)->i_data_sem);
211 mutex_unlock(&inode->i_mutex); 210 mutex_unlock(&inode->i_mutex);
212 } 211 }
213 return 0; 212 return 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 75d9304d0dc..6fb7e0adcda 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -92,28 +92,19 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
92 return NULL; 92 return NULL;
93 } 93 }
94 94
95 mutex_lock(&sbi->s_alloc_mutex);
96 if (sbi->s_lvid_bh) { 95 if (sbi->s_lvid_bh) {
97 struct logicalVolIntegrityDesc *lvid = 96 struct logicalVolIntegrityDescImpUse *lvidiu;
98 (struct logicalVolIntegrityDesc *) 97
99 sbi->s_lvid_bh->b_data; 98 iinfo->i_unique = lvid_get_unique_id(sb);
100 struct logicalVolIntegrityDescImpUse *lvidiu = 99 mutex_lock(&sbi->s_alloc_mutex);
101 udf_sb_lvidiu(sbi); 100 lvidiu = udf_sb_lvidiu(sbi);
102 struct logicalVolHeaderDesc *lvhd;
103 uint64_t uniqueID;
104 lvhd = (struct logicalVolHeaderDesc *)
105 (lvid->logicalVolContentsUse);
106 if (S_ISDIR(mode)) 101 if (S_ISDIR(mode))
107 le32_add_cpu(&lvidiu->numDirs, 1); 102 le32_add_cpu(&lvidiu->numDirs, 1);
108 else 103 else
109 le32_add_cpu(&lvidiu->numFiles, 1); 104 le32_add_cpu(&lvidiu->numFiles, 1);
110 iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID);
111 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
112 uniqueID += 16;
113 lvhd->uniqueID = cpu_to_le64(uniqueID);
114 udf_updated_lvid(sb); 105 udf_updated_lvid(sb);
106 mutex_unlock(&sbi->s_alloc_mutex);
115 } 107 }
116 mutex_unlock(&sbi->s_alloc_mutex);
117 108
118 inode_init_owner(inode, dir, mode); 109 inode_init_owner(inode, dir, mode);
119 110
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fc48f37aa2d..c6a2e782b97 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -31,7 +31,6 @@
31 31
32#include "udfdecl.h" 32#include "udfdecl.h"
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/smp_lock.h>
35#include <linux/module.h> 34#include <linux/module.h>
36#include <linux/pagemap.h> 35#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
@@ -51,6 +50,7 @@ MODULE_LICENSE("GPL");
51static mode_t udf_convert_permissions(struct fileEntry *); 50static mode_t udf_convert_permissions(struct fileEntry *);
52static int udf_update_inode(struct inode *, int); 51static int udf_update_inode(struct inode *, int);
53static void udf_fill_inode(struct inode *, struct buffer_head *); 52static void udf_fill_inode(struct inode *, struct buffer_head *);
53static int udf_sync_inode(struct inode *inode);
54static int udf_alloc_i_data(struct inode *inode, size_t size); 54static int udf_alloc_i_data(struct inode *inode, size_t size);
55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, 55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
56 sector_t *, int *); 56 sector_t *, int *);
@@ -79,9 +79,7 @@ void udf_evict_inode(struct inode *inode)
79 want_delete = 1; 79 want_delete = 1;
80 inode->i_size = 0; 80 inode->i_size = 0;
81 udf_truncate(inode); 81 udf_truncate(inode);
82 lock_kernel();
83 udf_update_inode(inode, IS_SYNC(inode)); 82 udf_update_inode(inode, IS_SYNC(inode));
84 unlock_kernel();
85 } 83 }
86 invalidate_inode_buffers(inode); 84 invalidate_inode_buffers(inode);
87 end_writeback(inode); 85 end_writeback(inode);
@@ -97,9 +95,7 @@ void udf_evict_inode(struct inode *inode)
97 kfree(iinfo->i_ext.i_data); 95 kfree(iinfo->i_ext.i_data);
98 iinfo->i_ext.i_data = NULL; 96 iinfo->i_ext.i_data = NULL;
99 if (want_delete) { 97 if (want_delete) {
100 lock_kernel();
101 udf_free_inode(inode); 98 udf_free_inode(inode);
102 unlock_kernel();
103 } 99 }
104} 100}
105 101
@@ -302,10 +298,9 @@ static int udf_get_block(struct inode *inode, sector_t block,
302 err = -EIO; 298 err = -EIO;
303 new = 0; 299 new = 0;
304 bh = NULL; 300 bh = NULL;
305
306 lock_kernel();
307
308 iinfo = UDF_I(inode); 301 iinfo = UDF_I(inode);
302
303 down_write(&iinfo->i_data_sem);
309 if (block == iinfo->i_next_alloc_block + 1) { 304 if (block == iinfo->i_next_alloc_block + 1) {
310 iinfo->i_next_alloc_block++; 305 iinfo->i_next_alloc_block++;
311 iinfo->i_next_alloc_goal++; 306 iinfo->i_next_alloc_goal++;
@@ -324,7 +319,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
324 map_bh(bh_result, inode->i_sb, phys); 319 map_bh(bh_result, inode->i_sb, phys);
325 320
326abort: 321abort:
327 unlock_kernel(); 322 up_write(&iinfo->i_data_sem);
328 return err; 323 return err;
329} 324}
330 325
@@ -1022,16 +1017,16 @@ void udf_truncate(struct inode *inode)
1022 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1017 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1023 return; 1018 return;
1024 1019
1025 lock_kernel();
1026 iinfo = UDF_I(inode); 1020 iinfo = UDF_I(inode);
1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1021 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1022 down_write(&iinfo->i_data_sem);
1028 if (inode->i_sb->s_blocksize < 1023 if (inode->i_sb->s_blocksize <
1029 (udf_file_entry_alloc_offset(inode) + 1024 (udf_file_entry_alloc_offset(inode) +
1030 inode->i_size)) { 1025 inode->i_size)) {
1031 udf_expand_file_adinicb(inode, inode->i_size, &err); 1026 udf_expand_file_adinicb(inode, inode->i_size, &err);
1032 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1033 inode->i_size = iinfo->i_lenAlloc; 1028 inode->i_size = iinfo->i_lenAlloc;
1034 unlock_kernel(); 1029 up_write(&iinfo->i_data_sem);
1035 return; 1030 return;
1036 } else 1031 } else
1037 udf_truncate_extents(inode); 1032 udf_truncate_extents(inode);
@@ -1042,10 +1037,13 @@ void udf_truncate(struct inode *inode)
1042 offset - udf_file_entry_alloc_offset(inode)); 1037 offset - udf_file_entry_alloc_offset(inode));
1043 iinfo->i_lenAlloc = inode->i_size; 1038 iinfo->i_lenAlloc = inode->i_size;
1044 } 1039 }
1040 up_write(&iinfo->i_data_sem);
1045 } else { 1041 } else {
1046 block_truncate_page(inode->i_mapping, inode->i_size, 1042 block_truncate_page(inode->i_mapping, inode->i_size,
1047 udf_get_block); 1043 udf_get_block);
1044 down_write(&iinfo->i_data_sem);
1048 udf_truncate_extents(inode); 1045 udf_truncate_extents(inode);
1046 up_write(&iinfo->i_data_sem);
1049 } 1047 }
1050 1048
1051 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); 1049 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
@@ -1053,7 +1051,6 @@ void udf_truncate(struct inode *inode)
1053 udf_sync_inode(inode); 1051 udf_sync_inode(inode);
1054 else 1052 else
1055 mark_inode_dirty(inode); 1053 mark_inode_dirty(inode);
1056 unlock_kernel();
1057} 1054}
1058 1055
1059static void __udf_read_inode(struct inode *inode) 1056static void __udf_read_inode(struct inode *inode)
@@ -1202,6 +1199,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1202 return; 1199 return;
1203 } 1200 }
1204 1201
1202 read_lock(&sbi->s_cred_lock);
1205 inode->i_uid = le32_to_cpu(fe->uid); 1203 inode->i_uid = le32_to_cpu(fe->uid);
1206 if (inode->i_uid == -1 || 1204 if (inode->i_uid == -1 ||
1207 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || 1205 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
@@ -1214,13 +1212,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1214 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) 1212 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
1215 inode->i_gid = UDF_SB(inode->i_sb)->s_gid; 1213 inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
1216 1214
1217 inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
1218 if (!inode->i_nlink)
1219 inode->i_nlink = 1;
1220
1221 inode->i_size = le64_to_cpu(fe->informationLength);
1222 iinfo->i_lenExtents = inode->i_size;
1223
1224 if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY && 1215 if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
1225 sbi->s_fmode != UDF_INVALID_MODE) 1216 sbi->s_fmode != UDF_INVALID_MODE)
1226 inode->i_mode = sbi->s_fmode; 1217 inode->i_mode = sbi->s_fmode;
@@ -1230,6 +1221,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1230 else 1221 else
1231 inode->i_mode = udf_convert_permissions(fe); 1222 inode->i_mode = udf_convert_permissions(fe);
1232 inode->i_mode &= ~sbi->s_umask; 1223 inode->i_mode &= ~sbi->s_umask;
1224 read_unlock(&sbi->s_cred_lock);
1225
1226 inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
1227 if (!inode->i_nlink)
1228 inode->i_nlink = 1;
1229
1230 inode->i_size = le64_to_cpu(fe->informationLength);
1231 iinfo->i_lenExtents = inode->i_size;
1233 1232
1234 if (iinfo->i_efe == 0) { 1233 if (iinfo->i_efe == 0) {
1235 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << 1234 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1373,16 +1372,10 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
1373 1372
1374int udf_write_inode(struct inode *inode, struct writeback_control *wbc) 1373int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
1375{ 1374{
1376 int ret; 1375 return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1377
1378 lock_kernel();
1379 ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1380 unlock_kernel();
1381
1382 return ret;
1383} 1376}
1384 1377
1385int udf_sync_inode(struct inode *inode) 1378static int udf_sync_inode(struct inode *inode)
1386{ 1379{
1387 return udf_update_inode(inode, 1); 1380 return udf_update_inode(inode, 1);
1388} 1381}
@@ -2048,7 +2041,7 @@ long udf_block_map(struct inode *inode, sector_t block)
2048 struct extent_position epos = {}; 2041 struct extent_position epos = {};
2049 int ret; 2042 int ret;
2050 2043
2051 lock_kernel(); 2044 down_read(&UDF_I(inode)->i_data_sem);
2052 2045
2053 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == 2046 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
2054 (EXT_RECORDED_ALLOCATED >> 30)) 2047 (EXT_RECORDED_ALLOCATED >> 30))
@@ -2056,7 +2049,7 @@ long udf_block_map(struct inode *inode, sector_t block)
2056 else 2049 else
2057 ret = 0; 2050 ret = 0;
2058 2051
2059 unlock_kernel(); 2052 up_read(&UDF_I(inode)->i_data_sem);
2060 brelse(epos.bh); 2053 brelse(epos.bh);
2061 2054
2062 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV)) 2055 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6d8dc02baeb..2be0f9eb86d 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include <linux/sched.h> 31#include <linux/sched.h>
33#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
@@ -228,10 +227,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
228 } 227 }
229 228
230 if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) && 229 if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
231 isdotdot) { 230 isdotdot)
232 brelse(epos.bh); 231 goto out_ok;
233 return fi;
234 }
235 232
236 if (!lfi) 233 if (!lfi)
237 continue; 234 continue;
@@ -263,7 +260,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
263 if (dentry->d_name.len > UDF_NAME_LEN - 2) 260 if (dentry->d_name.len > UDF_NAME_LEN - 2)
264 return ERR_PTR(-ENAMETOOLONG); 261 return ERR_PTR(-ENAMETOOLONG);
265 262
266 lock_kernel();
267#ifdef UDF_RECOVERY 263#ifdef UDF_RECOVERY
268 /* temporary shorthand for specifying files by inode number */ 264 /* temporary shorthand for specifying files by inode number */
269 if (!strncmp(dentry->d_name.name, ".B=", 3)) { 265 if (!strncmp(dentry->d_name.name, ".B=", 3)) {
@@ -275,7 +271,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
275 }; 271 };
276 inode = udf_iget(dir->i_sb, lb); 272 inode = udf_iget(dir->i_sb, lb);
277 if (!inode) { 273 if (!inode) {
278 unlock_kernel();
279 return ERR_PTR(-EACCES); 274 return ERR_PTR(-EACCES);
280 } 275 }
281 } else 276 } else
@@ -291,11 +286,9 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
291 loc = lelb_to_cpu(cfi.icb.extLocation); 286 loc = lelb_to_cpu(cfi.icb.extLocation);
292 inode = udf_iget(dir->i_sb, &loc); 287 inode = udf_iget(dir->i_sb, &loc);
293 if (!inode) { 288 if (!inode) {
294 unlock_kernel();
295 return ERR_PTR(-EACCES); 289 return ERR_PTR(-EACCES);
296 } 290 }
297 } 291 }
298 unlock_kernel();
299 292
300 return d_splice_alias(inode, dentry); 293 return d_splice_alias(inode, dentry);
301} 294}
@@ -476,15 +469,19 @@ add:
476 f_pos >> dir->i_sb->s_blocksize_bits, 1, err); 469 f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
477 if (!fibh->ebh) 470 if (!fibh->ebh)
478 goto out_err; 471 goto out_err;
472 /* Extents could have been merged, invalidate our position */
473 brelse(epos.bh);
474 epos.bh = NULL;
475 epos.block = dinfo->i_location;
476 epos.offset = udf_file_entry_alloc_offset(dir);
479 477
480 if (!fibh->soffset) { 478 if (!fibh->soffset) {
481 if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == 479 /* Find the freshly allocated block */
482 (EXT_RECORDED_ALLOCATED >> 30)) { 480 while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
483 block = eloc.logicalBlockNum + ((elen - 1) >> 481 (EXT_RECORDED_ALLOCATED >> 30))
482 ;
483 block = eloc.logicalBlockNum + ((elen - 1) >>
484 dir->i_sb->s_blocksize_bits); 484 dir->i_sb->s_blocksize_bits);
485 } else
486 block++;
487
488 brelse(fibh->sbh); 485 brelse(fibh->sbh);
489 fibh->sbh = fibh->ebh; 486 fibh->sbh = fibh->ebh;
490 fi = (struct fileIdentDesc *)(fibh->sbh->b_data); 487 fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
@@ -562,10 +559,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
562 int err; 559 int err;
563 struct udf_inode_info *iinfo; 560 struct udf_inode_info *iinfo;
564 561
565 lock_kernel();
566 inode = udf_new_inode(dir, mode, &err); 562 inode = udf_new_inode(dir, mode, &err);
567 if (!inode) { 563 if (!inode) {
568 unlock_kernel();
569 return err; 564 return err;
570 } 565 }
571 566
@@ -583,7 +578,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
583 inode->i_nlink--; 578 inode->i_nlink--;
584 mark_inode_dirty(inode); 579 mark_inode_dirty(inode);
585 iput(inode); 580 iput(inode);
586 unlock_kernel();
587 return err; 581 return err;
588 } 582 }
589 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 583 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -596,7 +590,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
596 if (fibh.sbh != fibh.ebh) 590 if (fibh.sbh != fibh.ebh)
597 brelse(fibh.ebh); 591 brelse(fibh.ebh);
598 brelse(fibh.sbh); 592 brelse(fibh.sbh);
599 unlock_kernel();
600 d_instantiate(dentry, inode); 593 d_instantiate(dentry, inode);
601 594
602 return 0; 595 return 0;
@@ -614,7 +607,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
614 if (!old_valid_dev(rdev)) 607 if (!old_valid_dev(rdev))
615 return -EINVAL; 608 return -EINVAL;
616 609
617 lock_kernel();
618 err = -EIO; 610 err = -EIO;
619 inode = udf_new_inode(dir, mode, &err); 611 inode = udf_new_inode(dir, mode, &err);
620 if (!inode) 612 if (!inode)
@@ -627,7 +619,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
627 inode->i_nlink--; 619 inode->i_nlink--;
628 mark_inode_dirty(inode); 620 mark_inode_dirty(inode);
629 iput(inode); 621 iput(inode);
630 unlock_kernel();
631 return err; 622 return err;
632 } 623 }
633 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 624 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -646,7 +637,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
646 err = 0; 637 err = 0;
647 638
648out: 639out:
649 unlock_kernel();
650 return err; 640 return err;
651} 641}
652 642
@@ -659,7 +649,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
659 struct udf_inode_info *dinfo = UDF_I(dir); 649 struct udf_inode_info *dinfo = UDF_I(dir);
660 struct udf_inode_info *iinfo; 650 struct udf_inode_info *iinfo;
661 651
662 lock_kernel();
663 err = -EMLINK; 652 err = -EMLINK;
664 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
665 goto out; 654 goto out;
@@ -712,7 +701,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
712 err = 0; 701 err = 0;
713 702
714out: 703out:
715 unlock_kernel();
716 return err; 704 return err;
717} 705}
718 706
@@ -794,7 +782,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
794 struct kernel_lb_addr tloc; 782 struct kernel_lb_addr tloc;
795 783
796 retval = -ENOENT; 784 retval = -ENOENT;
797 lock_kernel();
798 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 785 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
799 if (!fi) 786 if (!fi)
800 goto out; 787 goto out;
@@ -826,7 +813,6 @@ end_rmdir:
826 brelse(fibh.sbh); 813 brelse(fibh.sbh);
827 814
828out: 815out:
829 unlock_kernel();
830 return retval; 816 return retval;
831} 817}
832 818
@@ -840,7 +826,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
840 struct kernel_lb_addr tloc; 826 struct kernel_lb_addr tloc;
841 827
842 retval = -ENOENT; 828 retval = -ENOENT;
843 lock_kernel();
844 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 829 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
845 if (!fi) 830 if (!fi)
846 goto out; 831 goto out;
@@ -870,7 +855,6 @@ end_unlink:
870 brelse(fibh.sbh); 855 brelse(fibh.sbh);
871 856
872out: 857out:
873 unlock_kernel();
874 return retval; 858 return retval;
875} 859}
876 860
@@ -890,21 +874,21 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
890 int block; 874 int block;
891 unsigned char *name = NULL; 875 unsigned char *name = NULL;
892 int namelen; 876 int namelen;
893 struct buffer_head *bh;
894 struct udf_inode_info *iinfo; 877 struct udf_inode_info *iinfo;
878 struct super_block *sb = dir->i_sb;
895 879
896 lock_kernel();
897 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); 880 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
898 if (!inode) 881 if (!inode)
899 goto out; 882 goto out;
900 883
884 iinfo = UDF_I(inode);
885 down_write(&iinfo->i_data_sem);
901 name = kmalloc(UDF_NAME_LEN, GFP_NOFS); 886 name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
902 if (!name) { 887 if (!name) {
903 err = -ENOMEM; 888 err = -ENOMEM;
904 goto out_no_entry; 889 goto out_no_entry;
905 } 890 }
906 891
907 iinfo = UDF_I(inode);
908 inode->i_data.a_ops = &udf_symlink_aops; 892 inode->i_data.a_ops = &udf_symlink_aops;
909 inode->i_op = &udf_symlink_inode_operations; 893 inode->i_op = &udf_symlink_inode_operations;
910 894
@@ -912,7 +896,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
912 struct kernel_lb_addr eloc; 896 struct kernel_lb_addr eloc;
913 uint32_t bsize; 897 uint32_t bsize;
914 898
915 block = udf_new_block(inode->i_sb, inode, 899 block = udf_new_block(sb, inode,
916 iinfo->i_location.partitionReferenceNum, 900 iinfo->i_location.partitionReferenceNum,
917 iinfo->i_location.logicalBlockNum, &err); 901 iinfo->i_location.logicalBlockNum, &err);
918 if (!block) 902 if (!block)
@@ -923,17 +907,17 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
923 eloc.logicalBlockNum = block; 907 eloc.logicalBlockNum = block;
924 eloc.partitionReferenceNum = 908 eloc.partitionReferenceNum =
925 iinfo->i_location.partitionReferenceNum; 909 iinfo->i_location.partitionReferenceNum;
926 bsize = inode->i_sb->s_blocksize; 910 bsize = sb->s_blocksize;
927 iinfo->i_lenExtents = bsize; 911 iinfo->i_lenExtents = bsize;
928 udf_add_aext(inode, &epos, &eloc, bsize, 0); 912 udf_add_aext(inode, &epos, &eloc, bsize, 0);
929 brelse(epos.bh); 913 brelse(epos.bh);
930 914
931 block = udf_get_pblock(inode->i_sb, block, 915 block = udf_get_pblock(sb, block,
932 iinfo->i_location.partitionReferenceNum, 916 iinfo->i_location.partitionReferenceNum,
933 0); 917 0);
934 epos.bh = udf_tgetblk(inode->i_sb, block); 918 epos.bh = udf_tgetblk(sb, block);
935 lock_buffer(epos.bh); 919 lock_buffer(epos.bh);
936 memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); 920 memset(epos.bh->b_data, 0x00, bsize);
937 set_buffer_uptodate(epos.bh); 921 set_buffer_uptodate(epos.bh);
938 unlock_buffer(epos.bh); 922 unlock_buffer(epos.bh);
939 mark_buffer_dirty_inode(epos.bh, inode); 923 mark_buffer_dirty_inode(epos.bh, inode);
@@ -941,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
941 } else 925 } else
942 ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr; 926 ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
943 927
944 eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode); 928 eoffset = sb->s_blocksize - udf_ext0_offset(inode);
945 pc = (struct pathComponent *)ea; 929 pc = (struct pathComponent *)ea;
946 930
947 if (*symname == '/') { 931 if (*symname == '/') {
@@ -981,7 +965,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
981 } 965 }
982 966
983 if (pc->componentType == 5) { 967 if (pc->componentType == 5) {
984 namelen = udf_put_filename(inode->i_sb, compstart, name, 968 namelen = udf_put_filename(sb, compstart, name,
985 symname - compstart); 969 symname - compstart);
986 if (!namelen) 970 if (!namelen)
987 goto out_no_entry; 971 goto out_no_entry;
@@ -1015,27 +999,16 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
1015 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 999 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1016 if (!fi) 1000 if (!fi)
1017 goto out_no_entry; 1001 goto out_no_entry;
1018 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 1002 cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
1019 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); 1003 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
1020 bh = UDF_SB(inode->i_sb)->s_lvid_bh; 1004 if (UDF_SB(inode->i_sb)->s_lvid_bh) {
1021 if (bh) {
1022 struct logicalVolIntegrityDesc *lvid =
1023 (struct logicalVolIntegrityDesc *)bh->b_data;
1024 struct logicalVolHeaderDesc *lvhd;
1025 uint64_t uniqueID;
1026 lvhd = (struct logicalVolHeaderDesc *)
1027 lvid->logicalVolContentsUse;
1028 uniqueID = le64_to_cpu(lvhd->uniqueID);
1029 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 1005 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
1030 cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); 1006 cpu_to_le32(lvid_get_unique_id(sb));
1031 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
1032 uniqueID += 16;
1033 lvhd->uniqueID = cpu_to_le64(uniqueID);
1034 mark_buffer_dirty(bh);
1035 } 1007 }
1036 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 1008 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
1037 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 1009 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
1038 mark_inode_dirty(dir); 1010 mark_inode_dirty(dir);
1011 up_write(&iinfo->i_data_sem);
1039 if (fibh.sbh != fibh.ebh) 1012 if (fibh.sbh != fibh.ebh)
1040 brelse(fibh.ebh); 1013 brelse(fibh.ebh);
1041 brelse(fibh.sbh); 1014 brelse(fibh.sbh);
@@ -1044,10 +1017,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
1044 1017
1045out: 1018out:
1046 kfree(name); 1019 kfree(name);
1047 unlock_kernel();
1048 return err; 1020 return err;
1049 1021
1050out_no_entry: 1022out_no_entry:
1023 up_write(&iinfo->i_data_sem);
1051 inode_dec_link_count(inode); 1024 inode_dec_link_count(inode);
1052 iput(inode); 1025 iput(inode);
1053 goto out; 1026 goto out;
@@ -1060,36 +1033,20 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1060 struct udf_fileident_bh fibh; 1033 struct udf_fileident_bh fibh;
1061 struct fileIdentDesc cfi, *fi; 1034 struct fileIdentDesc cfi, *fi;
1062 int err; 1035 int err;
1063 struct buffer_head *bh;
1064 1036
1065 lock_kernel();
1066 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
1067 unlock_kernel();
1068 return -EMLINK; 1038 return -EMLINK;
1069 } 1039 }
1070 1040
1071 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1072 if (!fi) { 1042 if (!fi) {
1073 unlock_kernel();
1074 return err; 1043 return err;
1075 } 1044 }
1076 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 1045 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
1077 cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location); 1046 cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
1078 bh = UDF_SB(inode->i_sb)->s_lvid_bh; 1047 if (UDF_SB(inode->i_sb)->s_lvid_bh) {
1079 if (bh) {
1080 struct logicalVolIntegrityDesc *lvid =
1081 (struct logicalVolIntegrityDesc *)bh->b_data;
1082 struct logicalVolHeaderDesc *lvhd;
1083 uint64_t uniqueID;
1084 lvhd = (struct logicalVolHeaderDesc *)
1085 (lvid->logicalVolContentsUse);
1086 uniqueID = le64_to_cpu(lvhd->uniqueID);
1087 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 1048 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
1088 cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); 1049 cpu_to_le32(lvid_get_unique_id(inode->i_sb));
1089 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
1090 uniqueID += 16;
1091 lvhd->uniqueID = cpu_to_le64(uniqueID);
1092 mark_buffer_dirty(bh);
1093 } 1050 }
1094 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 1051 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
1095 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 1052 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
@@ -1103,7 +1060,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1103 mark_inode_dirty(inode); 1060 mark_inode_dirty(inode);
1104 ihold(inode); 1061 ihold(inode);
1105 d_instantiate(dentry, inode); 1062 d_instantiate(dentry, inode);
1106 unlock_kernel();
1107 1063
1108 return 0; 1064 return 0;
1109} 1065}
@@ -1124,7 +1080,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1124 struct kernel_lb_addr tloc; 1080 struct kernel_lb_addr tloc;
1125 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1081 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1126 1082
1127 lock_kernel();
1128 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); 1083 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1129 if (ofi) { 1084 if (ofi) {
1130 if (ofibh.sbh != ofibh.ebh) 1085 if (ofibh.sbh != ofibh.ebh)
@@ -1248,7 +1203,6 @@ end_rename:
1248 brelse(nfibh.ebh); 1203 brelse(nfibh.ebh);
1249 brelse(nfibh.sbh); 1204 brelse(nfibh.sbh);
1250 } 1205 }
1251 unlock_kernel();
1252 1206
1253 return retval; 1207 return retval;
1254} 1208}
@@ -1261,7 +1215,6 @@ static struct dentry *udf_get_parent(struct dentry *child)
1261 struct fileIdentDesc cfi; 1215 struct fileIdentDesc cfi;
1262 struct udf_fileident_bh fibh; 1216 struct udf_fileident_bh fibh;
1263 1217
1264 lock_kernel();
1265 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) 1218 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1266 goto out_unlock; 1219 goto out_unlock;
1267 1220
@@ -1273,11 +1226,9 @@ static struct dentry *udf_get_parent(struct dentry *child)
1273 inode = udf_iget(child->d_inode->i_sb, &tloc); 1226 inode = udf_iget(child->d_inode->i_sb, &tloc);
1274 if (!inode) 1227 if (!inode)
1275 goto out_unlock; 1228 goto out_unlock;
1276 unlock_kernel();
1277 1229
1278 return d_obtain_alias(inode); 1230 return d_obtain_alias(inode);
1279out_unlock: 1231out_unlock:
1280 unlock_kernel();
1281 return ERR_PTR(-EACCES); 1232 return ERR_PTR(-EACCES);
1282} 1233}
1283 1234
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 745eb209be0..a71090ea0e0 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/mutex.h>
28 29
29uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, 30uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
30 uint16_t partition, uint32_t offset) 31 uint16_t partition, uint32_t offset)
@@ -159,7 +160,9 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
159 struct udf_sb_info *sbi = UDF_SB(sb); 160 struct udf_sb_info *sbi = UDF_SB(sb);
160 u16 reallocationTableLen; 161 u16 reallocationTableLen;
161 struct buffer_head *bh; 162 struct buffer_head *bh;
163 int ret = 0;
162 164
165 mutex_lock(&sbi->s_alloc_mutex);
163 for (i = 0; i < sbi->s_partitions; i++) { 166 for (i = 0; i < sbi->s_partitions; i++) {
164 struct udf_part_map *map = &sbi->s_partmaps[i]; 167 struct udf_part_map *map = &sbi->s_partmaps[i];
165 if (old_block > map->s_partition_root && 168 if (old_block > map->s_partition_root &&
@@ -175,8 +178,10 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
175 break; 178 break;
176 } 179 }
177 180
178 if (!st) 181 if (!st) {
179 return 1; 182 ret = 1;
183 goto out;
184 }
180 185
181 reallocationTableLen = 186 reallocationTableLen =
182 le16_to_cpu(st->reallocationTableLen); 187 le16_to_cpu(st->reallocationTableLen);
@@ -207,14 +212,16 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
207 ((old_block - 212 ((old_block -
208 map->s_partition_root) & 213 map->s_partition_root) &
209 (sdata->s_packet_len - 1)); 214 (sdata->s_packet_len - 1));
210 return 0; 215 ret = 0;
216 goto out;
211 } else if (origLoc == packet) { 217 } else if (origLoc == packet) {
212 *new_block = le32_to_cpu( 218 *new_block = le32_to_cpu(
213 entry->mappedLocation) + 219 entry->mappedLocation) +
214 ((old_block - 220 ((old_block -
215 map->s_partition_root) & 221 map->s_partition_root) &
216 (sdata->s_packet_len - 1)); 222 (sdata->s_packet_len - 1));
217 return 0; 223 ret = 0;
224 goto out;
218 } else if (origLoc > packet) 225 } else if (origLoc > packet)
219 break; 226 break;
220 } 227 }
@@ -251,20 +258,24 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
251 st->mapEntry[k].mappedLocation) + 258 st->mapEntry[k].mappedLocation) +
252 ((old_block - map->s_partition_root) & 259 ((old_block - map->s_partition_root) &
253 (sdata->s_packet_len - 1)); 260 (sdata->s_packet_len - 1));
254 return 0; 261 ret = 0;
262 goto out;
255 } 263 }
256 264
257 return 1; 265 ret = 1;
266 goto out;
258 } /* if old_block */ 267 } /* if old_block */
259 } 268 }
260 269
261 if (i == sbi->s_partitions) { 270 if (i == sbi->s_partitions) {
262 /* outside of partitions */ 271 /* outside of partitions */
263 /* for now, fail =) */ 272 /* for now, fail =) */
264 return 1; 273 ret = 1;
265 } 274 }
266 275
267 return 0; 276out:
277 mutex_unlock(&sbi->s_alloc_mutex);
278 return ret;
268} 279}
269 280
270static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, 281static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4a5c7c61836..7b27b063ff6 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
48#include <linux/stat.h> 48#include <linux/stat.h>
49#include <linux/cdrom.h> 49#include <linux/cdrom.h>
50#include <linux/nls.h> 50#include <linux/nls.h>
51#include <linux/smp_lock.h>
52#include <linux/buffer_head.h> 51#include <linux/buffer_head.h>
53#include <linux/vfs.h> 52#include <linux/vfs.h>
54#include <linux/vmalloc.h> 53#include <linux/vmalloc.h>
@@ -135,15 +134,23 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
135 ei->i_next_alloc_block = 0; 134 ei->i_next_alloc_block = 0;
136 ei->i_next_alloc_goal = 0; 135 ei->i_next_alloc_goal = 0;
137 ei->i_strat4096 = 0; 136 ei->i_strat4096 = 0;
137 init_rwsem(&ei->i_data_sem);
138 138
139 return &ei->vfs_inode; 139 return &ei->vfs_inode;
140} 140}
141 141
142static void udf_destroy_inode(struct inode *inode) 142static void udf_i_callback(struct rcu_head *head)
143{ 143{
144 struct inode *inode = container_of(head, struct inode, i_rcu);
145 INIT_LIST_HEAD(&inode->i_dentry);
144 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 146 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
145} 147}
146 148
149static void udf_destroy_inode(struct inode *inode)
150{
151 call_rcu(&inode->i_rcu, udf_i_callback);
152}
153
147static void init_once(void *foo) 154static void init_once(void *foo)
148{ 155{
149 struct udf_inode_info *ei = (struct udf_inode_info *)foo; 156 struct udf_inode_info *ei = (struct udf_inode_info *)foo;
@@ -567,13 +574,14 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
567 if (!udf_parse_options(options, &uopt, true)) 574 if (!udf_parse_options(options, &uopt, true))
568 return -EINVAL; 575 return -EINVAL;
569 576
570 lock_kernel(); 577 write_lock(&sbi->s_cred_lock);
571 sbi->s_flags = uopt.flags; 578 sbi->s_flags = uopt.flags;
572 sbi->s_uid = uopt.uid; 579 sbi->s_uid = uopt.uid;
573 sbi->s_gid = uopt.gid; 580 sbi->s_gid = uopt.gid;
574 sbi->s_umask = uopt.umask; 581 sbi->s_umask = uopt.umask;
575 sbi->s_fmode = uopt.fmode; 582 sbi->s_fmode = uopt.fmode;
576 sbi->s_dmode = uopt.dmode; 583 sbi->s_dmode = uopt.dmode;
584 write_unlock(&sbi->s_cred_lock);
577 585
578 if (sbi->s_lvid_bh) { 586 if (sbi->s_lvid_bh) {
579 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); 587 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -590,7 +598,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
590 udf_open_lvid(sb); 598 udf_open_lvid(sb);
591 599
592out_unlock: 600out_unlock:
593 unlock_kernel();
594 return error; 601 return error;
595} 602}
596 603
@@ -959,9 +966,9 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
959 (sizeof(struct buffer_head *) * nr_groups); 966 (sizeof(struct buffer_head *) * nr_groups);
960 967
961 if (size <= PAGE_SIZE) 968 if (size <= PAGE_SIZE)
962 bitmap = kmalloc(size, GFP_KERNEL); 969 bitmap = kzalloc(size, GFP_KERNEL);
963 else 970 else
964 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 971 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
965 972
966 if (bitmap == NULL) { 973 if (bitmap == NULL) {
967 udf_error(sb, __func__, 974 udf_error(sb, __func__,
@@ -970,7 +977,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
970 return NULL; 977 return NULL;
971 } 978 }
972 979
973 memset(bitmap, 0x00, size);
974 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); 980 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
975 bitmap->s_nr_groups = nr_groups; 981 bitmap->s_nr_groups = nr_groups;
976 return bitmap; 982 return bitmap;
@@ -1774,6 +1780,8 @@ static void udf_open_lvid(struct super_block *sb)
1774 1780
1775 if (!bh) 1781 if (!bh)
1776 return; 1782 return;
1783
1784 mutex_lock(&sbi->s_alloc_mutex);
1777 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1785 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1778 lvidiu = udf_sb_lvidiu(sbi); 1786 lvidiu = udf_sb_lvidiu(sbi);
1779 1787
@@ -1790,6 +1798,7 @@ static void udf_open_lvid(struct super_block *sb)
1790 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1798 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1791 mark_buffer_dirty(bh); 1799 mark_buffer_dirty(bh);
1792 sbi->s_lvid_dirty = 0; 1800 sbi->s_lvid_dirty = 0;
1801 mutex_unlock(&sbi->s_alloc_mutex);
1793} 1802}
1794 1803
1795static void udf_close_lvid(struct super_block *sb) 1804static void udf_close_lvid(struct super_block *sb)
@@ -1802,6 +1811,7 @@ static void udf_close_lvid(struct super_block *sb)
1802 if (!bh) 1811 if (!bh)
1803 return; 1812 return;
1804 1813
1814 mutex_lock(&sbi->s_alloc_mutex);
1805 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1815 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1806 lvidiu = udf_sb_lvidiu(sbi); 1816 lvidiu = udf_sb_lvidiu(sbi);
1807 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1817 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1822,6 +1832,34 @@ static void udf_close_lvid(struct super_block *sb)
1822 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1832 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1823 mark_buffer_dirty(bh); 1833 mark_buffer_dirty(bh);
1824 sbi->s_lvid_dirty = 0; 1834 sbi->s_lvid_dirty = 0;
1835 mutex_unlock(&sbi->s_alloc_mutex);
1836}
1837
1838u64 lvid_get_unique_id(struct super_block *sb)
1839{
1840 struct buffer_head *bh;
1841 struct udf_sb_info *sbi = UDF_SB(sb);
1842 struct logicalVolIntegrityDesc *lvid;
1843 struct logicalVolHeaderDesc *lvhd;
1844 u64 uniqueID;
1845 u64 ret;
1846
1847 bh = sbi->s_lvid_bh;
1848 if (!bh)
1849 return 0;
1850
1851 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1852 lvhd = (struct logicalVolHeaderDesc *)lvid->logicalVolContentsUse;
1853
1854 mutex_lock(&sbi->s_alloc_mutex);
1855 ret = uniqueID = le64_to_cpu(lvhd->uniqueID);
1856 if (!(++uniqueID & 0xFFFFFFFF))
1857 uniqueID += 16;
1858 lvhd->uniqueID = cpu_to_le64(uniqueID);
1859 mutex_unlock(&sbi->s_alloc_mutex);
1860 mark_buffer_dirty(bh);
1861
1862 return ret;
1825} 1863}
1826 1864
1827static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) 1865static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1879,8 +1917,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1879 struct kernel_lb_addr rootdir, fileset; 1917 struct kernel_lb_addr rootdir, fileset;
1880 struct udf_sb_info *sbi; 1918 struct udf_sb_info *sbi;
1881 1919
1882 lock_kernel();
1883
1884 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); 1920 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
1885 uopt.uid = -1; 1921 uopt.uid = -1;
1886 uopt.gid = -1; 1922 uopt.gid = -1;
@@ -1889,10 +1925,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1889 uopt.dmode = UDF_INVALID_MODE; 1925 uopt.dmode = UDF_INVALID_MODE;
1890 1926
1891 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); 1927 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
1892 if (!sbi) { 1928 if (!sbi)
1893 unlock_kernel();
1894 return -ENOMEM; 1929 return -ENOMEM;
1895 }
1896 1930
1897 sb->s_fs_info = sbi; 1931 sb->s_fs_info = sbi;
1898 1932
@@ -1929,6 +1963,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1929 sbi->s_fmode = uopt.fmode; 1963 sbi->s_fmode = uopt.fmode;
1930 sbi->s_dmode = uopt.dmode; 1964 sbi->s_dmode = uopt.dmode;
1931 sbi->s_nls_map = uopt.nls_map; 1965 sbi->s_nls_map = uopt.nls_map;
1966 rwlock_init(&sbi->s_cred_lock);
1932 1967
1933 if (uopt.session == 0xFFFFFFFF) 1968 if (uopt.session == 0xFFFFFFFF)
1934 sbi->s_session = udf_get_last_session(sb); 1969 sbi->s_session = udf_get_last_session(sb);
@@ -2038,7 +2073,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2038 goto error_out; 2073 goto error_out;
2039 } 2074 }
2040 sb->s_maxbytes = MAX_LFS_FILESIZE; 2075 sb->s_maxbytes = MAX_LFS_FILESIZE;
2041 unlock_kernel();
2042 return 0; 2076 return 0;
2043 2077
2044error_out: 2078error_out:
@@ -2059,7 +2093,6 @@ error_out:
2059 kfree(sbi); 2093 kfree(sbi);
2060 sb->s_fs_info = NULL; 2094 sb->s_fs_info = NULL;
2061 2095
2062 unlock_kernel();
2063 return -EINVAL; 2096 return -EINVAL;
2064} 2097}
2065 2098
@@ -2098,8 +2131,6 @@ static void udf_put_super(struct super_block *sb)
2098 2131
2099 sbi = UDF_SB(sb); 2132 sbi = UDF_SB(sb);
2100 2133
2101 lock_kernel();
2102
2103 if (sbi->s_vat_inode) 2134 if (sbi->s_vat_inode)
2104 iput(sbi->s_vat_inode); 2135 iput(sbi->s_vat_inode);
2105 if (sbi->s_partitions) 2136 if (sbi->s_partitions)
@@ -2115,8 +2146,6 @@ static void udf_put_super(struct super_block *sb)
2115 kfree(sbi->s_partmaps); 2146 kfree(sbi->s_partmaps);
2116 kfree(sb->s_fs_info); 2147 kfree(sb->s_fs_info);
2117 sb->s_fs_info = NULL; 2148 sb->s_fs_info = NULL;
2118
2119 unlock_kernel();
2120} 2149}
2121 2150
2122static int udf_sync_fs(struct super_block *sb, int wait) 2151static int udf_sync_fs(struct super_block *sb, int wait)
@@ -2179,8 +2208,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2179 uint16_t ident; 2208 uint16_t ident;
2180 struct spaceBitmapDesc *bm; 2209 struct spaceBitmapDesc *bm;
2181 2210
2182 lock_kernel();
2183
2184 loc.logicalBlockNum = bitmap->s_extPosition; 2211 loc.logicalBlockNum = bitmap->s_extPosition;
2185 loc.partitionReferenceNum = UDF_SB(sb)->s_partition; 2212 loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
2186 bh = udf_read_ptagged(sb, &loc, 0, &ident); 2213 bh = udf_read_ptagged(sb, &loc, 0, &ident);
@@ -2217,10 +2244,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2217 } 2244 }
2218 } 2245 }
2219 brelse(bh); 2246 brelse(bh);
2220
2221out: 2247out:
2222 unlock_kernel();
2223
2224 return accum; 2248 return accum;
2225} 2249}
2226 2250
@@ -2233,8 +2257,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
2233 int8_t etype; 2257 int8_t etype;
2234 struct extent_position epos; 2258 struct extent_position epos;
2235 2259
2236 lock_kernel(); 2260 mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
2237
2238 epos.block = UDF_I(table)->i_location; 2261 epos.block = UDF_I(table)->i_location;
2239 epos.offset = sizeof(struct unallocSpaceEntry); 2262 epos.offset = sizeof(struct unallocSpaceEntry);
2240 epos.bh = NULL; 2263 epos.bh = NULL;
@@ -2243,8 +2266,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
2243 accum += (elen >> table->i_sb->s_blocksize_bits); 2266 accum += (elen >> table->i_sb->s_blocksize_bits);
2244 2267
2245 brelse(epos.bh); 2268 brelse(epos.bh);
2246 2269 mutex_unlock(&UDF_SB(sb)->s_alloc_mutex);
2247 unlock_kernel();
2248 2270
2249 return accum; 2271 return accum;
2250} 2272}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 16064787d2b..b1d4488b0f1 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include "udf_i.h" 31#include "udf_i.h"
33 32
@@ -78,13 +77,16 @@ static int udf_symlink_filler(struct file *file, struct page *page)
78 int err = -EIO; 77 int err = -EIO;
79 unsigned char *p = kmap(page); 78 unsigned char *p = kmap(page);
80 struct udf_inode_info *iinfo; 79 struct udf_inode_info *iinfo;
80 uint32_t pos;
81 81
82 lock_kernel();
83 iinfo = UDF_I(inode); 82 iinfo = UDF_I(inode);
83 pos = udf_block_map(inode, 0);
84
85 down_read(&iinfo->i_data_sem);
84 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 86 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
85 symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr; 87 symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
86 } else { 88 } else {
87 bh = sb_bread(inode->i_sb, udf_block_map(inode, 0)); 89 bh = sb_bread(inode->i_sb, pos);
88 90
89 if (!bh) 91 if (!bh)
90 goto out; 92 goto out;
@@ -95,14 +97,14 @@ static int udf_symlink_filler(struct file *file, struct page *page)
95 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); 97 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
96 brelse(bh); 98 brelse(bh);
97 99
98 unlock_kernel(); 100 up_read(&iinfo->i_data_sem);
99 SetPageUptodate(page); 101 SetPageUptodate(page);
100 kunmap(page); 102 kunmap(page);
101 unlock_page(page); 103 unlock_page(page);
102 return 0; 104 return 0;
103 105
104out: 106out:
105 unlock_kernel(); 107 up_read(&iinfo->i_data_sem);
106 SetPageError(page); 108 SetPageError(page);
107 kunmap(page); 109 kunmap(page);
108 unlock_page(page); 110 unlock_page(page);
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index e58d1de4107..d1bd31ea724 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,18 @@
1#ifndef _UDF_I_H 1#ifndef _UDF_I_H
2#define _UDF_I_H 2#define _UDF_I_H
3 3
4/*
5 * The i_data_sem and i_mutex serve for protection of allocation information
6 * of a regular files and symlinks. This includes all extents belonging to
7 * the file/symlink, a fact whether data are in-inode or in external data
8 * blocks, preallocation, goal block information... When extents are read,
9 * i_mutex or i_data_sem must be held (for reading is enough in case of
10 * i_data_sem). When extents are changed, i_data_sem must be held for writing
11 * and also i_mutex must be held.
12 *
13 * For directories i_mutex is used for all the necessary protection.
14 */
15
4struct udf_inode_info { 16struct udf_inode_info {
5 struct timespec i_crtime; 17 struct timespec i_crtime;
6 /* Physical address of inode */ 18 /* Physical address of inode */
@@ -21,6 +33,7 @@ struct udf_inode_info {
21 struct long_ad *i_lad; 33 struct long_ad *i_lad;
22 __u8 *i_data; 34 __u8 *i_data;
23 } i_ext; 35 } i_ext;
36 struct rw_semaphore i_data_sem;
24 struct inode vfs_inode; 37 struct inode vfs_inode;
25}; 38};
26 39
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index d113b72c276..4858c191242 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -2,6 +2,7 @@
2#define __LINUX_UDF_SB_H 2#define __LINUX_UDF_SB_H
3 3
4#include <linux/mutex.h> 4#include <linux/mutex.h>
5#include <linux/bitops.h>
5 6
6/* Since UDF 2.01 is ISO 13346 based... */ 7/* Since UDF 2.01 is ISO 13346 based... */
7#define UDF_SUPER_MAGIC 0x15013346 8#define UDF_SUPER_MAGIC 0x15013346
@@ -128,6 +129,8 @@ struct udf_sb_info {
128 uid_t s_uid; 129 uid_t s_uid;
129 mode_t s_fmode; 130 mode_t s_fmode;
130 mode_t s_dmode; 131 mode_t s_dmode;
132 /* Lock protecting consistency of above permission settings */
133 rwlock_t s_cred_lock;
131 134
132 /* Root Info */ 135 /* Root Info */
133 struct timespec s_record_time; 136 struct timespec s_record_time;
@@ -139,7 +142,7 @@ struct udf_sb_info {
139 __u16 s_udfrev; 142 __u16 s_udfrev;
140 143
141 /* Miscellaneous flags */ 144 /* Miscellaneous flags */
142 __u32 s_flags; 145 unsigned long s_flags;
143 146
144 /* Encoding info */ 147 /* Encoding info */
145 struct nls_table *s_nls_map; 148 struct nls_table *s_nls_map;
@@ -161,8 +164,19 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
161 164
162int udf_compute_nr_groups(struct super_block *sb, u32 partition); 165int udf_compute_nr_groups(struct super_block *sb, u32 partition);
163 166
164#define UDF_QUERY_FLAG(X,Y) ( UDF_SB(X)->s_flags & ( 1 << (Y) ) ) 167static inline int UDF_QUERY_FLAG(struct super_block *sb, int flag)
165#define UDF_SET_FLAG(X,Y) ( UDF_SB(X)->s_flags |= ( 1 << (Y) ) ) 168{
166#define UDF_CLEAR_FLAG(X,Y) ( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) ) 169 return test_bit(flag, &UDF_SB(sb)->s_flags);
170}
171
172static inline void UDF_SET_FLAG(struct super_block *sb, int flag)
173{
174 set_bit(flag, &UDF_SB(sb)->s_flags);
175}
176
177static inline void UDF_CLEAR_FLAG(struct super_block *sb, int flag)
178{
179 clear_bit(flag, &UDF_SB(sb)->s_flags);
180}
167 181
168#endif /* __LINUX_UDF_SB_H */ 182#endif /* __LINUX_UDF_SB_H */
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 6995ab1f430..eba48209f9f 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -111,6 +111,8 @@ struct extent_position {
111}; 111};
112 112
113/* super.c */ 113/* super.c */
114
115__attribute__((format(printf, 3, 4)))
114extern void udf_warning(struct super_block *, const char *, const char *, ...); 116extern void udf_warning(struct super_block *, const char *, const char *, ...);
115static inline void udf_updated_lvid(struct super_block *sb) 117static inline void udf_updated_lvid(struct super_block *sb)
116{ 118{
@@ -123,6 +125,7 @@ static inline void udf_updated_lvid(struct super_block *sb)
123 sb->s_dirt = 1; 125 sb->s_dirt = 1;
124 UDF_SB(sb)->s_lvid_dirty = 1; 126 UDF_SB(sb)->s_lvid_dirty = 1;
125} 127}
128extern u64 lvid_get_unique_id(struct super_block *sb);
126 129
127/* namei.c */ 130/* namei.c */
128extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 131extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -133,7 +136,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
133extern long udf_ioctl(struct file *, unsigned int, unsigned long); 136extern long udf_ioctl(struct file *, unsigned int, unsigned long);
134/* inode.c */ 137/* inode.c */
135extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
136extern int udf_sync_inode(struct inode *);
137extern void udf_expand_file_adinicb(struct inode *, int, int *); 139extern void udf_expand_file_adinicb(struct inode *, int, int *);
138extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
139extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 141extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56d..2c61ac5d4e4 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1412 return &ei->vfs_inode; 1412 return &ei->vfs_inode;
1413} 1413}
1414 1414
1415static void ufs_destroy_inode(struct inode *inode) 1415static void ufs_i_callback(struct rcu_head *head)
1416{ 1416{
1417 struct inode *inode = container_of(head, struct inode, i_rcu);
1418 INIT_LIST_HEAD(&inode->i_dentry);
1417 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1419 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1418} 1420}
1419 1421
1422static void ufs_destroy_inode(struct inode *inode)
1423{
1424 call_rcu(&inode->i_rcu, ufs_i_callback);
1425}
1426
1420static void init_once(void *foo) 1427static void init_once(void *foo)
1421{ 1428{
1422 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1429 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c37081..00000000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_SV_H__
19#define __XFS_SUPPORT_SV_H__
20
21#include <linux/wait.h>
22#include <linux/sched.h>
23#include <linux/spinlock.h>
24
25/*
26 * Synchronisation variables.
27 *
28 * (Parameters "pri", "svf" and "rts" are not implemented)
29 */
30
31typedef struct sv_s {
32 wait_queue_head_t waiters;
33} sv_t;
34
35static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
36{
37 DECLARE_WAITQUEUE(wait, current);
38
39 add_wait_queue_exclusive(&sv->waiters, &wait);
40 __set_current_state(TASK_UNINTERRUPTIBLE);
41 spin_unlock(lock);
42
43 schedule();
44
45 remove_wait_queue(&sv->waiters, &wait);
46}
47
48#define sv_init(sv,flag,name) \
49 init_waitqueue_head(&(sv)->waiters)
50#define sv_destroy(sv) \
51 /*NOTHING*/
52#define sv_wait(sv, pri, lock, s) \
53 _sv_wait(sv, lock)
54#define sv_signal(sv) \
55 wake_up(&(sv)->waiters)
56#define sv_broadcast(sv) \
57 wake_up_all(&(sv)->waiters)
58
59#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3..39f4f809bb6 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219} 219}
220 220
221int 221int
222xfs_check_acl(struct inode *inode, int mask) 222xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
223{ 223{
224 struct xfs_inode *ip = XFS_I(inode); 224 struct xfs_inode *ip;
225 struct posix_acl *acl; 225 struct posix_acl *acl;
226 int error = -EAGAIN; 226 int error = -EAGAIN;
227 227
228 ip = XFS_I(inode);
228 trace_xfs_check_acl(ip); 229 trace_xfs_check_acl(ip);
229 230
230 /* 231 /*
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask)
234 if (!XFS_IFORK_Q(ip)) 235 if (!XFS_IFORK_Q(ip))
235 return -EAGAIN; 236 return -EAGAIN;
236 237
238 if (flags & IPERM_FLAG_RCU) {
239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
240 return -ECHILD;
241 return -EAGAIN;
242 }
243
237 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); 244 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
238 if (IS_ERR(acl)) 245 if (IS_ERR(acl))
239 return PTR_ERR(acl); 246 return PTR_ERR(acl);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 691f61223ed..ec7bbb5645b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,15 +38,6 @@
38#include <linux/pagevec.h> 38#include <linux/pagevec.h>
39#include <linux/writeback.h> 39#include <linux/writeback.h>
40 40
41/*
42 * Types of I/O for bmap clustering and I/O completion tracking.
43 */
44enum {
45 IO_READ, /* mapping for a read */
46 IO_DELAY, /* mapping covers delalloc region */
47 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
48 IO_NEW /* just allocated */
49};
50 41
51/* 42/*
52 * Prime number of hash buckets since address is used as the key. 43 * Prime number of hash buckets since address is used as the key.
@@ -182,9 +173,6 @@ xfs_setfilesize(
182 xfs_inode_t *ip = XFS_I(ioend->io_inode); 173 xfs_inode_t *ip = XFS_I(ioend->io_inode);
183 xfs_fsize_t isize; 174 xfs_fsize_t isize;
184 175
185 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
186 ASSERT(ioend->io_type != IO_READ);
187
188 if (unlikely(ioend->io_error)) 176 if (unlikely(ioend->io_error))
189 return 0; 177 return 0;
190 178
@@ -244,10 +232,8 @@ xfs_end_io(
244 * We might have to update the on-disk file size after extending 232 * We might have to update the on-disk file size after extending
245 * writes. 233 * writes.
246 */ 234 */
247 if (ioend->io_type != IO_READ) { 235 error = xfs_setfilesize(ioend);
248 error = xfs_setfilesize(ioend); 236 ASSERT(!error || error == EAGAIN);
249 ASSERT(!error || error == EAGAIN);
250 }
251 237
252 /* 238 /*
253 * If we didn't complete processing of the ioend, requeue it to the 239 * If we didn't complete processing of the ioend, requeue it to the
@@ -318,14 +304,63 @@ STATIC int
318xfs_map_blocks( 304xfs_map_blocks(
319 struct inode *inode, 305 struct inode *inode,
320 loff_t offset, 306 loff_t offset,
321 ssize_t count,
322 struct xfs_bmbt_irec *imap, 307 struct xfs_bmbt_irec *imap,
323 int flags) 308 int type,
309 int nonblocking)
324{ 310{
325 int nmaps = 1; 311 struct xfs_inode *ip = XFS_I(inode);
326 int new = 0; 312 struct xfs_mount *mp = ip->i_mount;
313 ssize_t count = 1 << inode->i_blkbits;
314 xfs_fileoff_t offset_fsb, end_fsb;
315 int error = 0;
316 int bmapi_flags = XFS_BMAPI_ENTIRE;
317 int nimaps = 1;
318
319 if (XFS_FORCED_SHUTDOWN(mp))
320 return -XFS_ERROR(EIO);
321
322 if (type == IO_UNWRITTEN)
323 bmapi_flags |= XFS_BMAPI_IGSTATE;
324
325 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
326 if (nonblocking)
327 return -XFS_ERROR(EAGAIN);
328 xfs_ilock(ip, XFS_ILOCK_SHARED);
329 }
327 330
328 return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); 331 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
332 (ip->i_df.if_flags & XFS_IFEXTENTS));
333 ASSERT(offset <= mp->m_maxioffset);
334
335 if (offset + count > mp->m_maxioffset)
336 count = mp->m_maxioffset - offset;
337 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
338 offset_fsb = XFS_B_TO_FSBT(mp, offset);
339 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
340 bmapi_flags, NULL, 0, imap, &nimaps, NULL);
341 xfs_iunlock(ip, XFS_ILOCK_SHARED);
342
343 if (error)
344 return -XFS_ERROR(error);
345
346 if (type == IO_DELALLOC &&
347 (!nimaps || isnullstartblock(imap->br_startblock))) {
348 error = xfs_iomap_write_allocate(ip, offset, count, imap);
349 if (!error)
350 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
351 return -XFS_ERROR(error);
352 }
353
354#ifdef DEBUG
355 if (type == IO_UNWRITTEN) {
356 ASSERT(nimaps);
357 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
358 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
359 }
360#endif
361 if (nimaps)
362 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
363 return 0;
329} 364}
330 365
331STATIC int 366STATIC int
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
380 415
381 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
382 WRITE_SYNC_PLUG : WRITE, bio); 417 WRITE_SYNC_PLUG : WRITE, bio);
383 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
384 bio_put(bio);
385} 418}
386 419
387STATIC struct bio * 420STATIC struct bio *
388xfs_alloc_ioend_bio( 421xfs_alloc_ioend_bio(
389 struct buffer_head *bh) 422 struct buffer_head *bh)
390{ 423{
391 struct bio *bio;
392 int nvecs = bio_get_nr_vecs(bh->b_bdev); 424 int nvecs = bio_get_nr_vecs(bh->b_bdev);
393 425 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
394 do {
395 bio = bio_alloc(GFP_NOIO, nvecs);
396 nvecs >>= 1;
397 } while (!bio);
398 426
399 ASSERT(bio->bi_private == NULL); 427 ASSERT(bio->bi_private == NULL);
400 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 428 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
401 bio->bi_bdev = bh->b_bdev; 429 bio->bi_bdev = bh->b_bdev;
402 bio_get(bio);
403 return bio; 430 return bio;
404} 431}
405 432
@@ -470,9 +497,8 @@ xfs_submit_ioend(
470 /* Pass 1 - start writeback */ 497 /* Pass 1 - start writeback */
471 do { 498 do {
472 next = ioend->io_list; 499 next = ioend->io_list;
473 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 500 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
474 xfs_start_buffer_writeback(bh); 501 xfs_start_buffer_writeback(bh);
475 }
476 } while ((ioend = next) != NULL); 502 } while ((ioend = next) != NULL);
477 503
478 /* Pass 2 - submit I/O */ 504 /* Pass 2 - submit I/O */
@@ -600,117 +626,13 @@ xfs_map_at_offset(
600 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 626 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
601 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 627 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
602 628
603 lock_buffer(bh);
604 xfs_map_buffer(inode, bh, imap, offset); 629 xfs_map_buffer(inode, bh, imap, offset);
605 bh->b_bdev = xfs_find_bdev_for_inode(inode);
606 set_buffer_mapped(bh); 630 set_buffer_mapped(bh);
607 clear_buffer_delay(bh); 631 clear_buffer_delay(bh);
608 clear_buffer_unwritten(bh); 632 clear_buffer_unwritten(bh);
609} 633}
610 634
611/* 635/*
612 * Look for a page at index that is suitable for clustering.
613 */
614STATIC unsigned int
615xfs_probe_page(
616 struct page *page,
617 unsigned int pg_offset)
618{
619 struct buffer_head *bh, *head;
620 int ret = 0;
621
622 if (PageWriteback(page))
623 return 0;
624 if (!PageDirty(page))
625 return 0;
626 if (!page->mapping)
627 return 0;
628 if (!page_has_buffers(page))
629 return 0;
630
631 bh = head = page_buffers(page);
632 do {
633 if (!buffer_uptodate(bh))
634 break;
635 if (!buffer_mapped(bh))
636 break;
637 ret += bh->b_size;
638 if (ret >= pg_offset)
639 break;
640 } while ((bh = bh->b_this_page) != head);
641
642 return ret;
643}
644
645STATIC size_t
646xfs_probe_cluster(
647 struct inode *inode,
648 struct page *startpage,
649 struct buffer_head *bh,
650 struct buffer_head *head)
651{
652 struct pagevec pvec;
653 pgoff_t tindex, tlast, tloff;
654 size_t total = 0;
655 int done = 0, i;
656
657 /* First sum forwards in this page */
658 do {
659 if (!buffer_uptodate(bh) || !buffer_mapped(bh))
660 return total;
661 total += bh->b_size;
662 } while ((bh = bh->b_this_page) != head);
663
664 /* if we reached the end of the page, sum forwards in following pages */
665 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
666 tindex = startpage->index + 1;
667
668 /* Prune this back to avoid pathological behavior */
669 tloff = min(tlast, startpage->index + 64);
670
671 pagevec_init(&pvec, 0);
672 while (!done && tindex <= tloff) {
673 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
674
675 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
676 break;
677
678 for (i = 0; i < pagevec_count(&pvec); i++) {
679 struct page *page = pvec.pages[i];
680 size_t pg_offset, pg_len = 0;
681
682 if (tindex == tlast) {
683 pg_offset =
684 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
685 if (!pg_offset) {
686 done = 1;
687 break;
688 }
689 } else
690 pg_offset = PAGE_CACHE_SIZE;
691
692 if (page->index == tindex && trylock_page(page)) {
693 pg_len = xfs_probe_page(page, pg_offset);
694 unlock_page(page);
695 }
696
697 if (!pg_len) {
698 done = 1;
699 break;
700 }
701
702 total += pg_len;
703 tindex++;
704 }
705
706 pagevec_release(&pvec);
707 cond_resched();
708 }
709
710 return total;
711}
712
713/*
714 * Test if a given page is suitable for writing as part of an unwritten 636 * Test if a given page is suitable for writing as part of an unwritten
715 * or delayed allocate extent. 637 * or delayed allocate extent.
716 */ 638 */
@@ -731,9 +653,9 @@ xfs_is_delayed_page(
731 if (buffer_unwritten(bh)) 653 if (buffer_unwritten(bh))
732 acceptable = (type == IO_UNWRITTEN); 654 acceptable = (type == IO_UNWRITTEN);
733 else if (buffer_delay(bh)) 655 else if (buffer_delay(bh))
734 acceptable = (type == IO_DELAY); 656 acceptable = (type == IO_DELALLOC);
735 else if (buffer_dirty(bh) && buffer_mapped(bh)) 657 else if (buffer_dirty(bh) && buffer_mapped(bh))
736 acceptable = (type == IO_NEW); 658 acceptable = (type == IO_OVERWRITE);
737 else 659 else
738 break; 660 break;
739 } while ((bh = bh->b_this_page) != head); 661 } while ((bh = bh->b_this_page) != head);
@@ -758,8 +680,7 @@ xfs_convert_page(
758 loff_t tindex, 680 loff_t tindex,
759 struct xfs_bmbt_irec *imap, 681 struct xfs_bmbt_irec *imap,
760 xfs_ioend_t **ioendp, 682 xfs_ioend_t **ioendp,
761 struct writeback_control *wbc, 683 struct writeback_control *wbc)
762 int all_bh)
763{ 684{
764 struct buffer_head *bh, *head; 685 struct buffer_head *bh, *head;
765 xfs_off_t end_offset; 686 xfs_off_t end_offset;
@@ -814,37 +735,30 @@ xfs_convert_page(
814 continue; 735 continue;
815 } 736 }
816 737
817 if (buffer_unwritten(bh) || buffer_delay(bh)) { 738 if (buffer_unwritten(bh) || buffer_delay(bh) ||
739 buffer_mapped(bh)) {
818 if (buffer_unwritten(bh)) 740 if (buffer_unwritten(bh))
819 type = IO_UNWRITTEN; 741 type = IO_UNWRITTEN;
742 else if (buffer_delay(bh))
743 type = IO_DELALLOC;
820 else 744 else
821 type = IO_DELAY; 745 type = IO_OVERWRITE;
822 746
823 if (!xfs_imap_valid(inode, imap, offset)) { 747 if (!xfs_imap_valid(inode, imap, offset)) {
824 done = 1; 748 done = 1;
825 continue; 749 continue;
826 } 750 }
827 751
828 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 752 lock_buffer(bh);
829 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 753 if (type != IO_OVERWRITE)
830 754 xfs_map_at_offset(inode, bh, imap, offset);
831 xfs_map_at_offset(inode, bh, imap, offset);
832 xfs_add_to_ioend(inode, bh, offset, type, 755 xfs_add_to_ioend(inode, bh, offset, type,
833 ioendp, done); 756 ioendp, done);
834 757
835 page_dirty--; 758 page_dirty--;
836 count++; 759 count++;
837 } else { 760 } else {
838 type = IO_NEW; 761 done = 1;
839 if (buffer_mapped(bh) && all_bh) {
840 lock_buffer(bh);
841 xfs_add_to_ioend(inode, bh, offset,
842 type, ioendp, done);
843 count++;
844 page_dirty--;
845 } else {
846 done = 1;
847 }
848 } 762 }
849 } while (offset += len, (bh = bh->b_this_page) != head); 763 } while (offset += len, (bh = bh->b_this_page) != head);
850 764
@@ -876,7 +790,6 @@ xfs_cluster_write(
876 struct xfs_bmbt_irec *imap, 790 struct xfs_bmbt_irec *imap,
877 xfs_ioend_t **ioendp, 791 xfs_ioend_t **ioendp,
878 struct writeback_control *wbc, 792 struct writeback_control *wbc,
879 int all_bh,
880 pgoff_t tlast) 793 pgoff_t tlast)
881{ 794{
882 struct pagevec pvec; 795 struct pagevec pvec;
@@ -891,7 +804,7 @@ xfs_cluster_write(
891 804
892 for (i = 0; i < pagevec_count(&pvec); i++) { 805 for (i = 0; i < pagevec_count(&pvec); i++) {
893 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 806 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
894 imap, ioendp, wbc, all_bh); 807 imap, ioendp, wbc);
895 if (done) 808 if (done)
896 break; 809 break;
897 } 810 }
@@ -935,7 +848,7 @@ xfs_aops_discard_page(
935 struct buffer_head *bh, *head; 848 struct buffer_head *bh, *head;
936 loff_t offset = page_offset(page); 849 loff_t offset = page_offset(page);
937 850
938 if (!xfs_is_delayed_page(page, IO_DELAY)) 851 if (!xfs_is_delayed_page(page, IO_DELALLOC))
939 goto out_invalidate; 852 goto out_invalidate;
940 853
941 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 854 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1002,10 +915,10 @@ xfs_vm_writepage(
1002 unsigned int type; 915 unsigned int type;
1003 __uint64_t end_offset; 916 __uint64_t end_offset;
1004 pgoff_t end_index, last_index; 917 pgoff_t end_index, last_index;
1005 ssize_t size, len; 918 ssize_t len;
1006 int flags, err, imap_valid = 0, uptodate = 1; 919 int err, imap_valid = 0, uptodate = 1;
1007 int count = 0; 920 int count = 0;
1008 int all_bh = 0; 921 int nonblocking = 0;
1009 922
1010 trace_xfs_writepage(inode, page, 0); 923 trace_xfs_writepage(inode, page, 0);
1011 924
@@ -1056,10 +969,14 @@ xfs_vm_writepage(
1056 969
1057 bh = head = page_buffers(page); 970 bh = head = page_buffers(page);
1058 offset = page_offset(page); 971 offset = page_offset(page);
1059 flags = BMAPI_READ; 972 type = IO_OVERWRITE;
1060 type = IO_NEW; 973
974 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
975 nonblocking = 1;
1061 976
1062 do { 977 do {
978 int new_ioend = 0;
979
1063 if (offset >= end_offset) 980 if (offset >= end_offset)
1064 break; 981 break;
1065 if (!buffer_uptodate(bh)) 982 if (!buffer_uptodate(bh))
@@ -1076,90 +993,54 @@ xfs_vm_writepage(
1076 continue; 993 continue;
1077 } 994 }
1078 995
1079 if (imap_valid) 996 if (buffer_unwritten(bh)) {
1080 imap_valid = xfs_imap_valid(inode, &imap, offset); 997 if (type != IO_UNWRITTEN) {
1081
1082 if (buffer_unwritten(bh) || buffer_delay(bh)) {
1083 int new_ioend = 0;
1084
1085 /*
1086 * Make sure we don't use a read-only iomap
1087 */
1088 if (flags == BMAPI_READ)
1089 imap_valid = 0;
1090
1091 if (buffer_unwritten(bh)) {
1092 type = IO_UNWRITTEN; 998 type = IO_UNWRITTEN;
1093 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 999 imap_valid = 0;
1094 } else if (buffer_delay(bh)) {
1095 type = IO_DELAY;
1096 flags = BMAPI_ALLOCATE;
1097
1098 if (wbc->sync_mode == WB_SYNC_NONE)
1099 flags |= BMAPI_TRYLOCK;
1100 }
1101
1102 if (!imap_valid) {
1103 /*
1104 * If we didn't have a valid mapping then we
1105 * need to ensure that we put the new mapping
1106 * in a new ioend structure. This needs to be
1107 * done to ensure that the ioends correctly
1108 * reflect the block mappings at io completion
1109 * for unwritten extent conversion.
1110 */
1111 new_ioend = 1;
1112 err = xfs_map_blocks(inode, offset, len,
1113 &imap, flags);
1114 if (err)
1115 goto error;
1116 imap_valid = xfs_imap_valid(inode, &imap,
1117 offset);
1118 } 1000 }
1119 if (imap_valid) { 1001 } else if (buffer_delay(bh)) {
1120 xfs_map_at_offset(inode, bh, &imap, offset); 1002 if (type != IO_DELALLOC) {
1121 xfs_add_to_ioend(inode, bh, offset, type, 1003 type = IO_DELALLOC;
1122 &ioend, new_ioend); 1004 imap_valid = 0;
1123 count++;
1124 } 1005 }
1125 } else if (buffer_uptodate(bh)) { 1006 } else if (buffer_uptodate(bh)) {
1126 /* 1007 if (type != IO_OVERWRITE) {
1127 * we got here because the buffer is already mapped. 1008 type = IO_OVERWRITE;
1128 * That means it must already have extents allocated 1009 imap_valid = 0;
1129 * underneath it. Map the extent by reading it.
1130 */
1131 if (!imap_valid || flags != BMAPI_READ) {
1132 flags = BMAPI_READ;
1133 size = xfs_probe_cluster(inode, page, bh, head);
1134 err = xfs_map_blocks(inode, offset, size,
1135 &imap, flags);
1136 if (err)
1137 goto error;
1138 imap_valid = xfs_imap_valid(inode, &imap,
1139 offset);
1140 } 1010 }
1011 } else {
1012 if (PageUptodate(page)) {
1013 ASSERT(buffer_mapped(bh));
1014 imap_valid = 0;
1015 }
1016 continue;
1017 }
1141 1018
1019 if (imap_valid)
1020 imap_valid = xfs_imap_valid(inode, &imap, offset);
1021 if (!imap_valid) {
1142 /* 1022 /*
1143 * We set the type to IO_NEW in case we are doing a 1023 * If we didn't have a valid mapping then we need to
1144 * small write at EOF that is extending the file but 1024 * put the new mapping into a separate ioend structure.
1145 * without needing an allocation. We need to update the 1025 * This ensures non-contiguous extents always have
1146 * file size on I/O completion in this case so it is 1026 * separate ioends, which is particularly important
1147 * the same case as having just allocated a new extent 1027 * for unwritten extent conversion at I/O completion
1148 * that we are writing into for the first time. 1028 * time.
1149 */ 1029 */
1150 type = IO_NEW; 1030 new_ioend = 1;
1151 if (trylock_buffer(bh)) { 1031 err = xfs_map_blocks(inode, offset, &imap, type,
1152 if (imap_valid) 1032 nonblocking);
1153 all_bh = 1; 1033 if (err)
1154 xfs_add_to_ioend(inode, bh, offset, type, 1034 goto error;
1155 &ioend, !imap_valid); 1035 imap_valid = xfs_imap_valid(inode, &imap, offset);
1156 count++; 1036 }
1157 } else { 1037 if (imap_valid) {
1158 imap_valid = 0; 1038 lock_buffer(bh);
1159 } 1039 if (type != IO_OVERWRITE)
1160 } else if (PageUptodate(page)) { 1040 xfs_map_at_offset(inode, bh, &imap, offset);
1161 ASSERT(buffer_mapped(bh)); 1041 xfs_add_to_ioend(inode, bh, offset, type, &ioend,
1162 imap_valid = 0; 1042 new_ioend);
1043 count++;
1163 } 1044 }
1164 1045
1165 if (!iohead) 1046 if (!iohead)
@@ -1188,7 +1069,7 @@ xfs_vm_writepage(
1188 end_index = last_index; 1069 end_index = last_index;
1189 1070
1190 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1071 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1191 wbc, all_bh, end_index); 1072 wbc, end_index);
1192 } 1073 }
1193 1074
1194 if (iohead) 1075 if (iohead)
@@ -1257,13 +1138,19 @@ __xfs_get_blocks(
1257 int create, 1138 int create,
1258 int direct) 1139 int direct)
1259{ 1140{
1260 int flags = create ? BMAPI_WRITE : BMAPI_READ; 1141 struct xfs_inode *ip = XFS_I(inode);
1142 struct xfs_mount *mp = ip->i_mount;
1143 xfs_fileoff_t offset_fsb, end_fsb;
1144 int error = 0;
1145 int lockmode = 0;
1261 struct xfs_bmbt_irec imap; 1146 struct xfs_bmbt_irec imap;
1147 int nimaps = 1;
1262 xfs_off_t offset; 1148 xfs_off_t offset;
1263 ssize_t size; 1149 ssize_t size;
1264 int nimap = 1;
1265 int new = 0; 1150 int new = 0;
1266 int error; 1151
1152 if (XFS_FORCED_SHUTDOWN(mp))
1153 return -XFS_ERROR(EIO);
1267 1154
1268 offset = (xfs_off_t)iblock << inode->i_blkbits; 1155 offset = (xfs_off_t)iblock << inode->i_blkbits;
1269 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1156 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1272,15 +1159,45 @@ __xfs_get_blocks(
1272 if (!create && direct && offset >= i_size_read(inode)) 1159 if (!create && direct && offset >= i_size_read(inode))
1273 return 0; 1160 return 0;
1274 1161
1275 if (direct && create) 1162 if (create) {
1276 flags |= BMAPI_DIRECT; 1163 lockmode = XFS_ILOCK_EXCL;
1164 xfs_ilock(ip, lockmode);
1165 } else {
1166 lockmode = xfs_ilock_map_shared(ip);
1167 }
1168
1169 ASSERT(offset <= mp->m_maxioffset);
1170 if (offset + size > mp->m_maxioffset)
1171 size = mp->m_maxioffset - offset;
1172 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1173 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1277 1174
1278 error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, 1175 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
1279 &new); 1176 XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
1280 if (error) 1177 if (error)
1281 return -error; 1178 goto out_unlock;
1282 if (nimap == 0) 1179
1283 return 0; 1180 if (create &&
1181 (!nimaps ||
1182 (imap.br_startblock == HOLESTARTBLOCK ||
1183 imap.br_startblock == DELAYSTARTBLOCK))) {
1184 if (direct) {
1185 error = xfs_iomap_write_direct(ip, offset, size,
1186 &imap, nimaps);
1187 } else {
1188 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1189 }
1190 if (error)
1191 goto out_unlock;
1192
1193 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
1194 } else if (nimaps) {
1195 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
1196 } else {
1197 trace_xfs_get_blocks_notfound(ip, offset, size);
1198 goto out_unlock;
1199 }
1200 xfs_iunlock(ip, lockmode);
1284 1201
1285 if (imap.br_startblock != HOLESTARTBLOCK && 1202 if (imap.br_startblock != HOLESTARTBLOCK &&
1286 imap.br_startblock != DELAYSTARTBLOCK) { 1203 imap.br_startblock != DELAYSTARTBLOCK) {
@@ -1347,6 +1264,10 @@ __xfs_get_blocks(
1347 } 1264 }
1348 1265
1349 return 0; 1266 return 0;
1267
1268out_unlock:
1269 xfs_iunlock(ip, lockmode);
1270 return -error;
1350} 1271}
1351 1272
1352int 1273int
@@ -1434,7 +1355,7 @@ xfs_vm_direct_IO(
1434 ssize_t ret; 1355 ssize_t ret;
1435 1356
1436 if (rw & WRITE) { 1357 if (rw & WRITE) {
1437 iocb->private = xfs_alloc_ioend(inode, IO_NEW); 1358 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
1438 1359
1439 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1360 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1440 offset, nr_segs, 1361 offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index c5057fb6237..71f721e1a71 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
24 24
25/* 25/*
26 * Types of I/O for bmap clustering and I/O completion tracking.
27 */
28enum {
29 IO_DIRECT = 0, /* special case for direct I/O ioends */
30 IO_DELALLOC, /* mapping covers delalloc region */
31 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
32 IO_OVERWRITE, /* mapping covers already allocated extent */
33};
34
35#define XFS_IO_TYPES \
36 { 0, "" }, \
37 { IO_DELALLOC, "delalloc" }, \
38 { IO_UNWRITTEN, "unwritten" }, \
39 { IO_OVERWRITE, "overwrite" }
40
41/*
26 * xfs_ioend struct manages large extent writes for XFS. 42 * xfs_ioend struct manages large extent writes for XFS.
27 * It can manage several multi-page bio's at once. 43 * It can manage several multi-page bio's at once.
28 */ 44 */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c5deb6e9e3..92f1f2acc6a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
44 44
45static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
46STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
47STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
48STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 47STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
49static struct shrinker xfs_buf_shake = {
50 .shrink = xfsbufd_wakeup,
51 .seeks = DEFAULT_SEEKS,
52};
53 48
54static struct workqueue_struct *xfslogd_workqueue; 49static struct workqueue_struct *xfslogd_workqueue;
55struct workqueue_struct *xfsdatad_workqueue; 50struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
168} 163}
169 164
170/* 165/*
171 * Internal xfs_buf_t object manipulation 166 * xfs_buf_lru_add - add a buffer to the LRU.
167 *
168 * The LRU takes a new reference to the buffer so that it will only be freed
169 * once the shrinker takes the buffer off the LRU.
172 */ 170 */
171STATIC void
172xfs_buf_lru_add(
173 struct xfs_buf *bp)
174{
175 struct xfs_buftarg *btp = bp->b_target;
176
177 spin_lock(&btp->bt_lru_lock);
178 if (list_empty(&bp->b_lru)) {
179 atomic_inc(&bp->b_hold);
180 list_add_tail(&bp->b_lru, &btp->bt_lru);
181 btp->bt_lru_nr++;
182 }
183 spin_unlock(&btp->bt_lru_lock);
184}
185
186/*
187 * xfs_buf_lru_del - remove a buffer from the LRU
188 *
189 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
193 * bt_lru_lock.
194 */
195STATIC void
196xfs_buf_lru_del(
197 struct xfs_buf *bp)
198{
199 struct xfs_buftarg *btp = bp->b_target;
200
201 if (list_empty(&bp->b_lru))
202 return;
203
204 spin_lock(&btp->bt_lru_lock);
205 if (!list_empty(&bp->b_lru)) {
206 list_del_init(&bp->b_lru);
207 btp->bt_lru_nr--;
208 }
209 spin_unlock(&btp->bt_lru_lock);
210}
211
212/*
213 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
214 * b_lru_ref count so that the buffer is freed immediately when the buffer
215 * reference count falls to zero. If the buffer is already on the LRU, we need
216 * to remove the reference that LRU holds on the buffer.
217 *
218 * This prevents build-up of stale buffers on the LRU.
219 */
220void
221xfs_buf_stale(
222 struct xfs_buf *bp)
223{
224 bp->b_flags |= XBF_STALE;
225 atomic_set(&(bp)->b_lru_ref, 0);
226 if (!list_empty(&bp->b_lru)) {
227 struct xfs_buftarg *btp = bp->b_target;
228
229 spin_lock(&btp->bt_lru_lock);
230 if (!list_empty(&bp->b_lru)) {
231 list_del_init(&bp->b_lru);
232 btp->bt_lru_nr--;
233 atomic_dec(&bp->b_hold);
234 }
235 spin_unlock(&btp->bt_lru_lock);
236 }
237 ASSERT(atomic_read(&bp->b_hold) >= 1);
238}
173 239
174STATIC void 240STATIC void
175_xfs_buf_initialize( 241_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
186 252
187 memset(bp, 0, sizeof(xfs_buf_t)); 253 memset(bp, 0, sizeof(xfs_buf_t));
188 atomic_set(&bp->b_hold, 1); 254 atomic_set(&bp->b_hold, 1);
255 atomic_set(&bp->b_lru_ref, 1);
189 init_completion(&bp->b_iowait); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_lru);
190 INIT_LIST_HEAD(&bp->b_list); 258 INIT_LIST_HEAD(&bp->b_list);
191 RB_CLEAR_NODE(&bp->b_rbnode); 259 RB_CLEAR_NODE(&bp->b_rbnode);
192 sema_init(&bp->b_sema, 0); /* held, no waiters */ 260 sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
262{ 330{
263 trace_xfs_buf_free(bp, _RET_IP_); 331 trace_xfs_buf_free(bp, _RET_IP_);
264 332
333 ASSERT(list_empty(&bp->b_lru));
334
265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
266 uint i; 336 uint i;
267 337
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
337 __func__, gfp_mask); 407 __func__, gfp_mask);
338 408
339 XFS_STATS_INC(xb_page_retries); 409 XFS_STATS_INC(xb_page_retries);
340 xfsbufd_wakeup(NULL, 0, gfp_mask);
341 congestion_wait(BLK_RW_ASYNC, HZ/50); 410 congestion_wait(BLK_RW_ASYNC, HZ/50);
342 goto retry; 411 goto retry;
343 } 412 }
@@ -828,6 +897,7 @@ xfs_buf_rele(
828 897
829 if (!pag) { 898 if (!pag) {
830 ASSERT(!bp->b_relse); 899 ASSERT(!bp->b_relse);
900 ASSERT(list_empty(&bp->b_lru));
831 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 901 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
832 if (atomic_dec_and_test(&bp->b_hold)) 902 if (atomic_dec_and_test(&bp->b_hold))
833 xfs_buf_free(bp); 903 xfs_buf_free(bp);
@@ -835,13 +905,19 @@ xfs_buf_rele(
835 } 905 }
836 906
837 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 907 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
908
838 ASSERT(atomic_read(&bp->b_hold) > 0); 909 ASSERT(atomic_read(&bp->b_hold) > 0);
839 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 910 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
840 if (bp->b_relse) { 911 if (bp->b_relse) {
841 atomic_inc(&bp->b_hold); 912 atomic_inc(&bp->b_hold);
842 spin_unlock(&pag->pag_buf_lock); 913 spin_unlock(&pag->pag_buf_lock);
843 bp->b_relse(bp); 914 bp->b_relse(bp);
915 } else if (!(bp->b_flags & XBF_STALE) &&
916 atomic_read(&bp->b_lru_ref)) {
917 xfs_buf_lru_add(bp);
918 spin_unlock(&pag->pag_buf_lock);
844 } else { 919 } else {
920 xfs_buf_lru_del(bp);
845 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 921 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
846 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 922 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
847 spin_unlock(&pag->pag_buf_lock); 923 spin_unlock(&pag->pag_buf_lock);
@@ -1438,51 +1514,84 @@ xfs_buf_iomove(
1438 */ 1514 */
1439 1515
1440/* 1516/*
1441 * Wait for any bufs with callbacks that have been submitted but 1517 * Wait for any bufs with callbacks that have been submitted but have not yet
1442 * have not yet returned... walk the hash list for the target. 1518 * returned. These buffers will have an elevated hold count, so wait on those
1519 * while freeing all the buffers only held by the LRU.
1443 */ 1520 */
1444void 1521void
1445xfs_wait_buftarg( 1522xfs_wait_buftarg(
1446 struct xfs_buftarg *btp) 1523 struct xfs_buftarg *btp)
1447{ 1524{
1448 struct xfs_perag *pag; 1525 struct xfs_buf *bp;
1449 uint i;
1450 1526
1451 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { 1527restart:
1452 pag = xfs_perag_get(btp->bt_mount, i); 1528 spin_lock(&btp->bt_lru_lock);
1453 spin_lock(&pag->pag_buf_lock); 1529 while (!list_empty(&btp->bt_lru)) {
1454 while (rb_first(&pag->pag_buf_tree)) { 1530 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1455 spin_unlock(&pag->pag_buf_lock); 1531 if (atomic_read(&bp->b_hold) > 1) {
1532 spin_unlock(&btp->bt_lru_lock);
1456 delay(100); 1533 delay(100);
1457 spin_lock(&pag->pag_buf_lock); 1534 goto restart;
1458 } 1535 }
1459 spin_unlock(&pag->pag_buf_lock); 1536 /*
1460 xfs_perag_put(pag); 1537 * clear the LRU reference count so the bufer doesn't get
1538 * ignored in xfs_buf_rele().
1539 */
1540 atomic_set(&bp->b_lru_ref, 0);
1541 spin_unlock(&btp->bt_lru_lock);
1542 xfs_buf_rele(bp);
1543 spin_lock(&btp->bt_lru_lock);
1461 } 1544 }
1545 spin_unlock(&btp->bt_lru_lock);
1462} 1546}
1463 1547
1464/* 1548int
1465 * buftarg list for delwrite queue processing 1549xfs_buftarg_shrink(
1466 */ 1550 struct shrinker *shrink,
1467static LIST_HEAD(xfs_buftarg_list); 1551 int nr_to_scan,
1468static DEFINE_SPINLOCK(xfs_buftarg_lock); 1552 gfp_t mask)
1469
1470STATIC void
1471xfs_register_buftarg(
1472 xfs_buftarg_t *btp)
1473{ 1553{
1474 spin_lock(&xfs_buftarg_lock); 1554 struct xfs_buftarg *btp = container_of(shrink,
1475 list_add(&btp->bt_list, &xfs_buftarg_list); 1555 struct xfs_buftarg, bt_shrinker);
1476 spin_unlock(&xfs_buftarg_lock); 1556 struct xfs_buf *bp;
1477} 1557 LIST_HEAD(dispose);
1478 1558
1479STATIC void 1559 if (!nr_to_scan)
1480xfs_unregister_buftarg( 1560 return btp->bt_lru_nr;
1481 xfs_buftarg_t *btp) 1561
1482{ 1562 spin_lock(&btp->bt_lru_lock);
1483 spin_lock(&xfs_buftarg_lock); 1563 while (!list_empty(&btp->bt_lru)) {
1484 list_del(&btp->bt_list); 1564 if (nr_to_scan-- <= 0)
1485 spin_unlock(&xfs_buftarg_lock); 1565 break;
1566
1567 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1568
1569 /*
1570 * Decrement the b_lru_ref count unless the value is already
1571 * zero. If the value is already zero, we need to reclaim the
1572 * buffer, otherwise it gets another trip through the LRU.
1573 */
1574 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1575 list_move_tail(&bp->b_lru, &btp->bt_lru);
1576 continue;
1577 }
1578
1579 /*
1580 * remove the buffer from the LRU now to avoid needing another
1581 * lock round trip inside xfs_buf_rele().
1582 */
1583 list_move(&bp->b_lru, &dispose);
1584 btp->bt_lru_nr--;
1585 }
1586 spin_unlock(&btp->bt_lru_lock);
1587
1588 while (!list_empty(&dispose)) {
1589 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1590 list_del_init(&bp->b_lru);
1591 xfs_buf_rele(bp);
1592 }
1593
1594 return btp->bt_lru_nr;
1486} 1595}
1487 1596
1488void 1597void
@@ -1490,17 +1599,14 @@ xfs_free_buftarg(
1490 struct xfs_mount *mp, 1599 struct xfs_mount *mp,
1491 struct xfs_buftarg *btp) 1600 struct xfs_buftarg *btp)
1492{ 1601{
1602 unregister_shrinker(&btp->bt_shrinker);
1603
1493 xfs_flush_buftarg(btp, 1); 1604 xfs_flush_buftarg(btp, 1);
1494 if (mp->m_flags & XFS_MOUNT_BARRIER) 1605 if (mp->m_flags & XFS_MOUNT_BARRIER)
1495 xfs_blkdev_issue_flush(btp); 1606 xfs_blkdev_issue_flush(btp);
1496 iput(btp->bt_mapping->host); 1607 iput(btp->bt_mapping->host);
1497 1608
1498 /* Unregister the buftarg first so that we don't get a
1499 * wakeup finding a non-existent task
1500 */
1501 xfs_unregister_buftarg(btp);
1502 kthread_stop(btp->bt_task); 1609 kthread_stop(btp->bt_task);
1503
1504 kmem_free(btp); 1610 kmem_free(btp);
1505} 1611}
1506 1612
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
1597 xfs_buftarg_t *btp, 1703 xfs_buftarg_t *btp,
1598 const char *fsname) 1704 const char *fsname)
1599{ 1705{
1600 int error = 0;
1601
1602 INIT_LIST_HEAD(&btp->bt_list);
1603 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1706 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1604 spin_lock_init(&btp->bt_delwrite_lock); 1707 spin_lock_init(&btp->bt_delwrite_lock);
1605 btp->bt_flags = 0; 1708 btp->bt_flags = 0;
1606 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); 1709 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1607 if (IS_ERR(btp->bt_task)) { 1710 if (IS_ERR(btp->bt_task))
1608 error = PTR_ERR(btp->bt_task); 1711 return PTR_ERR(btp->bt_task);
1609 goto out_error; 1712 return 0;
1610 }
1611 xfs_register_buftarg(btp);
1612out_error:
1613 return error;
1614} 1713}
1615 1714
1616xfs_buftarg_t * 1715xfs_buftarg_t *
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
1627 btp->bt_mount = mp; 1726 btp->bt_mount = mp;
1628 btp->bt_dev = bdev->bd_dev; 1727 btp->bt_dev = bdev->bd_dev;
1629 btp->bt_bdev = bdev; 1728 btp->bt_bdev = bdev;
1729 INIT_LIST_HEAD(&btp->bt_lru);
1730 spin_lock_init(&btp->bt_lru_lock);
1630 if (xfs_setsize_buftarg_early(btp, bdev)) 1731 if (xfs_setsize_buftarg_early(btp, bdev))
1631 goto error; 1732 goto error;
1632 if (xfs_mapping_buftarg(btp, bdev)) 1733 if (xfs_mapping_buftarg(btp, bdev))
1633 goto error; 1734 goto error;
1634 if (xfs_alloc_delwrite_queue(btp, fsname)) 1735 if (xfs_alloc_delwrite_queue(btp, fsname))
1635 goto error; 1736 goto error;
1737 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
1738 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1739 register_shrinker(&btp->bt_shrinker);
1636 return btp; 1740 return btp;
1637 1741
1638error: 1742error:
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
1737 flush_workqueue(queue); 1841 flush_workqueue(queue);
1738} 1842}
1739 1843
1740STATIC int
1741xfsbufd_wakeup(
1742 struct shrinker *shrink,
1743 int priority,
1744 gfp_t mask)
1745{
1746 xfs_buftarg_t *btp;
1747
1748 spin_lock(&xfs_buftarg_lock);
1749 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1750 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1751 continue;
1752 if (list_empty(&btp->bt_delwrite_queue))
1753 continue;
1754 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1755 wake_up_process(btp->bt_task);
1756 }
1757 spin_unlock(&xfs_buftarg_lock);
1758 return 0;
1759}
1760
1761/* 1844/*
1762 * Move as many buffers as specified to the supplied list 1845 * Move as many buffers as specified to the supplied list
1763 * idicating if we skipped any buffers to prevent deadlocks. 1846 * idicating if we skipped any buffers to prevent deadlocks.
@@ -1952,7 +2035,6 @@ xfs_buf_init(void)
1952 if (!xfsconvertd_workqueue) 2035 if (!xfsconvertd_workqueue)
1953 goto out_destroy_xfsdatad_workqueue; 2036 goto out_destroy_xfsdatad_workqueue;
1954 2037
1955 register_shrinker(&xfs_buf_shake);
1956 return 0; 2038 return 0;
1957 2039
1958 out_destroy_xfsdatad_workqueue: 2040 out_destroy_xfsdatad_workqueue:
@@ -1968,7 +2050,6 @@ xfs_buf_init(void)
1968void 2050void
1969xfs_buf_terminate(void) 2051xfs_buf_terminate(void)
1970{ 2052{
1971 unregister_shrinker(&xfs_buf_shake);
1972 destroy_workqueue(xfsconvertd_workqueue); 2053 destroy_workqueue(xfsconvertd_workqueue);
1973 destroy_workqueue(xfsdatad_workqueue); 2054 destroy_workqueue(xfsdatad_workqueue);
1974 destroy_workqueue(xfslogd_workqueue); 2055 destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 383a3f37cf9..a76c2428faf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
128 128
129 /* per device delwri queue */ 129 /* per device delwri queue */
130 struct task_struct *bt_task; 130 struct task_struct *bt_task;
131 struct list_head bt_list;
132 struct list_head bt_delwrite_queue; 131 struct list_head bt_delwrite_queue;
133 spinlock_t bt_delwrite_lock; 132 spinlock_t bt_delwrite_lock;
134 unsigned long bt_flags; 133 unsigned long bt_flags;
134
135 /* LRU control structures */
136 struct shrinker bt_shrinker;
137 struct list_head bt_lru;
138 spinlock_t bt_lru_lock;
139 unsigned int bt_lru_nr;
135} xfs_buftarg_t; 140} xfs_buftarg_t;
136 141
137/* 142/*
@@ -164,9 +169,11 @@ typedef struct xfs_buf {
164 xfs_off_t b_file_offset; /* offset in file */ 169 xfs_off_t b_file_offset; /* offset in file */
165 size_t b_buffer_length;/* size of buffer in bytes */ 170 size_t b_buffer_length;/* size of buffer in bytes */
166 atomic_t b_hold; /* reference count */ 171 atomic_t b_hold; /* reference count */
172 atomic_t b_lru_ref; /* lru reclaim ref count */
167 xfs_buf_flags_t b_flags; /* status flags */ 173 xfs_buf_flags_t b_flags; /* status flags */
168 struct semaphore b_sema; /* semaphore for lockables */ 174 struct semaphore b_sema; /* semaphore for lockables */
169 175
176 struct list_head b_lru; /* lru list */
170 wait_queue_head_t b_waiters; /* unpin waiters */ 177 wait_queue_head_t b_waiters; /* unpin waiters */
171 struct list_head b_list; 178 struct list_head b_list;
172 struct xfs_perag *b_pag; /* contains rbtree root */ 179 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
264#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 271#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
265 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 272 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
266 273
267#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) 274void xfs_buf_stale(struct xfs_buf *bp);
275#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
268#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 276#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
269#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) 277#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
270#define XFS_BUF_SUPER_STALE(bp) do { \ 278#define XFS_BUF_SUPER_STALE(bp) do { \
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
328#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) 336#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
329#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) 337#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
330 338
331#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) 339static inline void
340xfs_buf_set_ref(
341 struct xfs_buf *bp,
342 int lru_ref)
343{
344 atomic_set(&bp->b_lru_ref, lru_ref);
345}
346#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
332#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 347#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
333#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
334 348
335#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) 349#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
336 350
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 3764d74790e..fc0114da7fd 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -70,8 +70,16 @@ xfs_fs_encode_fh(
70 else 70 else
71 fileid_type = FILEID_INO32_GEN_PARENT; 71 fileid_type = FILEID_INO32_GEN_PARENT;
72 72
73 /* filesystem may contain 64bit inode numbers */ 73 /*
74 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) 74 * If the the filesystem may contain 64bit inode numbers, we need
75 * to use larger file handles that can represent them.
76 *
77 * While we only allocate inodes that do not fit into 32 bits any
78 * large enough filesystem may contain them, thus the slightly
79 * confusing looking conditional below.
80 */
81 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
82 (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
75 fileid_type |= XFS_FILEID_TYPE_64FLAG; 83 fileid_type |= XFS_FILEID_TYPE_64FLAG;
76 84
77 /* 85 /*
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214ddd71ff7..09649499774 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
37 37
38#include <kmem.h> 38#include <kmem.h>
39#include <mrlock.h> 39#include <mrlock.h>
40#include <sv.h>
41#include <time.h> 40#include <time.h>
42 41
43#include <support/debug.h> 42#include <support/debug.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c115dd5e95a..a10f6416e56 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -834,8 +834,11 @@ xfsaild_wakeup(
834 struct xfs_ail *ailp, 834 struct xfs_ail *ailp,
835 xfs_lsn_t threshold_lsn) 835 xfs_lsn_t threshold_lsn)
836{ 836{
837 ailp->xa_target = threshold_lsn; 837 /* only ever move the target forwards */
838 wake_up_process(ailp->xa_task); 838 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
839 ailp->xa_target = threshold_lsn;
840 wake_up_process(ailp->xa_task);
841 }
839} 842}
840 843
841STATIC int 844STATIC int
@@ -847,8 +850,17 @@ xfsaild(
847 long tout = 0; /* milliseconds */ 850 long tout = 0; /* milliseconds */
848 851
849 while (!kthread_should_stop()) { 852 while (!kthread_should_stop()) {
850 schedule_timeout_interruptible(tout ? 853 /*
851 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); 854 * for short sleeps indicating congestion, don't allow us to
855 * get woken early. Otherwise all we do is bang on the AIL lock
856 * without making progress.
857 */
858 if (tout && tout <= 20)
859 __set_current_state(TASK_KILLABLE);
860 else
861 __set_current_state(TASK_INTERRUPTIBLE);
862 schedule_timeout(tout ?
863 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
852 864
853 /* swsusp */ 865 /* swsusp */
854 try_to_freeze(); 866 try_to_freeze();
@@ -1118,6 +1130,8 @@ xfs_fs_evict_inode(
1118 */ 1130 */
1119 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 1131 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1120 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 1132 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1133 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
1134 &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
1121 1135
1122 xfs_inactive(ip); 1136 xfs_inactive(ip);
1123} 1137}
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index afb0d7cfad1..a02480de975 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
53{ 53{
54 struct inode *inode = VFS_I(ip); 54 struct inode *inode = VFS_I(ip);
55 55
56 ASSERT(rcu_read_lock_held());
57
58 /*
59 * check for stale RCU freed inode
60 *
61 * If the inode has been reallocated, it doesn't matter if it's not in
62 * the AG we are walking - we are walking for writeback, so if it
63 * passes all the "valid inode" checks and is dirty, then we'll write
64 * it back anyway. If it has been reallocated and still being
65 * initialised, the XFS_INEW check below will catch it.
66 */
67 spin_lock(&ip->i_flags_lock);
68 if (!ip->i_ino)
69 goto out_unlock_noent;
70
71 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
72 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
73 goto out_unlock_noent;
74 spin_unlock(&ip->i_flags_lock);
75
56 /* nothing to sync during shutdown */ 76 /* nothing to sync during shutdown */
57 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 77 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
58 return EFSCORRUPTED; 78 return EFSCORRUPTED;
59 79
60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
62 return ENOENT;
63
64 /* If we can't grab the inode, it must on it's way to reclaim. */ 80 /* If we can't grab the inode, it must on it's way to reclaim. */
65 if (!igrab(inode)) 81 if (!igrab(inode))
66 return ENOENT; 82 return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
72 88
73 /* inode is valid */ 89 /* inode is valid */
74 return 0; 90 return 0;
91
92out_unlock_noent:
93 spin_unlock(&ip->i_flags_lock);
94 return ENOENT;
75} 95}
76 96
77STATIC int 97STATIC int
@@ -98,12 +118,12 @@ restart:
98 int error = 0; 118 int error = 0;
99 int i; 119 int i;
100 120
101 read_lock(&pag->pag_ici_lock); 121 rcu_read_lock();
102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 122 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
103 (void **)batch, first_index, 123 (void **)batch, first_index,
104 XFS_LOOKUP_BATCH); 124 XFS_LOOKUP_BATCH);
105 if (!nr_found) { 125 if (!nr_found) {
106 read_unlock(&pag->pag_ici_lock); 126 rcu_read_unlock();
107 break; 127 break;
108 } 128 }
109 129
@@ -118,18 +138,26 @@ restart:
118 batch[i] = NULL; 138 batch[i] = NULL;
119 139
120 /* 140 /*
121 * Update the index for the next lookup. Catch overflows 141 * Update the index for the next lookup. Catch
122 * into the next AG range which can occur if we have inodes 142 * overflows into the next AG range which can occur if
123 * in the last block of the AG and we are currently 143 * we have inodes in the last block of the AG and we
124 * pointing to the last inode. 144 * are currently pointing to the last inode.
145 *
146 * Because we may see inodes that are from the wrong AG
147 * due to RCU freeing and reallocation, only update the
148 * index if it lies in this AG. It was a race that lead
149 * us to see this inode, so another lookup from the
150 * same index will not find it again.
125 */ 151 */
152 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
153 continue;
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 154 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 155 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1; 156 done = 1;
129 } 157 }
130 158
131 /* unlock now we've grabbed the inodes. */ 159 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock); 160 rcu_read_unlock();
133 161
134 for (i = 0; i < nr_found; i++) { 162 for (i = 0; i < nr_found; i++) {
135 if (!batch[i]) 163 if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
592 struct xfs_perag *pag; 620 struct xfs_perag *pag;
593 621
594 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 622 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
595 write_lock(&pag->pag_ici_lock); 623 spin_lock(&pag->pag_ici_lock);
596 spin_lock(&ip->i_flags_lock); 624 spin_lock(&ip->i_flags_lock);
597 __xfs_inode_set_reclaim_tag(pag, ip); 625 __xfs_inode_set_reclaim_tag(pag, ip);
598 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 626 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
599 spin_unlock(&ip->i_flags_lock); 627 spin_unlock(&ip->i_flags_lock);
600 write_unlock(&pag->pag_ici_lock); 628 spin_unlock(&pag->pag_ici_lock);
601 xfs_perag_put(pag); 629 xfs_perag_put(pag);
602} 630}
603 631
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
639 struct xfs_inode *ip, 667 struct xfs_inode *ip,
640 int flags) 668 int flags)
641{ 669{
670 ASSERT(rcu_read_lock_held());
671
672 /* quick check for stale RCU freed inode */
673 if (!ip->i_ino)
674 return 1;
642 675
643 /* 676 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic. 677 * do some unlocked checks first to avoid unnecessary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim 678 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks. 679 * check. Only do these checks if we are not going to block on locks.
647 */ 680 */
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
654 * The radix tree lock here protects a thread in xfs_iget from racing 687 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the 688 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us. 689 * XFS_IRECLAIM flag set it will not touch us.
690 *
691 * Due to RCU lookup, we may find inodes that have been freed and only
692 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
693 * aren't candidates for reclaim at all, so we must check the
694 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
657 */ 695 */
658 spin_lock(&ip->i_flags_lock); 696 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 697 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { 698 __xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */ 699 /* not a reclaim candidate. */
662 spin_unlock(&ip->i_flags_lock); 700 spin_unlock(&ip->i_flags_lock);
663 return 1; 701 return 1;
664 } 702 }
@@ -795,12 +833,12 @@ reclaim:
795 * added to the tree assert that it's been there before to catch 833 * added to the tree assert that it's been there before to catch
796 * problems with the inode life time early on. 834 * problems with the inode life time early on.
797 */ 835 */
798 write_lock(&pag->pag_ici_lock); 836 spin_lock(&pag->pag_ici_lock);
799 if (!radix_tree_delete(&pag->pag_ici_root, 837 if (!radix_tree_delete(&pag->pag_ici_root,
800 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 838 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
801 ASSERT(0); 839 ASSERT(0);
802 __xfs_inode_clear_reclaim(pag, ip); 840 __xfs_inode_clear_reclaim(pag, ip);
803 write_unlock(&pag->pag_ici_lock); 841 spin_unlock(&pag->pag_ici_lock);
804 842
805 /* 843 /*
806 * Here we do an (almost) spurious inode lock in order to coordinate 844 * Here we do an (almost) spurious inode lock in order to coordinate
@@ -864,14 +902,14 @@ restart:
864 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 902 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
865 int i; 903 int i;
866 904
867 write_lock(&pag->pag_ici_lock); 905 rcu_read_lock();
868 nr_found = radix_tree_gang_lookup_tag( 906 nr_found = radix_tree_gang_lookup_tag(
869 &pag->pag_ici_root, 907 &pag->pag_ici_root,
870 (void **)batch, first_index, 908 (void **)batch, first_index,
871 XFS_LOOKUP_BATCH, 909 XFS_LOOKUP_BATCH,
872 XFS_ICI_RECLAIM_TAG); 910 XFS_ICI_RECLAIM_TAG);
873 if (!nr_found) { 911 if (!nr_found) {
874 write_unlock(&pag->pag_ici_lock); 912 rcu_read_unlock();
875 break; 913 break;
876 } 914 }
877 915
@@ -891,14 +929,24 @@ restart:
891 * occur if we have inodes in the last block of 929 * occur if we have inodes in the last block of
892 * the AG and we are currently pointing to the 930 * the AG and we are currently pointing to the
893 * last inode. 931 * last inode.
932 *
933 * Because we may see inodes that are from the
934 * wrong AG due to RCU freeing and
935 * reallocation, only update the index if it
936 * lies in this AG. It was a race that lead us
937 * to see this inode, so another lookup from
938 * the same index will not find it again.
894 */ 939 */
940 if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
941 pag->pag_agno)
942 continue;
895 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 943 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
896 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 944 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
897 done = 1; 945 done = 1;
898 } 946 }
899 947
900 /* unlock now we've grabbed the inodes. */ 948 /* unlock now we've grabbed the inodes. */
901 write_unlock(&pag->pag_ici_lock); 949 rcu_read_unlock();
902 950
903 for (i = 0; i < nr_found; i++) { 951 for (i = 0; i < nr_found; i++) {
904 if (!batch[i]) 952 if (!batch[i])
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e98c59..647af2a2e7a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
766 __field(int, curr_res) 766 __field(int, curr_res)
767 __field(int, unit_res) 767 __field(int, unit_res)
768 __field(unsigned int, flags) 768 __field(unsigned int, flags)
769 __field(void *, reserve_headq) 769 __field(int, reserveq)
770 __field(void *, write_headq) 770 __field(int, writeq)
771 __field(int, grant_reserve_cycle) 771 __field(int, grant_reserve_cycle)
772 __field(int, grant_reserve_bytes) 772 __field(int, grant_reserve_bytes)
773 __field(int, grant_write_cycle) 773 __field(int, grant_write_cycle)
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
784 __entry->curr_res = tic->t_curr_res; 784 __entry->curr_res = tic->t_curr_res;
785 __entry->unit_res = tic->t_unit_res; 785 __entry->unit_res = tic->t_unit_res;
786 __entry->flags = tic->t_flags; 786 __entry->flags = tic->t_flags;
787 __entry->reserve_headq = log->l_reserve_headq; 787 __entry->reserveq = list_empty(&log->l_reserveq);
788 __entry->write_headq = log->l_write_headq; 788 __entry->writeq = list_empty(&log->l_writeq);
789 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; 789 xlog_crack_grant_head(&log->l_grant_reserve_head,
790 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; 790 &__entry->grant_reserve_cycle,
791 __entry->grant_write_cycle = log->l_grant_write_cycle; 791 &__entry->grant_reserve_bytes);
792 __entry->grant_write_bytes = log->l_grant_write_bytes; 792 xlog_crack_grant_head(&log->l_grant_write_head,
793 &__entry->grant_write_cycle,
794 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 795 __entry->curr_cycle = log->l_curr_cycle;
794 __entry->curr_block = log->l_curr_block; 796 __entry->curr_block = log->l_curr_block;
795 __entry->tail_lsn = log->l_tail_lsn; 797 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
796 ), 798 ),
797 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " 799 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
798 "t_unit_res %u t_flags %s reserve_headq 0x%p " 800 "t_unit_res %u t_flags %s reserveq %s "
799 "write_headq 0x%p grant_reserve_cycle %d " 801 "writeq %s grant_reserve_cycle %d "
800 "grant_reserve_bytes %d grant_write_cycle %d " 802 "grant_reserve_bytes %d grant_write_cycle %d "
801 "grant_write_bytes %d curr_cycle %d curr_block %d " 803 "grant_write_bytes %d curr_cycle %d curr_block %d "
802 "tail_cycle %d tail_block %d", 804 "tail_cycle %d tail_block %d",
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
807 __entry->curr_res, 809 __entry->curr_res,
808 __entry->unit_res, 810 __entry->unit_res,
809 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), 811 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
810 __entry->reserve_headq, 812 __entry->reserveq ? "empty" : "active",
811 __entry->write_headq, 813 __entry->writeq ? "empty" : "active",
812 __entry->grant_reserve_cycle, 814 __entry->grant_reserve_cycle,
813 __entry->grant_reserve_bytes, 815 __entry->grant_reserve_bytes,
814 __entry->grant_write_cycle, 816 __entry->grant_write_cycle,
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); 837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); 838DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); 839DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
840DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 850DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 851DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
935DEFINE_PAGE_EVENT(xfs_releasepage); 939DEFINE_PAGE_EVENT(xfs_releasepage);
936DEFINE_PAGE_EVENT(xfs_invalidatepage); 940DEFINE_PAGE_EVENT(xfs_invalidatepage);
937 941
938DECLARE_EVENT_CLASS(xfs_iomap_class, 942DECLARE_EVENT_CLASS(xfs_imap_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 943 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
940 int flags, struct xfs_bmbt_irec *irec), 944 int type, struct xfs_bmbt_irec *irec),
941 TP_ARGS(ip, offset, count, flags, irec), 945 TP_ARGS(ip, offset, count, type, irec),
942 TP_STRUCT__entry( 946 TP_STRUCT__entry(
943 __field(dev_t, dev) 947 __field(dev_t, dev)
944 __field(xfs_ino_t, ino) 948 __field(xfs_ino_t, ino)
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
946 __field(loff_t, new_size) 950 __field(loff_t, new_size)
947 __field(loff_t, offset) 951 __field(loff_t, offset)
948 __field(size_t, count) 952 __field(size_t, count)
949 __field(int, flags) 953 __field(int, type)
950 __field(xfs_fileoff_t, startoff) 954 __field(xfs_fileoff_t, startoff)
951 __field(xfs_fsblock_t, startblock) 955 __field(xfs_fsblock_t, startblock)
952 __field(xfs_filblks_t, blockcount) 956 __field(xfs_filblks_t, blockcount)
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
958 __entry->new_size = ip->i_new_size; 962 __entry->new_size = ip->i_new_size;
959 __entry->offset = offset; 963 __entry->offset = offset;
960 __entry->count = count; 964 __entry->count = count;
961 __entry->flags = flags; 965 __entry->type = type;
962 __entry->startoff = irec ? irec->br_startoff : 0; 966 __entry->startoff = irec ? irec->br_startoff : 0;
963 __entry->startblock = irec ? irec->br_startblock : 0; 967 __entry->startblock = irec ? irec->br_startblock : 0;
964 __entry->blockcount = irec ? irec->br_blockcount : 0; 968 __entry->blockcount = irec ? irec->br_blockcount : 0;
965 ), 969 ),
966 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 970 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
967 "offset 0x%llx count %zd flags %s " 971 "offset 0x%llx count %zd type %s "
968 "startoff 0x%llx startblock %lld blockcount 0x%llx", 972 "startoff 0x%llx startblock %lld blockcount 0x%llx",
969 MAJOR(__entry->dev), MINOR(__entry->dev), 973 MAJOR(__entry->dev), MINOR(__entry->dev),
970 __entry->ino, 974 __entry->ino,
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
972 __entry->new_size, 976 __entry->new_size,
973 __entry->offset, 977 __entry->offset,
974 __entry->count, 978 __entry->count,
975 __print_flags(__entry->flags, "|", BMAPI_FLAGS), 979 __print_symbolic(__entry->type, XFS_IO_TYPES),
976 __entry->startoff, 980 __entry->startoff,
977 (__int64_t)__entry->startblock, 981 (__int64_t)__entry->startblock,
978 __entry->blockcount) 982 __entry->blockcount)
979) 983)
980 984
981#define DEFINE_IOMAP_EVENT(name) \ 985#define DEFINE_IOMAP_EVENT(name) \
982DEFINE_EVENT(xfs_iomap_class, name, \ 986DEFINE_EVENT(xfs_imap_class, name, \
983 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 987 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
984 int flags, struct xfs_bmbt_irec *irec), \ 988 int type, struct xfs_bmbt_irec *irec), \
985 TP_ARGS(ip, offset, count, flags, irec)) 989 TP_ARGS(ip, offset, count, type, irec))
986DEFINE_IOMAP_EVENT(xfs_iomap_enter); 990DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
987DEFINE_IOMAP_EVENT(xfs_iomap_found); 991DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
988DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 992DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
993DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
989 994
990DECLARE_EVENT_CLASS(xfs_simple_io_class, 995DECLARE_EVENT_CLASS(xfs_simple_io_class,
991 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 996 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
1022 TP_ARGS(ip, offset, count)) 1027 TP_ARGS(ip, offset, count))
1023DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 1028DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
1024DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1029DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1030DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1025 1031
1026 1032
1027TRACE_EVENT(xfs_itruncate_start, 1033TRACE_EVENT(xfs_itruncate_start,
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
1420 TP_PROTO(struct xfs_alloc_arg *args), \ 1426 TP_PROTO(struct xfs_alloc_arg *args), \
1421 TP_ARGS(args)) 1427 TP_ARGS(args))
1422DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1428DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1429DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
1423DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1430DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1424DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1431DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1425DEFINE_ALLOC_EVENT(xfs_alloc_near_first); 1432DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index faf8e1a83a1..d22aa310310 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
149 ASSERT(list_empty(&dqp->q_freelist)); 149 ASSERT(list_empty(&dqp->q_freelist));
150 150
151 mutex_destroy(&dqp->q_qlock); 151 mutex_destroy(&dqp->q_qlock);
152 sv_destroy(&dqp->q_pinwait);
153 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 152 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
154 153
155 atomic_dec(&xfs_Gqm->qm_totaldquots); 154 atomic_dec(&xfs_Gqm->qm_totaldquots);
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d..11dd72070cb 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
44#ifdef CONFIG_XFS_POSIX_ACL 44#ifdef CONFIG_XFS_POSIX_ACL
45extern int xfs_check_acl(struct inode *inode, int mask); 45extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 63c7a1a6c02..58632cc17f2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,7 @@ typedef struct xfs_perag {
227 227
228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
229 229
230 rwlock_t pag_ici_lock; /* incore inode lock */ 230 spinlock_t pag_ici_lock; /* incore inode cache lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
233 struct mutex pag_ici_reclaim_lock; /* serialisation point */ 233 struct mutex pag_ici_reclaim_lock; /* serialisation point */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 112abc439ca..fa8723f5870 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact(
577 xfs_extlen_t rlen; /* length of returned extent */ 577 xfs_extlen_t rlen; /* length of returned extent */
578 578
579 ASSERT(args->alignment == 1); 579 ASSERT(args->alignment == 1);
580
580 /* 581 /*
581 * Allocate/initialize a cursor for the by-number freespace btree. 582 * Allocate/initialize a cursor for the by-number freespace btree.
582 */ 583 */
583 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 584 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
584 args->agno, XFS_BTNUM_BNO); 585 args->agno, XFS_BTNUM_BNO);
586
585 /* 587 /*
586 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 588 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
587 * Look for the closest free block <= bno, it must contain bno 589 * Look for the closest free block <= bno, it must contain bno
588 * if any free block does. 590 * if any free block does.
589 */ 591 */
590 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) 592 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
593 if (error)
591 goto error0; 594 goto error0;
592 if (!i) { 595 if (!i)
593 /* 596 goto not_found;
594 * Didn't find it, return null. 597
595 */
596 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
597 args->agbno = NULLAGBLOCK;
598 return 0;
599 }
600 /* 598 /*
601 * Grab the freespace record. 599 * Grab the freespace record.
602 */ 600 */
603 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) 601 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
602 if (error)
604 goto error0; 603 goto error0;
605 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 604 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
606 ASSERT(fbno <= args->agbno); 605 ASSERT(fbno <= args->agbno);
607 minend = args->agbno + args->minlen; 606 minend = args->agbno + args->minlen;
608 maxend = args->agbno + args->maxlen; 607 maxend = args->agbno + args->maxlen;
609 fend = fbno + flen; 608 fend = fbno + flen;
609
610 /* 610 /*
611 * Give up if the freespace isn't long enough for the minimum request. 611 * Give up if the freespace isn't long enough for the minimum request.
612 */ 612 */
613 if (fend < minend) { 613 if (fend < minend)
614 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 614 goto not_found;
615 args->agbno = NULLAGBLOCK; 615
616 return 0;
617 }
618 /* 616 /*
619 * End of extent will be smaller of the freespace end and the 617 * End of extent will be smaller of the freespace end and the
620 * maximal requested end. 618 * maximal requested end.
621 */ 619 *
622 end = XFS_AGBLOCK_MIN(fend, maxend);
623 /*
624 * Fix the length according to mod and prod if given. 620 * Fix the length according to mod and prod if given.
625 */ 621 */
622 end = XFS_AGBLOCK_MIN(fend, maxend);
626 args->len = end - args->agbno; 623 args->len = end - args->agbno;
627 xfs_alloc_fix_len(args); 624 xfs_alloc_fix_len(args);
628 if (!xfs_alloc_fix_minleft(args)) { 625 if (!xfs_alloc_fix_minleft(args))
629 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 626 goto not_found;
630 return 0; 627
631 }
632 rlen = args->len; 628 rlen = args->len;
633 ASSERT(args->agbno + rlen <= fend); 629 ASSERT(args->agbno + rlen <= fend);
634 end = args->agbno + rlen; 630 end = args->agbno + rlen;
631
635 /* 632 /*
636 * We are allocating agbno for rlen [agbno .. end] 633 * We are allocating agbno for rlen [agbno .. end]
637 * Allocate/initialize a cursor for the by-size btree. 634 * Allocate/initialize a cursor for the by-size btree.
@@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact(
640 args->agno, XFS_BTNUM_CNT); 637 args->agno, XFS_BTNUM_CNT);
641 ASSERT(args->agbno + args->len <= 638 ASSERT(args->agbno + args->len <=
642 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 639 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
643 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 640 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
644 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { 641 args->len, XFSA_FIXUP_BNO_OK);
642 if (error) {
645 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 643 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
646 goto error0; 644 goto error0;
647 } 645 }
646
648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 647 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 648 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
650 649
651 trace_xfs_alloc_exact_done(args);
652 args->wasfromfl = 0; 650 args->wasfromfl = 0;
651 trace_xfs_alloc_exact_done(args);
652 return 0;
653
654not_found:
655 /* Didn't find it, return null. */
656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
657 args->agbno = NULLAGBLOCK;
658 trace_xfs_alloc_exact_notfound(args);
653 return 0; 659 return 0;
654 660
655error0: 661error0:
@@ -659,6 +665,95 @@ error0:
659} 665}
660 666
661/* 667/*
668 * Search the btree in a given direction via the search cursor and compare
669 * the records found against the good extent we've already found.
670 */
671STATIC int
672xfs_alloc_find_best_extent(
673 struct xfs_alloc_arg *args, /* allocation argument structure */
674 struct xfs_btree_cur **gcur, /* good cursor */
675 struct xfs_btree_cur **scur, /* searching cursor */
676 xfs_agblock_t gdiff, /* difference for search comparison */
677 xfs_agblock_t *sbno, /* extent found by search */
678 xfs_extlen_t *slen,
679 xfs_extlen_t *slena, /* aligned length */
680 int dir) /* 0 = search right, 1 = search left */
681{
682 xfs_agblock_t bno;
683 xfs_agblock_t new;
684 xfs_agblock_t sdiff;
685 int error;
686 int i;
687
688 /* The good extent is perfect, no need to search. */
689 if (!gdiff)
690 goto out_use_good;
691
692 /*
693 * Look until we find a better one, run out of space or run off the end.
694 */
695 do {
696 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
697 if (error)
698 goto error0;
699 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
700 xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
701 args->minlen, &bno, slena);
702
703 /*
704 * The good extent is closer than this one.
705 */
706 if (!dir) {
707 if (bno >= args->agbno + gdiff)
708 goto out_use_good;
709 } else {
710 if (bno <= args->agbno - gdiff)
711 goto out_use_good;
712 }
713
714 /*
715 * Same distance, compare length and pick the best.
716 */
717 if (*slena >= args->minlen) {
718 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
719 xfs_alloc_fix_len(args);
720
721 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
722 args->alignment, *sbno,
723 *slen, &new);
724
725 /*
726 * Choose closer size and invalidate other cursor.
727 */
728 if (sdiff < gdiff)
729 goto out_use_search;
730 goto out_use_good;
731 }
732
733 if (!dir)
734 error = xfs_btree_increment(*scur, 0, &i);
735 else
736 error = xfs_btree_decrement(*scur, 0, &i);
737 if (error)
738 goto error0;
739 } while (i);
740
741out_use_good:
742 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
743 *scur = NULL;
744 return 0;
745
746out_use_search:
747 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
748 *gcur = NULL;
749 return 0;
750
751error0:
752 /* caller invalidates cursors */
753 return error;
754}
755
756/*
662 * Allocate a variable extent near bno in the allocation group agno. 757 * Allocate a variable extent near bno in the allocation group agno.
663 * Extent's length (returned in len) will be between minlen and maxlen, 758 * Extent's length (returned in len) will be between minlen and maxlen,
664 * and of the form k * prod + mod unless there's nothing that large. 759 * and of the form k * prod + mod unless there's nothing that large.
@@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near(
925 } 1020 }
926 } 1021 }
927 } while (bno_cur_lt || bno_cur_gt); 1022 } while (bno_cur_lt || bno_cur_gt);
1023
928 /* 1024 /*
929 * Got both cursors still active, need to find better entry. 1025 * Got both cursors still active, need to find better entry.
930 */ 1026 */
931 if (bno_cur_lt && bno_cur_gt) { 1027 if (bno_cur_lt && bno_cur_gt) {
932 /*
933 * Left side is long enough, look for a right side entry.
934 */
935 if (ltlena >= args->minlen) { 1028 if (ltlena >= args->minlen) {
936 /* 1029 /*
937 * Fix up the length. 1030 * Left side is good, look for a right side entry.
938 */ 1031 */
939 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1032 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
940 xfs_alloc_fix_len(args); 1033 xfs_alloc_fix_len(args);
941 rlen = args->len; 1034 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
942 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
943 args->alignment, ltbno, ltlen, &ltnew); 1035 args->alignment, ltbno, ltlen, &ltnew);
1036
1037 error = xfs_alloc_find_best_extent(args,
1038 &bno_cur_lt, &bno_cur_gt,
1039 ltdiff, &gtbno, &gtlen, &gtlena,
1040 0 /* search right */);
1041 } else {
1042 ASSERT(gtlena >= args->minlen);
1043
944 /* 1044 /*
945 * Not perfect. 1045 * Right side is good, look for a left side entry.
946 */
947 if (ltdiff) {
948 /*
949 * Look until we find a better one, run out of
950 * space, or run off the end.
951 */
952 while (bno_cur_lt && bno_cur_gt) {
953 if ((error = xfs_alloc_get_rec(
954 bno_cur_gt, &gtbno,
955 &gtlen, &i)))
956 goto error0;
957 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
958 xfs_alloc_compute_aligned(gtbno, gtlen,
959 args->alignment, args->minlen,
960 &gtbnoa, &gtlena);
961 /*
962 * The left one is clearly better.
963 */
964 if (gtbnoa >= args->agbno + ltdiff) {
965 xfs_btree_del_cursor(
966 bno_cur_gt,
967 XFS_BTREE_NOERROR);
968 bno_cur_gt = NULL;
969 break;
970 }
971 /*
972 * If we reach a big enough entry,
973 * compare the two and pick the best.
974 */
975 if (gtlena >= args->minlen) {
976 args->len =
977 XFS_EXTLEN_MIN(gtlena,
978 args->maxlen);
979 xfs_alloc_fix_len(args);
980 rlen = args->len;
981 gtdiff = xfs_alloc_compute_diff(
982 args->agbno, rlen,
983 args->alignment,
984 gtbno, gtlen, &gtnew);
985 /*
986 * Right side is better.
987 */
988 if (gtdiff < ltdiff) {
989 xfs_btree_del_cursor(
990 bno_cur_lt,
991 XFS_BTREE_NOERROR);
992 bno_cur_lt = NULL;
993 }
994 /*
995 * Left side is better.
996 */
997 else {
998 xfs_btree_del_cursor(
999 bno_cur_gt,
1000 XFS_BTREE_NOERROR);
1001 bno_cur_gt = NULL;
1002 }
1003 break;
1004 }
1005 /*
1006 * Fell off the right end.
1007 */
1008 if ((error = xfs_btree_increment(
1009 bno_cur_gt, 0, &i)))
1010 goto error0;
1011 if (!i) {
1012 xfs_btree_del_cursor(
1013 bno_cur_gt,
1014 XFS_BTREE_NOERROR);
1015 bno_cur_gt = NULL;
1016 break;
1017 }
1018 }
1019 }
1020 /*
1021 * The left side is perfect, trash the right side.
1022 */
1023 else {
1024 xfs_btree_del_cursor(bno_cur_gt,
1025 XFS_BTREE_NOERROR);
1026 bno_cur_gt = NULL;
1027 }
1028 }
1029 /*
1030 * It's the right side that was found first, look left.
1031 */
1032 else {
1033 /*
1034 * Fix up the length.
1035 */ 1046 */
1036 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1037 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1038 rlen = args->len; 1049 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1039 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1040 args->alignment, gtbno, gtlen, &gtnew); 1050 args->alignment, gtbno, gtlen, &gtnew);
1041 /* 1051
1042 * Right side entry isn't perfect. 1052 error = xfs_alloc_find_best_extent(args,
1043 */ 1053 &bno_cur_gt, &bno_cur_lt,
1044 if (gtdiff) { 1054 gtdiff, &ltbno, &ltlen, &ltlena,
1045 /* 1055 1 /* search left */);
1046 * Look until we find a better one, run out of
1047 * space, or run off the end.
1048 */
1049 while (bno_cur_lt && bno_cur_gt) {
1050 if ((error = xfs_alloc_get_rec(
1051 bno_cur_lt, &ltbno,
1052 &ltlen, &i)))
1053 goto error0;
1054 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1055 xfs_alloc_compute_aligned(ltbno, ltlen,
1056 args->alignment, args->minlen,
1057 &ltbnoa, &ltlena);
1058 /*
1059 * The right one is clearly better.
1060 */
1061 if (ltbnoa <= args->agbno - gtdiff) {
1062 xfs_btree_del_cursor(
1063 bno_cur_lt,
1064 XFS_BTREE_NOERROR);
1065 bno_cur_lt = NULL;
1066 break;
1067 }
1068 /*
1069 * If we reach a big enough entry,
1070 * compare the two and pick the best.
1071 */
1072 if (ltlena >= args->minlen) {
1073 args->len = XFS_EXTLEN_MIN(
1074 ltlena, args->maxlen);
1075 xfs_alloc_fix_len(args);
1076 rlen = args->len;
1077 ltdiff = xfs_alloc_compute_diff(
1078 args->agbno, rlen,
1079 args->alignment,
1080 ltbno, ltlen, &ltnew);
1081 /*
1082 * Left side is better.
1083 */
1084 if (ltdiff < gtdiff) {
1085 xfs_btree_del_cursor(
1086 bno_cur_gt,
1087 XFS_BTREE_NOERROR);
1088 bno_cur_gt = NULL;
1089 }
1090 /*
1091 * Right side is better.
1092 */
1093 else {
1094 xfs_btree_del_cursor(
1095 bno_cur_lt,
1096 XFS_BTREE_NOERROR);
1097 bno_cur_lt = NULL;
1098 }
1099 break;
1100 }
1101 /*
1102 * Fell off the left end.
1103 */
1104 if ((error = xfs_btree_decrement(
1105 bno_cur_lt, 0, &i)))
1106 goto error0;
1107 if (!i) {
1108 xfs_btree_del_cursor(bno_cur_lt,
1109 XFS_BTREE_NOERROR);
1110 bno_cur_lt = NULL;
1111 break;
1112 }
1113 }
1114 }
1115 /*
1116 * The right side is perfect, trash the left side.
1117 */
1118 else {
1119 xfs_btree_del_cursor(bno_cur_lt,
1120 XFS_BTREE_NOERROR);
1121 bno_cur_lt = NULL;
1122 }
1123 } 1056 }
1057
1058 if (error)
1059 goto error0;
1124 } 1060 }
1061
1125 /* 1062 /*
1126 * If we couldn't get anything, give up. 1063 * If we couldn't get anything, give up.
1127 */ 1064 */
@@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near(
1130 args->agbno = NULLAGBLOCK; 1067 args->agbno = NULLAGBLOCK;
1131 return 0; 1068 return 0;
1132 } 1069 }
1070
1133 /* 1071 /*
1134 * At this point we have selected a freespace entry, either to the 1072 * At this point we have selected a freespace entry, either to the
1135 * left or to the right. If it's on the right, copy all the 1073 * left or to the right. If it's on the right, copy all the
@@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near(
1146 j = 1; 1084 j = 1;
1147 } else 1085 } else
1148 j = 0; 1086 j = 0;
1087
1149 /* 1088 /*
1150 * Fix up the length and compute the useful address. 1089 * Fix up the length and compute the useful address.
1151 */ 1090 */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a6cff8edcdb..71e90dc2aeb 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
637 * It didn't all fit, so we have to sort everything on hashval. 637 * It didn't all fit, so we have to sort everything on hashval.
638 */ 638 */
639 sbsize = sf->hdr.count * sizeof(*sbuf); 639 sbsize = sf->hdr.count * sizeof(*sbuf);
640 sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); 640 sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
641 641
642 /* 642 /*
643 * Scan the attribute list for the rest of the entries, storing 643 * Scan the attribute list for the rest of the entries, storing
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2386 args.dp = context->dp; 2386 args.dp = context->dp;
2387 args.whichfork = XFS_ATTR_FORK; 2387 args.whichfork = XFS_ATTR_FORK;
2388 args.valuelen = valuelen; 2388 args.valuelen = valuelen;
2389 args.value = kmem_alloc(valuelen, KM_SLEEP); 2389 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
2390 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 2390 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2391 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); 2391 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
2392 retval = xfs_attr_rmtval_get(&args); 2392 retval = xfs_attr_rmtval_get(&args);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 04f9cca8da7..2f9e97c128a 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -634,9 +634,8 @@ xfs_btree_read_bufl(
634 return error; 634 return error;
635 } 635 }
636 ASSERT(!bp || !XFS_BUF_GETERROR(bp)); 636 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
637 if (bp != NULL) { 637 if (bp)
638 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); 638 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
639 }
640 *bpp = bp; 639 *bpp = bp;
641 return 0; 640 return 0;
642} 641}
@@ -944,13 +943,13 @@ xfs_btree_set_refs(
944 switch (cur->bc_btnum) { 943 switch (cur->bc_btnum) {
945 case XFS_BTNUM_BNO: 944 case XFS_BTNUM_BNO:
946 case XFS_BTNUM_CNT: 945 case XFS_BTNUM_CNT:
947 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); 946 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
948 break; 947 break;
949 case XFS_BTNUM_INO: 948 case XFS_BTNUM_INO:
950 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); 949 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
951 break; 950 break;
952 case XFS_BTNUM_BMAP: 951 case XFS_BTNUM_BMAP:
953 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); 952 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
954 break; 953 break;
955 default: 954 default:
956 ASSERT(0); 955 ASSERT(0);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2686d0d54c5..ed2b65f3f8b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -142,7 +142,7 @@ xfs_buf_item_log_check(
142#endif 142#endif
143 143
144STATIC void xfs_buf_error_relse(xfs_buf_t *bp); 144STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
145STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); 145STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
146 146
147/* 147/*
148 * This returns the number of log iovecs needed to log the 148 * This returns the number of log iovecs needed to log the
@@ -450,7 +450,7 @@ xfs_buf_item_unpin(
450 * xfs_trans_ail_delete() drops the AIL lock. 450 * xfs_trans_ail_delete() drops the AIL lock.
451 */ 451 */
452 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 452 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
453 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 453 xfs_buf_do_callbacks(bp);
454 XFS_BUF_SET_FSPRIVATE(bp, NULL); 454 XFS_BUF_SET_FSPRIVATE(bp, NULL);
455 XFS_BUF_CLR_IODONE_FUNC(bp); 455 XFS_BUF_CLR_IODONE_FUNC(bp);
456 } else { 456 } else {
@@ -918,15 +918,26 @@ xfs_buf_attach_iodone(
918 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 918 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
919} 919}
920 920
921/*
922 * We can have many callbacks on a buffer. Running the callbacks individually
923 * can cause a lot of contention on the AIL lock, so we allow for a single
924 * callback to be able to scan the remaining lip->li_bio_list for other items
925 * of the same type and callback to be processed in the first call.
926 *
927 * As a result, the loop walking the callback list below will also modify the
928 * list. it removes the first item from the list and then runs the callback.
929 * The loop then restarts from the new head of the list. This allows the
930 * callback to scan and modify the list attached to the buffer and we don't
931 * have to care about maintaining a next item pointer.
932 */
921STATIC void 933STATIC void
922xfs_buf_do_callbacks( 934xfs_buf_do_callbacks(
923 xfs_buf_t *bp, 935 struct xfs_buf *bp)
924 xfs_log_item_t *lip)
925{ 936{
926 xfs_log_item_t *nlip; 937 struct xfs_log_item *lip;
927 938
928 while (lip != NULL) { 939 while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
929 nlip = lip->li_bio_list; 940 XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
930 ASSERT(lip->li_cb != NULL); 941 ASSERT(lip->li_cb != NULL);
931 /* 942 /*
932 * Clear the next pointer so we don't have any 943 * Clear the next pointer so we don't have any
@@ -936,7 +947,6 @@ xfs_buf_do_callbacks(
936 */ 947 */
937 lip->li_bio_list = NULL; 948 lip->li_bio_list = NULL;
938 lip->li_cb(bp, lip); 949 lip->li_cb(bp, lip);
939 lip = nlip;
940 } 950 }
941} 951}
942 952
@@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks(
970 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 980 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
971 XFS_BUF_SUPER_STALE(bp); 981 XFS_BUF_SUPER_STALE(bp);
972 trace_xfs_buf_item_iodone(bp, _RET_IP_); 982 trace_xfs_buf_item_iodone(bp, _RET_IP_);
973 xfs_buf_do_callbacks(bp, lip); 983 xfs_buf_do_callbacks(bp);
974 XFS_BUF_SET_FSPRIVATE(bp, NULL); 984 XFS_BUF_SET_FSPRIVATE(bp, NULL);
975 XFS_BUF_CLR_IODONE_FUNC(bp); 985 XFS_BUF_CLR_IODONE_FUNC(bp);
976 xfs_buf_ioend(bp, 0); 986 xfs_buf_ioend(bp, 0);
@@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks(
1029 return; 1039 return;
1030 } 1040 }
1031 1041
1032 xfs_buf_do_callbacks(bp, lip); 1042 xfs_buf_do_callbacks(bp);
1033 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1043 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1034 XFS_BUF_CLR_IODONE_FUNC(bp); 1044 XFS_BUF_CLR_IODONE_FUNC(bp);
1035 xfs_buf_ioend(bp, 0); 1045 xfs_buf_ioend(bp, 0);
@@ -1063,7 +1073,7 @@ xfs_buf_error_relse(
1063 * We have to unpin the pinned buffers so do the 1073 * We have to unpin the pinned buffers so do the
1064 * callbacks. 1074 * callbacks.
1065 */ 1075 */
1066 xfs_buf_do_callbacks(bp, lip); 1076 xfs_buf_do_callbacks(bp);
1067 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1077 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1068 XFS_BUF_CLR_IODONE_FUNC(bp); 1078 XFS_BUF_CLR_IODONE_FUNC(bp);
1069 XFS_BUF_SET_BRELSE_FUNC(bp,NULL); 1079 XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 0e2ed43f16c..b6ecd2061e7 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item {
105 xfs_buf_log_format_t bli_format; /* in-log header */ 105 xfs_buf_log_format_t bli_format; /* in-log header */
106} xfs_buf_log_item_t; 106} xfs_buf_log_item_t;
107 107
108/*
109 * This structure is used during recovery to record the buf log
110 * items which have been canceled and should not be replayed.
111 */
112typedef struct xfs_buf_cancel {
113 xfs_daddr_t bc_blkno;
114 uint bc_len;
115 int bc_refcount;
116 struct xfs_buf_cancel *bc_next;
117} xfs_buf_cancel_t;
118
119void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 108void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
120void xfs_buf_item_relse(struct xfs_buf *); 109void xfs_buf_item_relse(struct xfs_buf *);
121void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 110void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index a55e687bf56..75f2ef60e57 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -48,6 +48,28 @@ xfs_efi_item_free(
48} 48}
49 49
50/* 50/*
51 * Freeing the efi requires that we remove it from the AIL if it has already
52 * been placed there. However, the EFI may not yet have been placed in the AIL
53 * when called by xfs_efi_release() from EFD processing due to the ordering of
54 * committed vs unpin operations in bulk insert operations. Hence the
55 * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
56 * the EFI.
57 */
58STATIC void
59__xfs_efi_release(
60 struct xfs_efi_log_item *efip)
61{
62 struct xfs_ail *ailp = efip->efi_item.li_ailp;
63
64 if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
65 spin_lock(&ailp->xa_lock);
66 /* xfs_trans_ail_delete() drops the AIL lock. */
67 xfs_trans_ail_delete(ailp, &efip->efi_item);
68 xfs_efi_item_free(efip);
69 }
70}
71
72/*
51 * This returns the number of iovecs needed to log the given efi item. 73 * This returns the number of iovecs needed to log the given efi item.
52 * We only need 1 iovec for an efi item. It just logs the efi_log_format 74 * We only need 1 iovec for an efi item. It just logs the efi_log_format
53 * structure. 75 * structure.
@@ -74,7 +96,8 @@ xfs_efi_item_format(
74 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 96 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
75 uint size; 97 uint size;
76 98
77 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); 99 ASSERT(atomic_read(&efip->efi_next_extent) ==
100 efip->efi_format.efi_nextents);
78 101
79 efip->efi_format.efi_type = XFS_LI_EFI; 102 efip->efi_format.efi_type = XFS_LI_EFI;
80 103
@@ -99,10 +122,12 @@ xfs_efi_item_pin(
99} 122}
100 123
101/* 124/*
102 * While EFIs cannot really be pinned, the unpin operation is the 125 * While EFIs cannot really be pinned, the unpin operation is the last place at
103 * last place at which the EFI is manipulated during a transaction. 126 * which the EFI is manipulated during a transaction. If we are being asked to
104 * Here we coordinate with xfs_efi_cancel() to determine who gets to 127 * remove the EFI it's because the transaction has been cancelled and by
105 * free the EFI. 128 * definition that means the EFI cannot be in the AIL so remove it from the
129 * transaction and free it. Otherwise coordinate with xfs_efi_release() (via
130 * XFS_EFI_COMMITTED) to determine who gets to free the EFI.
106 */ 131 */
107STATIC void 132STATIC void
108xfs_efi_item_unpin( 133xfs_efi_item_unpin(
@@ -110,20 +135,14 @@ xfs_efi_item_unpin(
110 int remove) 135 int remove)
111{ 136{
112 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 137 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
113 struct xfs_ail *ailp = lip->li_ailp;
114
115 spin_lock(&ailp->xa_lock);
116 if (efip->efi_flags & XFS_EFI_CANCELED) {
117 if (remove)
118 xfs_trans_del_item(lip);
119 138
120 /* xfs_trans_ail_delete() drops the AIL lock. */ 139 if (remove) {
121 xfs_trans_ail_delete(ailp, lip); 140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
141 xfs_trans_del_item(lip);
122 xfs_efi_item_free(efip); 142 xfs_efi_item_free(efip);
123 } else { 143 return;
124 efip->efi_flags |= XFS_EFI_COMMITTED;
125 spin_unlock(&ailp->xa_lock);
126 } 144 }
145 __xfs_efi_release(efip);
127} 146}
128 147
129/* 148/*
@@ -152,16 +171,20 @@ xfs_efi_item_unlock(
152} 171}
153 172
154/* 173/*
155 * The EFI is logged only once and cannot be moved in the log, so 174 * The EFI is logged only once and cannot be moved in the log, so simply return
156 * simply return the lsn at which it's been logged. The canceled 175 * the lsn at which it's been logged. For bulk transaction committed
157 * flag is not paid any attention here. Checking for that is delayed 176 * processing, the EFI may be processed but not yet unpinned prior to the EFD
158 * until the EFI is unpinned. 177 * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
178 * when processing the EFD.
159 */ 179 */
160STATIC xfs_lsn_t 180STATIC xfs_lsn_t
161xfs_efi_item_committed( 181xfs_efi_item_committed(
162 struct xfs_log_item *lip, 182 struct xfs_log_item *lip,
163 xfs_lsn_t lsn) 183 xfs_lsn_t lsn)
164{ 184{
185 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
186
187 set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
165 return lsn; 188 return lsn;
166} 189}
167 190
@@ -230,6 +253,7 @@ xfs_efi_init(
230 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); 253 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
231 efip->efi_format.efi_nextents = nextents; 254 efip->efi_format.efi_nextents = nextents;
232 efip->efi_format.efi_id = (__psint_t)(void*)efip; 255 efip->efi_format.efi_id = (__psint_t)(void*)efip;
256 atomic_set(&efip->efi_next_extent, 0);
233 257
234 return efip; 258 return efip;
235} 259}
@@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
289} 313}
290 314
291/* 315/*
292 * This is called by the efd item code below to release references to 316 * This is called by the efd item code below to release references to the given
293 * the given efi item. Each efd calls this with the number of 317 * efi item. Each efd calls this with the number of extents that it has
294 * extents that it has logged, and when the sum of these reaches 318 * logged, and when the sum of these reaches the total number of extents logged
295 * the total number of extents logged by this efi item we can free 319 * by this efi item we can free the efi item.
296 * the efi item.
297 *
298 * Freeing the efi item requires that we remove it from the AIL.
299 * We'll use the AIL lock to protect our counters as well as
300 * the removal from the AIL.
301 */ 320 */
302void 321void
303xfs_efi_release(xfs_efi_log_item_t *efip, 322xfs_efi_release(xfs_efi_log_item_t *efip,
304 uint nextents) 323 uint nextents)
305{ 324{
306 struct xfs_ail *ailp = efip->efi_item.li_ailp; 325 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
307 int extents_left; 326 if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
308 327 __xfs_efi_release(efip);
309 ASSERT(efip->efi_next_extent > 0);
310 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
311
312 spin_lock(&ailp->xa_lock);
313 ASSERT(efip->efi_next_extent >= nextents);
314 efip->efi_next_extent -= nextents;
315 extents_left = efip->efi_next_extent;
316 if (extents_left == 0) {
317 /* xfs_trans_ail_delete() drops the AIL lock. */
318 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
319 xfs_efi_item_free(efip);
320 } else {
321 spin_unlock(&ailp->xa_lock);
322 }
323} 328}
324 329
325static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) 330static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0d22c56fdf6..375f68e4253 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
111#define XFS_EFI_MAX_FAST_EXTENTS 16 111#define XFS_EFI_MAX_FAST_EXTENTS 16
112 112
113/* 113/*
114 * Define EFI flags. 114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
115 */ 115 */
116#define XFS_EFI_RECOVERED 0x1 116#define XFS_EFI_RECOVERED 1
117#define XFS_EFI_COMMITTED 0x2 117#define XFS_EFI_COMMITTED 2
118#define XFS_EFI_CANCELED 0x4
119 118
120/* 119/*
121 * This is the "extent free intention" log item. It is used 120 * This is the "extent free intention" log item. It is used
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
125 */ 124 */
126typedef struct xfs_efi_log_item { 125typedef struct xfs_efi_log_item {
127 xfs_log_item_t efi_item; 126 xfs_log_item_t efi_item;
128 uint efi_flags; /* misc flags */ 127 atomic_t efi_next_extent;
129 uint efi_next_extent; 128 unsigned long efi_flags; /* misc flags */
130 xfs_efi_log_format_t efi_format; 129 xfs_efi_log_format_t efi_format;
131} xfs_efi_log_item_t; 130} xfs_efi_log_item_t;
132 131
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7c116e814a..f56d30e8040 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -374,6 +374,7 @@ xfs_growfs_data_private(
374 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 374 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
375 } else 375 } else
376 mp->m_maxicount = 0; 376 mp->m_maxicount = 0;
377 xfs_set_low_space_thresholds(mp);
377 378
378 /* update secondary superblocks. */ 379 /* update secondary superblocks. */
379 for (agno = 1; agno < nagcount; agno++) { 380 for (agno = 1; agno < nagcount; agno++) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8..cb9b6d1469f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,6 +43,17 @@
43 43
44 44
45/* 45/*
46 * Define xfs inode iolock lockdep classes. We need to ensure that all active
47 * inodes are considered the same for lockdep purposes, including inodes that
48 * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
49 * guarantee the locks are considered the same when there are multiple lock
50 * initialisation siteѕ. Also, define a reclaimable inode class so it is
51 * obvious in lockdep reports which class the report is against.
52 */
53static struct lock_class_key xfs_iolock_active;
54struct lock_class_key xfs_iolock_reclaimable;
55
56/*
46 * Allocate and initialise an xfs_inode. 57 * Allocate and initialise an xfs_inode.
47 */ 58 */
48STATIC struct xfs_inode * 59STATIC struct xfs_inode *
@@ -69,8 +80,11 @@ xfs_inode_alloc(
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 80 ASSERT(atomic_read(&ip->i_pincount) == 0);
70 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 81 ASSERT(!spin_is_locked(&ip->i_flags_lock));
71 ASSERT(completion_done(&ip->i_flush)); 82 ASSERT(completion_done(&ip->i_flush));
83 ASSERT(ip->i_ino == 0);
72 84
73 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 85 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
86 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
87 &xfs_iolock_active, "xfs_iolock_active");
74 88
75 /* initialise the xfs inode */ 89 /* initialise the xfs inode */
76 ip->i_ino = ino; 90 ip->i_ino = ino;
@@ -85,12 +99,20 @@ xfs_inode_alloc(
85 ip->i_size = 0; 99 ip->i_size = 0;
86 ip->i_new_size = 0; 100 ip->i_new_size = 0;
87 101
88 /* prevent anyone from using this yet */
89 VFS_I(ip)->i_state = I_NEW;
90
91 return ip; 102 return ip;
92} 103}
93 104
105STATIC void
106xfs_inode_free_callback(
107 struct rcu_head *head)
108{
109 struct inode *inode = container_of(head, struct inode, i_rcu);
110 struct xfs_inode *ip = XFS_I(inode);
111
112 INIT_LIST_HEAD(&inode->i_dentry);
113 kmem_zone_free(xfs_inode_zone, ip);
114}
115
94void 116void
95xfs_inode_free( 117xfs_inode_free(
96 struct xfs_inode *ip) 118 struct xfs_inode *ip)
@@ -134,7 +156,18 @@ xfs_inode_free(
134 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 156 ASSERT(!spin_is_locked(&ip->i_flags_lock));
135 ASSERT(completion_done(&ip->i_flush)); 157 ASSERT(completion_done(&ip->i_flush));
136 158
137 kmem_zone_free(xfs_inode_zone, ip); 159 /*
160 * Because we use RCU freeing we need to ensure the inode always
161 * appears to be reclaimed with an invalid inode number when in the
162 * free state. The ip->i_flags_lock provides the barrier against lookup
163 * races.
164 */
165 spin_lock(&ip->i_flags_lock);
166 ip->i_flags = XFS_IRECLAIM;
167 ip->i_ino = 0;
168 spin_unlock(&ip->i_flags_lock);
169
170 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
138} 171}
139 172
140/* 173/*
@@ -144,14 +177,29 @@ static int
144xfs_iget_cache_hit( 177xfs_iget_cache_hit(
145 struct xfs_perag *pag, 178 struct xfs_perag *pag,
146 struct xfs_inode *ip, 179 struct xfs_inode *ip,
180 xfs_ino_t ino,
147 int flags, 181 int flags,
148 int lock_flags) __releases(pag->pag_ici_lock) 182 int lock_flags) __releases(RCU)
149{ 183{
150 struct inode *inode = VFS_I(ip); 184 struct inode *inode = VFS_I(ip);
151 struct xfs_mount *mp = ip->i_mount; 185 struct xfs_mount *mp = ip->i_mount;
152 int error; 186 int error;
153 187
188 /*
189 * check for re-use of an inode within an RCU grace period due to the
190 * radix tree nodes not being updated yet. We monitor for this by
191 * setting the inode number to zero before freeing the inode structure.
192 * If the inode has been reallocated and set up, then the inode number
193 * will not match, so check for that, too.
194 */
154 spin_lock(&ip->i_flags_lock); 195 spin_lock(&ip->i_flags_lock);
196 if (ip->i_ino != ino) {
197 trace_xfs_iget_skip(ip);
198 XFS_STATS_INC(xs_ig_frecycle);
199 error = EAGAIN;
200 goto out_error;
201 }
202
155 203
156 /* 204 /*
157 * If we are racing with another cache hit that is currently 205 * If we are racing with another cache hit that is currently
@@ -194,7 +242,7 @@ xfs_iget_cache_hit(
194 ip->i_flags |= XFS_IRECLAIM; 242 ip->i_flags |= XFS_IRECLAIM;
195 243
196 spin_unlock(&ip->i_flags_lock); 244 spin_unlock(&ip->i_flags_lock);
197 read_unlock(&pag->pag_ici_lock); 245 rcu_read_unlock();
198 246
199 error = -inode_init_always(mp->m_super, inode); 247 error = -inode_init_always(mp->m_super, inode);
200 if (error) { 248 if (error) {
@@ -202,7 +250,7 @@ xfs_iget_cache_hit(
202 * Re-initializing the inode failed, and we are in deep 250 * Re-initializing the inode failed, and we are in deep
203 * trouble. Try to re-add it to the reclaim list. 251 * trouble. Try to re-add it to the reclaim list.
204 */ 252 */
205 read_lock(&pag->pag_ici_lock); 253 rcu_read_lock();
206 spin_lock(&ip->i_flags_lock); 254 spin_lock(&ip->i_flags_lock);
207 255
208 ip->i_flags &= ~XFS_INEW; 256 ip->i_flags &= ~XFS_INEW;
@@ -212,14 +260,20 @@ xfs_iget_cache_hit(
212 goto out_error; 260 goto out_error;
213 } 261 }
214 262
215 write_lock(&pag->pag_ici_lock); 263 spin_lock(&pag->pag_ici_lock);
216 spin_lock(&ip->i_flags_lock); 264 spin_lock(&ip->i_flags_lock);
217 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); 265 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
218 ip->i_flags |= XFS_INEW; 266 ip->i_flags |= XFS_INEW;
219 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 267 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
220 inode->i_state = I_NEW; 268 inode->i_state = I_NEW;
269
270 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
271 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
272 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
273 &xfs_iolock_active, "xfs_iolock_active");
274
221 spin_unlock(&ip->i_flags_lock); 275 spin_unlock(&ip->i_flags_lock);
222 write_unlock(&pag->pag_ici_lock); 276 spin_unlock(&pag->pag_ici_lock);
223 } else { 277 } else {
224 /* If the VFS inode is being torn down, pause and try again. */ 278 /* If the VFS inode is being torn down, pause and try again. */
225 if (!igrab(inode)) { 279 if (!igrab(inode)) {
@@ -230,7 +284,7 @@ xfs_iget_cache_hit(
230 284
231 /* We've got a live one. */ 285 /* We've got a live one. */
232 spin_unlock(&ip->i_flags_lock); 286 spin_unlock(&ip->i_flags_lock);
233 read_unlock(&pag->pag_ici_lock); 287 rcu_read_unlock();
234 trace_xfs_iget_hit(ip); 288 trace_xfs_iget_hit(ip);
235 } 289 }
236 290
@@ -244,7 +298,7 @@ xfs_iget_cache_hit(
244 298
245out_error: 299out_error:
246 spin_unlock(&ip->i_flags_lock); 300 spin_unlock(&ip->i_flags_lock);
247 read_unlock(&pag->pag_ici_lock); 301 rcu_read_unlock();
248 return error; 302 return error;
249} 303}
250 304
@@ -297,7 +351,7 @@ xfs_iget_cache_miss(
297 BUG(); 351 BUG();
298 } 352 }
299 353
300 write_lock(&pag->pag_ici_lock); 354 spin_lock(&pag->pag_ici_lock);
301 355
302 /* insert the new inode */ 356 /* insert the new inode */
303 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 357 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
@@ -312,14 +366,14 @@ xfs_iget_cache_miss(
312 ip->i_udquot = ip->i_gdquot = NULL; 366 ip->i_udquot = ip->i_gdquot = NULL;
313 xfs_iflags_set(ip, XFS_INEW); 367 xfs_iflags_set(ip, XFS_INEW);
314 368
315 write_unlock(&pag->pag_ici_lock); 369 spin_unlock(&pag->pag_ici_lock);
316 radix_tree_preload_end(); 370 radix_tree_preload_end();
317 371
318 *ipp = ip; 372 *ipp = ip;
319 return 0; 373 return 0;
320 374
321out_preload_end: 375out_preload_end:
322 write_unlock(&pag->pag_ici_lock); 376 spin_unlock(&pag->pag_ici_lock);
323 radix_tree_preload_end(); 377 radix_tree_preload_end();
324 if (lock_flags) 378 if (lock_flags)
325 xfs_iunlock(ip, lock_flags); 379 xfs_iunlock(ip, lock_flags);
@@ -366,7 +420,7 @@ xfs_iget(
366 xfs_agino_t agino; 420 xfs_agino_t agino;
367 421
368 /* reject inode numbers outside existing AGs */ 422 /* reject inode numbers outside existing AGs */
369 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 423 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
370 return EINVAL; 424 return EINVAL;
371 425
372 /* get the perag structure and ensure that it's inode capable */ 426 /* get the perag structure and ensure that it's inode capable */
@@ -375,15 +429,15 @@ xfs_iget(
375 429
376again: 430again:
377 error = 0; 431 error = 0;
378 read_lock(&pag->pag_ici_lock); 432 rcu_read_lock();
379 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 433 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
380 434
381 if (ip) { 435 if (ip) {
382 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); 436 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
383 if (error) 437 if (error)
384 goto out_error_or_again; 438 goto out_error_or_again;
385 } else { 439 } else {
386 read_unlock(&pag->pag_ici_lock); 440 rcu_read_unlock();
387 XFS_STATS_INC(xs_ig_missed); 441 XFS_STATS_INC(xs_ig_missed);
388 442
389 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 443 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 108c7a085f9..be7cf625421 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -887,7 +887,7 @@ xfs_iread(
887 * around for a while. This helps to keep recently accessed 887 * around for a while. This helps to keep recently accessed
888 * meta-data in-core longer. 888 * meta-data in-core longer.
889 */ 889 */
890 XFS_BUF_SET_REF(bp, XFS_INO_REF); 890 xfs_buf_set_ref(bp, XFS_INO_REF);
891 891
892 /* 892 /*
893 * Use xfs_trans_brelse() to release the buffer containing the 893 * Use xfs_trans_brelse() to release the buffer containing the
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster(
2000 */ 2000 */
2001 for (i = 0; i < ninodes; i++) { 2001 for (i = 0; i < ninodes; i++) {
2002retry: 2002retry:
2003 read_lock(&pag->pag_ici_lock); 2003 rcu_read_lock();
2004 ip = radix_tree_lookup(&pag->pag_ici_root, 2004 ip = radix_tree_lookup(&pag->pag_ici_root,
2005 XFS_INO_TO_AGINO(mp, (inum + i))); 2005 XFS_INO_TO_AGINO(mp, (inum + i)));
2006 2006
2007 /* Inode not in memory or stale, nothing to do */ 2007 /* Inode not in memory, nothing to do */
2008 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2008 if (!ip) {
2009 read_unlock(&pag->pag_ici_lock); 2009 rcu_read_unlock();
2010 continue; 2010 continue;
2011 } 2011 }
2012 2012
2013 /* 2013 /*
2014 * because this is an RCU protected lookup, we could
2015 * find a recently freed or even reallocated inode
2016 * during the lookup. We need to check under the
2017 * i_flags_lock for a valid inode here. Skip it if it
2018 * is not valid, the wrong inode or stale.
2019 */
2020 spin_lock(&ip->i_flags_lock);
2021 if (ip->i_ino != inum + i ||
2022 __xfs_iflags_test(ip, XFS_ISTALE)) {
2023 spin_unlock(&ip->i_flags_lock);
2024 rcu_read_unlock();
2025 continue;
2026 }
2027 spin_unlock(&ip->i_flags_lock);
2028
2029 /*
2014 * Don't try to lock/unlock the current inode, but we 2030 * Don't try to lock/unlock the current inode, but we
2015 * _cannot_ skip the other inodes that we did not find 2031 * _cannot_ skip the other inodes that we did not find
2016 * in the list attached to the buffer and are not 2032 * in the list attached to the buffer and are not
@@ -2019,11 +2035,11 @@ retry:
2019 */ 2035 */
2020 if (ip != free_ip && 2036 if (ip != free_ip &&
2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2037 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2022 read_unlock(&pag->pag_ici_lock); 2038 rcu_read_unlock();
2023 delay(1); 2039 delay(1);
2024 goto retry; 2040 goto retry;
2025 } 2041 }
2026 read_unlock(&pag->pag_ici_lock); 2042 rcu_read_unlock();
2027 2043
2028 xfs_iflock(ip); 2044 xfs_iflock(ip);
2029 xfs_iflags_set(ip, XFS_ISTALE); 2045 xfs_iflags_set(ip, XFS_ISTALE);
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
2629 2645
2630 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2646 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2631 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2647 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
2632 read_lock(&pag->pag_ici_lock); 2648 rcu_read_lock();
2633 /* really need a gang lookup range call here */ 2649 /* really need a gang lookup range call here */
2634 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2650 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2635 first_index, inodes_per_cluster); 2651 first_index, inodes_per_cluster);
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
2640 iq = ilist[i]; 2656 iq = ilist[i];
2641 if (iq == ip) 2657 if (iq == ip)
2642 continue; 2658 continue;
2643 /* if the inode lies outside this cluster, we're done. */ 2659
2644 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 2660 /*
2645 break; 2661 * because this is an RCU protected lookup, we could find a
2662 * recently freed or even reallocated inode during the lookup.
2663 * We need to check under the i_flags_lock for a valid inode
2664 * here. Skip it if it is not valid or the wrong inode.
2665 */
2666 spin_lock(&ip->i_flags_lock);
2667 if (!ip->i_ino ||
2668 (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
2669 spin_unlock(&ip->i_flags_lock);
2670 continue;
2671 }
2672 spin_unlock(&ip->i_flags_lock);
2673
2646 /* 2674 /*
2647 * Do an un-protected check to see if the inode is dirty and 2675 * Do an un-protected check to see if the inode is dirty and
2648 * is a candidate for flushing. These checks will be repeated 2676 * is a candidate for flushing. These checks will be repeated
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
2692 } 2720 }
2693 2721
2694out_free: 2722out_free:
2695 read_unlock(&pag->pag_ici_lock); 2723 rcu_read_unlock();
2696 kmem_free(ilist); 2724 kmem_free(ilist);
2697out_put: 2725out_put:
2698 xfs_perag_put(pag); 2726 xfs_perag_put(pag);
@@ -2704,7 +2732,7 @@ cluster_corrupt_out:
2704 * Corruption detected in the clustering loop. Invalidate the 2732 * Corruption detected in the clustering loop. Invalidate the
2705 * inode buffer and shut down the filesystem. 2733 * inode buffer and shut down the filesystem.
2706 */ 2734 */
2707 read_unlock(&pag->pag_ici_lock); 2735 rcu_read_unlock();
2708 /* 2736 /*
2709 * Clean up the buffer. If it was B_DELWRI, just release it -- 2737 * Clean up the buffer. If it was B_DELWRI, just release it --
2710 * brelse can handle it with no problems. If not, shut down the 2738 * brelse can handle it with no problems. If not, shut down the
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fb2ca2e4cdc..5c95fa8ec11 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
376/* 376/*
377 * In-core inode flags. 377 * In-core inode flags.
378 */ 378 */
379#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ 379#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
380#define XFS_ISTALE 0x0002 /* inode has been staled */ 380#define XFS_ISTALE 0x0002 /* inode has been staled */
381#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ 381#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
382#define XFS_INEW 0x0008 /* inode has just been allocated */ 382#define XFS_INEW 0x0008 /* inode has just been allocated */
383#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ 383#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
384#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ 384#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
385#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
385 386
386/* 387/*
387 * Flags for inode locking. 388 * Flags for inode locking.
@@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
438#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 439#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
439#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 440#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
440 441
442extern struct lock_class_key xfs_iolock_reclaimable;
443
441/* 444/*
442 * Flags for xfs_itruncate_start(). 445 * Flags for xfs_itruncate_start().
443 */ 446 */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7c8d30c453c..fd4f398bd6f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -842,15 +842,64 @@ xfs_inode_item_destroy(
842 * flushed to disk. It is responsible for removing the inode item 842 * flushed to disk. It is responsible for removing the inode item
843 * from the AIL if it has not been re-logged, and unlocking the inode's 843 * from the AIL if it has not been re-logged, and unlocking the inode's
844 * flush lock. 844 * flush lock.
845 *
846 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
847 * list for other inodes that will run this function. We remove them from the
848 * buffer list so we can process all the inode IO completions in one AIL lock
849 * traversal.
845 */ 850 */
846void 851void
847xfs_iflush_done( 852xfs_iflush_done(
848 struct xfs_buf *bp, 853 struct xfs_buf *bp,
849 struct xfs_log_item *lip) 854 struct xfs_log_item *lip)
850{ 855{
851 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 856 struct xfs_inode_log_item *iip;
852 xfs_inode_t *ip = iip->ili_inode; 857 struct xfs_log_item *blip;
858 struct xfs_log_item *next;
859 struct xfs_log_item *prev;
853 struct xfs_ail *ailp = lip->li_ailp; 860 struct xfs_ail *ailp = lip->li_ailp;
861 int need_ail = 0;
862
863 /*
864 * Scan the buffer IO completions for other inodes being completed and
865 * attach them to the current inode log item.
866 */
867 blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
868 prev = NULL;
869 while (blip != NULL) {
870 if (lip->li_cb != xfs_iflush_done) {
871 prev = blip;
872 blip = blip->li_bio_list;
873 continue;
874 }
875
876 /* remove from list */
877 next = blip->li_bio_list;
878 if (!prev) {
879 XFS_BUF_SET_FSPRIVATE(bp, next);
880 } else {
881 prev->li_bio_list = next;
882 }
883
884 /* add to current list */
885 blip->li_bio_list = lip->li_bio_list;
886 lip->li_bio_list = blip;
887
888 /*
889 * while we have the item, do the unlocked check for needing
890 * the AIL lock.
891 */
892 iip = INODE_ITEM(blip);
893 if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
894 need_ail++;
895
896 blip = next;
897 }
898
899 /* make sure we capture the state of the initial inode. */
900 iip = INODE_ITEM(lip);
901 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
902 need_ail++;
854 903
855 /* 904 /*
856 * We only want to pull the item from the AIL if it is 905 * We only want to pull the item from the AIL if it is
@@ -861,28 +910,37 @@ xfs_iflush_done(
861 * the lock since it's cheaper, and then we recheck while 910 * the lock since it's cheaper, and then we recheck while
862 * holding the lock before removing the inode from the AIL. 911 * holding the lock before removing the inode from the AIL.
863 */ 912 */
864 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { 913 if (need_ail) {
914 struct xfs_log_item *log_items[need_ail];
915 int i = 0;
865 spin_lock(&ailp->xa_lock); 916 spin_lock(&ailp->xa_lock);
866 if (lip->li_lsn == iip->ili_flush_lsn) { 917 for (blip = lip; blip; blip = blip->li_bio_list) {
867 /* xfs_trans_ail_delete() drops the AIL lock. */ 918 iip = INODE_ITEM(blip);
868 xfs_trans_ail_delete(ailp, lip); 919 if (iip->ili_logged &&
869 } else { 920 blip->li_lsn == iip->ili_flush_lsn) {
870 spin_unlock(&ailp->xa_lock); 921 log_items[i++] = blip;
922 }
923 ASSERT(i <= need_ail);
871 } 924 }
925 /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
926 xfs_trans_ail_delete_bulk(ailp, log_items, i);
872 } 927 }
873 928
874 iip->ili_logged = 0;
875 929
876 /* 930 /*
877 * Clear the ili_last_fields bits now that we know that the 931 * clean up and unlock the flush lock now we are done. We can clear the
878 * data corresponding to them is safely on disk. 932 * ili_last_fields bits now that we know that the data corresponding to
933 * them is safely on disk.
879 */ 934 */
880 iip->ili_last_fields = 0; 935 for (blip = lip; blip; blip = next) {
936 next = blip->li_bio_list;
937 blip->li_bio_list = NULL;
881 938
882 /* 939 iip = INODE_ITEM(blip);
883 * Release the inode's flush lock since we're done with it. 940 iip->ili_logged = 0;
884 */ 941 iip->ili_last_fields = 0;
885 xfs_ifunlock(ip); 942 xfs_ifunlock(iip->ili_inode);
943 }
886} 944}
887 945
888/* 946/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 20576146369..55582bd6665 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,127 +47,8 @@
47 47
48#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 48#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
49 << mp->m_writeio_log) 49 << mp->m_writeio_log)
50#define XFS_STRAT_WRITE_IMAPS 2
51#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 50#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
52 51
53STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
54 int, struct xfs_bmbt_irec *, int *);
55STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
56 struct xfs_bmbt_irec *, int *);
57STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
58 struct xfs_bmbt_irec *, int *);
59
60int
61xfs_iomap(
62 struct xfs_inode *ip,
63 xfs_off_t offset,
64 ssize_t count,
65 int flags,
66 struct xfs_bmbt_irec *imap,
67 int *nimaps,
68 int *new)
69{
70 struct xfs_mount *mp = ip->i_mount;
71 xfs_fileoff_t offset_fsb, end_fsb;
72 int error = 0;
73 int lockmode = 0;
74 int bmapi_flags = 0;
75
76 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
77
78 *new = 0;
79
80 if (XFS_FORCED_SHUTDOWN(mp))
81 return XFS_ERROR(EIO);
82
83 trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
84
85 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
86 case BMAPI_READ:
87 lockmode = xfs_ilock_map_shared(ip);
88 bmapi_flags = XFS_BMAPI_ENTIRE;
89 break;
90 case BMAPI_WRITE:
91 lockmode = XFS_ILOCK_EXCL;
92 if (flags & BMAPI_IGNSTATE)
93 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
94 xfs_ilock(ip, lockmode);
95 break;
96 case BMAPI_ALLOCATE:
97 lockmode = XFS_ILOCK_SHARED;
98 bmapi_flags = XFS_BMAPI_ENTIRE;
99
100 /* Attempt non-blocking lock */
101 if (flags & BMAPI_TRYLOCK) {
102 if (!xfs_ilock_nowait(ip, lockmode))
103 return XFS_ERROR(EAGAIN);
104 } else {
105 xfs_ilock(ip, lockmode);
106 }
107 break;
108 default:
109 BUG();
110 }
111
112 ASSERT(offset <= mp->m_maxioffset);
113 if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
114 count = mp->m_maxioffset - offset;
115 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
116 offset_fsb = XFS_B_TO_FSBT(mp, offset);
117
118 error = xfs_bmapi(NULL, ip, offset_fsb,
119 (xfs_filblks_t)(end_fsb - offset_fsb),
120 bmapi_flags, NULL, 0, imap,
121 nimaps, NULL);
122
123 if (error)
124 goto out;
125
126 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
127 case BMAPI_WRITE:
128 /* If we found an extent, return it */
129 if (*nimaps &&
130 (imap->br_startblock != HOLESTARTBLOCK) &&
131 (imap->br_startblock != DELAYSTARTBLOCK)) {
132 trace_xfs_iomap_found(ip, offset, count, flags, imap);
133 break;
134 }
135
136 if (flags & BMAPI_DIRECT) {
137 error = xfs_iomap_write_direct(ip, offset, count, flags,
138 imap, nimaps);
139 } else {
140 error = xfs_iomap_write_delay(ip, offset, count, flags,
141 imap, nimaps);
142 }
143 if (!error) {
144 trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
145 }
146 *new = 1;
147 break;
148 case BMAPI_ALLOCATE:
149 /* If we found an extent, return it */
150 xfs_iunlock(ip, lockmode);
151 lockmode = 0;
152
153 if (*nimaps && !isnullstartblock(imap->br_startblock)) {
154 trace_xfs_iomap_found(ip, offset, count, flags, imap);
155 break;
156 }
157
158 error = xfs_iomap_write_allocate(ip, offset, count,
159 imap, nimaps);
160 break;
161 }
162
163 ASSERT(*nimaps <= 1);
164
165out:
166 if (lockmode)
167 xfs_iunlock(ip, lockmode);
168 return XFS_ERROR(error);
169}
170
171STATIC int 52STATIC int
172xfs_iomap_eof_align_last_fsb( 53xfs_iomap_eof_align_last_fsb(
173 xfs_mount_t *mp, 54 xfs_mount_t *mp,
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero(
236 return EFSCORRUPTED; 117 return EFSCORRUPTED;
237} 118}
238 119
239STATIC int 120int
240xfs_iomap_write_direct( 121xfs_iomap_write_direct(
241 xfs_inode_t *ip, 122 xfs_inode_t *ip,
242 xfs_off_t offset, 123 xfs_off_t offset,
243 size_t count, 124 size_t count,
244 int flags,
245 xfs_bmbt_irec_t *imap, 125 xfs_bmbt_irec_t *imap,
246 int *nmaps) 126 int nmaps)
247{ 127{
248 xfs_mount_t *mp = ip->i_mount; 128 xfs_mount_t *mp = ip->i_mount;
249 xfs_fileoff_t offset_fsb; 129 xfs_fileoff_t offset_fsb;
@@ -279,7 +159,7 @@ xfs_iomap_write_direct(
279 if (error) 159 if (error)
280 goto error_out; 160 goto error_out;
281 } else { 161 } else {
282 if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 162 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
283 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 163 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
284 imap->br_blockcount + 164 imap->br_blockcount +
285 imap->br_startoff); 165 imap->br_startoff);
@@ -331,7 +211,7 @@ xfs_iomap_write_direct(
331 xfs_trans_ijoin(tp, ip); 211 xfs_trans_ijoin(tp, ip);
332 212
333 bmapi_flag = XFS_BMAPI_WRITE; 213 bmapi_flag = XFS_BMAPI_WRITE;
334 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) 214 if (offset < ip->i_size || extsz)
335 bmapi_flag |= XFS_BMAPI_PREALLOC; 215 bmapi_flag |= XFS_BMAPI_PREALLOC;
336 216
337 /* 217 /*
@@ -370,7 +250,6 @@ xfs_iomap_write_direct(
370 goto error_out; 250 goto error_out;
371 } 251 }
372 252
373 *nmaps = 1;
374 return 0; 253 return 0;
375 254
376error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 255error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
379 258
380error1: /* Just cancel transaction */ 259error1: /* Just cancel transaction */
381 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 260 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
382 *nmaps = 0; /* nothing set-up here */
383 261
384error_out: 262error_out:
385 return XFS_ERROR(error); 263 return XFS_ERROR(error);
@@ -389,6 +267,9 @@ error_out:
389 * If the caller is doing a write at the end of the file, then extend the 267 * If the caller is doing a write at the end of the file, then extend the
390 * allocation out to the file system's write iosize. We clean up any extra 268 * allocation out to the file system's write iosize. We clean up any extra
391 * space left over when the file is closed in xfs_inactive(). 269 * space left over when the file is closed in xfs_inactive().
270 *
271 * If we find we already have delalloc preallocation beyond EOF, don't do more
272 * preallocation as it it not needed.
392 */ 273 */
393STATIC int 274STATIC int
394xfs_iomap_eof_want_preallocate( 275xfs_iomap_eof_want_preallocate(
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate(
396 xfs_inode_t *ip, 277 xfs_inode_t *ip,
397 xfs_off_t offset, 278 xfs_off_t offset,
398 size_t count, 279 size_t count,
399 int ioflag,
400 xfs_bmbt_irec_t *imap, 280 xfs_bmbt_irec_t *imap,
401 int nimaps, 281 int nimaps,
402 int *prealloc) 282 int *prealloc)
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate(
405 xfs_filblks_t count_fsb; 285 xfs_filblks_t count_fsb;
406 xfs_fsblock_t firstblock; 286 xfs_fsblock_t firstblock;
407 int n, error, imaps; 287 int n, error, imaps;
288 int found_delalloc = 0;
408 289
409 *prealloc = 0; 290 *prealloc = 0;
410 if ((offset + count) <= ip->i_size) 291 if ((offset + count) <= ip->i_size)
@@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate(
429 return 0; 310 return 0;
430 start_fsb += imap[n].br_blockcount; 311 start_fsb += imap[n].br_blockcount;
431 count_fsb -= imap[n].br_blockcount; 312 count_fsb -= imap[n].br_blockcount;
313
314 if (imap[n].br_startblock == DELAYSTARTBLOCK)
315 found_delalloc = 1;
432 } 316 }
433 } 317 }
434 *prealloc = 1; 318 if (!found_delalloc)
319 *prealloc = 1;
435 return 0; 320 return 0;
436} 321}
437 322
438STATIC int 323/*
324 * If we don't have a user specified preallocation size, dynamically increase
325 * the preallocation size as the size of the file grows. Cap the maximum size
326 * at a single extent or less if the filesystem is near full. The closer the
327 * filesystem is to full, the smaller the maximum prealocation.
328 */
329STATIC xfs_fsblock_t
330xfs_iomap_prealloc_size(
331 struct xfs_mount *mp,
332 struct xfs_inode *ip)
333{
334 xfs_fsblock_t alloc_blocks = 0;
335
336 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
337 int shift = 0;
338 int64_t freesp;
339
340 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
341 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
342 rounddown_pow_of_two(alloc_blocks));
343
344 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
345 freesp = mp->m_sb.sb_fdblocks;
346 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
347 shift = 2;
348 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
349 shift++;
350 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
351 shift++;
352 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
353 shift++;
354 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
355 shift++;
356 }
357 if (shift)
358 alloc_blocks >>= shift;
359 }
360
361 if (alloc_blocks < mp->m_writeio_blocks)
362 alloc_blocks = mp->m_writeio_blocks;
363
364 return alloc_blocks;
365}
366
367int
439xfs_iomap_write_delay( 368xfs_iomap_write_delay(
440 xfs_inode_t *ip, 369 xfs_inode_t *ip,
441 xfs_off_t offset, 370 xfs_off_t offset,
442 size_t count, 371 size_t count,
443 int ioflag, 372 xfs_bmbt_irec_t *ret_imap)
444 xfs_bmbt_irec_t *ret_imap,
445 int *nmaps)
446{ 373{
447 xfs_mount_t *mp = ip->i_mount; 374 xfs_mount_t *mp = ip->i_mount;
448 xfs_fileoff_t offset_fsb; 375 xfs_fileoff_t offset_fsb;
@@ -469,16 +396,19 @@ xfs_iomap_write_delay(
469 extsz = xfs_get_extsz_hint(ip); 396 extsz = xfs_get_extsz_hint(ip);
470 offset_fsb = XFS_B_TO_FSBT(mp, offset); 397 offset_fsb = XFS_B_TO_FSBT(mp, offset);
471 398
399
472 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 400 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
473 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 401 imap, XFS_WRITE_IMAPS, &prealloc);
474 if (error) 402 if (error)
475 return error; 403 return error;
476 404
477retry: 405retry:
478 if (prealloc) { 406 if (prealloc) {
407 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
408
479 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 409 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
480 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 410 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
481 last_fsb = ioalign + mp->m_writeio_blocks; 411 last_fsb = ioalign + alloc_blocks;
482 } else { 412 } else {
483 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 413 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
484 } 414 }
@@ -496,22 +426,31 @@ retry:
496 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 426 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
497 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 427 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
498 &nimaps, NULL); 428 &nimaps, NULL);
499 if (error && (error != ENOSPC)) 429 switch (error) {
430 case 0:
431 case ENOSPC:
432 case EDQUOT:
433 break;
434 default:
500 return XFS_ERROR(error); 435 return XFS_ERROR(error);
436 }
501 437
502 /* 438 /*
503 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 439 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
504 * then we must have run out of space - flush all other inodes with 440 * ENOSPC, * flush all other inodes with delalloc blocks to free up
505 * delalloc blocks and retry without EOF preallocation. 441 * some of the excess reserved metadata space. For both cases, retry
442 * without EOF preallocation.
506 */ 443 */
507 if (nimaps == 0) { 444 if (nimaps == 0) {
508 trace_xfs_delalloc_enospc(ip, offset, count); 445 trace_xfs_delalloc_enospc(ip, offset, count);
509 if (flushed) 446 if (flushed)
510 return XFS_ERROR(ENOSPC); 447 return XFS_ERROR(error ? error : ENOSPC);
511 448
512 xfs_iunlock(ip, XFS_ILOCK_EXCL); 449 if (error == ENOSPC) {
513 xfs_flush_inodes(ip); 450 xfs_iunlock(ip, XFS_ILOCK_EXCL);
514 xfs_ilock(ip, XFS_ILOCK_EXCL); 451 xfs_flush_inodes(ip);
452 xfs_ilock(ip, XFS_ILOCK_EXCL);
453 }
515 454
516 flushed = 1; 455 flushed = 1;
517 error = 0; 456 error = 0;
@@ -523,8 +462,6 @@ retry:
523 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 462 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
524 463
525 *ret_imap = imap[0]; 464 *ret_imap = imap[0];
526 *nmaps = 1;
527
528 return 0; 465 return 0;
529} 466}
530 467
@@ -538,13 +475,12 @@ retry:
538 * We no longer bother to look at the incoming map - all we have to 475 * We no longer bother to look at the incoming map - all we have to
539 * guarantee is that whatever we allocate fills the required range. 476 * guarantee is that whatever we allocate fills the required range.
540 */ 477 */
541STATIC int 478int
542xfs_iomap_write_allocate( 479xfs_iomap_write_allocate(
543 xfs_inode_t *ip, 480 xfs_inode_t *ip,
544 xfs_off_t offset, 481 xfs_off_t offset,
545 size_t count, 482 size_t count,
546 xfs_bmbt_irec_t *imap, 483 xfs_bmbt_irec_t *imap)
547 int *retmap)
548{ 484{
549 xfs_mount_t *mp = ip->i_mount; 485 xfs_mount_t *mp = ip->i_mount;
550 xfs_fileoff_t offset_fsb, last_block; 486 xfs_fileoff_t offset_fsb, last_block;
@@ -557,8 +493,6 @@ xfs_iomap_write_allocate(
557 int error = 0; 493 int error = 0;
558 int nres; 494 int nres;
559 495
560 *retmap = 0;
561
562 /* 496 /*
563 * Make sure that the dquots are there. 497 * Make sure that the dquots are there.
564 */ 498 */
@@ -680,7 +614,6 @@ xfs_iomap_write_allocate(
680 if ((offset_fsb >= imap->br_startoff) && 614 if ((offset_fsb >= imap->br_startoff) &&
681 (offset_fsb < (imap->br_startoff + 615 (offset_fsb < (imap->br_startoff +
682 imap->br_blockcount))) { 616 imap->br_blockcount))) {
683 *retmap = 1;
684 XFS_STATS_INC(xs_xstrat_quick); 617 XFS_STATS_INC(xs_xstrat_quick);
685 return 0; 618 return 0;
686 } 619 }
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7748a430f50..80615760959 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,30 +18,15 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21/* base extent manipulation calls */
22#define BMAPI_READ (1 << 0) /* read extents */
23#define BMAPI_WRITE (1 << 1) /* create extents */
24#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
25
26/* modifiers */
27#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
28#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
29#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
30#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
31
32#define BMAPI_FLAGS \
33 { BMAPI_READ, "READ" }, \
34 { BMAPI_WRITE, "WRITE" }, \
35 { BMAPI_ALLOCATE, "ALLOCATE" }, \
36 { BMAPI_IGNSTATE, "IGNSTATE" }, \
37 { BMAPI_DIRECT, "DIRECT" }, \
38 { BMAPI_TRYLOCK, "TRYLOCK" }
39
40struct xfs_inode; 21struct xfs_inode;
41struct xfs_bmbt_irec; 22struct xfs_bmbt_irec;
42 23
43extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, 24extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
44 struct xfs_bmbt_irec *, int *, int *); 25 struct xfs_bmbt_irec *, int);
26extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *);
28extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
29 struct xfs_bmbt_irec *);
45extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); 30extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
46 31
47#endif /* __XFS_IOMAP_H__*/ 32#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index cee4ab9f8a9..0bf24b11d0c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
47 xfs_buftarg_t *log_target, 47 xfs_buftarg_t *log_target,
48 xfs_daddr_t blk_offset, 48 xfs_daddr_t blk_offset,
49 int num_bblks); 49 int num_bblks);
50STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); 50STATIC int xlog_space_left(struct log *log, atomic64_t *head);
51STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 51STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
52STATIC void xlog_dealloc_log(xlog_t *log); 52STATIC void xlog_dealloc_log(xlog_t *log);
53 53
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
70/* local functions to manipulate grant head */ 70/* local functions to manipulate grant head */
71STATIC int xlog_grant_log_space(xlog_t *log, 71STATIC int xlog_grant_log_space(xlog_t *log,
72 xlog_ticket_t *xtic); 72 xlog_ticket_t *xtic);
73STATIC void xlog_grant_push_ail(xfs_mount_t *mp, 73STATIC void xlog_grant_push_ail(struct log *log,
74 int need_bytes); 74 int need_bytes);
75STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 75STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
76 xlog_ticket_t *ticket); 76 xlog_ticket_t *ticket);
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
81 81
82#if defined(DEBUG) 82#if defined(DEBUG)
83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); 83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
84STATIC void xlog_verify_grant_head(xlog_t *log, int equals); 84STATIC void xlog_verify_grant_tail(struct log *log);
85STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 85STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
86 int count, boolean_t syncing); 86 int count, boolean_t syncing);
87STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, 87STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
88 xfs_lsn_t tail_lsn); 88 xfs_lsn_t tail_lsn);
89#else 89#else
90#define xlog_verify_dest_ptr(a,b) 90#define xlog_verify_dest_ptr(a,b)
91#define xlog_verify_grant_head(a,b) 91#define xlog_verify_grant_tail(a)
92#define xlog_verify_iclog(a,b,c,d) 92#define xlog_verify_iclog(a,b,c,d)
93#define xlog_verify_tail_lsn(a,b,c) 93#define xlog_verify_tail_lsn(a,b,c)
94#endif 94#endif
95 95
96STATIC int xlog_iclogs_empty(xlog_t *log); 96STATIC int xlog_iclogs_empty(xlog_t *log);
97 97
98
99static void 98static void
100xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 99xlog_grant_sub_space(
100 struct log *log,
101 atomic64_t *head,
102 int bytes)
101{ 103{
102 if (*qp) { 104 int64_t head_val = atomic64_read(head);
103 tic->t_next = (*qp); 105 int64_t new, old;
104 tic->t_prev = (*qp)->t_prev;
105 (*qp)->t_prev->t_next = tic;
106 (*qp)->t_prev = tic;
107 } else {
108 tic->t_prev = tic->t_next = tic;
109 *qp = tic;
110 }
111 106
112 tic->t_flags |= XLOG_TIC_IN_Q; 107 do {
113} 108 int cycle, space;
114 109
115static void 110 xlog_crack_grant_head_val(head_val, &cycle, &space);
116xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
117{
118 if (tic == tic->t_next) {
119 *qp = NULL;
120 } else {
121 *qp = tic->t_next;
122 tic->t_next->t_prev = tic->t_prev;
123 tic->t_prev->t_next = tic->t_next;
124 }
125 111
126 tic->t_next = tic->t_prev = NULL; 112 space -= bytes;
127 tic->t_flags &= ~XLOG_TIC_IN_Q; 113 if (space < 0) {
114 space += log->l_logsize;
115 cycle--;
116 }
117
118 old = head_val;
119 new = xlog_assign_grant_head_val(cycle, space);
120 head_val = atomic64_cmpxchg(head, old, new);
121 } while (head_val != old);
128} 122}
129 123
130static void 124static void
131xlog_grant_sub_space(struct log *log, int bytes) 125xlog_grant_add_space(
126 struct log *log,
127 atomic64_t *head,
128 int bytes)
132{ 129{
133 log->l_grant_write_bytes -= bytes; 130 int64_t head_val = atomic64_read(head);
134 if (log->l_grant_write_bytes < 0) { 131 int64_t new, old;
135 log->l_grant_write_bytes += log->l_logsize;
136 log->l_grant_write_cycle--;
137 }
138
139 log->l_grant_reserve_bytes -= bytes;
140 if ((log)->l_grant_reserve_bytes < 0) {
141 log->l_grant_reserve_bytes += log->l_logsize;
142 log->l_grant_reserve_cycle--;
143 }
144 132
145} 133 do {
134 int tmp;
135 int cycle, space;
146 136
147static void 137 xlog_crack_grant_head_val(head_val, &cycle, &space);
148xlog_grant_add_space_write(struct log *log, int bytes)
149{
150 int tmp = log->l_logsize - log->l_grant_write_bytes;
151 if (tmp > bytes)
152 log->l_grant_write_bytes += bytes;
153 else {
154 log->l_grant_write_cycle++;
155 log->l_grant_write_bytes = bytes - tmp;
156 }
157}
158 138
159static void 139 tmp = log->l_logsize - space;
160xlog_grant_add_space_reserve(struct log *log, int bytes) 140 if (tmp > bytes)
161{ 141 space += bytes;
162 int tmp = log->l_logsize - log->l_grant_reserve_bytes; 142 else {
163 if (tmp > bytes) 143 space = bytes - tmp;
164 log->l_grant_reserve_bytes += bytes; 144 cycle++;
165 else { 145 }
166 log->l_grant_reserve_cycle++;
167 log->l_grant_reserve_bytes = bytes - tmp;
168 }
169}
170 146
171static inline void 147 old = head_val;
172xlog_grant_add_space(struct log *log, int bytes) 148 new = xlog_assign_grant_head_val(cycle, space);
173{ 149 head_val = atomic64_cmpxchg(head, old, new);
174 xlog_grant_add_space_write(log, bytes); 150 } while (head_val != old);
175 xlog_grant_add_space_reserve(log, bytes);
176} 151}
177 152
178static void 153static void
@@ -355,7 +330,7 @@ xfs_log_reserve(
355 330
356 trace_xfs_log_reserve(log, internal_ticket); 331 trace_xfs_log_reserve(log, internal_ticket);
357 332
358 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 333 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
359 retval = xlog_regrant_write_log_space(log, internal_ticket); 334 retval = xlog_regrant_write_log_space(log, internal_ticket);
360 } else { 335 } else {
361 /* may sleep if need to allocate more tickets */ 336 /* may sleep if need to allocate more tickets */
@@ -369,7 +344,7 @@ xfs_log_reserve(
369 344
370 trace_xfs_log_reserve(log, internal_ticket); 345 trace_xfs_log_reserve(log, internal_ticket);
371 346
372 xlog_grant_push_ail(mp, 347 xlog_grant_push_ail(log,
373 (internal_ticket->t_unit_res * 348 (internal_ticket->t_unit_res *
374 internal_ticket->t_cnt)); 349 internal_ticket->t_cnt));
375 retval = xlog_grant_log_space(log, internal_ticket); 350 retval = xlog_grant_log_space(log, internal_ticket);
@@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
584 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 559 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
585 iclog->ic_state == XLOG_STATE_DIRTY)) { 560 iclog->ic_state == XLOG_STATE_DIRTY)) {
586 if (!XLOG_FORCED_SHUTDOWN(log)) { 561 if (!XLOG_FORCED_SHUTDOWN(log)) {
587 sv_wait(&iclog->ic_force_wait, PMEM, 562 xlog_wait(&iclog->ic_force_wait,
588 &log->l_icloglock, s); 563 &log->l_icloglock);
589 } else { 564 } else {
590 spin_unlock(&log->l_icloglock); 565 spin_unlock(&log->l_icloglock);
591 } 566 }
@@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
625 || iclog->ic_state == XLOG_STATE_DIRTY 600 || iclog->ic_state == XLOG_STATE_DIRTY
626 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 601 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
627 602
628 sv_wait(&iclog->ic_force_wait, PMEM, 603 xlog_wait(&iclog->ic_force_wait,
629 &log->l_icloglock, s); 604 &log->l_icloglock);
630 } else { 605 } else {
631 spin_unlock(&log->l_icloglock); 606 spin_unlock(&log->l_icloglock);
632 } 607 }
@@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp,
703{ 678{
704 xlog_ticket_t *tic; 679 xlog_ticket_t *tic;
705 xlog_t *log = mp->m_log; 680 xlog_t *log = mp->m_log;
706 int need_bytes, free_bytes, cycle, bytes; 681 int need_bytes, free_bytes;
707 682
708 if (XLOG_FORCED_SHUTDOWN(log)) 683 if (XLOG_FORCED_SHUTDOWN(log))
709 return; 684 return;
710 685
711 if (tail_lsn == 0) { 686 if (tail_lsn == 0)
712 /* needed since sync_lsn is 64 bits */ 687 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
713 spin_lock(&log->l_icloglock);
714 tail_lsn = log->l_last_sync_lsn;
715 spin_unlock(&log->l_icloglock);
716 }
717
718 spin_lock(&log->l_grant_lock);
719 688
720 /* Also an invalid lsn. 1 implies that we aren't passing in a valid 689 /* tail_lsn == 1 implies that we weren't passed a valid value. */
721 * tail_lsn. 690 if (tail_lsn != 1)
722 */ 691 atomic64_set(&log->l_tail_lsn, tail_lsn);
723 if (tail_lsn != 1) {
724 log->l_tail_lsn = tail_lsn;
725 }
726 692
727 if ((tic = log->l_write_headq)) { 693 if (!list_empty_careful(&log->l_writeq)) {
728#ifdef DEBUG 694#ifdef DEBUG
729 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 695 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
730 panic("Recovery problem"); 696 panic("Recovery problem");
731#endif 697#endif
732 cycle = log->l_grant_write_cycle; 698 spin_lock(&log->l_grant_write_lock);
733 bytes = log->l_grant_write_bytes; 699 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
734 free_bytes = xlog_space_left(log, cycle, bytes); 700 list_for_each_entry(tic, &log->l_writeq, t_queue) {
735 do {
736 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 701 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
737 702
738 if (free_bytes < tic->t_unit_res && tail_lsn != 1) 703 if (free_bytes < tic->t_unit_res && tail_lsn != 1)
739 break; 704 break;
740 tail_lsn = 0; 705 tail_lsn = 0;
741 free_bytes -= tic->t_unit_res; 706 free_bytes -= tic->t_unit_res;
742 sv_signal(&tic->t_wait); 707 trace_xfs_log_regrant_write_wake_up(log, tic);
743 tic = tic->t_next; 708 wake_up(&tic->t_wait);
744 } while (tic != log->l_write_headq); 709 }
710 spin_unlock(&log->l_grant_write_lock);
745 } 711 }
746 if ((tic = log->l_reserve_headq)) { 712
713 if (!list_empty_careful(&log->l_reserveq)) {
747#ifdef DEBUG 714#ifdef DEBUG
748 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 715 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
749 panic("Recovery problem"); 716 panic("Recovery problem");
750#endif 717#endif
751 cycle = log->l_grant_reserve_cycle; 718 spin_lock(&log->l_grant_reserve_lock);
752 bytes = log->l_grant_reserve_bytes; 719 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
753 free_bytes = xlog_space_left(log, cycle, bytes); 720 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
754 do {
755 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 721 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
756 need_bytes = tic->t_unit_res*tic->t_cnt; 722 need_bytes = tic->t_unit_res*tic->t_cnt;
757 else 723 else
@@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
760 break; 726 break;
761 tail_lsn = 0; 727 tail_lsn = 0;
762 free_bytes -= need_bytes; 728 free_bytes -= need_bytes;
763 sv_signal(&tic->t_wait); 729 trace_xfs_log_grant_wake_up(log, tic);
764 tic = tic->t_next; 730 wake_up(&tic->t_wait);
765 } while (tic != log->l_reserve_headq); 731 }
732 spin_unlock(&log->l_grant_reserve_lock);
766 } 733 }
767 spin_unlock(&log->l_grant_lock); 734}
768} /* xfs_log_move_tail */
769 735
770/* 736/*
771 * Determine if we have a transaction that has gone to disk 737 * Determine if we have a transaction that has gone to disk
@@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp)
831 * We may be holding the log iclog lock upon entering this routine. 797 * We may be holding the log iclog lock upon entering this routine.
832 */ 798 */
833xfs_lsn_t 799xfs_lsn_t
834xlog_assign_tail_lsn(xfs_mount_t *mp) 800xlog_assign_tail_lsn(
801 struct xfs_mount *mp)
835{ 802{
836 xfs_lsn_t tail_lsn; 803 xfs_lsn_t tail_lsn;
837 xlog_t *log = mp->m_log; 804 struct log *log = mp->m_log;
838 805
839 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 806 tail_lsn = xfs_trans_ail_tail(mp->m_ail);
840 spin_lock(&log->l_grant_lock); 807 if (!tail_lsn)
841 if (tail_lsn != 0) { 808 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
842 log->l_tail_lsn = tail_lsn;
843 } else {
844 tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
845 }
846 spin_unlock(&log->l_grant_lock);
847 809
810 atomic64_set(&log->l_tail_lsn, tail_lsn);
848 return tail_lsn; 811 return tail_lsn;
849} /* xlog_assign_tail_lsn */ 812}
850
851 813
852/* 814/*
853 * Return the space in the log between the tail and the head. The head 815 * Return the space in the log between the tail and the head. The head
@@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
864 * result is that we return the size of the log as the amount of space left. 826 * result is that we return the size of the log as the amount of space left.
865 */ 827 */
866STATIC int 828STATIC int
867xlog_space_left(xlog_t *log, int cycle, int bytes) 829xlog_space_left(
868{ 830 struct log *log,
869 int free_bytes; 831 atomic64_t *head)
870 int tail_bytes; 832{
871 int tail_cycle; 833 int free_bytes;
872 834 int tail_bytes;
873 tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); 835 int tail_cycle;
874 tail_cycle = CYCLE_LSN(log->l_tail_lsn); 836 int head_cycle;
875 if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { 837 int head_bytes;
876 free_bytes = log->l_logsize - (bytes - tail_bytes); 838
877 } else if ((tail_cycle + 1) < cycle) { 839 xlog_crack_grant_head(head, &head_cycle, &head_bytes);
840 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
841 tail_bytes = BBTOB(tail_bytes);
842 if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
843 free_bytes = log->l_logsize - (head_bytes - tail_bytes);
844 else if (tail_cycle + 1 < head_cycle)
878 return 0; 845 return 0;
879 } else if (tail_cycle < cycle) { 846 else if (tail_cycle < head_cycle) {
880 ASSERT(tail_cycle == (cycle - 1)); 847 ASSERT(tail_cycle == (head_cycle - 1));
881 free_bytes = tail_bytes - bytes; 848 free_bytes = tail_bytes - head_bytes;
882 } else { 849 } else {
883 /* 850 /*
884 * The reservation head is behind the tail. 851 * The reservation head is behind the tail.
@@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes)
889 "xlog_space_left: head behind tail\n" 856 "xlog_space_left: head behind tail\n"
890 " tail_cycle = %d, tail_bytes = %d\n" 857 " tail_cycle = %d, tail_bytes = %d\n"
891 " GH cycle = %d, GH bytes = %d", 858 " GH cycle = %d, GH bytes = %d",
892 tail_cycle, tail_bytes, cycle, bytes); 859 tail_cycle, tail_bytes, head_cycle, head_bytes);
893 ASSERT(0); 860 ASSERT(0);
894 free_bytes = log->l_logsize; 861 free_bytes = log->l_logsize;
895 } 862 }
896 return free_bytes; 863 return free_bytes;
897} /* xlog_space_left */ 864}
898 865
899 866
900/* 867/*
@@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp,
1047 log->l_flags |= XLOG_ACTIVE_RECOVERY; 1014 log->l_flags |= XLOG_ACTIVE_RECOVERY;
1048 1015
1049 log->l_prev_block = -1; 1016 log->l_prev_block = -1;
1050 log->l_tail_lsn = xlog_assign_lsn(1, 0);
1051 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ 1017 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
1052 log->l_last_sync_lsn = log->l_tail_lsn; 1018 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1019 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1053 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1020 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1054 log->l_grant_reserve_cycle = 1; 1021 xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
1055 log->l_grant_write_cycle = 1; 1022 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
1023 INIT_LIST_HEAD(&log->l_reserveq);
1024 INIT_LIST_HEAD(&log->l_writeq);
1025 spin_lock_init(&log->l_grant_reserve_lock);
1026 spin_lock_init(&log->l_grant_write_lock);
1056 1027
1057 error = EFSCORRUPTED; 1028 error = EFSCORRUPTED;
1058 if (xfs_sb_version_hassector(&mp->m_sb)) { 1029 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1094 log->l_xbuf = bp; 1065 log->l_xbuf = bp;
1095 1066
1096 spin_lock_init(&log->l_icloglock); 1067 spin_lock_init(&log->l_icloglock);
1097 spin_lock_init(&log->l_grant_lock); 1068 init_waitqueue_head(&log->l_flush_wait);
1098 sv_init(&log->l_flush_wait, 0, "flush_wait");
1099 1069
1100 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1070 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1101 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1071 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1151 1121
1152 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1122 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1153 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1123 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1154 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1124 init_waitqueue_head(&iclog->ic_force_wait);
1155 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1125 init_waitqueue_head(&iclog->ic_write_wait);
1156 1126
1157 iclogp = &iclog->ic_next; 1127 iclogp = &iclog->ic_next;
1158 } 1128 }
@@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp,
1167out_free_iclog: 1137out_free_iclog:
1168 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 1138 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1169 prev_iclog = iclog->ic_next; 1139 prev_iclog = iclog->ic_next;
1170 if (iclog->ic_bp) { 1140 if (iclog->ic_bp)
1171 sv_destroy(&iclog->ic_force_wait);
1172 sv_destroy(&iclog->ic_write_wait);
1173 xfs_buf_free(iclog->ic_bp); 1141 xfs_buf_free(iclog->ic_bp);
1174 }
1175 kmem_free(iclog); 1142 kmem_free(iclog);
1176 } 1143 }
1177 spinlock_destroy(&log->l_icloglock); 1144 spinlock_destroy(&log->l_icloglock);
1178 spinlock_destroy(&log->l_grant_lock);
1179 xfs_buf_free(log->l_xbuf); 1145 xfs_buf_free(log->l_xbuf);
1180out_free_log: 1146out_free_log:
1181 kmem_free(log); 1147 kmem_free(log);
@@ -1223,61 +1189,60 @@ xlog_commit_record(
1223 * water mark. In this manner, we would be creating a low water mark. 1189 * water mark. In this manner, we would be creating a low water mark.
1224 */ 1190 */
1225STATIC void 1191STATIC void
1226xlog_grant_push_ail(xfs_mount_t *mp, 1192xlog_grant_push_ail(
1227 int need_bytes) 1193 struct log *log,
1194 int need_bytes)
1228{ 1195{
1229 xlog_t *log = mp->m_log; /* pointer to the log */ 1196 xfs_lsn_t threshold_lsn = 0;
1230 xfs_lsn_t tail_lsn; /* lsn of the log tail */ 1197 xfs_lsn_t last_sync_lsn;
1231 xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ 1198 int free_blocks;
1232 int free_blocks; /* free blocks left to write to */ 1199 int free_bytes;
1233 int free_bytes; /* free bytes left to write to */ 1200 int threshold_block;
1234 int threshold_block; /* block in lsn we'd like to be at */ 1201 int threshold_cycle;
1235 int threshold_cycle; /* lsn cycle we'd like to be at */ 1202 int free_threshold;
1236 int free_threshold; 1203
1237 1204 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1238 ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1205
1239 1206 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
1240 spin_lock(&log->l_grant_lock); 1207 free_blocks = BTOBBT(free_bytes);
1241 free_bytes = xlog_space_left(log, 1208
1242 log->l_grant_reserve_cycle, 1209 /*
1243 log->l_grant_reserve_bytes); 1210 * Set the threshold for the minimum number of free blocks in the
1244 tail_lsn = log->l_tail_lsn; 1211 * log to the maximum of what the caller needs, one quarter of the
1245 free_blocks = BTOBBT(free_bytes); 1212 * log, and 256 blocks.
1246 1213 */
1247 /* 1214 free_threshold = BTOBB(need_bytes);
1248 * Set the threshold for the minimum number of free blocks in the 1215 free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
1249 * log to the maximum of what the caller needs, one quarter of the 1216 free_threshold = MAX(free_threshold, 256);
1250 * log, and 256 blocks. 1217 if (free_blocks >= free_threshold)
1251 */ 1218 return;
1252 free_threshold = BTOBB(need_bytes); 1219
1253 free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); 1220 xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
1254 free_threshold = MAX(free_threshold, 256); 1221 &threshold_block);
1255 if (free_blocks < free_threshold) { 1222 threshold_block += free_threshold;
1256 threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
1257 threshold_cycle = CYCLE_LSN(tail_lsn);
1258 if (threshold_block >= log->l_logBBsize) { 1223 if (threshold_block >= log->l_logBBsize) {
1259 threshold_block -= log->l_logBBsize; 1224 threshold_block -= log->l_logBBsize;
1260 threshold_cycle += 1; 1225 threshold_cycle += 1;
1261 } 1226 }
1262 threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); 1227 threshold_lsn = xlog_assign_lsn(threshold_cycle,
1228 threshold_block);
1229 /*
1230 * Don't pass in an lsn greater than the lsn of the last
1231 * log record known to be on disk. Use a snapshot of the last sync lsn
1232 * so that it doesn't change between the compare and the set.
1233 */
1234 last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
1235 if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
1236 threshold_lsn = last_sync_lsn;
1263 1237
1264 /* Don't pass in an lsn greater than the lsn of the last 1238 /*
1265 * log record known to be on disk. 1239 * Get the transaction layer to kick the dirty buffers out to
1240 * disk asynchronously. No point in trying to do this if
1241 * the filesystem is shutting down.
1266 */ 1242 */
1267 if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) 1243 if (!XLOG_FORCED_SHUTDOWN(log))
1268 threshold_lsn = log->l_last_sync_lsn; 1244 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1269 } 1245}
1270 spin_unlock(&log->l_grant_lock);
1271
1272 /*
1273 * Get the transaction layer to kick the dirty buffers out to
1274 * disk asynchronously. No point in trying to do this if
1275 * the filesystem is shutting down.
1276 */
1277 if (threshold_lsn &&
1278 !XLOG_FORCED_SHUTDOWN(log))
1279 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1280} /* xlog_grant_push_ail */
1281 1246
1282/* 1247/*
1283 * The bdstrat callback function for log bufs. This gives us a central 1248 * The bdstrat callback function for log bufs. This gives us a central
@@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log,
1372 roundoff < BBTOB(1))); 1337 roundoff < BBTOB(1)));
1373 1338
1374 /* move grant heads by roundoff in sync */ 1339 /* move grant heads by roundoff in sync */
1375 spin_lock(&log->l_grant_lock); 1340 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
1376 xlog_grant_add_space(log, roundoff); 1341 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
1377 spin_unlock(&log->l_grant_lock);
1378 1342
1379 /* put cycle number in every block */ 1343 /* put cycle number in every block */
1380 xlog_pack_data(log, iclog, roundoff); 1344 xlog_pack_data(log, iclog, roundoff);
@@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log)
1489 1453
1490 iclog = log->l_iclog; 1454 iclog = log->l_iclog;
1491 for (i=0; i<log->l_iclog_bufs; i++) { 1455 for (i=0; i<log->l_iclog_bufs; i++) {
1492 sv_destroy(&iclog->ic_force_wait);
1493 sv_destroy(&iclog->ic_write_wait);
1494 xfs_buf_free(iclog->ic_bp); 1456 xfs_buf_free(iclog->ic_bp);
1495 next_iclog = iclog->ic_next; 1457 next_iclog = iclog->ic_next;
1496 kmem_free(iclog); 1458 kmem_free(iclog);
1497 iclog = next_iclog; 1459 iclog = next_iclog;
1498 } 1460 }
1499 spinlock_destroy(&log->l_icloglock); 1461 spinlock_destroy(&log->l_icloglock);
1500 spinlock_destroy(&log->l_grant_lock);
1501 1462
1502 xfs_buf_free(log->l_xbuf); 1463 xfs_buf_free(log->l_xbuf);
1503 log->l_mp->m_log = NULL; 1464 log->l_mp->m_log = NULL;
@@ -2232,7 +2193,7 @@ xlog_state_do_callback(
2232 lowest_lsn = xlog_get_lowest_lsn(log); 2193 lowest_lsn = xlog_get_lowest_lsn(log);
2233 if (lowest_lsn && 2194 if (lowest_lsn &&
2234 XFS_LSN_CMP(lowest_lsn, 2195 XFS_LSN_CMP(lowest_lsn,
2235 be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { 2196 be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
2236 iclog = iclog->ic_next; 2197 iclog = iclog->ic_next;
2237 continue; /* Leave this iclog for 2198 continue; /* Leave this iclog for
2238 * another thread */ 2199 * another thread */
@@ -2240,23 +2201,21 @@ xlog_state_do_callback(
2240 2201
2241 iclog->ic_state = XLOG_STATE_CALLBACK; 2202 iclog->ic_state = XLOG_STATE_CALLBACK;
2242 2203
2243 spin_unlock(&log->l_icloglock);
2244 2204
2245 /* l_last_sync_lsn field protected by 2205 /*
2246 * l_grant_lock. Don't worry about iclog's lsn. 2206 * update the last_sync_lsn before we drop the
2247 * No one else can be here except us. 2207 * icloglock to ensure we are the only one that
2208 * can update it.
2248 */ 2209 */
2249 spin_lock(&log->l_grant_lock); 2210 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2250 ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, 2211 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2251 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2212 atomic64_set(&log->l_last_sync_lsn,
2252 log->l_last_sync_lsn = 2213 be64_to_cpu(iclog->ic_header.h_lsn));
2253 be64_to_cpu(iclog->ic_header.h_lsn);
2254 spin_unlock(&log->l_grant_lock);
2255 2214
2256 } else { 2215 } else
2257 spin_unlock(&log->l_icloglock);
2258 ioerrors++; 2216 ioerrors++;
2259 } 2217
2218 spin_unlock(&log->l_icloglock);
2260 2219
2261 /* 2220 /*
2262 * Keep processing entries in the callback list until 2221 * Keep processing entries in the callback list until
@@ -2297,7 +2256,7 @@ xlog_state_do_callback(
2297 xlog_state_clean_log(log); 2256 xlog_state_clean_log(log);
2298 2257
2299 /* wake up threads waiting in xfs_log_force() */ 2258 /* wake up threads waiting in xfs_log_force() */
2300 sv_broadcast(&iclog->ic_force_wait); 2259 wake_up_all(&iclog->ic_force_wait);
2301 2260
2302 iclog = iclog->ic_next; 2261 iclog = iclog->ic_next;
2303 } while (first_iclog != iclog); 2262 } while (first_iclog != iclog);
@@ -2344,7 +2303,7 @@ xlog_state_do_callback(
2344 spin_unlock(&log->l_icloglock); 2303 spin_unlock(&log->l_icloglock);
2345 2304
2346 if (wake) 2305 if (wake)
2347 sv_broadcast(&log->l_flush_wait); 2306 wake_up_all(&log->l_flush_wait);
2348} 2307}
2349 2308
2350 2309
@@ -2395,7 +2354,7 @@ xlog_state_done_syncing(
2395 * iclog buffer, we wake them all, one will get to do the 2354 * iclog buffer, we wake them all, one will get to do the
2396 * I/O, the others get to wait for the result. 2355 * I/O, the others get to wait for the result.
2397 */ 2356 */
2398 sv_broadcast(&iclog->ic_write_wait); 2357 wake_up_all(&iclog->ic_write_wait);
2399 spin_unlock(&log->l_icloglock); 2358 spin_unlock(&log->l_icloglock);
2400 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2359 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2401} /* xlog_state_done_syncing */ 2360} /* xlog_state_done_syncing */
@@ -2444,7 +2403,7 @@ restart:
2444 XFS_STATS_INC(xs_log_noiclogs); 2403 XFS_STATS_INC(xs_log_noiclogs);
2445 2404
2446 /* Wait for log writes to have flushed */ 2405 /* Wait for log writes to have flushed */
2447 sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); 2406 xlog_wait(&log->l_flush_wait, &log->l_icloglock);
2448 goto restart; 2407 goto restart;
2449 } 2408 }
2450 2409
@@ -2527,6 +2486,18 @@ restart:
2527 * 2486 *
2528 * Once a ticket gets put onto the reserveq, it will only return after 2487 * Once a ticket gets put onto the reserveq, it will only return after
2529 * the needed reservation is satisfied. 2488 * the needed reservation is satisfied.
2489 *
2490 * This function is structured so that it has a lock free fast path. This is
2491 * necessary because every new transaction reservation will come through this
2492 * path. Hence any lock will be globally hot if we take it unconditionally on
2493 * every pass.
2494 *
2495 * As tickets are only ever moved on and off the reserveq under the
2496 * l_grant_reserve_lock, we only need to take that lock if we are going
2497 * to add the ticket to the queue and sleep. We can avoid taking the lock if the
2498 * ticket was never added to the reserveq because the t_queue list head will be
2499 * empty and we hold the only reference to it so it can safely be checked
2500 * unlocked.
2530 */ 2501 */
2531STATIC int 2502STATIC int
2532xlog_grant_log_space(xlog_t *log, 2503xlog_grant_log_space(xlog_t *log,
@@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log,
2534{ 2505{
2535 int free_bytes; 2506 int free_bytes;
2536 int need_bytes; 2507 int need_bytes;
2537#ifdef DEBUG
2538 xfs_lsn_t tail_lsn;
2539#endif
2540
2541 2508
2542#ifdef DEBUG 2509#ifdef DEBUG
2543 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 2510 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2544 panic("grant Recovery problem"); 2511 panic("grant Recovery problem");
2545#endif 2512#endif
2546 2513
2547 /* Is there space or do we need to sleep? */
2548 spin_lock(&log->l_grant_lock);
2549
2550 trace_xfs_log_grant_enter(log, tic); 2514 trace_xfs_log_grant_enter(log, tic);
2551 2515
2516 need_bytes = tic->t_unit_res;
2517 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2518 need_bytes *= tic->t_ocnt;
2519
2552 /* something is already sleeping; insert new transaction at end */ 2520 /* something is already sleeping; insert new transaction at end */
2553 if (log->l_reserve_headq) { 2521 if (!list_empty_careful(&log->l_reserveq)) {
2554 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2522 spin_lock(&log->l_grant_reserve_lock);
2523 /* recheck the queue now we are locked */
2524 if (list_empty(&log->l_reserveq)) {
2525 spin_unlock(&log->l_grant_reserve_lock);
2526 goto redo;
2527 }
2528 list_add_tail(&tic->t_queue, &log->l_reserveq);
2555 2529
2556 trace_xfs_log_grant_sleep1(log, tic); 2530 trace_xfs_log_grant_sleep1(log, tic);
2557 2531
@@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log,
2563 goto error_return; 2537 goto error_return;
2564 2538
2565 XFS_STATS_INC(xs_sleep_logspace); 2539 XFS_STATS_INC(xs_sleep_logspace);
2566 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2540 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2541
2567 /* 2542 /*
2568 * If we got an error, and the filesystem is shutting down, 2543 * If we got an error, and the filesystem is shutting down,
2569 * we'll catch it down below. So just continue... 2544 * we'll catch it down below. So just continue...
2570 */ 2545 */
2571 trace_xfs_log_grant_wake1(log, tic); 2546 trace_xfs_log_grant_wake1(log, tic);
2572 spin_lock(&log->l_grant_lock);
2573 } 2547 }
2574 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2575 need_bytes = tic->t_unit_res*tic->t_ocnt;
2576 else
2577 need_bytes = tic->t_unit_res;
2578 2548
2579redo: 2549redo:
2580 if (XLOG_FORCED_SHUTDOWN(log)) 2550 if (XLOG_FORCED_SHUTDOWN(log))
2581 goto error_return; 2551 goto error_return_unlocked;
2582 2552
2583 free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, 2553 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2584 log->l_grant_reserve_bytes);
2585 if (free_bytes < need_bytes) { 2554 if (free_bytes < need_bytes) {
2586 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2555 spin_lock(&log->l_grant_reserve_lock);
2587 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2556 if (list_empty(&tic->t_queue))
2557 list_add_tail(&tic->t_queue, &log->l_reserveq);
2588 2558
2589 trace_xfs_log_grant_sleep2(log, tic); 2559 trace_xfs_log_grant_sleep2(log, tic);
2590 2560
2591 spin_unlock(&log->l_grant_lock);
2592 xlog_grant_push_ail(log->l_mp, need_bytes);
2593 spin_lock(&log->l_grant_lock);
2594
2595 XFS_STATS_INC(xs_sleep_logspace);
2596 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2597
2598 spin_lock(&log->l_grant_lock);
2599 if (XLOG_FORCED_SHUTDOWN(log)) 2561 if (XLOG_FORCED_SHUTDOWN(log))
2600 goto error_return; 2562 goto error_return;
2601 2563
2602 trace_xfs_log_grant_wake2(log, tic); 2564 xlog_grant_push_ail(log, need_bytes);
2565
2566 XFS_STATS_INC(xs_sleep_logspace);
2567 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2603 2568
2569 trace_xfs_log_grant_wake2(log, tic);
2604 goto redo; 2570 goto redo;
2605 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2571 }
2606 xlog_del_ticketq(&log->l_reserve_headq, tic);
2607 2572
2608 /* we've got enough space */ 2573 if (!list_empty(&tic->t_queue)) {
2609 xlog_grant_add_space(log, need_bytes); 2574 spin_lock(&log->l_grant_reserve_lock);
2610#ifdef DEBUG 2575 list_del_init(&tic->t_queue);
2611 tail_lsn = log->l_tail_lsn; 2576 spin_unlock(&log->l_grant_reserve_lock);
2612 /*
2613 * Check to make sure the grant write head didn't just over lap the
2614 * tail. If the cycles are the same, we can't be overlapping.
2615 * Otherwise, make sure that the cycles differ by exactly one and
2616 * check the byte count.
2617 */
2618 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2619 ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2620 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2621 } 2577 }
2622#endif 2578
2579 /* we've got enough space */
2580 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2581 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2623 trace_xfs_log_grant_exit(log, tic); 2582 trace_xfs_log_grant_exit(log, tic);
2624 xlog_verify_grant_head(log, 1); 2583 xlog_verify_grant_tail(log);
2625 spin_unlock(&log->l_grant_lock);
2626 return 0; 2584 return 0;
2627 2585
2628 error_return: 2586error_return_unlocked:
2629 if (tic->t_flags & XLOG_TIC_IN_Q) 2587 spin_lock(&log->l_grant_reserve_lock);
2630 xlog_del_ticketq(&log->l_reserve_headq, tic); 2588error_return:
2631 2589 list_del_init(&tic->t_queue);
2590 spin_unlock(&log->l_grant_reserve_lock);
2632 trace_xfs_log_grant_error(log, tic); 2591 trace_xfs_log_grant_error(log, tic);
2633 2592
2634 /* 2593 /*
@@ -2638,7 +2597,6 @@ redo:
2638 */ 2597 */
2639 tic->t_curr_res = 0; 2598 tic->t_curr_res = 0;
2640 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2599 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2641 spin_unlock(&log->l_grant_lock);
2642 return XFS_ERROR(EIO); 2600 return XFS_ERROR(EIO);
2643} /* xlog_grant_log_space */ 2601} /* xlog_grant_log_space */
2644 2602
@@ -2646,17 +2604,14 @@ redo:
2646/* 2604/*
2647 * Replenish the byte reservation required by moving the grant write head. 2605 * Replenish the byte reservation required by moving the grant write head.
2648 * 2606 *
2649 * 2607 * Similar to xlog_grant_log_space, the function is structured to have a lock
2608 * free fast path.
2650 */ 2609 */
2651STATIC int 2610STATIC int
2652xlog_regrant_write_log_space(xlog_t *log, 2611xlog_regrant_write_log_space(xlog_t *log,
2653 xlog_ticket_t *tic) 2612 xlog_ticket_t *tic)
2654{ 2613{
2655 int free_bytes, need_bytes; 2614 int free_bytes, need_bytes;
2656 xlog_ticket_t *ntic;
2657#ifdef DEBUG
2658 xfs_lsn_t tail_lsn;
2659#endif
2660 2615
2661 tic->t_curr_res = tic->t_unit_res; 2616 tic->t_curr_res = tic->t_unit_res;
2662 xlog_tic_reset_res(tic); 2617 xlog_tic_reset_res(tic);
@@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 panic("regrant Recovery problem"); 2624 panic("regrant Recovery problem");
2670#endif 2625#endif
2671 2626
2672 spin_lock(&log->l_grant_lock);
2673
2674 trace_xfs_log_regrant_write_enter(log, tic); 2627 trace_xfs_log_regrant_write_enter(log, tic);
2675
2676 if (XLOG_FORCED_SHUTDOWN(log)) 2628 if (XLOG_FORCED_SHUTDOWN(log))
2677 goto error_return; 2629 goto error_return_unlocked;
2678 2630
2679 /* If there are other waiters on the queue then give them a 2631 /* If there are other waiters on the queue then give them a
2680 * chance at logspace before us. Wake up the first waiters, 2632 * chance at logspace before us. Wake up the first waiters,
@@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log,
2683 * this transaction. 2635 * this transaction.
2684 */ 2636 */
2685 need_bytes = tic->t_unit_res; 2637 need_bytes = tic->t_unit_res;
2686 if ((ntic = log->l_write_headq)) { 2638 if (!list_empty_careful(&log->l_writeq)) {
2687 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2639 struct xlog_ticket *ntic;
2688 log->l_grant_write_bytes); 2640
2689 do { 2641 spin_lock(&log->l_grant_write_lock);
2642 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2643 list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2690 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); 2644 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2691 2645
2692 if (free_bytes < ntic->t_unit_res) 2646 if (free_bytes < ntic->t_unit_res)
2693 break; 2647 break;
2694 free_bytes -= ntic->t_unit_res; 2648 free_bytes -= ntic->t_unit_res;
2695 sv_signal(&ntic->t_wait); 2649 wake_up(&ntic->t_wait);
2696 ntic = ntic->t_next; 2650 }
2697 } while (ntic != log->l_write_headq);
2698
2699 if (ntic != log->l_write_headq) {
2700 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2701 xlog_ins_ticketq(&log->l_write_headq, tic);
2702 2651
2652 if (ntic != list_first_entry(&log->l_writeq,
2653 struct xlog_ticket, t_queue)) {
2654 if (list_empty(&tic->t_queue))
2655 list_add_tail(&tic->t_queue, &log->l_writeq);
2703 trace_xfs_log_regrant_write_sleep1(log, tic); 2656 trace_xfs_log_regrant_write_sleep1(log, tic);
2704 2657
2705 spin_unlock(&log->l_grant_lock); 2658 xlog_grant_push_ail(log, need_bytes);
2706 xlog_grant_push_ail(log->l_mp, need_bytes);
2707 spin_lock(&log->l_grant_lock);
2708 2659
2709 XFS_STATS_INC(xs_sleep_logspace); 2660 XFS_STATS_INC(xs_sleep_logspace);
2710 sv_wait(&tic->t_wait, PINOD|PLTWAIT, 2661 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2711 &log->l_grant_lock, s);
2712
2713 /* If we're shutting down, this tic is already
2714 * off the queue */
2715 spin_lock(&log->l_grant_lock);
2716 if (XLOG_FORCED_SHUTDOWN(log))
2717 goto error_return;
2718
2719 trace_xfs_log_regrant_write_wake1(log, tic); 2662 trace_xfs_log_regrant_write_wake1(log, tic);
2720 } 2663 } else
2664 spin_unlock(&log->l_grant_write_lock);
2721 } 2665 }
2722 2666
2723redo: 2667redo:
2724 if (XLOG_FORCED_SHUTDOWN(log)) 2668 if (XLOG_FORCED_SHUTDOWN(log))
2725 goto error_return; 2669 goto error_return_unlocked;
2726 2670
2727 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2671 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2728 log->l_grant_write_bytes);
2729 if (free_bytes < need_bytes) { 2672 if (free_bytes < need_bytes) {
2730 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2673 spin_lock(&log->l_grant_write_lock);
2731 xlog_ins_ticketq(&log->l_write_headq, tic); 2674 if (list_empty(&tic->t_queue))
2732 spin_unlock(&log->l_grant_lock); 2675 list_add_tail(&tic->t_queue, &log->l_writeq);
2733 xlog_grant_push_ail(log->l_mp, need_bytes);
2734 spin_lock(&log->l_grant_lock);
2735
2736 XFS_STATS_INC(xs_sleep_logspace);
2737 trace_xfs_log_regrant_write_sleep2(log, tic);
2738
2739 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2740 2676
2741 /* If we're shutting down, this tic is already off the queue */
2742 spin_lock(&log->l_grant_lock);
2743 if (XLOG_FORCED_SHUTDOWN(log)) 2677 if (XLOG_FORCED_SHUTDOWN(log))
2744 goto error_return; 2678 goto error_return;
2745 2679
2680 xlog_grant_push_ail(log, need_bytes);
2681
2682 XFS_STATS_INC(xs_sleep_logspace);
2683 trace_xfs_log_regrant_write_sleep2(log, tic);
2684 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2685
2746 trace_xfs_log_regrant_write_wake2(log, tic); 2686 trace_xfs_log_regrant_write_wake2(log, tic);
2747 goto redo; 2687 goto redo;
2748 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2688 }
2749 xlog_del_ticketq(&log->l_write_headq, tic);
2750 2689
2751 /* we've got enough space */ 2690 if (!list_empty(&tic->t_queue)) {
2752 xlog_grant_add_space_write(log, need_bytes); 2691 spin_lock(&log->l_grant_write_lock);
2753#ifdef DEBUG 2692 list_del_init(&tic->t_queue);
2754 tail_lsn = log->l_tail_lsn; 2693 spin_unlock(&log->l_grant_write_lock);
2755 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2756 ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2757 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2758 } 2694 }
2759#endif
2760 2695
2696 /* we've got enough space */
2697 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2761 trace_xfs_log_regrant_write_exit(log, tic); 2698 trace_xfs_log_regrant_write_exit(log, tic);
2762 2699 xlog_verify_grant_tail(log);
2763 xlog_verify_grant_head(log, 1);
2764 spin_unlock(&log->l_grant_lock);
2765 return 0; 2700 return 0;
2766 2701
2767 2702
2703 error_return_unlocked:
2704 spin_lock(&log->l_grant_write_lock);
2768 error_return: 2705 error_return:
2769 if (tic->t_flags & XLOG_TIC_IN_Q) 2706 list_del_init(&tic->t_queue);
2770 xlog_del_ticketq(&log->l_reserve_headq, tic); 2707 spin_unlock(&log->l_grant_write_lock);
2771
2772 trace_xfs_log_regrant_write_error(log, tic); 2708 trace_xfs_log_regrant_write_error(log, tic);
2773 2709
2774 /* 2710 /*
@@ -2778,7 +2714,6 @@ redo:
2778 */ 2714 */
2779 tic->t_curr_res = 0; 2715 tic->t_curr_res = 0;
2780 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2716 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2781 spin_unlock(&log->l_grant_lock);
2782 return XFS_ERROR(EIO); 2717 return XFS_ERROR(EIO);
2783} /* xlog_regrant_write_log_space */ 2718} /* xlog_regrant_write_log_space */
2784 2719
@@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2799 if (ticket->t_cnt > 0) 2734 if (ticket->t_cnt > 0)
2800 ticket->t_cnt--; 2735 ticket->t_cnt--;
2801 2736
2802 spin_lock(&log->l_grant_lock); 2737 xlog_grant_sub_space(log, &log->l_grant_reserve_head,
2803 xlog_grant_sub_space(log, ticket->t_curr_res); 2738 ticket->t_curr_res);
2739 xlog_grant_sub_space(log, &log->l_grant_write_head,
2740 ticket->t_curr_res);
2804 ticket->t_curr_res = ticket->t_unit_res; 2741 ticket->t_curr_res = ticket->t_unit_res;
2805 xlog_tic_reset_res(ticket); 2742 xlog_tic_reset_res(ticket);
2806 2743
2807 trace_xfs_log_regrant_reserve_sub(log, ticket); 2744 trace_xfs_log_regrant_reserve_sub(log, ticket);
2808 2745
2809 xlog_verify_grant_head(log, 1);
2810
2811 /* just return if we still have some of the pre-reserved space */ 2746 /* just return if we still have some of the pre-reserved space */
2812 if (ticket->t_cnt > 0) { 2747 if (ticket->t_cnt > 0)
2813 spin_unlock(&log->l_grant_lock);
2814 return; 2748 return;
2815 }
2816 2749
2817 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2750 xlog_grant_add_space(log, &log->l_grant_reserve_head,
2751 ticket->t_unit_res);
2818 2752
2819 trace_xfs_log_regrant_reserve_exit(log, ticket); 2753 trace_xfs_log_regrant_reserve_exit(log, ticket);
2820 2754
2821 xlog_verify_grant_head(log, 0);
2822 spin_unlock(&log->l_grant_lock);
2823 ticket->t_curr_res = ticket->t_unit_res; 2755 ticket->t_curr_res = ticket->t_unit_res;
2824 xlog_tic_reset_res(ticket); 2756 xlog_tic_reset_res(ticket);
2825} /* xlog_regrant_reserve_log_space */ 2757} /* xlog_regrant_reserve_log_space */
@@ -2843,28 +2775,29 @@ STATIC void
2843xlog_ungrant_log_space(xlog_t *log, 2775xlog_ungrant_log_space(xlog_t *log,
2844 xlog_ticket_t *ticket) 2776 xlog_ticket_t *ticket)
2845{ 2777{
2778 int bytes;
2779
2846 if (ticket->t_cnt > 0) 2780 if (ticket->t_cnt > 0)
2847 ticket->t_cnt--; 2781 ticket->t_cnt--;
2848 2782
2849 spin_lock(&log->l_grant_lock);
2850 trace_xfs_log_ungrant_enter(log, ticket); 2783 trace_xfs_log_ungrant_enter(log, ticket);
2851
2852 xlog_grant_sub_space(log, ticket->t_curr_res);
2853
2854 trace_xfs_log_ungrant_sub(log, ticket); 2784 trace_xfs_log_ungrant_sub(log, ticket);
2855 2785
2856 /* If this is a permanent reservation ticket, we may be able to free 2786 /*
2787 * If this is a permanent reservation ticket, we may be able to free
2857 * up more space based on the remaining count. 2788 * up more space based on the remaining count.
2858 */ 2789 */
2790 bytes = ticket->t_curr_res;
2859 if (ticket->t_cnt > 0) { 2791 if (ticket->t_cnt > 0) {
2860 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2792 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
2861 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2793 bytes += ticket->t_unit_res*ticket->t_cnt;
2862 } 2794 }
2863 2795
2796 xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
2797 xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
2798
2864 trace_xfs_log_ungrant_exit(log, ticket); 2799 trace_xfs_log_ungrant_exit(log, ticket);
2865 2800
2866 xlog_verify_grant_head(log, 1);
2867 spin_unlock(&log->l_grant_lock);
2868 xfs_log_move_tail(log->l_mp, 1); 2801 xfs_log_move_tail(log->l_mp, 1);
2869} /* xlog_ungrant_log_space */ 2802} /* xlog_ungrant_log_space */
2870 2803
@@ -2901,11 +2834,11 @@ xlog_state_release_iclog(
2901 2834
2902 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2835 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2903 /* update tail before writing to iclog */ 2836 /* update tail before writing to iclog */
2904 xlog_assign_tail_lsn(log->l_mp); 2837 xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
2905 sync++; 2838 sync++;
2906 iclog->ic_state = XLOG_STATE_SYNCING; 2839 iclog->ic_state = XLOG_STATE_SYNCING;
2907 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); 2840 iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
2908 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); 2841 xlog_verify_tail_lsn(log, iclog, tail_lsn);
2909 /* cycle incremented when incrementing curr_block */ 2842 /* cycle incremented when incrementing curr_block */
2910 } 2843 }
2911 spin_unlock(&log->l_icloglock); 2844 spin_unlock(&log->l_icloglock);
@@ -3088,7 +3021,7 @@ maybe_sleep:
3088 return XFS_ERROR(EIO); 3021 return XFS_ERROR(EIO);
3089 } 3022 }
3090 XFS_STATS_INC(xs_log_force_sleep); 3023 XFS_STATS_INC(xs_log_force_sleep);
3091 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); 3024 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3092 /* 3025 /*
3093 * No need to grab the log lock here since we're 3026 * No need to grab the log lock here since we're
3094 * only deciding whether or not to return EIO 3027 * only deciding whether or not to return EIO
@@ -3206,8 +3139,8 @@ try_again:
3206 3139
3207 XFS_STATS_INC(xs_log_force_sleep); 3140 XFS_STATS_INC(xs_log_force_sleep);
3208 3141
3209 sv_wait(&iclog->ic_prev->ic_write_wait, 3142 xlog_wait(&iclog->ic_prev->ic_write_wait,
3210 PSWP, &log->l_icloglock, s); 3143 &log->l_icloglock);
3211 if (log_flushed) 3144 if (log_flushed)
3212 *log_flushed = 1; 3145 *log_flushed = 1;
3213 already_slept = 1; 3146 already_slept = 1;
@@ -3235,7 +3168,7 @@ try_again:
3235 return XFS_ERROR(EIO); 3168 return XFS_ERROR(EIO);
3236 } 3169 }
3237 XFS_STATS_INC(xs_log_force_sleep); 3170 XFS_STATS_INC(xs_log_force_sleep);
3238 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); 3171 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3239 /* 3172 /*
3240 * No need to grab the log lock here since we're 3173 * No need to grab the log lock here since we're
3241 * only deciding whether or not to return EIO 3174 * only deciding whether or not to return EIO
@@ -3310,10 +3243,8 @@ xfs_log_ticket_put(
3310 xlog_ticket_t *ticket) 3243 xlog_ticket_t *ticket)
3311{ 3244{
3312 ASSERT(atomic_read(&ticket->t_ref) > 0); 3245 ASSERT(atomic_read(&ticket->t_ref) > 0);
3313 if (atomic_dec_and_test(&ticket->t_ref)) { 3246 if (atomic_dec_and_test(&ticket->t_ref))
3314 sv_destroy(&ticket->t_wait);
3315 kmem_zone_free(xfs_log_ticket_zone, ticket); 3247 kmem_zone_free(xfs_log_ticket_zone, ticket);
3316 }
3317} 3248}
3318 3249
3319xlog_ticket_t * 3250xlog_ticket_t *
@@ -3435,6 +3366,7 @@ xlog_ticket_alloc(
3435 } 3366 }
3436 3367
3437 atomic_set(&tic->t_ref, 1); 3368 atomic_set(&tic->t_ref, 1);
3369 INIT_LIST_HEAD(&tic->t_queue);
3438 tic->t_unit_res = unit_bytes; 3370 tic->t_unit_res = unit_bytes;
3439 tic->t_curr_res = unit_bytes; 3371 tic->t_curr_res = unit_bytes;
3440 tic->t_cnt = cnt; 3372 tic->t_cnt = cnt;
@@ -3445,7 +3377,7 @@ xlog_ticket_alloc(
3445 tic->t_trans_type = 0; 3377 tic->t_trans_type = 0;
3446 if (xflags & XFS_LOG_PERM_RESERV) 3378 if (xflags & XFS_LOG_PERM_RESERV)
3447 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3379 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3448 sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); 3380 init_waitqueue_head(&tic->t_wait);
3449 3381
3450 xlog_tic_reset_res(tic); 3382 xlog_tic_reset_res(tic);
3451 3383
@@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr(
3484} 3416}
3485 3417
3486STATIC void 3418STATIC void
3487xlog_verify_grant_head(xlog_t *log, int equals) 3419xlog_verify_grant_tail(
3420 struct log *log)
3488{ 3421{
3489 if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { 3422 int tail_cycle, tail_blocks;
3490 if (equals) 3423 int cycle, space;
3491 ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); 3424
3492 else 3425 /*
3493 ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); 3426 * Check to make sure the grant write head didn't just over lap the
3494 } else { 3427 * tail. If the cycles are the same, we can't be overlapping.
3495 ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); 3428 * Otherwise, make sure that the cycles differ by exactly one and
3496 ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); 3429 * check the byte count.
3497 } 3430 */
3498} /* xlog_verify_grant_head */ 3431 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3432 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3433 if (tail_cycle != cycle) {
3434 ASSERT(cycle - 1 == tail_cycle);
3435 ASSERT(space <= BBTOB(tail_blocks));
3436 }
3437}
3499 3438
3500/* check if it will fit */ 3439/* check if it will fit */
3501STATIC void 3440STATIC void
@@ -3716,12 +3655,10 @@ xfs_log_force_umount(
3716 xlog_cil_force(log); 3655 xlog_cil_force(log);
3717 3656
3718 /* 3657 /*
3719 * We must hold both the GRANT lock and the LOG lock, 3658 * mark the filesystem and the as in a shutdown state and wake
3720 * before we mark the filesystem SHUTDOWN and wake 3659 * everybody up to tell them the bad news.
3721 * everybody up to tell the bad news.
3722 */ 3660 */
3723 spin_lock(&log->l_icloglock); 3661 spin_lock(&log->l_icloglock);
3724 spin_lock(&log->l_grant_lock);
3725 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3662 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3726 if (mp->m_sb_bp) 3663 if (mp->m_sb_bp)
3727 XFS_BUF_DONE(mp->m_sb_bp); 3664 XFS_BUF_DONE(mp->m_sb_bp);
@@ -3742,27 +3679,21 @@ xfs_log_force_umount(
3742 spin_unlock(&log->l_icloglock); 3679 spin_unlock(&log->l_icloglock);
3743 3680
3744 /* 3681 /*
3745 * We don't want anybody waiting for log reservations 3682 * We don't want anybody waiting for log reservations after this. That
3746 * after this. That means we have to wake up everybody 3683 * means we have to wake up everybody queued up on reserveq as well as
3747 * queued up on reserve_headq as well as write_headq. 3684 * writeq. In addition, we make sure in xlog_{re}grant_log_space that
3748 * In addition, we make sure in xlog_{re}grant_log_space 3685 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3749 * that we don't enqueue anything once the SHUTDOWN flag 3686 * action is protected by the grant locks.
3750 * is set, and this action is protected by the GRANTLOCK.
3751 */ 3687 */
3752 if ((tic = log->l_reserve_headq)) { 3688 spin_lock(&log->l_grant_reserve_lock);
3753 do { 3689 list_for_each_entry(tic, &log->l_reserveq, t_queue)
3754 sv_signal(&tic->t_wait); 3690 wake_up(&tic->t_wait);
3755 tic = tic->t_next; 3691 spin_unlock(&log->l_grant_reserve_lock);
3756 } while (tic != log->l_reserve_headq); 3692
3757 } 3693 spin_lock(&log->l_grant_write_lock);
3758 3694 list_for_each_entry(tic, &log->l_writeq, t_queue)
3759 if ((tic = log->l_write_headq)) { 3695 wake_up(&tic->t_wait);
3760 do { 3696 spin_unlock(&log->l_grant_write_lock);
3761 sv_signal(&tic->t_wait);
3762 tic = tic->t_next;
3763 } while (tic != log->l_write_headq);
3764 }
3765 spin_unlock(&log->l_grant_lock);
3766 3697
3767 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3698 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3768 ASSERT(!logerror); 3699 ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23d6ceb5e97..9dc8125d04e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -61,7 +61,7 @@ xlog_cil_init(
61 INIT_LIST_HEAD(&cil->xc_committing); 61 INIT_LIST_HEAD(&cil->xc_committing);
62 spin_lock_init(&cil->xc_cil_lock); 62 spin_lock_init(&cil->xc_cil_lock);
63 init_rwsem(&cil->xc_ctx_lock); 63 init_rwsem(&cil->xc_ctx_lock);
64 sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); 64 init_waitqueue_head(&cil->xc_commit_wait);
65 65
66 INIT_LIST_HEAD(&ctx->committing); 66 INIT_LIST_HEAD(&ctx->committing);
67 INIT_LIST_HEAD(&ctx->busy_extents); 67 INIT_LIST_HEAD(&ctx->busy_extents);
@@ -361,15 +361,10 @@ xlog_cil_committed(
361 int abort) 361 int abort)
362{ 362{
363 struct xfs_cil_ctx *ctx = args; 363 struct xfs_cil_ctx *ctx = args;
364 struct xfs_log_vec *lv;
365 int abortflag = abort ? XFS_LI_ABORTED : 0;
366 struct xfs_busy_extent *busyp, *n; 364 struct xfs_busy_extent *busyp, *n;
367 365
368 /* unpin all the log items */ 366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
369 for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { 367 ctx->start_lsn, abort);
370 xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
371 abortflag);
372 }
373 368
374 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
375 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
@@ -568,7 +563,7 @@ restart:
568 * It is still being pushed! Wait for the push to 563 * It is still being pushed! Wait for the push to
569 * complete, then start again from the beginning. 564 * complete, then start again from the beginning.
570 */ 565 */
571 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 566 xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
572 goto restart; 567 goto restart;
573 } 568 }
574 } 569 }
@@ -592,7 +587,7 @@ restart:
592 */ 587 */
593 spin_lock(&cil->xc_cil_lock); 588 spin_lock(&cil->xc_cil_lock);
594 ctx->commit_lsn = commit_lsn; 589 ctx->commit_lsn = commit_lsn;
595 sv_broadcast(&cil->xc_commit_wait); 590 wake_up_all(&cil->xc_commit_wait);
596 spin_unlock(&cil->xc_cil_lock); 591 spin_unlock(&cil->xc_cil_lock);
597 592
598 /* release the hounds! */ 593 /* release the hounds! */
@@ -757,7 +752,7 @@ restart:
757 * It is still being pushed! Wait for the push to 752 * It is still being pushed! Wait for the push to
758 * complete, then start again from the beginning. 753 * complete, then start again from the beginning.
759 */ 754 */
760 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 755 xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
761 goto restart; 756 goto restart;
762 } 757 }
763 if (ctx->sequence != sequence) 758 if (ctx->sequence != sequence)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index edcdfe01617..d5f8be8f4bf 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -21,7 +21,6 @@
21struct xfs_buf; 21struct xfs_buf;
22struct log; 22struct log;
23struct xlog_ticket; 23struct xlog_ticket;
24struct xfs_buf_cancel;
25struct xfs_mount; 24struct xfs_mount;
26 25
27/* 26/*
@@ -54,7 +53,6 @@ struct xfs_mount;
54 BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ 53 BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
55 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) 54 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
56 55
57
58static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) 56static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
59{ 57{
60 return ((xfs_lsn_t)cycle << 32) | block; 58 return ((xfs_lsn_t)cycle << 32) | block;
@@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i)
133 */ 131 */
134#define XLOG_TIC_INITED 0x1 /* has been initialized */ 132#define XLOG_TIC_INITED 0x1 /* has been initialized */
135#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ 133#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
136#define XLOG_TIC_IN_Q 0x4
137 134
138#define XLOG_TIC_FLAGS \ 135#define XLOG_TIC_FLAGS \
139 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ 136 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
140 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ 137 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
141 { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
142 138
143#endif /* __KERNEL__ */ 139#endif /* __KERNEL__ */
144 140
@@ -244,9 +240,8 @@ typedef struct xlog_res {
244} xlog_res_t; 240} xlog_res_t;
245 241
246typedef struct xlog_ticket { 242typedef struct xlog_ticket {
247 sv_t t_wait; /* ticket wait queue : 20 */ 243 wait_queue_head_t t_wait; /* ticket wait queue */
248 struct xlog_ticket *t_next; /* :4|8 */ 244 struct list_head t_queue; /* reserve/write queue */
249 struct xlog_ticket *t_prev; /* :4|8 */
250 xlog_tid_t t_tid; /* transaction identifier : 4 */ 245 xlog_tid_t t_tid; /* transaction identifier : 4 */
251 atomic_t t_ref; /* ticket reference count : 4 */ 246 atomic_t t_ref; /* ticket reference count : 4 */
252 int t_curr_res; /* current reservation in bytes : 4 */ 247 int t_curr_res; /* current reservation in bytes : 4 */
@@ -353,8 +348,8 @@ typedef union xlog_in_core2 {
353 * and move everything else out to subsequent cachelines. 348 * and move everything else out to subsequent cachelines.
354 */ 349 */
355typedef struct xlog_in_core { 350typedef struct xlog_in_core {
356 sv_t ic_force_wait; 351 wait_queue_head_t ic_force_wait;
357 sv_t ic_write_wait; 352 wait_queue_head_t ic_write_wait;
358 struct xlog_in_core *ic_next; 353 struct xlog_in_core *ic_next;
359 struct xlog_in_core *ic_prev; 354 struct xlog_in_core *ic_prev;
360 struct xfs_buf *ic_bp; 355 struct xfs_buf *ic_bp;
@@ -421,7 +416,7 @@ struct xfs_cil {
421 struct xfs_cil_ctx *xc_ctx; 416 struct xfs_cil_ctx *xc_ctx;
422 struct rw_semaphore xc_ctx_lock; 417 struct rw_semaphore xc_ctx_lock;
423 struct list_head xc_committing; 418 struct list_head xc_committing;
424 sv_t xc_commit_wait; 419 wait_queue_head_t xc_commit_wait;
425 xfs_lsn_t xc_current_sequence; 420 xfs_lsn_t xc_current_sequence;
426}; 421};
427 422
@@ -491,7 +486,7 @@ typedef struct log {
491 struct xfs_buftarg *l_targ; /* buftarg of log */ 486 struct xfs_buftarg *l_targ; /* buftarg of log */
492 uint l_flags; 487 uint l_flags;
493 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 488 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
494 struct xfs_buf_cancel **l_buf_cancel_table; 489 struct list_head *l_buf_cancel_table;
495 int l_iclog_hsize; /* size of iclog header */ 490 int l_iclog_hsize; /* size of iclog header */
496 int l_iclog_heads; /* # of iclog header sectors */ 491 int l_iclog_heads; /* # of iclog header sectors */
497 uint l_sectBBsize; /* sector size in BBs (2^n) */ 492 uint l_sectBBsize; /* sector size in BBs (2^n) */
@@ -503,29 +498,40 @@ typedef struct log {
503 int l_logBBsize; /* size of log in BB chunks */ 498 int l_logBBsize; /* size of log in BB chunks */
504 499
505 /* The following block of fields are changed while holding icloglock */ 500 /* The following block of fields are changed while holding icloglock */
506 sv_t l_flush_wait ____cacheline_aligned_in_smp; 501 wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
507 /* waiting for iclog flush */ 502 /* waiting for iclog flush */
508 int l_covered_state;/* state of "covering disk 503 int l_covered_state;/* state of "covering disk
509 * log entries" */ 504 * log entries" */
510 xlog_in_core_t *l_iclog; /* head log queue */ 505 xlog_in_core_t *l_iclog; /* head log queue */
511 spinlock_t l_icloglock; /* grab to change iclog state */ 506 spinlock_t l_icloglock; /* grab to change iclog state */
512 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
513 * buffers */
514 xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
515 int l_curr_cycle; /* Cycle number of log writes */ 507 int l_curr_cycle; /* Cycle number of log writes */
516 int l_prev_cycle; /* Cycle number before last 508 int l_prev_cycle; /* Cycle number before last
517 * block increment */ 509 * block increment */
518 int l_curr_block; /* current logical log block */ 510 int l_curr_block; /* current logical log block */
519 int l_prev_block; /* previous logical log block */ 511 int l_prev_block; /* previous logical log block */
520 512
521 /* The following block of fields are changed while holding grant_lock */ 513 /*
522 spinlock_t l_grant_lock ____cacheline_aligned_in_smp; 514 * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
523 xlog_ticket_t *l_reserve_headq; 515 * read without needing to hold specific locks. To avoid operations
524 xlog_ticket_t *l_write_headq; 516 * contending with other hot objects, place each of them on a separate
525 int l_grant_reserve_cycle; 517 * cacheline.
526 int l_grant_reserve_bytes; 518 */
527 int l_grant_write_cycle; 519 /* lsn of last LR on disk */
528 int l_grant_write_bytes; 520 atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
521 /* lsn of 1st LR with unflushed * buffers */
522 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
523
524 /*
525 * ticket grant locks, queues and accounting have their own cachlines
526 * as these are quite hot and can be operated on concurrently.
527 */
528 spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
529 struct list_head l_reserveq;
530 atomic64_t l_grant_reserve_head;
531
532 spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
533 struct list_head l_writeq;
534 atomic64_t l_grant_write_head;
529 535
530 /* The following field are used for debugging; need to hold icloglock */ 536 /* The following field are used for debugging; need to hold icloglock */
531#ifdef DEBUG 537#ifdef DEBUG
@@ -534,6 +540,9 @@ typedef struct log {
534 540
535} xlog_t; 541} xlog_t;
536 542
543#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
544 ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
545
537#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 546#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
538 547
539/* common routines */ 548/* common routines */
@@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
562 xlog_in_core_t **commit_iclog, uint flags); 571 xlog_in_core_t **commit_iclog, uint flags);
563 572
564/* 573/*
574 * When we crack an atomic LSN, we sample it first so that the value will not
575 * change while we are cracking it into the component values. This means we
576 * will always get consistent component values to work from. This should always
577 * be used to smaple and crack LSNs taht are stored and updated in atomic
578 * variables.
579 */
580static inline void
581xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
582{
583 xfs_lsn_t val = atomic64_read(lsn);
584
585 *cycle = CYCLE_LSN(val);
586 *block = BLOCK_LSN(val);
587}
588
589/*
590 * Calculate and assign a value to an atomic LSN variable from component pieces.
591 */
592static inline void
593xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
594{
595 atomic64_set(lsn, xlog_assign_lsn(cycle, block));
596}
597
598/*
599 * When we crack the grant head, we sample it first so that the value will not
600 * change while we are cracking it into the component values. This means we
601 * will always get consistent component values to work from.
602 */
603static inline void
604xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
605{
606 *cycle = val >> 32;
607 *space = val & 0xffffffff;
608}
609
610static inline void
611xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
612{
613 xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
614}
615
616static inline int64_t
617xlog_assign_grant_head_val(int cycle, int space)
618{
619 return ((int64_t)cycle << 32) | space;
620}
621
622static inline void
623xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
624{
625 atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
626}
627
628/*
565 * Committed Item List interfaces 629 * Committed Item List interfaces
566 */ 630 */
567int xlog_cil_init(struct log *log); 631int xlog_cil_init(struct log *log);
@@ -585,6 +649,21 @@ xlog_cil_force(struct log *log)
585 */ 649 */
586#define XLOG_UNMOUNT_REC_TYPE (-1U) 650#define XLOG_UNMOUNT_REC_TYPE (-1U)
587 651
652/*
653 * Wrapper function for waiting on a wait queue serialised against wakeups
654 * by a spinlock. This matches the semantics of all the wait queues used in the
655 * log code.
656 */
657static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
658{
659 DECLARE_WAITQUEUE(wait, current);
660
661 add_wait_queue_exclusive(wq, &wait);
662 __set_current_state(TASK_UNINTERRUPTIBLE);
663 spin_unlock(lock);
664 schedule();
665 remove_wait_queue(wq, &wait);
666}
588#endif /* __KERNEL__ */ 667#endif /* __KERNEL__ */
589 668
590#endif /* __XFS_LOG_PRIV_H__ */ 669#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 966d3f97458..204d8e5fa7f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *);
53#endif 53#endif
54 54
55/* 55/*
56 * This structure is used during recovery to record the buf log items which
57 * have been canceled and should not be replayed.
58 */
59struct xfs_buf_cancel {
60 xfs_daddr_t bc_blkno;
61 uint bc_len;
62 int bc_refcount;
63 struct list_head bc_list;
64};
65
66/*
56 * Sector aligned buffer routines for buffer create/read/write/access 67 * Sector aligned buffer routines for buffer create/read/write/access
57 */ 68 */
58 69
@@ -925,12 +936,12 @@ xlog_find_tail(
925 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 936 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
926 if (found == 2) 937 if (found == 2)
927 log->l_curr_cycle++; 938 log->l_curr_cycle++;
928 log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); 939 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
929 log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); 940 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
930 log->l_grant_reserve_cycle = log->l_curr_cycle; 941 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
931 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); 942 BBTOB(log->l_curr_block));
932 log->l_grant_write_cycle = log->l_curr_cycle; 943 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
933 log->l_grant_write_bytes = BBTOB(log->l_curr_block); 944 BBTOB(log->l_curr_block));
934 945
935 /* 946 /*
936 * Look for unmount record. If we find it, then we know there 947 * Look for unmount record. If we find it, then we know there
@@ -960,7 +971,7 @@ xlog_find_tail(
960 } 971 }
961 after_umount_blk = (i + hblks + (int) 972 after_umount_blk = (i + hblks + (int)
962 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 973 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
963 tail_lsn = log->l_tail_lsn; 974 tail_lsn = atomic64_read(&log->l_tail_lsn);
964 if (*head_blk == after_umount_blk && 975 if (*head_blk == after_umount_blk &&
965 be32_to_cpu(rhead->h_num_logops) == 1) { 976 be32_to_cpu(rhead->h_num_logops) == 1) {
966 umount_data_blk = (i + hblks) % log->l_logBBsize; 977 umount_data_blk = (i + hblks) % log->l_logBBsize;
@@ -975,12 +986,10 @@ xlog_find_tail(
975 * log records will point recovery to after the 986 * log records will point recovery to after the
976 * current unmount record. 987 * current unmount record.
977 */ 988 */
978 log->l_tail_lsn = 989 xlog_assign_atomic_lsn(&log->l_tail_lsn,
979 xlog_assign_lsn(log->l_curr_cycle, 990 log->l_curr_cycle, after_umount_blk);
980 after_umount_blk); 991 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
981 log->l_last_sync_lsn = 992 log->l_curr_cycle, after_umount_blk);
982 xlog_assign_lsn(log->l_curr_cycle,
983 after_umount_blk);
984 *tail_blk = after_umount_blk; 993 *tail_blk = after_umount_blk;
985 994
986 /* 995 /*
@@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans(
1605 * record in the table to tell us how many times we expect to see this 1614 * record in the table to tell us how many times we expect to see this
1606 * record during the second pass. 1615 * record during the second pass.
1607 */ 1616 */
1608STATIC void 1617STATIC int
1609xlog_recover_do_buffer_pass1( 1618xlog_recover_buffer_pass1(
1610 xlog_t *log, 1619 struct log *log,
1611 xfs_buf_log_format_t *buf_f) 1620 xlog_recover_item_t *item)
1612{ 1621{
1613 xfs_buf_cancel_t *bcp; 1622 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1614 xfs_buf_cancel_t *nextp; 1623 struct list_head *bucket;
1615 xfs_buf_cancel_t *prevp; 1624 struct xfs_buf_cancel *bcp;
1616 xfs_buf_cancel_t **bucket;
1617 xfs_daddr_t blkno = 0;
1618 uint len = 0;
1619 ushort flags = 0;
1620
1621 switch (buf_f->blf_type) {
1622 case XFS_LI_BUF:
1623 blkno = buf_f->blf_blkno;
1624 len = buf_f->blf_len;
1625 flags = buf_f->blf_flags;
1626 break;
1627 }
1628 1625
1629 /* 1626 /*
1630 * If this isn't a cancel buffer item, then just return. 1627 * If this isn't a cancel buffer item, then just return.
1631 */ 1628 */
1632 if (!(flags & XFS_BLF_CANCEL)) { 1629 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
1633 trace_xfs_log_recover_buf_not_cancel(log, buf_f); 1630 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
1634 return; 1631 return 0;
1635 }
1636
1637 /*
1638 * Insert an xfs_buf_cancel record into the hash table of
1639 * them. If there is already an identical record, bump
1640 * its reference count.
1641 */
1642 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1643 XLOG_BC_TABLE_SIZE];
1644 /*
1645 * If the hash bucket is empty then just insert a new record into
1646 * the bucket.
1647 */
1648 if (*bucket == NULL) {
1649 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1650 KM_SLEEP);
1651 bcp->bc_blkno = blkno;
1652 bcp->bc_len = len;
1653 bcp->bc_refcount = 1;
1654 bcp->bc_next = NULL;
1655 *bucket = bcp;
1656 return;
1657 } 1632 }
1658 1633
1659 /* 1634 /*
1660 * The hash bucket is not empty, so search for duplicates of our 1635 * Insert an xfs_buf_cancel record into the hash table of them.
1661 * record. If we find one them just bump its refcount. If not 1636 * If there is already an identical record, bump its reference count.
1662 * then add us at the end of the list.
1663 */ 1637 */
1664 prevp = NULL; 1638 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
1665 nextp = *bucket; 1639 list_for_each_entry(bcp, bucket, bc_list) {
1666 while (nextp != NULL) { 1640 if (bcp->bc_blkno == buf_f->blf_blkno &&
1667 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1641 bcp->bc_len == buf_f->blf_len) {
1668 nextp->bc_refcount++; 1642 bcp->bc_refcount++;
1669 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); 1643 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
1670 return; 1644 return 0;
1671 } 1645 }
1672 prevp = nextp; 1646 }
1673 nextp = nextp->bc_next; 1647
1674 } 1648 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
1675 ASSERT(prevp != NULL); 1649 bcp->bc_blkno = buf_f->blf_blkno;
1676 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1650 bcp->bc_len = buf_f->blf_len;
1677 KM_SLEEP);
1678 bcp->bc_blkno = blkno;
1679 bcp->bc_len = len;
1680 bcp->bc_refcount = 1; 1651 bcp->bc_refcount = 1;
1681 bcp->bc_next = NULL; 1652 list_add_tail(&bcp->bc_list, bucket);
1682 prevp->bc_next = bcp; 1653
1683 trace_xfs_log_recover_buf_cancel_add(log, buf_f); 1654 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
1655 return 0;
1684} 1656}
1685 1657
1686/* 1658/*
@@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1(
1698 */ 1670 */
1699STATIC int 1671STATIC int
1700xlog_check_buffer_cancelled( 1672xlog_check_buffer_cancelled(
1701 xlog_t *log, 1673 struct log *log,
1702 xfs_daddr_t blkno, 1674 xfs_daddr_t blkno,
1703 uint len, 1675 uint len,
1704 ushort flags) 1676 ushort flags)
1705{ 1677{
1706 xfs_buf_cancel_t *bcp; 1678 struct list_head *bucket;
1707 xfs_buf_cancel_t *prevp; 1679 struct xfs_buf_cancel *bcp;
1708 xfs_buf_cancel_t **bucket;
1709 1680
1710 if (log->l_buf_cancel_table == NULL) { 1681 if (log->l_buf_cancel_table == NULL) {
1711 /* 1682 /*
@@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled(
1716 return 0; 1687 return 0;
1717 } 1688 }
1718 1689
1719 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1720 XLOG_BC_TABLE_SIZE];
1721 bcp = *bucket;
1722 if (bcp == NULL) {
1723 /*
1724 * There is no corresponding entry in the table built
1725 * in pass one, so this buffer has not been cancelled.
1726 */
1727 ASSERT(!(flags & XFS_BLF_CANCEL));
1728 return 0;
1729 }
1730
1731 /* 1690 /*
1732 * Search for an entry in the buffer cancel table that 1691 * Search for an entry in the cancel table that matches our buffer.
1733 * matches our buffer.
1734 */ 1692 */
1735 prevp = NULL; 1693 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
1736 while (bcp != NULL) { 1694 list_for_each_entry(bcp, bucket, bc_list) {
1737 if (bcp->bc_blkno == blkno && bcp->bc_len == len) { 1695 if (bcp->bc_blkno == blkno && bcp->bc_len == len)
1738 /* 1696 goto found;
1739 * We've go a match, so return 1 so that the
1740 * recovery of this buffer is cancelled.
1741 * If this buffer is actually a buffer cancel
1742 * log item, then decrement the refcount on the
1743 * one in the table and remove it if this is the
1744 * last reference.
1745 */
1746 if (flags & XFS_BLF_CANCEL) {
1747 bcp->bc_refcount--;
1748 if (bcp->bc_refcount == 0) {
1749 if (prevp == NULL) {
1750 *bucket = bcp->bc_next;
1751 } else {
1752 prevp->bc_next = bcp->bc_next;
1753 }
1754 kmem_free(bcp);
1755 }
1756 }
1757 return 1;
1758 }
1759 prevp = bcp;
1760 bcp = bcp->bc_next;
1761 } 1697 }
1698
1762 /* 1699 /*
1763 * We didn't find a corresponding entry in the table, so 1700 * We didn't find a corresponding entry in the table, so return 0 so
1764 * return 0 so that the buffer is NOT cancelled. 1701 * that the buffer is NOT cancelled.
1765 */ 1702 */
1766 ASSERT(!(flags & XFS_BLF_CANCEL)); 1703 ASSERT(!(flags & XFS_BLF_CANCEL));
1767 return 0; 1704 return 0;
1768}
1769 1705
1770STATIC int 1706found:
1771xlog_recover_do_buffer_pass2( 1707 /*
1772 xlog_t *log, 1708 * We've go a match, so return 1 so that the recovery of this buffer
1773 xfs_buf_log_format_t *buf_f) 1709 * is cancelled. If this buffer is actually a buffer cancel log
1774{ 1710 * item, then decrement the refcount on the one in the table and
1775 xfs_daddr_t blkno = 0; 1711 * remove it if this is the last reference.
1776 ushort flags = 0; 1712 */
1777 uint len = 0; 1713 if (flags & XFS_BLF_CANCEL) {
1778 1714 if (--bcp->bc_refcount == 0) {
1779 switch (buf_f->blf_type) { 1715 list_del(&bcp->bc_list);
1780 case XFS_LI_BUF: 1716 kmem_free(bcp);
1781 blkno = buf_f->blf_blkno; 1717 }
1782 flags = buf_f->blf_flags;
1783 len = buf_f->blf_len;
1784 break;
1785 } 1718 }
1786 1719 return 1;
1787 return xlog_check_buffer_cancelled(log, blkno, len, flags);
1788} 1720}
1789 1721
1790/* 1722/*
1791 * Perform recovery for a buffer full of inodes. In these buffers, 1723 * Perform recovery for a buffer full of inodes. In these buffers, the only
1792 * the only data which should be recovered is that which corresponds 1724 * data which should be recovered is that which corresponds to the
1793 * to the di_next_unlinked pointers in the on disk inode structures. 1725 * di_next_unlinked pointers in the on disk inode structures. The rest of the
1794 * The rest of the data for the inodes is always logged through the 1726 * data for the inodes is always logged through the inodes themselves rather
1795 * inodes themselves rather than the inode buffer and is recovered 1727 * than the inode buffer and is recovered in xlog_recover_inode_pass2().
1796 * in xlog_recover_do_inode_trans().
1797 * 1728 *
1798 * The only time when buffers full of inodes are fully recovered is 1729 * The only time when buffers full of inodes are fully recovered is when the
1799 * when the buffer is full of newly allocated inodes. In this case 1730 * buffer is full of newly allocated inodes. In this case the buffer will
1800 * the buffer will not be marked as an inode buffer and so will be 1731 * not be marked as an inode buffer and so will be sent to
1801 * sent to xlog_recover_do_reg_buffer() below during recovery. 1732 * xlog_recover_do_reg_buffer() below during recovery.
1802 */ 1733 */
1803STATIC int 1734STATIC int
1804xlog_recover_do_inode_buffer( 1735xlog_recover_do_inode_buffer(
1805 xfs_mount_t *mp, 1736 struct xfs_mount *mp,
1806 xlog_recover_item_t *item, 1737 xlog_recover_item_t *item,
1807 xfs_buf_t *bp, 1738 struct xfs_buf *bp,
1808 xfs_buf_log_format_t *buf_f) 1739 xfs_buf_log_format_t *buf_f)
1809{ 1740{
1810 int i; 1741 int i;
1811 int item_index; 1742 int item_index = 0;
1812 int bit; 1743 int bit = 0;
1813 int nbits; 1744 int nbits = 0;
1814 int reg_buf_offset; 1745 int reg_buf_offset = 0;
1815 int reg_buf_bytes; 1746 int reg_buf_bytes = 0;
1816 int next_unlinked_offset; 1747 int next_unlinked_offset;
1817 int inodes_per_buf; 1748 int inodes_per_buf;
1818 xfs_agino_t *logged_nextp; 1749 xfs_agino_t *logged_nextp;
1819 xfs_agino_t *buffer_nextp; 1750 xfs_agino_t *buffer_nextp;
1820 unsigned int *data_map = NULL;
1821 unsigned int map_size = 0;
1822 1751
1823 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1752 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1824 1753
1825 switch (buf_f->blf_type) {
1826 case XFS_LI_BUF:
1827 data_map = buf_f->blf_data_map;
1828 map_size = buf_f->blf_map_size;
1829 break;
1830 }
1831 /*
1832 * Set the variables corresponding to the current region to
1833 * 0 so that we'll initialize them on the first pass through
1834 * the loop.
1835 */
1836 reg_buf_offset = 0;
1837 reg_buf_bytes = 0;
1838 bit = 0;
1839 nbits = 0;
1840 item_index = 0;
1841 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1754 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1842 for (i = 0; i < inodes_per_buf; i++) { 1755 for (i = 0; i < inodes_per_buf; i++) {
1843 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1756 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
@@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer(
1852 * the current di_next_unlinked field. 1765 * the current di_next_unlinked field.
1853 */ 1766 */
1854 bit += nbits; 1767 bit += nbits;
1855 bit = xfs_next_bit(data_map, map_size, bit); 1768 bit = xfs_next_bit(buf_f->blf_data_map,
1769 buf_f->blf_map_size, bit);
1856 1770
1857 /* 1771 /*
1858 * If there are no more logged regions in the 1772 * If there are no more logged regions in the
1859 * buffer, then we're done. 1773 * buffer, then we're done.
1860 */ 1774 */
1861 if (bit == -1) { 1775 if (bit == -1)
1862 return 0; 1776 return 0;
1863 }
1864 1777
1865 nbits = xfs_contig_bits(data_map, map_size, 1778 nbits = xfs_contig_bits(buf_f->blf_data_map,
1866 bit); 1779 buf_f->blf_map_size, bit);
1867 ASSERT(nbits > 0); 1780 ASSERT(nbits > 0);
1868 reg_buf_offset = bit << XFS_BLF_SHIFT; 1781 reg_buf_offset = bit << XFS_BLF_SHIFT;
1869 reg_buf_bytes = nbits << XFS_BLF_SHIFT; 1782 reg_buf_bytes = nbits << XFS_BLF_SHIFT;
@@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer(
1875 * di_next_unlinked field, then move on to the next 1788 * di_next_unlinked field, then move on to the next
1876 * di_next_unlinked field. 1789 * di_next_unlinked field.
1877 */ 1790 */
1878 if (next_unlinked_offset < reg_buf_offset) { 1791 if (next_unlinked_offset < reg_buf_offset)
1879 continue; 1792 continue;
1880 }
1881 1793
1882 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1794 ASSERT(item->ri_buf[item_index].i_addr != NULL);
1883 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 1795 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
@@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer(
1913 * given buffer. The bitmap in the buf log format structure indicates 1825 * given buffer. The bitmap in the buf log format structure indicates
1914 * where to place the logged data. 1826 * where to place the logged data.
1915 */ 1827 */
1916/*ARGSUSED*/
1917STATIC void 1828STATIC void
1918xlog_recover_do_reg_buffer( 1829xlog_recover_do_reg_buffer(
1919 struct xfs_mount *mp, 1830 struct xfs_mount *mp,
1920 xlog_recover_item_t *item, 1831 xlog_recover_item_t *item,
1921 xfs_buf_t *bp, 1832 struct xfs_buf *bp,
1922 xfs_buf_log_format_t *buf_f) 1833 xfs_buf_log_format_t *buf_f)
1923{ 1834{
1924 int i; 1835 int i;
1925 int bit; 1836 int bit;
1926 int nbits; 1837 int nbits;
1927 unsigned int *data_map = NULL;
1928 unsigned int map_size = 0;
1929 int error; 1838 int error;
1930 1839
1931 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 1840 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
1932 1841
1933 switch (buf_f->blf_type) {
1934 case XFS_LI_BUF:
1935 data_map = buf_f->blf_data_map;
1936 map_size = buf_f->blf_map_size;
1937 break;
1938 }
1939 bit = 0; 1842 bit = 0;
1940 i = 1; /* 0 is the buf format structure */ 1843 i = 1; /* 0 is the buf format structure */
1941 while (1) { 1844 while (1) {
1942 bit = xfs_next_bit(data_map, map_size, bit); 1845 bit = xfs_next_bit(buf_f->blf_data_map,
1846 buf_f->blf_map_size, bit);
1943 if (bit == -1) 1847 if (bit == -1)
1944 break; 1848 break;
1945 nbits = xfs_contig_bits(data_map, map_size, bit); 1849 nbits = xfs_contig_bits(buf_f->blf_data_map,
1850 buf_f->blf_map_size, bit);
1946 ASSERT(nbits > 0); 1851 ASSERT(nbits > 0);
1947 ASSERT(item->ri_buf[i].i_addr != NULL); 1852 ASSERT(item->ri_buf[i].i_addr != NULL);
1948 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 1853 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
@@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer(
2176 * for more details on the implementation of the table of cancel records. 2081 * for more details on the implementation of the table of cancel records.
2177 */ 2082 */
2178STATIC int 2083STATIC int
2179xlog_recover_do_buffer_trans( 2084xlog_recover_buffer_pass2(
2180 xlog_t *log, 2085 xlog_t *log,
2181 xlog_recover_item_t *item, 2086 xlog_recover_item_t *item)
2182 int pass)
2183{ 2087{
2184 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2088 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2185 xfs_mount_t *mp; 2089 xfs_mount_t *mp = log->l_mp;
2186 xfs_buf_t *bp; 2090 xfs_buf_t *bp;
2187 int error; 2091 int error;
2188 int cancel;
2189 xfs_daddr_t blkno;
2190 int len;
2191 ushort flags;
2192 uint buf_flags; 2092 uint buf_flags;
2193 2093
2194 if (pass == XLOG_RECOVER_PASS1) { 2094 /*
2195 /* 2095 * In this pass we only want to recover all the buffers which have
2196 * In this pass we're only looking for buf items 2096 * not been cancelled and are not cancellation buffers themselves.
2197 * with the XFS_BLF_CANCEL bit set. 2097 */
2198 */ 2098 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
2199 xlog_recover_do_buffer_pass1(log, buf_f); 2099 buf_f->blf_len, buf_f->blf_flags)) {
2100 trace_xfs_log_recover_buf_cancel(log, buf_f);
2200 return 0; 2101 return 0;
2201 } else {
2202 /*
2203 * In this pass we want to recover all the buffers
2204 * which have not been cancelled and are not
2205 * cancellation buffers themselves. The routine
2206 * we call here will tell us whether or not to
2207 * continue with the replay of this buffer.
2208 */
2209 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2210 if (cancel) {
2211 trace_xfs_log_recover_buf_cancel(log, buf_f);
2212 return 0;
2213 }
2214 } 2102 }
2103
2215 trace_xfs_log_recover_buf_recover(log, buf_f); 2104 trace_xfs_log_recover_buf_recover(log, buf_f);
2216 switch (buf_f->blf_type) {
2217 case XFS_LI_BUF:
2218 blkno = buf_f->blf_blkno;
2219 len = buf_f->blf_len;
2220 flags = buf_f->blf_flags;
2221 break;
2222 default:
2223 xfs_fs_cmn_err(CE_ALERT, log->l_mp,
2224 "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
2225 buf_f->blf_type, log->l_mp->m_logname ?
2226 log->l_mp->m_logname : "internal");
2227 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
2228 XFS_ERRLEVEL_LOW, log->l_mp);
2229 return XFS_ERROR(EFSCORRUPTED);
2230 }
2231 2105
2232 mp = log->l_mp;
2233 buf_flags = XBF_LOCK; 2106 buf_flags = XBF_LOCK;
2234 if (!(flags & XFS_BLF_INODE_BUF)) 2107 if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
2235 buf_flags |= XBF_MAPPED; 2108 buf_flags |= XBF_MAPPED;
2236 2109
2237 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); 2110 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
2111 buf_flags);
2238 if (XFS_BUF_ISERROR(bp)) { 2112 if (XFS_BUF_ISERROR(bp)) {
2239 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2113 xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
2240 bp, blkno); 2114 bp, buf_f->blf_blkno);
2241 error = XFS_BUF_GETERROR(bp); 2115 error = XFS_BUF_GETERROR(bp);
2242 xfs_buf_relse(bp); 2116 xfs_buf_relse(bp);
2243 return error; 2117 return error;
2244 } 2118 }
2245 2119
2246 error = 0; 2120 error = 0;
2247 if (flags & XFS_BLF_INODE_BUF) { 2121 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2248 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2122 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2249 } else if (flags & 2123 } else if (buf_f->blf_flags &
2250 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2124 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
2251 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2125 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2252 } else { 2126 } else {
@@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans(
2286} 2160}
2287 2161
2288STATIC int 2162STATIC int
2289xlog_recover_do_inode_trans( 2163xlog_recover_inode_pass2(
2290 xlog_t *log, 2164 xlog_t *log,
2291 xlog_recover_item_t *item, 2165 xlog_recover_item_t *item)
2292 int pass)
2293{ 2166{
2294 xfs_inode_log_format_t *in_f; 2167 xfs_inode_log_format_t *in_f;
2295 xfs_mount_t *mp; 2168 xfs_mount_t *mp = log->l_mp;
2296 xfs_buf_t *bp; 2169 xfs_buf_t *bp;
2297 xfs_dinode_t *dip; 2170 xfs_dinode_t *dip;
2298 xfs_ino_t ino;
2299 int len; 2171 int len;
2300 xfs_caddr_t src; 2172 xfs_caddr_t src;
2301 xfs_caddr_t dest; 2173 xfs_caddr_t dest;
@@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans(
2305 xfs_icdinode_t *dicp; 2177 xfs_icdinode_t *dicp;
2306 int need_free = 0; 2178 int need_free = 0;
2307 2179
2308 if (pass == XLOG_RECOVER_PASS1) {
2309 return 0;
2310 }
2311
2312 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2180 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2313 in_f = item->ri_buf[0].i_addr; 2181 in_f = item->ri_buf[0].i_addr;
2314 } else { 2182 } else {
@@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans(
2318 if (error) 2186 if (error)
2319 goto error; 2187 goto error;
2320 } 2188 }
2321 ino = in_f->ilf_ino;
2322 mp = log->l_mp;
2323 2189
2324 /* 2190 /*
2325 * Inode buffers can be freed, look out for it, 2191 * Inode buffers can be freed, look out for it,
@@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans(
2354 xfs_buf_relse(bp); 2220 xfs_buf_relse(bp);
2355 xfs_fs_cmn_err(CE_ALERT, mp, 2221 xfs_fs_cmn_err(CE_ALERT, mp,
2356 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2222 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
2357 dip, bp, ino); 2223 dip, bp, in_f->ilf_ino);
2358 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", 2224 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2359 XFS_ERRLEVEL_LOW, mp); 2225 XFS_ERRLEVEL_LOW, mp);
2360 error = EFSCORRUPTED; 2226 error = EFSCORRUPTED;
2361 goto error; 2227 goto error;
@@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans(
2365 xfs_buf_relse(bp); 2231 xfs_buf_relse(bp);
2366 xfs_fs_cmn_err(CE_ALERT, mp, 2232 xfs_fs_cmn_err(CE_ALERT, mp,
2367 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2233 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
2368 item, ino); 2234 item, in_f->ilf_ino);
2369 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", 2235 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2370 XFS_ERRLEVEL_LOW, mp); 2236 XFS_ERRLEVEL_LOW, mp);
2371 error = EFSCORRUPTED; 2237 error = EFSCORRUPTED;
2372 goto error; 2238 goto error;
@@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans(
2394 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2260 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
2395 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2261 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2396 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2262 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2397 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", 2263 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2398 XFS_ERRLEVEL_LOW, mp, dicp); 2264 XFS_ERRLEVEL_LOW, mp, dicp);
2399 xfs_buf_relse(bp); 2265 xfs_buf_relse(bp);
2400 xfs_fs_cmn_err(CE_ALERT, mp, 2266 xfs_fs_cmn_err(CE_ALERT, mp,
2401 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2267 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2402 item, dip, bp, ino); 2268 item, dip, bp, in_f->ilf_ino);
2403 error = EFSCORRUPTED; 2269 error = EFSCORRUPTED;
2404 goto error; 2270 goto error;
2405 } 2271 }
@@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans(
2407 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2273 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2408 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2274 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2409 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2275 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
2410 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", 2276 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2411 XFS_ERRLEVEL_LOW, mp, dicp); 2277 XFS_ERRLEVEL_LOW, mp, dicp);
2412 xfs_buf_relse(bp); 2278 xfs_buf_relse(bp);
2413 xfs_fs_cmn_err(CE_ALERT, mp, 2279 xfs_fs_cmn_err(CE_ALERT, mp,
2414 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2280 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2415 item, dip, bp, ino); 2281 item, dip, bp, in_f->ilf_ino);
2416 error = EFSCORRUPTED; 2282 error = EFSCORRUPTED;
2417 goto error; 2283 goto error;
2418 } 2284 }
2419 } 2285 }
2420 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2286 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
2421 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", 2287 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2422 XFS_ERRLEVEL_LOW, mp, dicp); 2288 XFS_ERRLEVEL_LOW, mp, dicp);
2423 xfs_buf_relse(bp); 2289 xfs_buf_relse(bp);
2424 xfs_fs_cmn_err(CE_ALERT, mp, 2290 xfs_fs_cmn_err(CE_ALERT, mp,
2425 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2291 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2426 item, dip, bp, ino, 2292 item, dip, bp, in_f->ilf_ino,
2427 dicp->di_nextents + dicp->di_anextents, 2293 dicp->di_nextents + dicp->di_anextents,
2428 dicp->di_nblocks); 2294 dicp->di_nblocks);
2429 error = EFSCORRUPTED; 2295 error = EFSCORRUPTED;
2430 goto error; 2296 goto error;
2431 } 2297 }
2432 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2298 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2433 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", 2299 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2434 XFS_ERRLEVEL_LOW, mp, dicp); 2300 XFS_ERRLEVEL_LOW, mp, dicp);
2435 xfs_buf_relse(bp); 2301 xfs_buf_relse(bp);
2436 xfs_fs_cmn_err(CE_ALERT, mp, 2302 xfs_fs_cmn_err(CE_ALERT, mp,
2437 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2303 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
2438 item, dip, bp, ino, dicp->di_forkoff); 2304 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2439 error = EFSCORRUPTED; 2305 error = EFSCORRUPTED;
2440 goto error; 2306 goto error;
2441 } 2307 }
2442 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2308 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2443 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2309 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2444 XFS_ERRLEVEL_LOW, mp, dicp); 2310 XFS_ERRLEVEL_LOW, mp, dicp);
2445 xfs_buf_relse(bp); 2311 xfs_buf_relse(bp);
2446 xfs_fs_cmn_err(CE_ALERT, mp, 2312 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans(
2532 break; 2398 break;
2533 2399
2534 default: 2400 default:
2535 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); 2401 xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag");
2536 ASSERT(0); 2402 ASSERT(0);
2537 xfs_buf_relse(bp); 2403 xfs_buf_relse(bp);
2538 error = EIO; 2404 error = EIO;
@@ -2556,18 +2422,11 @@ error:
2556 * of that type. 2422 * of that type.
2557 */ 2423 */
2558STATIC int 2424STATIC int
2559xlog_recover_do_quotaoff_trans( 2425xlog_recover_quotaoff_pass1(
2560 xlog_t *log, 2426 xlog_t *log,
2561 xlog_recover_item_t *item, 2427 xlog_recover_item_t *item)
2562 int pass)
2563{ 2428{
2564 xfs_qoff_logformat_t *qoff_f; 2429 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
2565
2566 if (pass == XLOG_RECOVER_PASS2) {
2567 return (0);
2568 }
2569
2570 qoff_f = item->ri_buf[0].i_addr;
2571 ASSERT(qoff_f); 2430 ASSERT(qoff_f);
2572 2431
2573 /* 2432 /*
@@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans(
2588 * Recover a dquot record 2447 * Recover a dquot record
2589 */ 2448 */
2590STATIC int 2449STATIC int
2591xlog_recover_do_dquot_trans( 2450xlog_recover_dquot_pass2(
2592 xlog_t *log, 2451 xlog_t *log,
2593 xlog_recover_item_t *item, 2452 xlog_recover_item_t *item)
2594 int pass)
2595{ 2453{
2596 xfs_mount_t *mp; 2454 xfs_mount_t *mp = log->l_mp;
2597 xfs_buf_t *bp; 2455 xfs_buf_t *bp;
2598 struct xfs_disk_dquot *ddq, *recddq; 2456 struct xfs_disk_dquot *ddq, *recddq;
2599 int error; 2457 int error;
2600 xfs_dq_logformat_t *dq_f; 2458 xfs_dq_logformat_t *dq_f;
2601 uint type; 2459 uint type;
2602 2460
2603 if (pass == XLOG_RECOVER_PASS1) {
2604 return 0;
2605 }
2606 mp = log->l_mp;
2607 2461
2608 /* 2462 /*
2609 * Filesystems are required to send in quota flags at mount time. 2463 * Filesystems are required to send in quota flags at mount time.
@@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans(
2647 if ((error = xfs_qm_dqcheck(recddq, 2501 if ((error = xfs_qm_dqcheck(recddq,
2648 dq_f->qlf_id, 2502 dq_f->qlf_id,
2649 0, XFS_QMOPT_DOWARN, 2503 0, XFS_QMOPT_DOWARN,
2650 "xlog_recover_do_dquot_trans (log copy)"))) { 2504 "xlog_recover_dquot_pass2 (log copy)"))) {
2651 return XFS_ERROR(EIO); 2505 return XFS_ERROR(EIO);
2652 } 2506 }
2653 ASSERT(dq_f->qlf_len == 1); 2507 ASSERT(dq_f->qlf_len == 1);
@@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans(
2670 * minimal initialization then. 2524 * minimal initialization then.
2671 */ 2525 */
2672 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2526 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2673 "xlog_recover_do_dquot_trans")) { 2527 "xlog_recover_dquot_pass2")) {
2674 xfs_buf_relse(bp); 2528 xfs_buf_relse(bp);
2675 return XFS_ERROR(EIO); 2529 return XFS_ERROR(EIO);
2676 } 2530 }
@@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans(
2693 * LSN. 2547 * LSN.
2694 */ 2548 */
2695STATIC int 2549STATIC int
2696xlog_recover_do_efi_trans( 2550xlog_recover_efi_pass2(
2697 xlog_t *log, 2551 xlog_t *log,
2698 xlog_recover_item_t *item, 2552 xlog_recover_item_t *item,
2699 xfs_lsn_t lsn, 2553 xfs_lsn_t lsn)
2700 int pass)
2701{ 2554{
2702 int error; 2555 int error;
2703 xfs_mount_t *mp; 2556 xfs_mount_t *mp = log->l_mp;
2704 xfs_efi_log_item_t *efip; 2557 xfs_efi_log_item_t *efip;
2705 xfs_efi_log_format_t *efi_formatp; 2558 xfs_efi_log_format_t *efi_formatp;
2706 2559
2707 if (pass == XLOG_RECOVER_PASS1) {
2708 return 0;
2709 }
2710
2711 efi_formatp = item->ri_buf[0].i_addr; 2560 efi_formatp = item->ri_buf[0].i_addr;
2712 2561
2713 mp = log->l_mp;
2714 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2562 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2715 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2563 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2716 &(efip->efi_format)))) { 2564 &(efip->efi_format)))) {
2717 xfs_efi_item_free(efip); 2565 xfs_efi_item_free(efip);
2718 return error; 2566 return error;
2719 } 2567 }
2720 efip->efi_next_extent = efi_formatp->efi_nextents; 2568 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
2721 efip->efi_flags |= XFS_EFI_COMMITTED;
2722 2569
2723 spin_lock(&log->l_ailp->xa_lock); 2570 spin_lock(&log->l_ailp->xa_lock);
2724 /* 2571 /*
2725 * xfs_trans_ail_update() drops the AIL lock. 2572 * xfs_trans_ail_update() drops the AIL lock.
2726 */ 2573 */
2727 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); 2574 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
2728 return 0; 2575 return 0;
2729} 2576}
2730 2577
@@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans(
2737 * efd format structure. If we find it, we remove the efi from the 2584 * efd format structure. If we find it, we remove the efi from the
2738 * AIL and free it. 2585 * AIL and free it.
2739 */ 2586 */
2740STATIC void 2587STATIC int
2741xlog_recover_do_efd_trans( 2588xlog_recover_efd_pass2(
2742 xlog_t *log, 2589 xlog_t *log,
2743 xlog_recover_item_t *item, 2590 xlog_recover_item_t *item)
2744 int pass)
2745{ 2591{
2746 xfs_efd_log_format_t *efd_formatp; 2592 xfs_efd_log_format_t *efd_formatp;
2747 xfs_efi_log_item_t *efip = NULL; 2593 xfs_efi_log_item_t *efip = NULL;
@@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans(
2750 struct xfs_ail_cursor cur; 2596 struct xfs_ail_cursor cur;
2751 struct xfs_ail *ailp = log->l_ailp; 2597 struct xfs_ail *ailp = log->l_ailp;
2752 2598
2753 if (pass == XLOG_RECOVER_PASS1) {
2754 return;
2755 }
2756
2757 efd_formatp = item->ri_buf[0].i_addr; 2599 efd_formatp = item->ri_buf[0].i_addr;
2758 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2600 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2759 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2601 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
@@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans(
2785 } 2627 }
2786 xfs_trans_ail_cursor_done(ailp, &cur); 2628 xfs_trans_ail_cursor_done(ailp, &cur);
2787 spin_unlock(&ailp->xa_lock); 2629 spin_unlock(&ailp->xa_lock);
2788}
2789
2790/*
2791 * Perform the transaction
2792 *
2793 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2794 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2795 */
2796STATIC int
2797xlog_recover_do_trans(
2798 xlog_t *log,
2799 xlog_recover_t *trans,
2800 int pass)
2801{
2802 int error = 0;
2803 xlog_recover_item_t *item;
2804
2805 error = xlog_recover_reorder_trans(log, trans, pass);
2806 if (error)
2807 return error;
2808
2809 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2810 trace_xfs_log_recover_item_recover(log, trans, item, pass);
2811 switch (ITEM_TYPE(item)) {
2812 case XFS_LI_BUF:
2813 error = xlog_recover_do_buffer_trans(log, item, pass);
2814 break;
2815 case XFS_LI_INODE:
2816 error = xlog_recover_do_inode_trans(log, item, pass);
2817 break;
2818 case XFS_LI_EFI:
2819 error = xlog_recover_do_efi_trans(log, item,
2820 trans->r_lsn, pass);
2821 break;
2822 case XFS_LI_EFD:
2823 xlog_recover_do_efd_trans(log, item, pass);
2824 error = 0;
2825 break;
2826 case XFS_LI_DQUOT:
2827 error = xlog_recover_do_dquot_trans(log, item, pass);
2828 break;
2829 case XFS_LI_QUOTAOFF:
2830 error = xlog_recover_do_quotaoff_trans(log, item,
2831 pass);
2832 break;
2833 default:
2834 xlog_warn(
2835 "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
2836 ASSERT(0);
2837 error = XFS_ERROR(EIO);
2838 break;
2839 }
2840
2841 if (error)
2842 return error;
2843 }
2844 2630
2845 return 0; 2631 return 0;
2846} 2632}
@@ -2852,7 +2638,7 @@ xlog_recover_do_trans(
2852 */ 2638 */
2853STATIC void 2639STATIC void
2854xlog_recover_free_trans( 2640xlog_recover_free_trans(
2855 xlog_recover_t *trans) 2641 struct xlog_recover *trans)
2856{ 2642{
2857 xlog_recover_item_t *item, *n; 2643 xlog_recover_item_t *item, *n;
2858 int i; 2644 int i;
@@ -2871,17 +2657,95 @@ xlog_recover_free_trans(
2871} 2657}
2872 2658
2873STATIC int 2659STATIC int
2660xlog_recover_commit_pass1(
2661 struct log *log,
2662 struct xlog_recover *trans,
2663 xlog_recover_item_t *item)
2664{
2665 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
2666
2667 switch (ITEM_TYPE(item)) {
2668 case XFS_LI_BUF:
2669 return xlog_recover_buffer_pass1(log, item);
2670 case XFS_LI_QUOTAOFF:
2671 return xlog_recover_quotaoff_pass1(log, item);
2672 case XFS_LI_INODE:
2673 case XFS_LI_EFI:
2674 case XFS_LI_EFD:
2675 case XFS_LI_DQUOT:
2676 /* nothing to do in pass 1 */
2677 return 0;
2678 default:
2679 xlog_warn(
2680 "XFS: invalid item type (%d) xlog_recover_commit_pass1",
2681 ITEM_TYPE(item));
2682 ASSERT(0);
2683 return XFS_ERROR(EIO);
2684 }
2685}
2686
2687STATIC int
2688xlog_recover_commit_pass2(
2689 struct log *log,
2690 struct xlog_recover *trans,
2691 xlog_recover_item_t *item)
2692{
2693 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
2694
2695 switch (ITEM_TYPE(item)) {
2696 case XFS_LI_BUF:
2697 return xlog_recover_buffer_pass2(log, item);
2698 case XFS_LI_INODE:
2699 return xlog_recover_inode_pass2(log, item);
2700 case XFS_LI_EFI:
2701 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
2702 case XFS_LI_EFD:
2703 return xlog_recover_efd_pass2(log, item);
2704 case XFS_LI_DQUOT:
2705 return xlog_recover_dquot_pass2(log, item);
2706 case XFS_LI_QUOTAOFF:
2707 /* nothing to do in pass2 */
2708 return 0;
2709 default:
2710 xlog_warn(
2711 "XFS: invalid item type (%d) xlog_recover_commit_pass2",
2712 ITEM_TYPE(item));
2713 ASSERT(0);
2714 return XFS_ERROR(EIO);
2715 }
2716}
2717
2718/*
2719 * Perform the transaction.
2720 *
2721 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2722 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2723 */
2724STATIC int
2874xlog_recover_commit_trans( 2725xlog_recover_commit_trans(
2875 xlog_t *log, 2726 struct log *log,
2876 xlog_recover_t *trans, 2727 struct xlog_recover *trans,
2877 int pass) 2728 int pass)
2878{ 2729{
2879 int error; 2730 int error = 0;
2731 xlog_recover_item_t *item;
2880 2732
2881 hlist_del(&trans->r_list); 2733 hlist_del(&trans->r_list);
2882 if ((error = xlog_recover_do_trans(log, trans, pass))) 2734
2735 error = xlog_recover_reorder_trans(log, trans, pass);
2736 if (error)
2883 return error; 2737 return error;
2884 xlog_recover_free_trans(trans); /* no error */ 2738
2739 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2740 if (pass == XLOG_RECOVER_PASS1)
2741 error = xlog_recover_commit_pass1(log, trans, item);
2742 else
2743 error = xlog_recover_commit_pass2(log, trans, item);
2744 if (error)
2745 return error;
2746 }
2747
2748 xlog_recover_free_trans(trans);
2885 return 0; 2749 return 0;
2886} 2750}
2887 2751
@@ -3011,7 +2875,7 @@ xlog_recover_process_efi(
3011 xfs_extent_t *extp; 2875 xfs_extent_t *extp;
3012 xfs_fsblock_t startblock_fsb; 2876 xfs_fsblock_t startblock_fsb;
3013 2877
3014 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); 2878 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
3015 2879
3016 /* 2880 /*
3017 * First check the validity of the extents described by the 2881 * First check the validity of the extents described by the
@@ -3050,7 +2914,7 @@ xlog_recover_process_efi(
3050 extp->ext_len); 2914 extp->ext_len);
3051 } 2915 }
3052 2916
3053 efip->efi_flags |= XFS_EFI_RECOVERED; 2917 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3054 error = xfs_trans_commit(tp, 0); 2918 error = xfs_trans_commit(tp, 0);
3055 return error; 2919 return error;
3056 2920
@@ -3107,7 +2971,7 @@ xlog_recover_process_efis(
3107 * Skip EFIs that we've already processed. 2971 * Skip EFIs that we've already processed.
3108 */ 2972 */
3109 efip = (xfs_efi_log_item_t *)lip; 2973 efip = (xfs_efi_log_item_t *)lip;
3110 if (efip->efi_flags & XFS_EFI_RECOVERED) { 2974 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
3111 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2975 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3112 continue; 2976 continue;
3113 } 2977 }
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery(
3724 xfs_daddr_t head_blk, 3588 xfs_daddr_t head_blk,
3725 xfs_daddr_t tail_blk) 3589 xfs_daddr_t tail_blk)
3726{ 3590{
3727 int error; 3591 int error, i;
3728 3592
3729 ASSERT(head_blk != tail_blk); 3593 ASSERT(head_blk != tail_blk);
3730 3594
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery(
3732 * First do a pass to find all of the cancelled buf log items. 3596 * First do a pass to find all of the cancelled buf log items.
3733 * Store them in the buf_cancel_table for use in the second pass. 3597 * Store them in the buf_cancel_table for use in the second pass.
3734 */ 3598 */
3735 log->l_buf_cancel_table = 3599 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
3736 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * 3600 sizeof(struct list_head),
3737 sizeof(xfs_buf_cancel_t*),
3738 KM_SLEEP); 3601 KM_SLEEP);
3602 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3603 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
3604
3739 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3605 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3740 XLOG_RECOVER_PASS1); 3606 XLOG_RECOVER_PASS1);
3741 if (error != 0) { 3607 if (error != 0) {
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery(
3754 int i; 3620 int i;
3755 3621
3756 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3622 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3757 ASSERT(log->l_buf_cancel_table[i] == NULL); 3623 ASSERT(list_empty(&log->l_buf_cancel_table[i]));
3758 } 3624 }
3759#endif /* DEBUG */ 3625#endif /* DEBUG */
3760 3626
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 19e9dfa1c25..d447aef84bc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -472,7 +472,7 @@ xfs_initialize_perag(
472 goto out_unwind; 472 goto out_unwind;
473 pag->pag_agno = index; 473 pag->pag_agno = index;
474 pag->pag_mount = mp; 474 pag->pag_mount = mp;
475 rwlock_init(&pag->pag_ici_lock); 475 spin_lock_init(&pag->pag_ici_lock);
476 mutex_init(&pag->pag_ici_reclaim_lock); 476 mutex_init(&pag->pag_ici_reclaim_lock);
477 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 477 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
478 spin_lock_init(&pag->pag_buf_lock); 478 spin_lock_init(&pag->pag_buf_lock);
@@ -975,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
975} 975}
976 976
977/* 977/*
978 * precalculate the low space thresholds for dynamic speculative preallocation.
979 */
980void
981xfs_set_low_space_thresholds(
982 struct xfs_mount *mp)
983{
984 int i;
985
986 for (i = 0; i < XFS_LOWSP_MAX; i++) {
987 __uint64_t space = mp->m_sb.sb_dblocks;
988
989 do_div(space, 100);
990 mp->m_low_space[i] = space * (i + 1);
991 }
992}
993
994
995/*
978 * Set whether we're using inode alignment. 996 * Set whether we're using inode alignment.
979 */ 997 */
980STATIC void 998STATIC void
@@ -1196,6 +1214,9 @@ xfs_mountfs(
1196 */ 1214 */
1197 xfs_set_rw_sizes(mp); 1215 xfs_set_rw_sizes(mp);
1198 1216
1217 /* set the low space thresholds for dynamic preallocation */
1218 xfs_set_low_space_thresholds(mp);
1219
1199 /* 1220 /*
1200 * Set the inode cluster size. 1221 * Set the inode cluster size.
1201 * This may still be overridden by the file system 1222 * This may still be overridden by the file system
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5861b498074..a62e8971539 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
103 xfs_mod_incore_sb(mp, field, delta, rsvd) 103 xfs_mod_incore_sb(mp, field, delta, rsvd)
104#endif 104#endif
105 105
106/* dynamic preallocation free space thresholds, 5% down to 1% */
107enum {
108 XFS_LOWSP_1_PCNT = 0,
109 XFS_LOWSP_2_PCNT,
110 XFS_LOWSP_3_PCNT,
111 XFS_LOWSP_4_PCNT,
112 XFS_LOWSP_5_PCNT,
113 XFS_LOWSP_MAX,
114};
115
106typedef struct xfs_mount { 116typedef struct xfs_mount {
107 struct super_block *m_super; 117 struct super_block *m_super;
108 xfs_tid_t m_tid; /* next unused tid for fs */ 118 xfs_tid_t m_tid; /* next unused tid for fs */
@@ -202,6 +212,8 @@ typedef struct xfs_mount {
202 __int64_t m_update_flags; /* sb flags we need to update 212 __int64_t m_update_flags; /* sb flags we need to update
203 on the next remount,rw */ 213 on the next remount,rw */
204 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
215 int64_t m_low_space[XFS_LOWSP_MAX];
216 /* low free space thresholds */
205} xfs_mount_t; 217} xfs_mount_t;
206 218
207/* 219/*
@@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
379 391
380extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 392extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
381 393
394extern void xfs_set_low_space_thresholds(struct xfs_mount *);
395
382#endif /* __KERNEL__ */ 396#endif /* __KERNEL__ */
383 397
384extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 398extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 45ce15dc5b2..edfa178bafb 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -408,7 +408,7 @@ xfs_mru_cache_flush(
408 spin_lock(&mru->lock); 408 spin_lock(&mru->lock);
409 if (mru->queued) { 409 if (mru->queued) {
410 spin_unlock(&mru->lock); 410 spin_unlock(&mru->lock);
411 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 411 cancel_delayed_work_sync(&mru->work);
412 spin_lock(&mru->lock); 412 spin_lock(&mru->lock);
413 } 413 }
414 414
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f6d956b7711..f80a067a465 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
1350 * they could be immediately flushed and we'd have to race with the flusher 1350 * they could be immediately flushed and we'd have to race with the flusher
1351 * trying to pull the item from the AIL as we add it. 1351 * trying to pull the item from the AIL as we add it.
1352 */ 1352 */
1353void 1353static void
1354xfs_trans_item_committed( 1354xfs_trans_item_committed(
1355 struct xfs_log_item *lip, 1355 struct xfs_log_item *lip,
1356 xfs_lsn_t commit_lsn, 1356 xfs_lsn_t commit_lsn,
@@ -1425,6 +1425,83 @@ xfs_trans_committed(
1425 xfs_trans_free(tp); 1425 xfs_trans_free(tp);
1426} 1426}
1427 1427
1428static inline void
1429xfs_log_item_batch_insert(
1430 struct xfs_ail *ailp,
1431 struct xfs_log_item **log_items,
1432 int nr_items,
1433 xfs_lsn_t commit_lsn)
1434{
1435 int i;
1436
1437 spin_lock(&ailp->xa_lock);
1438 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
1439 xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
1440
1441 for (i = 0; i < nr_items; i++)
1442 IOP_UNPIN(log_items[i], 0);
1443}
1444
1445/*
1446 * Bulk operation version of xfs_trans_committed that takes a log vector of
1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to
1448 * minimise lock traffic.
1449 */
1450void
1451xfs_trans_committed_bulk(
1452 struct xfs_ail *ailp,
1453 struct xfs_log_vec *log_vector,
1454 xfs_lsn_t commit_lsn,
1455 int aborted)
1456{
1457#define LOG_ITEM_BATCH_SIZE 32
1458 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
1459 struct xfs_log_vec *lv;
1460 int i = 0;
1461
1462 /* unpin all the log items */
1463 for (lv = log_vector; lv; lv = lv->lv_next ) {
1464 struct xfs_log_item *lip = lv->lv_item;
1465 xfs_lsn_t item_lsn;
1466
1467 if (aborted)
1468 lip->li_flags |= XFS_LI_ABORTED;
1469 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1470
1471 /* item_lsn of -1 means the item was freed */
1472 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1473 continue;
1474
1475 if (item_lsn != commit_lsn) {
1476
1477 /*
1478 * Not a bulk update option due to unusual item_lsn.
1479 * Push into AIL immediately, rechecking the lsn once
1480 * we have the ail lock. Then unpin the item.
1481 */
1482 spin_lock(&ailp->xa_lock);
1483 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
1484 xfs_trans_ail_update(ailp, lip, item_lsn);
1485 else
1486 spin_unlock(&ailp->xa_lock);
1487 IOP_UNPIN(lip, 0);
1488 continue;
1489 }
1490
1491 /* Item is a candidate for bulk AIL insert. */
1492 log_items[i++] = lv->lv_item;
1493 if (i >= LOG_ITEM_BATCH_SIZE) {
1494 xfs_log_item_batch_insert(ailp, log_items,
1495 LOG_ITEM_BATCH_SIZE, commit_lsn);
1496 i = 0;
1497 }
1498 }
1499
1500 /* make sure we insert the remainder! */
1501 if (i)
1502 xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
1503}
1504
1428/* 1505/*
1429 * Called from the trans_commit code when we notice that 1506 * Called from the trans_commit code when we notice that
1430 * the filesystem is in the middle of a forced shutdown. 1507 * the filesystem is in the middle of a forced shutdown.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 246286b77a8..c2042b736b8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -294,8 +294,8 @@ struct xfs_log_item_desc {
294#define XFS_ALLOC_BTREE_REF 2 294#define XFS_ALLOC_BTREE_REF 2
295#define XFS_BMAP_BTREE_REF 2 295#define XFS_BMAP_BTREE_REF 2
296#define XFS_DIR_BTREE_REF 2 296#define XFS_DIR_BTREE_REF 2
297#define XFS_INO_REF 2
297#define XFS_ATTR_BTREE_REF 1 298#define XFS_ATTR_BTREE_REF 1
298#define XFS_INO_REF 1
299#define XFS_DQUOT_REF 1 299#define XFS_DQUOT_REF 1
300 300
301#ifdef __KERNEL__ 301#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dc9069568ff..c5bbbc45db9 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,8 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); 31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
32STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); 32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); 33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); 34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 35
@@ -449,129 +449,152 @@ xfs_trans_unlocked_item(
449 xfs_log_move_tail(ailp->xa_mount, 1); 449 xfs_log_move_tail(ailp->xa_mount, 1);
450} /* xfs_trans_unlocked_item */ 450} /* xfs_trans_unlocked_item */
451 451
452
453/* 452/*
454 * Update the position of the item in the AIL with the new 453 * xfs_trans_ail_update - bulk AIL insertion operation.
455 * lsn. If it is not yet in the AIL, add it. Otherwise, move 454 *
456 * it to its new position by removing it and re-adding it. 455 * @xfs_trans_ail_update takes an array of log items that all need to be
456 * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
457 * be added. Otherwise, it will be repositioned by removing it and re-adding
458 * it to the AIL. If we move the first item in the AIL, update the log tail to
459 * match the new minimum LSN in the AIL.
457 * 460 *
458 * Wakeup anyone with an lsn less than the item's lsn. If the item 461 * This function takes the AIL lock once to execute the update operations on
459 * we move in the AIL is the minimum one, update the tail lsn in the 462 * all the items in the array, and as such should not be called with the AIL
460 * log manager. 463 * lock held. As a result, once we have the AIL lock, we need to check each log
464 * item LSN to confirm it needs to be moved forward in the AIL.
461 * 465 *
462 * This function must be called with the AIL lock held. The lock 466 * To optimise the insert operation, we delete all the items from the AIL in
463 * is dropped before returning. 467 * the first pass, moving them into a temporary list, then splice the temporary
468 * list into the correct position in the AIL. This avoids needing to do an
469 * insert operation on every item.
470 *
471 * This function must be called with the AIL lock held. The lock is dropped
472 * before returning.
464 */ 473 */
465void 474void
466xfs_trans_ail_update( 475xfs_trans_ail_update_bulk(
467 struct xfs_ail *ailp, 476 struct xfs_ail *ailp,
468 xfs_log_item_t *lip, 477 struct xfs_log_item **log_items,
469 xfs_lsn_t lsn) __releases(ailp->xa_lock) 478 int nr_items,
479 xfs_lsn_t lsn) __releases(ailp->xa_lock)
470{ 480{
471 xfs_log_item_t *dlip = NULL; 481 xfs_log_item_t *mlip;
472 xfs_log_item_t *mlip; /* ptr to minimum lip */
473 xfs_lsn_t tail_lsn; 482 xfs_lsn_t tail_lsn;
483 int mlip_changed = 0;
484 int i;
485 LIST_HEAD(tmp);
474 486
475 mlip = xfs_ail_min(ailp); 487 mlip = xfs_ail_min(ailp);
476 488
477 if (lip->li_flags & XFS_LI_IN_AIL) { 489 for (i = 0; i < nr_items; i++) {
478 dlip = xfs_ail_delete(ailp, lip); 490 struct xfs_log_item *lip = log_items[i];
479 ASSERT(dlip == lip); 491 if (lip->li_flags & XFS_LI_IN_AIL) {
480 xfs_trans_ail_cursor_clear(ailp, dlip); 492 /* check if we really need to move the item */
481 } else { 493 if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
482 lip->li_flags |= XFS_LI_IN_AIL; 494 continue;
495
496 xfs_ail_delete(ailp, lip);
497 if (mlip == lip)
498 mlip_changed = 1;
499 } else {
500 lip->li_flags |= XFS_LI_IN_AIL;
501 }
502 lip->li_lsn = lsn;
503 list_add(&lip->li_ail, &tmp);
483 } 504 }
484 505
485 lip->li_lsn = lsn; 506 xfs_ail_splice(ailp, &tmp, lsn);
486 xfs_ail_insert(ailp, lip);
487 507
488 if (mlip == dlip) { 508 if (!mlip_changed) {
489 mlip = xfs_ail_min(ailp);
490 /*
491 * It is not safe to access mlip after the AIL lock is
492 * dropped, so we must get a copy of li_lsn before we do
493 * so. This is especially important on 32-bit platforms
494 * where accessing and updating 64-bit values like li_lsn
495 * is not atomic.
496 */
497 tail_lsn = mlip->li_lsn;
498 spin_unlock(&ailp->xa_lock);
499 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
500 } else {
501 spin_unlock(&ailp->xa_lock); 509 spin_unlock(&ailp->xa_lock);
510 return;
502 } 511 }
503 512
504 513 /*
505} /* xfs_trans_update_ail */ 514 * It is not safe to access mlip after the AIL lock is dropped, so we
515 * must get a copy of li_lsn before we do so. This is especially
516 * important on 32-bit platforms where accessing and updating 64-bit
517 * values like li_lsn is not atomic.
518 */
519 mlip = xfs_ail_min(ailp);
520 tail_lsn = mlip->li_lsn;
521 spin_unlock(&ailp->xa_lock);
522 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
523}
506 524
507/* 525/*
508 * Delete the given item from the AIL. It must already be in 526 * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
509 * the AIL.
510 * 527 *
511 * Wakeup anyone with an lsn less than item's lsn. If the item 528 * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
512 * we delete in the AIL is the minimum one, update the tail lsn in the 529 * removed from the AIL. The caller is already holding the AIL lock, and done
513 * log manager. 530 * all the checks necessary to ensure the items passed in via @log_items are
531 * ready for deletion. This includes checking that the items are in the AIL.
514 * 532 *
515 * Clear the IN_AIL flag from the item, reset its lsn to 0, and 533 * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
516 * bump the AIL's generation count to indicate that the tree 534 * flag from the item and reset the item's lsn to 0. If we remove the first
517 * has changed. 535 * item in the AIL, update the log tail to match the new minimum LSN in the
536 * AIL.
518 * 537 *
519 * This function must be called with the AIL lock held. The lock 538 * This function will not drop the AIL lock until all items are removed from
520 * is dropped before returning. 539 * the AIL to minimise the amount of lock traffic on the AIL. This does not
540 * greatly increase the AIL hold time, but does significantly reduce the amount
541 * of traffic on the lock, especially during IO completion.
542 *
543 * This function must be called with the AIL lock held. The lock is dropped
544 * before returning.
521 */ 545 */
522void 546void
523xfs_trans_ail_delete( 547xfs_trans_ail_delete_bulk(
524 struct xfs_ail *ailp, 548 struct xfs_ail *ailp,
525 xfs_log_item_t *lip) __releases(ailp->xa_lock) 549 struct xfs_log_item **log_items,
550 int nr_items) __releases(ailp->xa_lock)
526{ 551{
527 xfs_log_item_t *dlip;
528 xfs_log_item_t *mlip; 552 xfs_log_item_t *mlip;
529 xfs_lsn_t tail_lsn; 553 xfs_lsn_t tail_lsn;
554 int mlip_changed = 0;
555 int i;
530 556
531 if (lip->li_flags & XFS_LI_IN_AIL) { 557 mlip = xfs_ail_min(ailp);
532 mlip = xfs_ail_min(ailp);
533 dlip = xfs_ail_delete(ailp, lip);
534 ASSERT(dlip == lip);
535 xfs_trans_ail_cursor_clear(ailp, dlip);
536
537 558
538 lip->li_flags &= ~XFS_LI_IN_AIL; 559 for (i = 0; i < nr_items; i++) {
539 lip->li_lsn = 0; 560 struct xfs_log_item *lip = log_items[i];
561 if (!(lip->li_flags & XFS_LI_IN_AIL)) {
562 struct xfs_mount *mp = ailp->xa_mount;
540 563
541 if (mlip == dlip) {
542 mlip = xfs_ail_min(ailp);
543 /*
544 * It is not safe to access mlip after the AIL lock
545 * is dropped, so we must get a copy of li_lsn
546 * before we do so. This is especially important
547 * on 32-bit platforms where accessing and updating
548 * 64-bit values like li_lsn is not atomic.
549 */
550 tail_lsn = mlip ? mlip->li_lsn : 0;
551 spin_unlock(&ailp->xa_lock);
552 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
553 } else {
554 spin_unlock(&ailp->xa_lock); 564 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) {
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
570 }
571 return;
555 } 572 }
573
574 xfs_ail_delete(ailp, lip);
575 lip->li_flags &= ~XFS_LI_IN_AIL;
576 lip->li_lsn = 0;
577 if (mlip == lip)
578 mlip_changed = 1;
556 } 579 }
557 else {
558 /*
559 * If the file system is not being shutdown, we are in
560 * serious trouble if we get to this stage.
561 */
562 struct xfs_mount *mp = ailp->xa_mount;
563 580
581 if (!mlip_changed) {
564 spin_unlock(&ailp->xa_lock); 582 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) { 583 return;
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
570 }
571 } 584 }
572}
573
574 585
586 /*
587 * It is not safe to access mlip after the AIL lock is dropped, so we
588 * must get a copy of li_lsn before we do so. This is especially
589 * important on 32-bit platforms where accessing and updating 64-bit
590 * values like li_lsn is not atomic. It is possible we've emptied the
591 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
592 */
593 mlip = xfs_ail_min(ailp);
594 tail_lsn = mlip ? mlip->li_lsn : 0;
595 spin_unlock(&ailp->xa_lock);
596 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
597}
575 598
576/* 599/*
577 * The active item list (AIL) is a doubly linked list of log 600 * The active item list (AIL) is a doubly linked list of log
@@ -623,16 +646,13 @@ xfs_trans_ail_destroy(
623} 646}
624 647
625/* 648/*
626 * Insert the given log item into the AIL. 649 * splice the log item list into the AIL at the given LSN.
627 * We almost always insert at the end of the list, so on inserts
628 * we search from the end of the list to find where the
629 * new item belongs.
630 */ 650 */
631STATIC void 651STATIC void
632xfs_ail_insert( 652xfs_ail_splice(
633 struct xfs_ail *ailp, 653 struct xfs_ail *ailp,
634 xfs_log_item_t *lip) 654 struct list_head *list,
635/* ARGSUSED */ 655 xfs_lsn_t lsn)
636{ 656{
637 xfs_log_item_t *next_lip; 657 xfs_log_item_t *next_lip;
638 658
@@ -640,39 +660,33 @@ xfs_ail_insert(
640 * If the list is empty, just insert the item. 660 * If the list is empty, just insert the item.
641 */ 661 */
642 if (list_empty(&ailp->xa_ail)) { 662 if (list_empty(&ailp->xa_ail)) {
643 list_add(&lip->li_ail, &ailp->xa_ail); 663 list_splice(list, &ailp->xa_ail);
644 return; 664 return;
645 } 665 }
646 666
647 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
648 if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) 668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
649 break; 669 break;
650 } 670 }
651 671
652 ASSERT((&next_lip->li_ail == &ailp->xa_ail) || 672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
653 (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); 673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
654
655 list_add(&lip->li_ail, &next_lip->li_ail);
656 674
657 xfs_ail_check(ailp, lip); 675 list_splice_init(list, &next_lip->li_ail);
658 return; 676 return;
659} 677}
660 678
661/* 679/*
662 * Delete the given item from the AIL. Return a pointer to the item. 680 * Delete the given item from the AIL. Return a pointer to the item.
663 */ 681 */
664/*ARGSUSED*/ 682STATIC void
665STATIC xfs_log_item_t *
666xfs_ail_delete( 683xfs_ail_delete(
667 struct xfs_ail *ailp, 684 struct xfs_ail *ailp,
668 xfs_log_item_t *lip) 685 xfs_log_item_t *lip)
669/* ARGSUSED */
670{ 686{
671 xfs_ail_check(ailp, lip); 687 xfs_ail_check(ailp, lip);
672
673 list_del(&lip->li_ail); 688 list_del(&lip->li_ail);
674 689 xfs_trans_ail_cursor_clear(ailp, lip);
675 return lip;
676} 690}
677 691
678/* 692/*
@@ -682,7 +696,6 @@ xfs_ail_delete(
682STATIC xfs_log_item_t * 696STATIC xfs_log_item_t *
683xfs_ail_min( 697xfs_ail_min(
684 struct xfs_ail *ailp) 698 struct xfs_ail *ailp)
685/* ARGSUSED */
686{ 699{
687 if (list_empty(&ailp->xa_ail)) 700 if (list_empty(&ailp->xa_ail))
688 return NULL; 701 return NULL;
@@ -699,7 +712,6 @@ STATIC xfs_log_item_t *
699xfs_ail_next( 712xfs_ail_next(
700 struct xfs_ail *ailp, 713 struct xfs_ail *ailp,
701 xfs_log_item_t *lip) 714 xfs_log_item_t *lip)
702/* ARGSUSED */
703{ 715{
704 if (lip->li_ail.next == &ailp->xa_ail) 716 if (lip->li_ail.next == &ailp->xa_ail)
705 return NULL; 717 return NULL;
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index f783d5e9fa7..f7590f5bade 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
69 tp->t_flags |= XFS_TRANS_DIRTY; 69 tp->t_flags |= XFS_TRANS_DIRTY;
70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; 70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
71 71
72 next_extent = efip->efi_next_extent; 72 /*
73 * atomic_inc_return gives us the value after the increment;
74 * we want to use it as an array index so we need to subtract 1 from
75 * it.
76 */
77 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
73 ASSERT(next_extent < efip->efi_format.efi_nextents); 78 ASSERT(next_extent < efip->efi_format.efi_nextents);
74 extp = &(efip->efi_format.efi_extents[next_extent]); 79 extp = &(efip->efi_format.efi_extents[next_extent]);
75 extp->ext_start = start_block; 80 extp->ext_start = start_block;
76 extp->ext_len = ext_len; 81 extp->ext_len = ext_len;
77 efip->efi_next_extent++;
78} 82}
79 83
80 84
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 62da86c90de..35162c238fa 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -22,15 +22,17 @@ struct xfs_log_item;
22struct xfs_log_item_desc; 22struct xfs_log_item_desc;
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25struct xfs_ail;
26struct xfs_log_vec;
25 27
26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 28void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
27void xfs_trans_del_item(struct xfs_log_item *); 29void xfs_trans_del_item(struct xfs_log_item *);
28void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 30void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
29 int flags); 31 int flags);
30void xfs_trans_item_committed(struct xfs_log_item *lip,
31 xfs_lsn_t commit_lsn, int aborted);
32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
33 33
34void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
35 xfs_lsn_t commit_lsn, int aborted);
34/* 36/*
35 * AIL traversal cursor. 37 * AIL traversal cursor.
36 * 38 *
@@ -73,12 +75,29 @@ struct xfs_ail {
73/* 75/*
74 * From xfs_trans_ail.c 76 * From xfs_trans_ail.c
75 */ 77 */
76void xfs_trans_ail_update(struct xfs_ail *ailp, 78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
77 struct xfs_log_item *lip, xfs_lsn_t lsn) 79 struct xfs_log_item **log_items, int nr_items,
78 __releases(ailp->xa_lock); 80 xfs_lsn_t lsn) __releases(ailp->xa_lock);
79void xfs_trans_ail_delete(struct xfs_ail *ailp, 81static inline void
80 struct xfs_log_item *lip) 82xfs_trans_ail_update(
81 __releases(ailp->xa_lock); 83 struct xfs_ail *ailp,
84 struct xfs_log_item *lip,
85 xfs_lsn_t lsn) __releases(ailp->xa_lock)
86{
87 xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
88}
89
90void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
91 struct xfs_log_item **log_items, int nr_items)
92 __releases(ailp->xa_lock);
93static inline void
94xfs_trans_ail_delete(
95 struct xfs_ail *ailp,
96 xfs_log_item_t *lip) __releases(ailp->xa_lock)
97{
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99}
100
82void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
83void xfs_trans_unlocked_item(struct xfs_ail *, 102void xfs_trans_unlocked_item(struct xfs_ail *,
84 xfs_log_item_t *); 103 xfs_log_item_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8e4a63c4151..d8e6f8cd6f0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -964,29 +964,48 @@ xfs_release(
964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
965 } 965 }
966 966
967 if (ip->i_d.di_nlink != 0) { 967 if (ip->i_d.di_nlink == 0)
968 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 968 return 0;
969 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
970 ip->i_delayed_blks > 0)) &&
971 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
972 (!(ip->i_d.di_flags &
973 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
974 969
975 /* 970 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
976 * If we can't get the iolock just skip truncating 971 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
977 * the blocks past EOF because we could deadlock 972 ip->i_delayed_blks > 0)) &&
978 * with the mmap_sem otherwise. We'll get another 973 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
979 * chance to drop them once the last reference to 974 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
980 * the inode is dropped, so we'll never leak blocks
981 * permanently.
982 */
983 error = xfs_free_eofblocks(mp, ip,
984 XFS_FREE_EOF_TRYLOCK);
985 if (error)
986 return error;
987 }
988 }
989 975
976 /*
977 * If we can't get the iolock just skip truncating the blocks
978 * past EOF because we could deadlock with the mmap_sem
979 * otherwise. We'll get another chance to drop them once the
980 * last reference to the inode is dropped, so we'll never leak
981 * blocks permanently.
982 *
983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to
987 * occur.
988 *
989 * In this case don't do the truncation, either, but we have to
990 * be careful how we detect this case. Blocks beyond EOF show
991 * up as i_delayed_blks even when the inode is clean, so we
992 * need to truncate them away first before checking for a dirty
993 * release. Hence on the first dirty close we will still remove
994 * the speculative allocation, but after that we will leave it
995 * in place.
996 */
997 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
998 return 0;
999
1000 error = xfs_free_eofblocks(mp, ip,
1001 XFS_FREE_EOF_TRYLOCK);
1002 if (error)
1003 return error;
1004
1005 /* delalloc blocks after truncation means it really is dirty */
1006 if (ip->i_delayed_blks)
1007 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
1008 }
990 return 0; 1009 return 0;
991} 1010}
992 1011