aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c16
-rw-r--r--fs/9p/vfs_super.c5
-rw-r--r--fs/Kconfig5
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/fsclient.c8
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/afs/super.c7
-rw-r--r--fs/aio.c121
-rw-r--r--fs/anon_inodes.c109
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/autofs4/inode.c10
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_aout.c18
-rw-r--r--fs/binfmt_elf.c37
-rw-r--r--fs/binfmt_elf_fdpic.c6
-rw-r--r--fs/binfmt_em86.c3
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_misc.c10
-rw-r--r--fs/binfmt_script.c3
-rw-r--r--fs/binfmt_som.c4
-rw-r--r--fs/bio-integrity.c10
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c21
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/check-integrity.c1
-rw-r--r--fs/btrfs/compression.c12
-rw-r--r--fs/btrfs/extent_io.c16
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/super.c8
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/super.c3
-rw-r--r--fs/cifs/README6
-rw-r--r--fs/cifs/cifs_debug.c3
-rw-r--r--fs/cifs/cifsacl.c1
-rw-r--r--fs/cifs/cifsfs.c32
-rw-r--r--fs/cifs/cifsglob.h47
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c18
-rw-r--r--fs/cifs/connect.c44
-rw-r--r--fs/cifs/dir.c26
-rw-r--r--fs/cifs/file.c74
-rw-r--r--fs/cifs/inode.c28
-rw-r--r--fs/cifs/misc.c19
-rw-r--r--fs/cifs/transport.c78
-rw-r--r--fs/cifs/xattr.c6
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/compat.c1
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h7
-rw-r--r--fs/configfs/dir.c72
-rw-r--r--fs/configfs/inode.c62
-rw-r--r--fs/configfs/mount.c16
-rw-r--r--fs/configfs/symlink.c12
-rw-r--r--fs/cramfs/inode.c12
-rw-r--r--fs/dcache.c93
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c149
-rw-r--r--fs/devpts/inode.c88
-rw-r--r--fs/dlm/dir.c17
-rw-r--r--fs/dlm/lock.c8
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lowcomms.c24
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c49
-rw-r--r--fs/exec.c46
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/namei.c13
-rw-r--r--fs/exofs/super.c4
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/namei.c13
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext3/super.c3
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/namei_vfat.c83
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/fs_struct.c31
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c9
-rw-r--r--fs/gfs2/aops.c12
-rw-r--r--fs/gfs2/bmap.c4
-rw-r--r--fs/gfs2/file.c15
-rw-r--r--fs/gfs2/glock.c210
-rw-r--r--fs/gfs2/incore.h50
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/lock_dlm.c123
-rw-r--r--fs/gfs2/log.c244
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c103
-rw-r--r--fs/gfs2/main.c18
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c189
-rw-r--r--fs/gfs2/rgrp.h10
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trace_gfs2.h60
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/gfs2/xattr.c4
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c34
-rw-r--r--fs/hfsplus/super.c17
-rw-r--r--fs/hostfs/hostfs.h3
-rw-r--r--fs/hostfs/hostfs_kern.c9
-rw-r--r--fs/hostfs/hostfs_user.c4
-rw-r--r--fs/hpfs/super.c6
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c151
-rw-r--r--fs/inode.c32
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/journal.c14
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/journal.c14
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/compr.c2
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntlock.c3
-rw-r--r--fs/lockd/clntxdr.c8
-rw-r--r--fs/lockd/host.c42
-rw-r--r--fs/lockd/mon.c21
-rw-r--r--fs/lockd/netns.h12
-rw-r--r--fs/lockd/svc.c117
-rw-r--r--fs/lockd/svclock.c59
-rw-r--r--fs/logfs/dir.c21
-rw-r--r--fs/logfs/readwrite.c38
-rw-r--r--fs/logfs/segment.c4
-rw-r--r--fs/logfs/super.c12
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/inode.c38
-rw-r--r--fs/minix/minix.h1
-rw-r--r--fs/minix/namei.c14
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c218
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/nfs/Kconfig29
-rw-r--r--fs/nfs/blocklayout/blocklayout.c161
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c46
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c33
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/cache_lib.c61
-rw-r--r--fs/nfs/cache_lib.h10
-rw-r--r--fs/nfs/callback.c19
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c99
-rw-r--r--fs/nfs/callback_xdr.c21
-rw-r--r--fs/nfs/client.c247
-rw-r--r--fs/nfs/delegation.c68
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c35
-rw-r--r--fs/nfs/direct.c6
-rw-r--r--fs/nfs/dns_resolve.c130
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/getroot.c6
-rw-r--r--fs/nfs/idmap.c734
-rw-r--r--fs/nfs/inode.c119
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfs/mount_clnt.c16
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/netns.h27
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c24
-rw-r--r--fs/nfs/nfs3xdr.c4
-rw-r--r--fs/nfs/nfs4_fs.h58
-rw-r--r--fs/nfs/nfs4filelayout.c272
-rw-r--r--fs/nfs/nfs4filelayout.h7
-rw-r--r--fs/nfs/nfs4filelayoutdev.c90
-rw-r--r--fs/nfs/nfs4namespace.c10
-rw-r--r--fs/nfs/nfs4proc.c563
-rw-r--r--fs/nfs/nfs4state.c355
-rw-r--r--fs/nfs/nfs4xdr.c697
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c54
-rw-r--r--fs/nfs/objlayout/objlayout.c142
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/pagelist.c92
-rw-r--r--fs/nfs/pnfs.c46
-rw-r--r--fs/nfs/pnfs.h98
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/proc.c24
-rw-r--r--fs/nfs/read.c14
-rw-r--r--fs/nfs/super.c167
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c45
-rw-r--r--fs/nfs/write.c213
-rw-r--r--fs/nfsd/fault_inject.c2
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsctl.c6
-rw-r--r--fs/nfsd/nfssvc.c4
-rw-r--r--fs/nfsd/stats.c5
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/cpfile.c94
-rw-r--r--fs/nilfs2/dat.c38
-rw-r--r--fs/nilfs2/dir.c4
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c11
-rw-r--r--fs/nilfs2/page.c8
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/sufile.c68
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/nilfs2/the_nilfs.c7
-rw-r--r--fs/notify/notification.c3
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/attrib.c20
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ntfs/super.c17
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/super.c51
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/array.c119
-rw-r--r--fs/proc/base.c15
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/internal.h12
-rw-r--r--fs/proc/kcore.c8
-rw-r--r--fs/proc/namespaces.c2
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c1276
-rw-r--r--fs/proc/stat.c62
-rw-r--r--fs/proc/task_mmu.c357
-rw-r--r--fs/proc/task_nommu.c69
-rw-r--r--fs/proc/vmcore.c23
-rw-r--r--fs/pstore/inode.c25
-rw-r--r--fs/pstore/platform.c30
-rw-r--r--fs/qnx4/inode.c88
-rw-r--r--fs/qnx4/namei.c9
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/Kconfig26
-rw-r--r--fs/qnx6/Makefile7
-rw-r--r--fs/qnx6/README8
-rw-r--r--fs/qnx6/dir.c291
-rw-r--r--fs/qnx6/inode.c698
-rw-r--r--fs/qnx6/namei.c42
-rw-r--r--fs/qnx6/qnx6.h135
-rw-r--r--fs/qnx6/super_mmi.c150
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/ramfs/inode.c30
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/acl.h76
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c6
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/lbalance.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/objectid.c3
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/reiserfs.h2923
-rw-r--r--fs/reiserfs/resize.c3
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/tail_conversion.c6
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/reiserfs/xattr.h122
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/reiserfs/xattr_trusted.c4
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/select.c42
-rw-r--r--fs/seq_file.c114
-rw-r--r--fs/splice.c9
-rw-r--r--fs/squashfs/file.c8
-rw-r--r--fs/squashfs/super.c3
-rw-r--r--fs/squashfs/symlink.c4
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c4
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/dir.c224
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/sysfs.h17
-rw-r--r--fs/sysv/namei.c12
-rw-r--r--fs/sysv/super.c27
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/debug.c410
-rw-r--r--fs/ubifs/debug.h3
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c4
-rw-r--r--fs/ubifs/recovery.c3
-rw-r--r--fs/ubifs/sb.c19
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/udf/file.c4
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/ufs/super.c7
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_aops.c183
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_bmap.c13
-rw-r--r--fs/xfs/xfs_buf.c17
-rw-r--r--fs/xfs/xfs_dfrag.c24
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_dquot.c418
-rw-r--r--fs/xfs/xfs_dquot.h49
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_iget.c41
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.c297
-rw-r--r--fs/xfs/xfs_inode_item.h16
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c71
-rw-r--r--fs/xfs/xfs_itable.c21
-rw-r--r--fs/xfs/xfs_log.c612
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_priv.h28
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c628
-rw-r--r--fs/xfs/xfs_qm.h49
-rw-r--r--fs/xfs/xfs_qm_bhv.c42
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c130
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_rename.c11
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c171
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c46
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h28
-rw-r--r--fs/xfs/xfs_trans.c31
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c16
-rw-r--r--fs/xfs/xfs_vnodeops.h3
399 files changed, 13844 insertions, 6862 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1964f98e74be..b85efa773949 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -594,21 +594,21 @@ static int __init init_v9fs(void)
594 int err; 594 int err;
595 pr_info("Installing v9fs 9p2000 file system support\n"); 595 pr_info("Installing v9fs 9p2000 file system support\n");
596 /* TODO: Setup list of registered trasnport modules */ 596 /* TODO: Setup list of registered trasnport modules */
597 err = register_filesystem(&v9fs_fs_type);
598 if (err < 0) {
599 pr_err("Failed to register filesystem\n");
600 return err;
601 }
602 597
603 err = v9fs_cache_register(); 598 err = v9fs_cache_register();
604 if (err < 0) { 599 if (err < 0) {
605 pr_err("Failed to register v9fs for caching\n"); 600 pr_err("Failed to register v9fs for caching\n");
606 goto out_fs_unreg; 601 return err;
607 } 602 }
608 603
609 err = v9fs_sysfs_init(); 604 err = v9fs_sysfs_init();
610 if (err < 0) { 605 if (err < 0) {
611 pr_err("Failed to register with sysfs\n"); 606 pr_err("Failed to register with sysfs\n");
607 goto out_cache;
608 }
609 err = register_filesystem(&v9fs_fs_type);
610 if (err < 0) {
611 pr_err("Failed to register filesystem\n");
612 goto out_sysfs_cleanup; 612 goto out_sysfs_cleanup;
613 } 613 }
614 614
@@ -617,8 +617,8 @@ static int __init init_v9fs(void)
617out_sysfs_cleanup: 617out_sysfs_cleanup:
618 v9fs_sysfs_cleanup(); 618 v9fs_sysfs_cleanup();
619 619
620out_fs_unreg: 620out_cache:
621 unregister_filesystem(&v9fs_fs_type); 621 v9fs_cache_unregister();
622 622
623 return err; 623 return err;
624} 624}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7b0cd87b07c2..8c92a9ba8330 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157 157
158 root = d_alloc_root(inode); 158 root = d_make_root(inode);
159 if (!root) { 159 if (!root) {
160 iput(inode);
161 retval = -ENOMEM; 160 retval = -ENOMEM;
162 goto release_sb; 161 goto release_sb;
163 } 162 }
@@ -260,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
260 if (v9fs_proto_dotl(v9ses)) { 259 if (v9fs_proto_dotl(v9ses)) {
261 res = p9_client_statfs(fid, &rs); 260 res = p9_client_statfs(fid, &rs);
262 if (res == 0) { 261 if (res == 0) {
263 buf->f_type = V9FS_MAGIC; 262 buf->f_type = rs.type;
264 buf->f_bsize = rs.bsize; 263 buf->f_bsize = rs.bsize;
265 buf->f_blocks = rs.blocks; 264 buf->f_blocks = rs.blocks;
266 buf->f_bfree = rs.bfree; 265 buf->f_bfree = rs.bfree;
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9e..f95ae3a027f3 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
4 4
5menu "File systems" 5menu "File systems"
6 6
7# Use unaligned word dcache accesses
8config DCACHE_WORD_ACCESS
9 bool
10
7if BLOCK 11if BLOCK
8 12
9source "fs/ext2/Kconfig" 13source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
210source "fs/omfs/Kconfig" 214source "fs/omfs/Kconfig"
211source "fs/hpfs/Kconfig" 215source "fs/hpfs/Kconfig"
212source "fs/qnx4/Kconfig" 216source "fs/qnx4/Kconfig"
217source "fs/qnx6/Kconfig"
213source "fs/romfs/Kconfig" 218source "fs/romfs/Kconfig"
214source "fs/pstore/Kconfig" 219source "fs/pstore/Kconfig"
215source "fs/sysv/Kconfig" 220source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 93804d4d66e1..2fb977934673 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
102obj-$(CONFIG_AFFS_FS) += affs/ 102obj-$(CONFIG_AFFS_FS) += affs/
103obj-$(CONFIG_ROMFS_FS) += romfs/ 103obj-$(CONFIG_ROMFS_FS) += romfs/
104obj-$(CONFIG_QNX4FS_FS) += qnx4/ 104obj-$(CONFIG_QNX4FS_FS) += qnx4/
105obj-$(CONFIG_QNX6FS_FS) += qnx6/
105obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 106obj-$(CONFIG_AUTOFS4_FS) += autofs4/
106obj-$(CONFIG_ADFS_FS) += adfs/ 107obj-$(CONFIG_ADFS_FS) += adfs/
107obj-$(CONFIG_FUSE_FS) += fuse/ 108obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 8e3b36ace305..06fdcc9382c4 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
483 483
484 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
485 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
486 sb->s_root = d_alloc_root(root); 486 sb->s_root = d_make_root(root);
487 if (!sb->s_root) { 487 if (!sb->s_root) {
488 int i; 488 int i;
489 iput(root);
490 for (i = 0; i < asb->s_map_size; i++) 489 for (i = 0; i < asb->s_map_size; i++)
491 brelse(asb->s_map[i].dm_bh); 490 brelse(asb->s_map[i].dm_bh);
492 kfree(asb->s_map); 491 kfree(asb->s_map);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8ba73fed7964..0782653a05a2 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -473,7 +473,7 @@ got_root:
473 root_inode = affs_iget(sb, root_block); 473 root_inode = affs_iget(sb, root_block);
474 if (IS_ERR(root_inode)) { 474 if (IS_ERR(root_inode)) {
475 ret = PTR_ERR(root_inode); 475 ret = PTR_ERR(root_inode);
476 goto out_error_noinode; 476 goto out_error;
477 } 477 }
478 478
479 if (AFFS_SB(sb)->s_flags & SF_INTL) 479 if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -481,7 +481,7 @@ got_root:
481 else 481 else
482 sb->s_d_op = &affs_dentry_operations; 482 sb->s_d_op = &affs_dentry_operations;
483 483
484 sb->s_root = d_alloc_root(root_inode); 484 sb->s_root = d_make_root(root_inode);
485 if (!sb->s_root) { 485 if (!sb->s_root) {
486 printk(KERN_ERR "AFFS: Get root inode failed\n"); 486 printk(KERN_ERR "AFFS: Get root inode failed\n");
487 goto out_error; 487 goto out_error;
@@ -494,9 +494,6 @@ got_root:
494 * Begin the cascaded cleanup ... 494 * Begin the cascaded cleanup ...
495 */ 495 */
496out_error: 496out_error:
497 if (root_inode)
498 iput(root_inode);
499out_error_noinode:
500 kfree(sbi->s_bitmap); 497 kfree(sbi->s_bitmap);
501 affs_brelse(root_bh); 498 affs_brelse(root_bh);
502 kfree(sbi->s_prefix); 499 kfree(sbi->s_prefix);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fee..8f6e9234d565 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
251 ASSERT(key != NULL); 251 ASSERT(key != NULL);
252 252
253 vnode = AFS_FS_I(mapping->host); 253 vnode = AFS_FS_I(mapping->host);
254 if (vnode->flags & AFS_VNODE_DELETED) { 254 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
255 _leave(" = -ESTALE"); 255 _leave(" = -ESTALE");
256 return -ESTALE; 256 return -ESTALE;
257 } 257 }
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c21..b960ff05ea0b 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
365 _debug("extract data"); 365 _debug("extract data");
366 if (call->count > 0) { 366 if (call->count > 0) {
367 page = call->reply3; 367 page = call->reply3;
368 buffer = kmap_atomic(page, KM_USER0); 368 buffer = kmap_atomic(page);
369 ret = afs_extract_data(call, skb, last, buffer, 369 ret = afs_extract_data(call, skb, last, buffer,
370 call->count); 370 call->count);
371 kunmap_atomic(buffer, KM_USER0); 371 kunmap_atomic(buffer);
372 switch (ret) { 372 switch (ret) {
373 case 0: break; 373 case 0: break;
374 case -EAGAIN: return 0; 374 case -EAGAIN: return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
411 if (call->count < PAGE_SIZE) { 411 if (call->count < PAGE_SIZE) {
412 _debug("clear"); 412 _debug("clear");
413 page = call->reply3; 413 page = call->reply3;
414 buffer = kmap_atomic(page, KM_USER0); 414 buffer = kmap_atomic(page);
415 memset(buffer + call->count, 0, PAGE_SIZE - call->count); 415 memset(buffer + call->count, 0, PAGE_SIZE - call->count);
416 kunmap_atomic(buffer, KM_USER0); 416 kunmap_atomic(buffer);
417 } 417 }
418 418
419 _leave(" = 0 [done]"); 419 _leave(" = 0 [done]");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d2b0888126d4..a306bb6d88d9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
109 unsigned reply_size; /* current size of reply */ 109 unsigned reply_size; /* current size of reply */
110 unsigned first_offset; /* offset into mapping[first] */ 110 unsigned first_offset; /* offset into mapping[first] */
111 unsigned last_to; /* amount of mapping[last] */ 111 unsigned last_to; /* amount of mapping[last] */
112 unsigned short offset; /* offset into received data store */ 112 unsigned offset; /* offset into received data store */
113 unsigned char unmarshall; /* unmarshalling phase */ 113 unsigned char unmarshall; /* unmarshalling phase */
114 bool incoming; /* T if incoming call */ 114 bool incoming; /* T if incoming call */
115 bool send_pages; /* T if data from mapping should be sent */ 115 bool send_pages; /* T if data from mapping should be sent */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f4ce2658b7d..298cf8919ec7 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
200 if (PageError(page)) 200 if (PageError(page))
201 goto error; 201 goto error;
202 202
203 buf = kmap_atomic(page, KM_USER0); 203 buf = kmap_atomic(page);
204 memcpy(devname, buf, size); 204 memcpy(devname, buf, size);
205 kunmap_atomic(buf, KM_USER0); 205 kunmap_atomic(buf);
206 page_cache_release(page); 206 page_cache_release(page);
207 page = NULL; 207 page = NULL;
208 } 208 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb4..8ad8c2a0703a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
314 struct msghdr msg; 314 struct msghdr msg;
315 struct kvec iov[1]; 315 struct kvec iov[1];
316 int ret; 316 int ret;
317 struct sk_buff *skb;
317 318
318 _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); 319 _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
319 320
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
380 381
381error_do_abort: 382error_do_abort:
382 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); 383 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
384 while ((skb = skb_dequeue(&call->rx_queue)))
385 afs_free_skb(skb);
383 rxrpc_kernel_end_call(rxcall); 386 rxrpc_kernel_end_call(rxcall);
384 call->rxcall = NULL; 387 call->rxcall = NULL;
385error_kill_call: 388error_kill_call:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 983ec59fc80d..f02b31e7e648 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
301{ 301{
302 struct afs_super_info *as = sb->s_fs_info; 302 struct afs_super_info *as = sb->s_fs_info;
303 struct afs_fid fid; 303 struct afs_fid fid;
304 struct dentry *root = NULL;
305 struct inode *inode = NULL; 304 struct inode *inode = NULL;
306 int ret; 305 int ret;
307 306
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
327 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 326 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
328 327
329 ret = -ENOMEM; 328 ret = -ENOMEM;
330 root = d_alloc_root(inode); 329 sb->s_root = d_make_root(inode);
331 if (!root) 330 if (!sb->s_root)
332 goto error; 331 goto error;
333 332
334 sb->s_d_op = &afs_fs_dentry_operations; 333 sb->s_d_op = &afs_fs_dentry_operations;
335 sb->s_root = root;
336 334
337 _leave(" = 0"); 335 _leave(" = 0");
338 return 0; 336 return 0;
339 337
340error: 338error:
341 iput(inode);
342 _leave(" = %d", ret); 339 _leave(" = %d", ret);
343 return ret; 340 return ret;
344} 341}
diff --git a/fs/aio.c b/fs/aio.c
index 969beb0e2231..4f71627264fd 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -13,7 +13,7 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/aio_abi.h> 15#include <linux/aio_abi.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/backing-dev.h> 18#include <linux/backing-dev.h>
19#include <linux/uio.h> 19#include <linux/uio.h>
@@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx)
160 160
161 info->nr = nr_events; /* trusted copy */ 161 info->nr = nr_events; /* trusted copy */
162 162
163 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 163 ring = kmap_atomic(info->ring_pages[0]);
164 ring->nr = nr_events; /* user copy */ 164 ring->nr = nr_events; /* user copy */
165 ring->id = ctx->user_id; 165 ring->id = ctx->user_id;
166 ring->head = ring->tail = 0; 166 ring->head = ring->tail = 0;
@@ -168,47 +168,38 @@ static int aio_setup_ring(struct kioctx *ctx)
168 ring->compat_features = AIO_RING_COMPAT_FEATURES; 168 ring->compat_features = AIO_RING_COMPAT_FEATURES;
169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
170 ring->header_length = sizeof(struct aio_ring); 170 ring->header_length = sizeof(struct aio_ring);
171 kunmap_atomic(ring, KM_USER0); 171 kunmap_atomic(ring);
172 172
173 return 0; 173 return 0;
174} 174}
175 175
176 176
177/* aio_ring_event: returns a pointer to the event at the given index from 177/* aio_ring_event: returns a pointer to the event at the given index from
178 * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); 178 * kmap_atomic(). Release the pointer with put_aio_ring_event();
179 */ 179 */
180#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) 180#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
181#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 181#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
182#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 182#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
183 183
184#define aio_ring_event(info, nr, km) ({ \ 184#define aio_ring_event(info, nr) ({ \
185 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ 185 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
186 struct io_event *__event; \ 186 struct io_event *__event; \
187 __event = kmap_atomic( \ 187 __event = kmap_atomic( \
188 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ 188 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
189 __event += pos % AIO_EVENTS_PER_PAGE; \ 189 __event += pos % AIO_EVENTS_PER_PAGE; \
190 __event; \ 190 __event; \
191}) 191})
192 192
193#define put_aio_ring_event(event, km) do { \ 193#define put_aio_ring_event(event) do { \
194 struct io_event *__event = (event); \ 194 struct io_event *__event = (event); \
195 (void)__event; \ 195 (void)__event; \
196 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 196 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
197} while(0) 197} while(0)
198 198
199static void ctx_rcu_free(struct rcu_head *head) 199static void ctx_rcu_free(struct rcu_head *head)
200{ 200{
201 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); 201 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
202 unsigned nr_events = ctx->max_reqs;
203
204 kmem_cache_free(kioctx_cachep, ctx); 202 kmem_cache_free(kioctx_cachep, ctx);
205
206 if (nr_events) {
207 spin_lock(&aio_nr_lock);
208 BUG_ON(aio_nr - nr_events > aio_nr);
209 aio_nr -= nr_events;
210 spin_unlock(&aio_nr_lock);
211 }
212} 203}
213 204
214/* __put_ioctx 205/* __put_ioctx
@@ -217,23 +208,23 @@ static void ctx_rcu_free(struct rcu_head *head)
217 */ 208 */
218static void __put_ioctx(struct kioctx *ctx) 209static void __put_ioctx(struct kioctx *ctx)
219{ 210{
211 unsigned nr_events = ctx->max_reqs;
220 BUG_ON(ctx->reqs_active); 212 BUG_ON(ctx->reqs_active);
221 213
222 cancel_delayed_work(&ctx->wq); 214 cancel_delayed_work_sync(&ctx->wq);
223 cancel_work_sync(&ctx->wq.work);
224 aio_free_ring(ctx); 215 aio_free_ring(ctx);
225 mmdrop(ctx->mm); 216 mmdrop(ctx->mm);
226 ctx->mm = NULL; 217 ctx->mm = NULL;
218 if (nr_events) {
219 spin_lock(&aio_nr_lock);
220 BUG_ON(aio_nr - nr_events > aio_nr);
221 aio_nr -= nr_events;
222 spin_unlock(&aio_nr_lock);
223 }
227 pr_debug("__put_ioctx: freeing %p\n", ctx); 224 pr_debug("__put_ioctx: freeing %p\n", ctx);
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 225 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 226}
230 227
231static inline void get_ioctx(struct kioctx *kioctx)
232{
233 BUG_ON(atomic_read(&kioctx->users) <= 0);
234 atomic_inc(&kioctx->users);
235}
236
237static inline int try_get_ioctx(struct kioctx *kioctx) 228static inline int try_get_ioctx(struct kioctx *kioctx)
238{ 229{
239 return atomic_inc_not_zero(&kioctx->users); 230 return atomic_inc_not_zero(&kioctx->users);
@@ -253,7 +244,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
253{ 244{
254 struct mm_struct *mm; 245 struct mm_struct *mm;
255 struct kioctx *ctx; 246 struct kioctx *ctx;
256 int did_sync = 0; 247 int err = -ENOMEM;
257 248
258 /* Prevent overflows */ 249 /* Prevent overflows */
259 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 250 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -262,7 +253,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
262 return ERR_PTR(-EINVAL); 253 return ERR_PTR(-EINVAL);
263 } 254 }
264 255
265 if ((unsigned long)nr_events > aio_max_nr) 256 if (!nr_events || (unsigned long)nr_events > aio_max_nr)
266 return ERR_PTR(-EAGAIN); 257 return ERR_PTR(-EAGAIN);
267 258
268 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); 259 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -273,7 +264,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
273 mm = ctx->mm = current->mm; 264 mm = ctx->mm = current->mm;
274 atomic_inc(&mm->mm_count); 265 atomic_inc(&mm->mm_count);
275 266
276 atomic_set(&ctx->users, 1); 267 atomic_set(&ctx->users, 2);
277 spin_lock_init(&ctx->ctx_lock); 268 spin_lock_init(&ctx->ctx_lock);
278 spin_lock_init(&ctx->ring_info.ring_lock); 269 spin_lock_init(&ctx->ring_info.ring_lock);
279 init_waitqueue_head(&ctx->wait); 270 init_waitqueue_head(&ctx->wait);
@@ -286,25 +277,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
286 goto out_freectx; 277 goto out_freectx;
287 278
288 /* limit the number of system wide aios */ 279 /* limit the number of system wide aios */
289 do { 280 spin_lock(&aio_nr_lock);
290 spin_lock_bh(&aio_nr_lock); 281 if (aio_nr + nr_events > aio_max_nr ||
291 if (aio_nr + nr_events > aio_max_nr || 282 aio_nr + nr_events < aio_nr) {
292 aio_nr + nr_events < aio_nr) 283 spin_unlock(&aio_nr_lock);
293 ctx->max_reqs = 0;
294 else
295 aio_nr += ctx->max_reqs;
296 spin_unlock_bh(&aio_nr_lock);
297 if (ctx->max_reqs || did_sync)
298 break;
299
300 /* wait for rcu callbacks to have completed before giving up */
301 synchronize_rcu();
302 did_sync = 1;
303 ctx->max_reqs = nr_events;
304 } while (1);
305
306 if (ctx->max_reqs == 0)
307 goto out_cleanup; 284 goto out_cleanup;
285 }
286 aio_nr += ctx->max_reqs;
287 spin_unlock(&aio_nr_lock);
308 288
309 /* now link into global list. */ 289 /* now link into global list. */
310 spin_lock(&mm->ioctx_lock); 290 spin_lock(&mm->ioctx_lock);
@@ -316,16 +296,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
316 return ctx; 296 return ctx;
317 297
318out_cleanup: 298out_cleanup:
319 __put_ioctx(ctx); 299 err = -EAGAIN;
320 return ERR_PTR(-EAGAIN); 300 aio_free_ring(ctx);
321
322out_freectx: 301out_freectx:
323 mmdrop(mm); 302 mmdrop(mm);
324 kmem_cache_free(kioctx_cachep, ctx); 303 kmem_cache_free(kioctx_cachep, ctx);
325 ctx = ERR_PTR(-ENOMEM); 304 dprintk("aio: error allocating ioctx %d\n", err);
326 305 return ERR_PTR(err);
327 dprintk("aio: error allocating ioctx %p\n", ctx);
328 return ctx;
329} 306}
330 307
331/* aio_cancel_all 308/* aio_cancel_all
@@ -413,10 +390,6 @@ void exit_aio(struct mm_struct *mm)
413 aio_cancel_all(ctx); 390 aio_cancel_all(ctx);
414 391
415 wait_for_all_aios(ctx); 392 wait_for_all_aios(ctx);
416 /*
417 * Ensure we don't leave the ctx on the aio_wq
418 */
419 cancel_work_sync(&ctx->wq.work);
420 393
421 if (1 != atomic_read(&ctx->users)) 394 if (1 != atomic_read(&ctx->users))
422 printk(KERN_DEBUG 395 printk(KERN_DEBUG
@@ -490,6 +463,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
490 kmem_cache_free(kiocb_cachep, req); 463 kmem_cache_free(kiocb_cachep, req);
491 ctx->reqs_active--; 464 ctx->reqs_active--;
492 } 465 }
466 if (unlikely(!ctx->reqs_active && ctx->dead))
467 wake_up_all(&ctx->wait);
493 spin_unlock_irq(&ctx->ctx_lock); 468 spin_unlock_irq(&ctx->ctx_lock);
494} 469}
495 470
@@ -607,11 +582,16 @@ static void aio_fput_routine(struct work_struct *data)
607 fput(req->ki_filp); 582 fput(req->ki_filp);
608 583
609 /* Link the iocb into the context's free list */ 584 /* Link the iocb into the context's free list */
585 rcu_read_lock();
610 spin_lock_irq(&ctx->ctx_lock); 586 spin_lock_irq(&ctx->ctx_lock);
611 really_put_req(ctx, req); 587 really_put_req(ctx, req);
588 /*
589 * at that point ctx might've been killed, but actual
590 * freeing is RCU'd
591 */
612 spin_unlock_irq(&ctx->ctx_lock); 592 spin_unlock_irq(&ctx->ctx_lock);
593 rcu_read_unlock();
613 594
614 put_ioctx(ctx);
615 spin_lock_irq(&fput_lock); 595 spin_lock_irq(&fput_lock);
616 } 596 }
617 spin_unlock_irq(&fput_lock); 597 spin_unlock_irq(&fput_lock);
@@ -642,7 +622,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
642 * this function will be executed w/out any aio kthread wakeup. 622 * this function will be executed w/out any aio kthread wakeup.
643 */ 623 */
644 if (unlikely(!fput_atomic(req->ki_filp))) { 624 if (unlikely(!fput_atomic(req->ki_filp))) {
645 get_ioctx(ctx);
646 spin_lock(&fput_lock); 625 spin_lock(&fput_lock);
647 list_add(&req->ki_list, &fput_head); 626 list_add(&req->ki_list, &fput_head);
648 spin_unlock(&fput_lock); 627 spin_unlock(&fput_lock);
@@ -920,7 +899,7 @@ static void aio_kick_handler(struct work_struct *work)
920 unuse_mm(mm); 899 unuse_mm(mm);
921 set_fs(oldfs); 900 set_fs(oldfs);
922 /* 901 /*
923 * we're in a worker thread already, don't use queue_delayed_work, 902 * we're in a worker thread already; no point using non-zero delay
924 */ 903 */
925 if (requeue) 904 if (requeue)
926 queue_delayed_work(aio_wq, &ctx->wq, 0); 905 queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +998,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1019 if (kiocbIsCancelled(iocb)) 998 if (kiocbIsCancelled(iocb))
1020 goto put_rq; 999 goto put_rq;
1021 1000
1022 ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); 1001 ring = kmap_atomic(info->ring_pages[0]);
1023 1002
1024 tail = info->tail; 1003 tail = info->tail;
1025 event = aio_ring_event(info, tail, KM_IRQ0); 1004 event = aio_ring_event(info, tail);
1026 if (++tail >= info->nr) 1005 if (++tail >= info->nr)
1027 tail = 0; 1006 tail = 0;
1028 1007
@@ -1043,8 +1022,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1043 info->tail = tail; 1022 info->tail = tail;
1044 ring->tail = tail; 1023 ring->tail = tail;
1045 1024
1046 put_aio_ring_event(event, KM_IRQ0); 1025 put_aio_ring_event(event);
1047 kunmap_atomic(ring, KM_IRQ1); 1026 kunmap_atomic(ring);
1048 1027
1049 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 1028 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
1050 1029
@@ -1089,7 +1068,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1089 unsigned long head; 1068 unsigned long head;
1090 int ret = 0; 1069 int ret = 0;
1091 1070
1092 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 1071 ring = kmap_atomic(info->ring_pages[0]);
1093 dprintk("in aio_read_evt h%lu t%lu m%lu\n", 1072 dprintk("in aio_read_evt h%lu t%lu m%lu\n",
1094 (unsigned long)ring->head, (unsigned long)ring->tail, 1073 (unsigned long)ring->head, (unsigned long)ring->tail,
1095 (unsigned long)ring->nr); 1074 (unsigned long)ring->nr);
@@ -1101,18 +1080,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1101 1080
1102 head = ring->head % info->nr; 1081 head = ring->head % info->nr;
1103 if (head != ring->tail) { 1082 if (head != ring->tail) {
1104 struct io_event *evp = aio_ring_event(info, head, KM_USER1); 1083 struct io_event *evp = aio_ring_event(info, head);
1105 *ent = *evp; 1084 *ent = *evp;
1106 head = (head + 1) % info->nr; 1085 head = (head + 1) % info->nr;
1107 smp_mb(); /* finish reading the event before updatng the head */ 1086 smp_mb(); /* finish reading the event before updatng the head */
1108 ring->head = head; 1087 ring->head = head;
1109 ret = 1; 1088 ret = 1;
1110 put_aio_ring_event(evp, KM_USER1); 1089 put_aio_ring_event(evp);
1111 } 1090 }
1112 spin_unlock(&info->ring_lock); 1091 spin_unlock(&info->ring_lock);
1113 1092
1114out: 1093out:
1115 kunmap_atomic(ring, KM_USER0); 1094 kunmap_atomic(ring);
1116 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, 1095 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
1117 (unsigned long)ring->head, (unsigned long)ring->tail); 1096 (unsigned long)ring->head, (unsigned long)ring->tail);
1118 return ret; 1097 return ret;
@@ -1336,10 +1315,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1336 ret = PTR_ERR(ioctx); 1315 ret = PTR_ERR(ioctx);
1337 if (!IS_ERR(ioctx)) { 1316 if (!IS_ERR(ioctx)) {
1338 ret = put_user(ioctx->user_id, ctxp); 1317 ret = put_user(ioctx->user_id, ctxp);
1339 if (!ret) 1318 if (!ret) {
1319 put_ioctx(ioctx);
1340 return 0; 1320 return 0;
1341 1321 }
1342 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
1343 io_destroy(ioctx); 1322 io_destroy(ioctx);
1344 } 1323 }
1345 1324
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907d..28d39fb84ae3 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
39 .d_dname = anon_inodefs_dname, 39 .d_dname = anon_inodefs_dname,
40}; 40};
41 41
42static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
43 int flags, const char *dev_name, void *data)
44{
45 return mount_pseudo(fs_type, "anon_inode:", NULL,
46 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
47}
48
49static struct file_system_type anon_inode_fs_type = {
50 .name = "anon_inodefs",
51 .mount = anon_inodefs_mount,
52 .kill_sb = kill_anon_super,
53};
54
55/* 42/*
56 * nop .set_page_dirty method so that people can use .page_mkwrite on 43 * nop .set_page_dirty method so that people can use .page_mkwrite on
57 * anon inodes. 44 * anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
65 .set_page_dirty = anon_set_page_dirty, 52 .set_page_dirty = anon_set_page_dirty,
66}; 53};
67 54
55/*
56 * A single inode exists for all anon_inode files. Contrary to pipes,
57 * anon_inode inodes have no associated per-instance data, so we need
58 * only allocate one of them.
59 */
60static struct inode *anon_inode_mkinode(struct super_block *s)
61{
62 struct inode *inode = new_inode_pseudo(s);
63
64 if (!inode)
65 return ERR_PTR(-ENOMEM);
66
67 inode->i_ino = get_next_ino();
68 inode->i_fop = &anon_inode_fops;
69
70 inode->i_mapping->a_ops = &anon_aops;
71
72 /*
73 * Mark the inode dirty from the very beginning,
74 * that way it will never be moved to the dirty
75 * list because mark_inode_dirty() will think
76 * that it already _is_ on the dirty list.
77 */
78 inode->i_state = I_DIRTY;
79 inode->i_mode = S_IRUSR | S_IWUSR;
80 inode->i_uid = current_fsuid();
81 inode->i_gid = current_fsgid();
82 inode->i_flags |= S_PRIVATE;
83 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
84 return inode;
85}
86
87static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
88 int flags, const char *dev_name, void *data)
89{
90 struct dentry *root;
91 root = mount_pseudo(fs_type, "anon_inode:", NULL,
92 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
93 if (!IS_ERR(root)) {
94 struct super_block *s = root->d_sb;
95 anon_inode_inode = anon_inode_mkinode(s);
96 if (IS_ERR(anon_inode_inode)) {
97 dput(root);
98 deactivate_locked_super(s);
99 root = ERR_CAST(anon_inode_inode);
100 }
101 }
102 return root;
103}
104
105static struct file_system_type anon_inode_fs_type = {
106 .name = "anon_inodefs",
107 .mount = anon_inodefs_mount,
108 .kill_sb = kill_anon_super,
109};
110
68/** 111/**
69 * anon_inode_getfile - creates a new file instance by hooking it up to an 112 * anon_inode_getfile - creates a new file instance by hooking it up to an
70 * anonymous inode, and a dentry that describe the "class" 113 * anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
180} 223}
181EXPORT_SYMBOL_GPL(anon_inode_getfd); 224EXPORT_SYMBOL_GPL(anon_inode_getfd);
182 225
183/*
184 * A single inode exists for all anon_inode files. Contrary to pipes,
185 * anon_inode inodes have no associated per-instance data, so we need
186 * only allocate one of them.
187 */
188static struct inode *anon_inode_mkinode(void)
189{
190 struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
191
192 if (!inode)
193 return ERR_PTR(-ENOMEM);
194
195 inode->i_ino = get_next_ino();
196 inode->i_fop = &anon_inode_fops;
197
198 inode->i_mapping->a_ops = &anon_aops;
199
200 /*
201 * Mark the inode dirty from the very beginning,
202 * that way it will never be moved to the dirty
203 * list because mark_inode_dirty() will think
204 * that it already _is_ on the dirty list.
205 */
206 inode->i_state = I_DIRTY;
207 inode->i_mode = S_IRUSR | S_IWUSR;
208 inode->i_uid = current_fsuid();
209 inode->i_gid = current_fsgid();
210 inode->i_flags |= S_PRIVATE;
211 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
212 return inode;
213}
214
215static int __init anon_inode_init(void) 226static int __init anon_inode_init(void)
216{ 227{
217 int error; 228 int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
224 error = PTR_ERR(anon_inode_mnt); 235 error = PTR_ERR(anon_inode_mnt);
225 goto err_unregister_filesystem; 236 goto err_unregister_filesystem;
226 } 237 }
227 anon_inode_inode = anon_inode_mkinode();
228 if (IS_ERR(anon_inode_inode)) {
229 error = PTR_ERR(anon_inode_inode);
230 goto err_mntput;
231 }
232
233 return 0; 238 return 0;
234 239
235err_mntput:
236 kern_unmount(anon_inode_mnt);
237err_unregister_filesystem: 240err_unregister_filesystem:
238 unregister_filesystem(&anon_inode_fs_type); 241 unregister_filesystem(&anon_inode_fs_type);
239err_exit: 242err_exit:
diff --git a/fs/attr.c b/fs/attr.c
index 95053ad8abcc..73f69a6ce9ed 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -5,7 +5,7 @@
5 * changes by Thomas Schoebel-Theuer 5 * changes by Thomas Schoebel-Theuer
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/export.h>
9#include <linux/time.h> 9#include <linux/time.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b4050..cddc74b9cdb2 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
31{ 31{
32 int err; 32 int err;
33 33
34 autofs_dev_ioctl_init();
35
34 err = register_filesystem(&autofs_fs_type); 36 err = register_filesystem(&autofs_fs_type);
35 if (err) 37 if (err)
36 return err; 38 autofs_dev_ioctl_exit();
37
38 autofs_dev_ioctl_init();
39 39
40 return err; 40 return err;
41} 41}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 06858d955120..d8dc002e9cc3 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -247,12 +247,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
247 if (!ino) 247 if (!ino)
248 goto fail_free; 248 goto fail_free;
249 root_inode = autofs4_get_inode(s, S_IFDIR | 0755); 249 root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
250 if (!root_inode) 250 root = d_make_root(root_inode);
251 goto fail_ino;
252
253 root = d_alloc_root(root_inode);
254 if (!root) 251 if (!root)
255 goto fail_iput; 252 goto fail_ino;
256 pipe = NULL; 253 pipe = NULL;
257 254
258 root->d_fsdata = ino; 255 root->d_fsdata = ino;
@@ -317,9 +314,6 @@ fail_fput:
317fail_dput: 314fail_dput:
318 dput(root); 315 dput(root);
319 goto fail_free; 316 goto fail_free;
320fail_iput:
321 printk("autofs: get root dentry failed\n");
322 iput(root_inode);
323fail_ino: 317fail_ino:
324 kfree(ino); 318 kfree(ino);
325fail_free: 319fail_free:
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 22e9a78872ff..37268c5bb98b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -9,7 +9,7 @@
9 */ 9 */
10 10
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/module.h> 12#include <linux/export.h>
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/namei.h> 15#include <linux/namei.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6e6d536767fe..e18da23d42b5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
852 ret = PTR_ERR(root); 852 ret = PTR_ERR(root);
853 goto unacquire_priv_sbp; 853 goto unacquire_priv_sbp;
854 } 854 }
855 sb->s_root = d_alloc_root(root); 855 sb->s_root = d_make_root(root);
856 if (!sb->s_root) { 856 if (!sb->s_root) {
857 iput(root);
858 befs_error(sb, "get root inode failed"); 857 befs_error(sb, "get root inode failed");
859 goto unacquire_priv_sbp; 858 goto unacquire_priv_sbp;
860 } 859 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b0391bc402b1..e23dc7c8b884 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
367 ret = PTR_ERR(inode); 367 ret = PTR_ERR(inode);
368 goto out2; 368 goto out2;
369 } 369 }
370 s->s_root = d_alloc_root(inode); 370 s->s_root = d_make_root(inode);
371 if (!s->s_root) { 371 if (!s->s_root) {
372 iput(inode);
373 ret = -ENOMEM; 372 ret = -ENOMEM;
374 goto out2; 373 goto out2;
375 } 374 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26ae..4d5e6d26578c 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -259,8 +259,14 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
259 current->mm->free_area_cache = current->mm->mmap_base; 259 current->mm->free_area_cache = current->mm->mmap_base;
260 current->mm->cached_hole_size = 0; 260 current->mm->cached_hole_size = 0;
261 261
262 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
263 if (retval < 0) {
264 /* Someone check-me: is this error path enough? */
265 send_sig(SIGKILL, current, 0);
266 return retval;
267 }
268
262 install_exec_creds(bprm); 269 install_exec_creds(bprm);
263 current->flags &= ~PF_FORKNOEXEC;
264 270
265 if (N_MAGIC(ex) == OMAGIC) { 271 if (N_MAGIC(ex) == OMAGIC) {
266 unsigned long text_addr, map_size; 272 unsigned long text_addr, map_size;
@@ -352,13 +358,6 @@ beyond_if:
352 return retval; 358 return retval;
353 } 359 }
354 360
355 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
356 if (retval < 0) {
357 /* Someone check-me: is this error path enough? */
358 send_sig(SIGKILL, current, 0);
359 return retval;
360 }
361
362 current->mm->start_stack = 361 current->mm->start_stack =
363 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); 362 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
364#ifdef __alpha__ 363#ifdef __alpha__
@@ -454,7 +453,8 @@ out:
454 453
455static int __init init_aout_binfmt(void) 454static int __init init_aout_binfmt(void)
456{ 455{
457 return register_binfmt(&aout_format); 456 register_binfmt(&aout_format);
457 return 0;
458} 458}
459 459
460static void __exit exit_aout_binfmt(void) 460static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d613..504b6eee50a9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -712,7 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
712 goto out_free_dentry; 712 goto out_free_dentry;
713 713
714 /* OK, This is the point of no return */ 714 /* OK, This is the point of no return */
715 current->flags &= ~PF_FORKNOEXEC;
716 current->mm->def_flags = def_flags; 715 current->mm->def_flags = def_flags;
717 716
718 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 717 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -934,7 +933,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
934#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 933#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
935 934
936 install_exec_creds(bprm); 935 install_exec_creds(bprm);
937 current->flags &= ~PF_FORKNOEXEC;
938 retval = create_elf_tables(bprm, &loc->elf_ex, 936 retval = create_elf_tables(bprm, &loc->elf_ex,
939 load_addr, interp_load_addr); 937 load_addr, interp_load_addr);
940 if (retval < 0) { 938 if (retval < 0) {
@@ -1095,6 +1093,29 @@ out:
1095 */ 1093 */
1096 1094
1097/* 1095/*
1096 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1097 * that are useful for post-mortem analysis are included in every core dump.
1098 * In that way we ensure that the core dump is fully interpretable later
1099 * without matching up the same kernel and hardware config to see what PC values
1100 * meant. These special mappings include - vDSO, vsyscall, and other
1101 * architecture specific mappings
1102 */
1103static bool always_dump_vma(struct vm_area_struct *vma)
1104{
1105 /* Any vsyscall mappings? */
1106 if (vma == get_gate_vma(vma->vm_mm))
1107 return true;
1108 /*
1109 * arch_vma_name() returns non-NULL for special architecture mappings,
1110 * such as vDSO sections.
1111 */
1112 if (arch_vma_name(vma))
1113 return true;
1114
1115 return false;
1116}
1117
1118/*
1098 * Decide what to dump of a segment, part, all or none. 1119 * Decide what to dump of a segment, part, all or none.
1099 */ 1120 */
1100static unsigned long vma_dump_size(struct vm_area_struct *vma, 1121static unsigned long vma_dump_size(struct vm_area_struct *vma,
@@ -1102,10 +1123,13 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1102{ 1123{
1103#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1124#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1104 1125
1105 /* The vma can be set up to tell us the answer directly. */ 1126 /* always dump the vdso and vsyscall sections */
1106 if (vma->vm_flags & VM_ALWAYSDUMP) 1127 if (always_dump_vma(vma))
1107 goto whole; 1128 goto whole;
1108 1129
1130 if (vma->vm_flags & VM_NODUMP)
1131 return 0;
1132
1109 /* Hugetlb memory check */ 1133 /* Hugetlb memory check */
1110 if (vma->vm_flags & VM_HUGETLB) { 1134 if (vma->vm_flags & VM_HUGETLB) {
1111 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1135 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
@@ -1421,7 +1445,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1421 for (i = 1; i < view->n; ++i) { 1445 for (i = 1; i < view->n; ++i) {
1422 const struct user_regset *regset = &view->regsets[i]; 1446 const struct user_regset *regset = &view->regsets[i];
1423 do_thread_regset_writeback(t->task, regset); 1447 do_thread_regset_writeback(t->task, regset);
1424 if (regset->core_note_type && 1448 if (regset->core_note_type && regset->get &&
1425 (!regset->active || regset->active(t->task, regset))) { 1449 (!regset->active || regset->active(t->task, regset))) {
1426 int ret; 1450 int ret;
1427 size_t size = regset->n * regset->size; 1451 size_t size = regset->n * regset->size;
@@ -2077,7 +2101,8 @@ out:
2077 2101
2078static int __init init_elf_binfmt(void) 2102static int __init init_elf_binfmt(void)
2079{ 2103{
2080 return register_binfmt(&elf_format); 2104 register_binfmt(&elf_format);
2105 return 0;
2081} 2106}
2082 2107
2083static void __exit exit_elf_binfmt(void) 2108static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459faf..c64bf5ee2df4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -91,7 +91,8 @@ static struct linux_binfmt elf_fdpic_format = {
91 91
92static int __init init_elf_fdpic_binfmt(void) 92static int __init init_elf_fdpic_binfmt(void)
93{ 93{
94 return register_binfmt(&elf_fdpic_format); 94 register_binfmt(&elf_fdpic_format);
95 return 0;
95} 96}
96 97
97static void __exit exit_elf_fdpic_binfmt(void) 98static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +335,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
334 current->mm->context.exec_fdpic_loadmap = 0; 335 current->mm->context.exec_fdpic_loadmap = 0;
335 current->mm->context.interp_fdpic_loadmap = 0; 336 current->mm->context.interp_fdpic_loadmap = 0;
336 337
337 current->flags &= ~PF_FORKNOEXEC;
338
339#ifdef CONFIG_MMU 338#ifdef CONFIG_MMU
340 elf_fdpic_arch_lay_out_mm(&exec_params, 339 elf_fdpic_arch_lay_out_mm(&exec_params,
341 &interp_params, 340 &interp_params,
@@ -413,7 +412,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
413#endif 412#endif
414 413
415 install_exec_creds(bprm); 414 install_exec_creds(bprm);
416 current->flags &= ~PF_FORKNOEXEC;
417 if (create_elf_fdpic_tables(bprm, current->mm, 415 if (create_elf_fdpic_tables(bprm, current->mm,
418 &exec_params, &interp_params) < 0) 416 &exec_params, &interp_params) < 0)
419 goto error_kill; 417 goto error_kill;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9bd..2790c7e1912e 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
100 100
101static int __init init_em86_binfmt(void) 101static int __init init_em86_binfmt(void)
102{ 102{
103 return register_binfmt(&em86_format); 103 register_binfmt(&em86_format);
104 return 0;
104} 105}
105 106
106static void __exit exit_em86_binfmt(void) 107static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed778..5979027451b3 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -15,7 +15,7 @@
15 * JAN/99 -- coded full program relocation (gerg@snapgear.com) 15 * JAN/99 -- coded full program relocation (gerg@snapgear.com)
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/export.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
@@ -902,7 +902,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
902 libinfo.lib_list[j].start_data:UNLOADED_LIB; 902 libinfo.lib_list[j].start_data:UNLOADED_LIB;
903 903
904 install_exec_creds(bprm); 904 install_exec_creds(bprm);
905 current->flags &= ~PF_FORKNOEXEC;
906 905
907 set_binfmt(&flat_format); 906 set_binfmt(&flat_format);
908 907
@@ -950,7 +949,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
950 949
951static int __init init_flat_binfmt(void) 950static int __init init_flat_binfmt(void)
952{ 951{
953 return register_binfmt(&flat_format); 952 register_binfmt(&flat_format);
953 return 0;
954} 954}
955 955
956/****************************************************************************/ 956/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a9198dfd5f85..613aa0618235 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/magic.h>
22#include <linux/binfmts.h> 23#include <linux/binfmts.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/ctype.h> 25#include <linux/ctype.h>
@@ -699,7 +700,7 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
699 [3] = {"register", &bm_register_operations, S_IWUSR}, 700 [3] = {"register", &bm_register_operations, S_IWUSR},
700 /* last one */ {""} 701 /* last one */ {""}
701 }; 702 };
702 int err = simple_fill_super(sb, 0x42494e4d, bm_files); 703 int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
703 if (!err) 704 if (!err)
704 sb->s_op = &s_ops; 705 sb->s_op = &s_ops;
705 return err; 706 return err;
@@ -726,11 +727,8 @@ static struct file_system_type bm_fs_type = {
726static int __init init_misc_binfmt(void) 727static int __init init_misc_binfmt(void)
727{ 728{
728 int err = register_filesystem(&bm_fs_type); 729 int err = register_filesystem(&bm_fs_type);
729 if (!err) { 730 if (!err)
730 err = insert_binfmt(&misc_format); 731 insert_binfmt(&misc_format);
731 if (err)
732 unregister_filesystem(&bm_fs_type);
733 }
734 return err; 732 return err;
735} 733}
736 734
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591f..d3b8c1f63155 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
105 105
106static int __init init_script_binfmt(void) 106static int __init init_script_binfmt(void)
107{ 107{
108 return register_binfmt(&script_format); 108 register_binfmt(&script_format);
109 return 0;
109} 110}
110 111
111static void __exit exit_script_binfmt(void) 112static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b0..e4fc746629a7 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -225,7 +225,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
225 goto out_free; 225 goto out_free;
226 226
227 /* OK, This is the point of no return */ 227 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 228 current->personality = PER_HPUX;
230 setup_new_exec(bprm); 229 setup_new_exec(bprm);
231 230
@@ -289,7 +288,8 @@ static int load_som_library(struct file *f)
289 288
290static int __init init_som_binfmt(void) 289static int __init init_som_binfmt(void)
291{ 290{
292 return register_binfmt(&som_format); 291 register_binfmt(&som_format);
292 return 0;
293} 293}
294 294
295static void __exit exit_som_binfmt(void) 295static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917cd..e85c04b9f61c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
357 bix.sector_size = bi->sector_size; 357 bix.sector_size = bi->sector_size;
358 358
359 bio_for_each_segment(bv, bio, i) { 359 bio_for_each_segment(bv, bio, i) {
360 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 360 void *kaddr = kmap_atomic(bv->bv_page);
361 bix.data_buf = kaddr + bv->bv_offset; 361 bix.data_buf = kaddr + bv->bv_offset;
362 bix.data_size = bv->bv_len; 362 bix.data_size = bv->bv_len;
363 bix.prot_buf = prot_buf; 363 bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
371 total += sectors * bi->tuple_size; 371 total += sectors * bi->tuple_size;
372 BUG_ON(total > bio->bi_integrity->bip_size); 372 BUG_ON(total > bio->bi_integrity->bip_size);
373 373
374 kunmap_atomic(kaddr, KM_USER0); 374 kunmap_atomic(kaddr);
375 } 375 }
376} 376}
377 377
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
498 bix.sector_size = bi->sector_size; 498 bix.sector_size = bi->sector_size;
499 499
500 bio_for_each_segment(bv, bio, i) { 500 bio_for_each_segment(bv, bio, i) {
501 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 501 void *kaddr = kmap_atomic(bv->bv_page);
502 bix.data_buf = kaddr + bv->bv_offset; 502 bix.data_buf = kaddr + bv->bv_offset;
503 bix.data_size = bv->bv_len; 503 bix.data_size = bv->bv_len;
504 bix.prot_buf = prot_buf; 504 bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
507 ret = bi->verify_fn(&bix); 507 ret = bi->verify_fn(&bix);
508 508
509 if (ret) { 509 if (ret) {
510 kunmap_atomic(kaddr, KM_USER0); 510 kunmap_atomic(kaddr);
511 return ret; 511 return ret;
512 } 512 }
513 513
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
517 total += sectors * bi->tuple_size; 517 total += sectors * bi->tuple_size;
518 BUG_ON(total > bio->bi_integrity->bip_size); 518 BUG_ON(total > bio->bi_integrity->bip_size);
519 519
520 kunmap_atomic(kaddr, KM_USER0); 520 kunmap_atomic(kaddr);
521 } 521 }
522 522
523 return ret; 523 return ret;
diff --git a/fs/bio.c b/fs/bio.c
index b980ecde026a..e453924036e9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -22,7 +22,7 @@
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/module.h> 25#include <linux/export.h>
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <scsi/sg.h> /* for struct sg_iovec */ 28#include <scsi/sg.h> /* for struct sg_iovec */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0e575d1304b4..e08f6a20a5bb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -16,6 +16,7 @@
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/blkpg.h> 18#include <linux/blkpg.h>
19#include <linux/magic.h>
19#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
20#include <linux/swap.h> 21#include <linux/swap.h>
21#include <linux/pagevec.h> 22#include <linux/pagevec.h>
@@ -109,7 +110,7 @@ void invalidate_bdev(struct block_device *bdev)
109 /* 99% of the time, we don't need to flush the cleancache on the bdev. 110 /* 99% of the time, we don't need to flush the cleancache on the bdev.
110 * But, for the strange corners, lets be cautious 111 * But, for the strange corners, lets be cautious
111 */ 112 */
112 cleancache_flush_inode(mapping); 113 cleancache_invalidate_inode(mapping);
113} 114}
114EXPORT_SYMBOL(invalidate_bdev); 115EXPORT_SYMBOL(invalidate_bdev);
115 116
@@ -506,7 +507,7 @@ static const struct super_operations bdev_sops = {
506static struct dentry *bd_mount(struct file_system_type *fs_type, 507static struct dentry *bd_mount(struct file_system_type *fs_type,
507 int flags, const char *dev_name, void *data) 508 int flags, const char *dev_name, void *data)
508{ 509{
509 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); 510 return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
510} 511}
511 512
512static struct file_system_type bd_type = { 513static struct file_system_type bd_type = {
@@ -1183,8 +1184,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1183 * The latter is necessary to prevent ghost 1184 * The latter is necessary to prevent ghost
1184 * partitions on a removed medium. 1185 * partitions on a removed medium.
1185 */ 1186 */
1186 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1187 if (bdev->bd_invalidated) {
1187 rescan_partitions(disk, bdev); 1188 if (!ret)
1189 rescan_partitions(disk, bdev);
1190 else if (ret == -ENOMEDIUM)
1191 invalidate_partitions(disk, bdev);
1192 }
1188 if (ret) 1193 if (ret)
1189 goto out_clear; 1194 goto out_clear;
1190 } else { 1195 } else {
@@ -1214,8 +1219,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1214 if (bdev->bd_disk->fops->open) 1219 if (bdev->bd_disk->fops->open)
1215 ret = bdev->bd_disk->fops->open(bdev, mode); 1220 ret = bdev->bd_disk->fops->open(bdev, mode);
1216 /* the same as first opener case, read comment there */ 1221 /* the same as first opener case, read comment there */
1217 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1222 if (bdev->bd_invalidated) {
1218 rescan_partitions(bdev->bd_disk, bdev); 1223 if (!ret)
1224 rescan_partitions(bdev->bd_disk, bdev);
1225 else if (ret == -ENOMEDIUM)
1226 invalidate_partitions(bdev->bd_disk, bdev);
1227 }
1219 if (ret) 1228 if (ret)
1220 goto out_unlock_bdev; 1229 goto out_unlock_bdev;
1221 } 1230 }
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 98f6bf10bbd4..0436c12da8c2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
583 struct btrfs_path *path; 583 struct btrfs_path *path;
584 struct btrfs_key info_key = { 0 }; 584 struct btrfs_key info_key = { 0 };
585 struct btrfs_delayed_ref_root *delayed_refs = NULL; 585 struct btrfs_delayed_ref_root *delayed_refs = NULL;
586 struct btrfs_delayed_ref_head *head = NULL; 586 struct btrfs_delayed_ref_head *head;
587 int info_level = 0; 587 int info_level = 0;
588 int ret; 588 int ret;
589 struct list_head prefs_delayed; 589 struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
607 * at a specified point in time 607 * at a specified point in time
608 */ 608 */
609again: 609again:
610 head = NULL;
611
610 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 612 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
611 if (ret < 0) 613 if (ret < 0)
612 goto out; 614 goto out;
@@ -635,8 +637,10 @@ again:
635 goto again; 637 goto again;
636 } 638 }
637 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); 639 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
638 if (ret) 640 if (ret) {
641 spin_unlock(&delayed_refs->lock);
639 goto out; 642 goto out;
643 }
640 } 644 }
641 spin_unlock(&delayed_refs->lock); 645 spin_unlock(&delayed_refs->lock);
642 646
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d986824bb2b4..c053e90f2006 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
89#include "disk-io.h" 89#include "disk-io.h"
90#include "transaction.h" 90#include "transaction.h"
91#include "extent_io.h" 91#include "extent_io.h"
92#include "disk-io.h"
93#include "volumes.h" 92#include "volumes.h"
94#include "print-tree.h" 93#include "print-tree.h"
95#include "locking.h" 94#include "locking.h"
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d02c27cd14c7..b805afb37fa8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
120 page = cb->compressed_pages[i]; 120 page = cb->compressed_pages[i];
121 csum = ~(u32)0; 121 csum = ~(u32)0;
122 122
123 kaddr = kmap_atomic(page, KM_USER0); 123 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 125 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr, KM_USER0); 126 kunmap_atomic(kaddr);
127 127
128 if (csum != *cb_sum) { 128 if (csum != *cb_sum) {
129 printk(KERN_INFO "btrfs csum failed ino %llu " 129 printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
521 if (zero_offset) { 521 if (zero_offset) {
522 int zeros; 522 int zeros;
523 zeros = PAGE_CACHE_SIZE - zero_offset; 523 zeros = PAGE_CACHE_SIZE - zero_offset;
524 userpage = kmap_atomic(page, KM_USER0); 524 userpage = kmap_atomic(page);
525 memset(userpage + zero_offset, 0, zeros); 525 memset(userpage + zero_offset, 0, zeros);
526 flush_dcache_page(page); 526 flush_dcache_page(page);
527 kunmap_atomic(userpage, KM_USER0); 527 kunmap_atomic(userpage);
528 } 528 }
529 } 529 }
530 530
@@ -993,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
993 bytes = min(PAGE_CACHE_SIZE - *pg_offset, 993 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
994 PAGE_CACHE_SIZE - buf_offset); 994 PAGE_CACHE_SIZE - buf_offset);
995 bytes = min(bytes, working_bytes); 995 bytes = min(bytes, working_bytes);
996 kaddr = kmap_atomic(page_out, KM_USER0); 996 kaddr = kmap_atomic(page_out);
997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
998 kunmap_atomic(kaddr, KM_USER0); 998 kunmap_atomic(kaddr);
999 flush_dcache_page(page_out); 999 flush_dcache_page(page_out);
1000 1000
1001 *pg_offset += bytes; 1001 *pg_offset += bytes;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a55fbe6252de..2862454bcdb3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2546,10 +2546,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2546 2546
2547 if (zero_offset) { 2547 if (zero_offset) {
2548 iosize = PAGE_CACHE_SIZE - zero_offset; 2548 iosize = PAGE_CACHE_SIZE - zero_offset;
2549 userpage = kmap_atomic(page, KM_USER0); 2549 userpage = kmap_atomic(page);
2550 memset(userpage + zero_offset, 0, iosize); 2550 memset(userpage + zero_offset, 0, iosize);
2551 flush_dcache_page(page); 2551 flush_dcache_page(page);
2552 kunmap_atomic(userpage, KM_USER0); 2552 kunmap_atomic(userpage);
2553 } 2553 }
2554 } 2554 }
2555 while (cur <= end) { 2555 while (cur <= end) {
@@ -2558,10 +2558,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2558 struct extent_state *cached = NULL; 2558 struct extent_state *cached = NULL;
2559 2559
2560 iosize = PAGE_CACHE_SIZE - pg_offset; 2560 iosize = PAGE_CACHE_SIZE - pg_offset;
2561 userpage = kmap_atomic(page, KM_USER0); 2561 userpage = kmap_atomic(page);
2562 memset(userpage + pg_offset, 0, iosize); 2562 memset(userpage + pg_offset, 0, iosize);
2563 flush_dcache_page(page); 2563 flush_dcache_page(page);
2564 kunmap_atomic(userpage, KM_USER0); 2564 kunmap_atomic(userpage);
2565 set_extent_uptodate(tree, cur, cur + iosize - 1, 2565 set_extent_uptodate(tree, cur, cur + iosize - 1,
2566 &cached, GFP_NOFS); 2566 &cached, GFP_NOFS);
2567 unlock_extent_cached(tree, cur, cur + iosize - 1, 2567 unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2607,10 +2607,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2607 char *userpage; 2607 char *userpage;
2608 struct extent_state *cached = NULL; 2608 struct extent_state *cached = NULL;
2609 2609
2610 userpage = kmap_atomic(page, KM_USER0); 2610 userpage = kmap_atomic(page);
2611 memset(userpage + pg_offset, 0, iosize); 2611 memset(userpage + pg_offset, 0, iosize);
2612 flush_dcache_page(page); 2612 flush_dcache_page(page);
2613 kunmap_atomic(userpage, KM_USER0); 2613 kunmap_atomic(userpage);
2614 2614
2615 set_extent_uptodate(tree, cur, cur + iosize - 1, 2615 set_extent_uptodate(tree, cur, cur + iosize - 1,
2616 &cached, GFP_NOFS); 2616 &cached, GFP_NOFS);
@@ -2756,10 +2756,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2756 if (page->index == end_index) { 2756 if (page->index == end_index) {
2757 char *userpage; 2757 char *userpage;
2758 2758
2759 userpage = kmap_atomic(page, KM_USER0); 2759 userpage = kmap_atomic(page);
2760 memset(userpage + pg_offset, 0, 2760 memset(userpage + pg_offset, 0,
2761 PAGE_CACHE_SIZE - pg_offset); 2761 PAGE_CACHE_SIZE - pg_offset);
2762 kunmap_atomic(userpage, KM_USER0); 2762 kunmap_atomic(userpage);
2763 flush_dcache_page(page); 2763 flush_dcache_page(page);
2764 } 2764 }
2765 pg_offset = 0; 2765 pg_offset = 0;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c7fb3a4247d3..078b4fd54500 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -447,13 +447,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
447 sums->bytenr = ordered->start; 447 sums->bytenr = ordered->start;
448 } 448 }
449 449
450 data = kmap_atomic(bvec->bv_page, KM_USER0); 450 data = kmap_atomic(bvec->bv_page);
451 sector_sum->sum = ~(u32)0; 451 sector_sum->sum = ~(u32)0;
452 sector_sum->sum = btrfs_csum_data(root, 452 sector_sum->sum = btrfs_csum_data(root,
453 data + bvec->bv_offset, 453 data + bvec->bv_offset,
454 sector_sum->sum, 454 sector_sum->sum,
455 bvec->bv_len); 455 bvec->bv_len);
456 kunmap_atomic(data, KM_USER0); 456 kunmap_atomic(data);
457 btrfs_csum_final(sector_sum->sum, 457 btrfs_csum_final(sector_sum->sum,
458 (char *)&sector_sum->sum); 458 (char *)&sector_sum->sum);
459 sector_sum->bytenr = disk_bytenr; 459 sector_sum->bytenr = disk_bytenr;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 710ea380c7ed..b02e379b14c7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1068,7 +1068,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1068 spin_unlock(&block_group->lock); 1068 spin_unlock(&block_group->lock);
1069 ret = 0; 1069 ret = 0;
1070#ifdef DEBUG 1070#ifdef DEBUG
1071 printk(KERN_ERR "btrfs: failed to write free space cace " 1071 printk(KERN_ERR "btrfs: failed to write free space cache "
1072 "for block group %llu\n", block_group->key.objectid); 1072 "for block group %llu\n", block_group->key.objectid);
1073#endif 1073#endif
1074 } 1074 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 892b34785ccc..3a0b5c1f9d31 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -173,9 +173,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
173 cur_size = min_t(unsigned long, compressed_size, 173 cur_size = min_t(unsigned long, compressed_size,
174 PAGE_CACHE_SIZE); 174 PAGE_CACHE_SIZE);
175 175
176 kaddr = kmap_atomic(cpage, KM_USER0); 176 kaddr = kmap_atomic(cpage);
177 write_extent_buffer(leaf, kaddr, ptr, cur_size); 177 write_extent_buffer(leaf, kaddr, ptr, cur_size);
178 kunmap_atomic(kaddr, KM_USER0); 178 kunmap_atomic(kaddr);
179 179
180 i++; 180 i++;
181 ptr += cur_size; 181 ptr += cur_size;
@@ -187,10 +187,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
187 page = find_get_page(inode->i_mapping, 187 page = find_get_page(inode->i_mapping,
188 start >> PAGE_CACHE_SHIFT); 188 start >> PAGE_CACHE_SHIFT);
189 btrfs_set_file_extent_compression(leaf, ei, 0); 189 btrfs_set_file_extent_compression(leaf, ei, 0);
190 kaddr = kmap_atomic(page, KM_USER0); 190 kaddr = kmap_atomic(page);
191 offset = start & (PAGE_CACHE_SIZE - 1); 191 offset = start & (PAGE_CACHE_SIZE - 1);
192 write_extent_buffer(leaf, kaddr + offset, ptr, size); 192 write_extent_buffer(leaf, kaddr + offset, ptr, size);
193 kunmap_atomic(kaddr, KM_USER0); 193 kunmap_atomic(kaddr);
194 page_cache_release(page); 194 page_cache_release(page);
195 } 195 }
196 btrfs_mark_buffer_dirty(leaf); 196 btrfs_mark_buffer_dirty(leaf);
@@ -422,10 +422,10 @@ again:
422 * sending it down to disk 422 * sending it down to disk
423 */ 423 */
424 if (offset) { 424 if (offset) {
425 kaddr = kmap_atomic(page, KM_USER0); 425 kaddr = kmap_atomic(page);
426 memset(kaddr + offset, 0, 426 memset(kaddr + offset, 0,
427 PAGE_CACHE_SIZE - offset); 427 PAGE_CACHE_SIZE - offset);
428 kunmap_atomic(kaddr, KM_USER0); 428 kunmap_atomic(kaddr);
429 } 429 }
430 will_compress = 1; 430 will_compress = 1;
431 } 431 }
@@ -1873,7 +1873,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1873 } else { 1873 } else {
1874 ret = get_state_private(io_tree, start, &private); 1874 ret = get_state_private(io_tree, start, &private);
1875 } 1875 }
1876 kaddr = kmap_atomic(page, KM_USER0); 1876 kaddr = kmap_atomic(page);
1877 if (ret) 1877 if (ret)
1878 goto zeroit; 1878 goto zeroit;
1879 1879
@@ -1882,7 +1882,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1882 if (csum != private) 1882 if (csum != private)
1883 goto zeroit; 1883 goto zeroit;
1884 1884
1885 kunmap_atomic(kaddr, KM_USER0); 1885 kunmap_atomic(kaddr);
1886good: 1886good:
1887 return 0; 1887 return 0;
1888 1888
@@ -1894,7 +1894,7 @@ zeroit:
1894 (unsigned long long)private); 1894 (unsigned long long)private);
1895 memset(kaddr + offset, 1, end - start + 1); 1895 memset(kaddr + offset, 1, end - start + 1);
1896 flush_dcache_page(page); 1896 flush_dcache_page(page);
1897 kunmap_atomic(kaddr, KM_USER0); 1897 kunmap_atomic(kaddr);
1898 if (private == 0) 1898 if (private == 0)
1899 return 0; 1899 return 0;
1900 return -EIO; 1900 return -EIO;
@@ -4937,12 +4937,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4937 ret = btrfs_decompress(compress_type, tmp, page, 4937 ret = btrfs_decompress(compress_type, tmp, page,
4938 extent_offset, inline_size, max_size); 4938 extent_offset, inline_size, max_size);
4939 if (ret) { 4939 if (ret) {
4940 char *kaddr = kmap_atomic(page, KM_USER0); 4940 char *kaddr = kmap_atomic(page);
4941 unsigned long copy_size = min_t(u64, 4941 unsigned long copy_size = min_t(u64,
4942 PAGE_CACHE_SIZE - pg_offset, 4942 PAGE_CACHE_SIZE - pg_offset,
4943 max_size - extent_offset); 4943 max_size - extent_offset);
4944 memset(kaddr + pg_offset, 0, copy_size); 4944 memset(kaddr + pg_offset, 0, copy_size);
4945 kunmap_atomic(kaddr, KM_USER0); 4945 kunmap_atomic(kaddr);
4946 } 4946 }
4947 kfree(tmp); 4947 kfree(tmp);
4948 return 0; 4948 return 0;
@@ -5719,11 +5719,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5719 unsigned long flags; 5719 unsigned long flags;
5720 5720
5721 local_irq_save(flags); 5721 local_irq_save(flags);
5722 kaddr = kmap_atomic(page, KM_IRQ0); 5722 kaddr = kmap_atomic(page);
5723 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 5723 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5724 csum, bvec->bv_len); 5724 csum, bvec->bv_len);
5725 btrfs_csum_final(csum, (char *)&csum); 5725 btrfs_csum_final(csum, (char *)&csum);
5726 kunmap_atomic(kaddr, KM_IRQ0); 5726 kunmap_atomic(kaddr);
5727 local_irq_restore(flags); 5727 local_irq_restore(flags);
5728 5728
5729 flush_dcache_page(bvec->bv_page); 5729 flush_dcache_page(bvec->bv_page);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea78..743b86fa4fcb 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
411 411
412 bytes = min_t(unsigned long, destlen, out_len - start_byte); 412 bytes = min_t(unsigned long, destlen, out_len - start_byte);
413 413
414 kaddr = kmap_atomic(dest_page, KM_USER0); 414 kaddr = kmap_atomic(dest_page);
415 memcpy(kaddr, workspace->buf + start_byte, bytes); 415 memcpy(kaddr, workspace->buf + start_byte, bytes);
416 kunmap_atomic(kaddr, KM_USER0); 416 kunmap_atomic(kaddr);
417out: 417out:
418 return ret; 418 return ret;
419} 419}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132b..22db04550f6a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -305,7 +305,7 @@ again:
305 305
306 spin_lock(&fs_info->reada_lock); 306 spin_lock(&fs_info->reada_lock);
307 ret = radix_tree_insert(&dev->reada_zones, 307 ret = radix_tree_insert(&dev->reada_zones,
308 (unsigned long)zone->end >> PAGE_CACHE_SHIFT, 308 (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
309 zone); 309 zone);
310 spin_unlock(&fs_info->reada_lock); 310 spin_unlock(&fs_info->reada_lock);
311 311
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index abc0fbffa510..390e7102b0ff 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -591,7 +591,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
591 u64 flags = sbio->spag[ix].flags; 591 u64 flags = sbio->spag[ix].flags;
592 592
593 page = sbio->bio->bi_io_vec[ix].bv_page; 593 page = sbio->bio->bi_io_vec[ix].bv_page;
594 buffer = kmap_atomic(page, KM_USER0); 594 buffer = kmap_atomic(page);
595 if (flags & BTRFS_EXTENT_FLAG_DATA) { 595 if (flags & BTRFS_EXTENT_FLAG_DATA) {
596 ret = scrub_checksum_data(sbio->sdev, 596 ret = scrub_checksum_data(sbio->sdev,
597 sbio->spag + ix, buffer); 597 sbio->spag + ix, buffer);
@@ -603,7 +603,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
603 } else { 603 } else {
604 WARN_ON(1); 604 WARN_ON(1);
605 } 605 }
606 kunmap_atomic(buffer, KM_USER0); 606 kunmap_atomic(buffer);
607 607
608 return ret; 608 return ret;
609} 609}
@@ -792,7 +792,7 @@ static void scrub_checksum(struct btrfs_work *work)
792 } 792 }
793 for (i = 0; i < sbio->count; ++i) { 793 for (i = 0; i < sbio->count; ++i) {
794 page = sbio->bio->bi_io_vec[i].bv_page; 794 page = sbio->bio->bi_io_vec[i].bv_page;
795 buffer = kmap_atomic(page, KM_USER0); 795 buffer = kmap_atomic(page);
796 flags = sbio->spag[i].flags; 796 flags = sbio->spag[i].flags;
797 logical = sbio->logical + i * PAGE_SIZE; 797 logical = sbio->logical + i * PAGE_SIZE;
798 ret = 0; 798 ret = 0;
@@ -807,7 +807,7 @@ static void scrub_checksum(struct btrfs_work *work)
807 } else { 807 } else {
808 WARN_ON(1); 808 WARN_ON(1);
809 } 809 }
810 kunmap_atomic(buffer, KM_USER0); 810 kunmap_atomic(buffer);
811 if (ret) { 811 if (ret) {
812 ret = scrub_recheck_error(sbio, i); 812 ret = scrub_recheck_error(sbio, i);
813 if (!ret) { 813 if (!ret) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3ce97b217cbe..81df3fec6a6d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -629,7 +629,6 @@ static int btrfs_fill_super(struct super_block *sb,
629 void *data, int silent) 629 void *data, int silent)
630{ 630{
631 struct inode *inode; 631 struct inode *inode;
632 struct dentry *root_dentry;
633 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 632 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
634 struct btrfs_key key; 633 struct btrfs_key key;
635 int err; 634 int err;
@@ -660,15 +659,12 @@ static int btrfs_fill_super(struct super_block *sb,
660 goto fail_close; 659 goto fail_close;
661 } 660 }
662 661
663 root_dentry = d_alloc_root(inode); 662 sb->s_root = d_make_root(inode);
664 if (!root_dentry) { 663 if (!sb->s_root) {
665 iput(inode);
666 err = -ENOMEM; 664 err = -ENOMEM;
667 goto fail_close; 665 goto fail_close;
668 } 666 }
669 667
670 sb->s_root = root_dentry;
671
672 save_mount_options(sb, data); 668 save_mount_options(sb, data);
673 cleancache_init_fs(sb); 669 cleancache_init_fs(sb);
674 sb->s_flags |= MS_ACTIVE; 670 sb->s_flags |= MS_ACTIVE;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index faccd47c6c46..92c20654cc55 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
370 PAGE_CACHE_SIZE - buf_offset); 370 PAGE_CACHE_SIZE - buf_offset);
371 bytes = min(bytes, bytes_left); 371 bytes = min(bytes, bytes_left);
372 372
373 kaddr = kmap_atomic(dest_page, KM_USER0); 373 kaddr = kmap_atomic(dest_page);
374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
375 kunmap_atomic(kaddr, KM_USER0); 375 kunmap_atomic(kaddr);
376 376
377 pg_offset += bytes; 377 pg_offset += bytes;
378 bytes_left -= bytes; 378 bytes_left -= bytes;
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a30db77af32..70e2017edd70 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -29,7 +29,7 @@
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/highmem.h> 31#include <linux/highmem.h>
32#include <linux/module.h> 32#include <linux/export.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/hash.h> 34#include <linux/hash.h>
35#include <linux/suspend.h> 35#include <linux/suspend.h>
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index a0358c2189cb..7f0771d3894e 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -646,7 +646,8 @@ lookup_again:
646 * (this is used to keep track of culling, and atimes are only 646 * (this is used to keep track of culling, and atimes are only
647 * updated by read, write and readdir but not lookup or 647 * updated by read, write and readdir but not lookup or
648 * open) */ 648 * open) */
649 touch_atime(cache->mnt, next); 649 path.dentry = next;
650 touch_atime(&path);
650 } 651 }
651 652
652 /* open a file interface onto a data file */ 653 /* open a file interface onto a data file */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 00de2c9568cd..256f85221926 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -655,9 +655,8 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
655 dout("open_root_inode success\n"); 655 dout("open_root_inode success\n");
656 if (ceph_ino(inode) == CEPH_INO_ROOT && 656 if (ceph_ino(inode) == CEPH_INO_ROOT &&
657 fsc->sb->s_root == NULL) { 657 fsc->sb->s_root == NULL) {
658 root = d_alloc_root(inode); 658 root = d_make_root(inode);
659 if (!root) { 659 if (!root) {
660 iput(inode);
661 root = ERR_PTR(-ENOMEM); 660 root = ERR_PTR(-ENOMEM);
662 goto out; 661 goto out;
663 } 662 }
diff --git a/fs/cifs/README b/fs/cifs/README
index 895da1dc1550..b7d782bab797 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -753,10 +753,6 @@ module loading or during the runtime by using the interface
753 753
754i.e. echo "value" > /sys/module/cifs/parameters/<param> 754i.e. echo "value" > /sys/module/cifs/parameters/<param>
755 755
7561. echo_retries - The number of echo attempts before giving up and 7561. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
757 reconnecting to the server. The default is 5. The value 0
758 means never reconnect.
759
7602. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
761 [Y/y/1]. To disable use any of [N/n/0]. 757 [Y/y/1]. To disable use any of [N/n/0].
762 758
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 24b3dfc05282..573b899b5a5d 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -171,8 +171,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
171 seq_printf(m, "TCP status: %d\n\tLocal Users To " 171 seq_printf(m, "TCP status: %d\n\tLocal Users To "
172 "Server: %d SecMode: 0x%x Req On Wire: %d", 172 "Server: %d SecMode: 0x%x Req On Wire: %d",
173 server->tcpStatus, server->srv_count, 173 server->tcpStatus, server->srv_count,
174 server->sec_mode, 174 server->sec_mode, in_flight(server));
175 atomic_read(&server->inFlight));
176 175
177#ifdef CONFIG_CIFS_STATS2 176#ifdef CONFIG_CIFS_STATS2
178 seq_printf(m, " In Send: %d In MaxReq Wait: %d", 177 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c1b254487388..3cc1b251ca08 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -556,6 +556,7 @@ init_cifs_idmap(void)
556 556
557 /* instruct request_key() to use this special keyring as a cache for 557 /* instruct request_key() to use this special keyring as a cache for
558 * the results it looks up */ 558 * the results it looks up */
559 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
559 cred->thread_keyring = keyring; 560 cred->thread_keyring = keyring;
560 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 561 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
561 root_cred = cred; 562 root_cred = cred;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b1fd382d1952..eee522c56ef0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -76,12 +76,7 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
76unsigned int cifs_max_pending = CIFS_MAX_REQ; 76unsigned int cifs_max_pending = CIFS_MAX_REQ;
77module_param(cifs_max_pending, int, 0444); 77module_param(cifs_max_pending, int, 0444);
78MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " 78MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
79 "Default: 50 Range: 2 to 256"); 79 "Default: 32767 Range: 2 to 32767.");
80unsigned short echo_retries = 5;
81module_param(echo_retries, ushort, 0644);
82MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and "
83 "reconnecting server. Default: 5. 0 means "
84 "never reconnect.");
85module_param(enable_oplocks, bool, 0644); 80module_param(enable_oplocks, bool, 0644);
86MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" 81MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:"
87 "y/Y/1"); 82 "y/Y/1");
@@ -119,12 +114,10 @@ cifs_read_super(struct super_block *sb)
119 114
120 if (IS_ERR(inode)) { 115 if (IS_ERR(inode)) {
121 rc = PTR_ERR(inode); 116 rc = PTR_ERR(inode);
122 inode = NULL;
123 goto out_no_root; 117 goto out_no_root;
124 } 118 }
125 119
126 sb->s_root = d_alloc_root(inode); 120 sb->s_root = d_make_root(inode);
127
128 if (!sb->s_root) { 121 if (!sb->s_root) {
129 rc = -ENOMEM; 122 rc = -ENOMEM;
130 goto out_no_root; 123 goto out_no_root;
@@ -147,9 +140,6 @@ cifs_read_super(struct super_block *sb)
147 140
148out_no_root: 141out_no_root:
149 cERROR(1, "cifs_read_super: get root inode failed"); 142 cERROR(1, "cifs_read_super: get root inode failed");
150 if (inode)
151 iput(inode);
152
153 return rc; 143 return rc;
154} 144}
155 145
@@ -1116,9 +1106,9 @@ init_cifs(void)
1116 if (cifs_max_pending < 2) { 1106 if (cifs_max_pending < 2) {
1117 cifs_max_pending = 2; 1107 cifs_max_pending = 2;
1118 cFYI(1, "cifs_max_pending set to min of 2"); 1108 cFYI(1, "cifs_max_pending set to min of 2");
1119 } else if (cifs_max_pending > 256) { 1109 } else if (cifs_max_pending > CIFS_MAX_REQ) {
1120 cifs_max_pending = 256; 1110 cifs_max_pending = CIFS_MAX_REQ;
1121 cFYI(1, "cifs_max_pending set to max of 256"); 1111 cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ);
1122 } 1112 }
1123 1113
1124 rc = cifs_fscache_register(); 1114 rc = cifs_fscache_register();
@@ -1180,11 +1170,8 @@ static void __exit
1180exit_cifs(void) 1170exit_cifs(void)
1181{ 1171{
1182 cFYI(DBG2, "exit_cifs"); 1172 cFYI(DBG2, "exit_cifs");
1183 cifs_proc_clean(); 1173 unregister_filesystem(&cifs_fs_type);
1184 cifs_fscache_unregister();
1185#ifdef CONFIG_CIFS_DFS_UPCALL
1186 cifs_dfs_release_automount_timer(); 1174 cifs_dfs_release_automount_timer();
1187#endif
1188#ifdef CONFIG_CIFS_ACL 1175#ifdef CONFIG_CIFS_ACL
1189 cifs_destroy_idmaptrees(); 1176 cifs_destroy_idmaptrees();
1190 exit_cifs_idmap(); 1177 exit_cifs_idmap();
@@ -1192,10 +1179,11 @@ exit_cifs(void)
1192#ifdef CONFIG_CIFS_UPCALL 1179#ifdef CONFIG_CIFS_UPCALL
1193 unregister_key_type(&cifs_spnego_key_type); 1180 unregister_key_type(&cifs_spnego_key_type);
1194#endif 1181#endif
1195 unregister_filesystem(&cifs_fs_type);
1196 cifs_destroy_inodecache();
1197 cifs_destroy_mids();
1198 cifs_destroy_request_bufs(); 1182 cifs_destroy_request_bufs();
1183 cifs_destroy_mids();
1184 cifs_destroy_inodecache();
1185 cifs_fscache_unregister();
1186 cifs_proc_clean();
1199} 1187}
1200 1188
1201MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1189MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 76e7d8b6da17..339ebe3ebc0d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -55,14 +55,9 @@
55 55
56/* 56/*
57 * MAX_REQ is the maximum number of requests that WE will send 57 * MAX_REQ is the maximum number of requests that WE will send
58 * on one socket concurrently. It also matches the most common 58 * on one socket concurrently.
59 * value of max multiplex returned by servers. We may
60 * eventually want to use the negotiated value (in case
61 * future servers can handle more) when we are more confident that
62 * we will not have problems oveloading the socket with pending
63 * write data.
64 */ 59 */
65#define CIFS_MAX_REQ 50 60#define CIFS_MAX_REQ 32767
66 61
67#define RFC1001_NAME_LEN 15 62#define RFC1001_NAME_LEN 15
68#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) 63#define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
@@ -255,7 +250,9 @@ struct TCP_Server_Info {
255 bool noblocksnd; /* use blocking sendmsg */ 250 bool noblocksnd; /* use blocking sendmsg */
256 bool noautotune; /* do not autotune send buf sizes */ 251 bool noautotune; /* do not autotune send buf sizes */
257 bool tcp_nodelay; 252 bool tcp_nodelay;
258 atomic_t inFlight; /* number of requests on the wire to server */ 253 int credits; /* send no more requests at once */
254 unsigned int in_flight; /* number of requests on the wire to server */
255 spinlock_t req_lock; /* protect the two values above */
259 struct mutex srv_mutex; 256 struct mutex srv_mutex;
260 struct task_struct *tsk; 257 struct task_struct *tsk;
261 char server_GUID[16]; 258 char server_GUID[16];
@@ -263,6 +260,7 @@ struct TCP_Server_Info {
263 bool session_estab; /* mark when very first sess is established */ 260 bool session_estab; /* mark when very first sess is established */
264 u16 dialect; /* dialect index that server chose */ 261 u16 dialect; /* dialect index that server chose */
265 enum securityEnum secType; 262 enum securityEnum secType;
263 bool oplocks:1; /* enable oplocks */
266 unsigned int maxReq; /* Clients should submit no more */ 264 unsigned int maxReq; /* Clients should submit no more */
267 /* than maxReq distinct unanswered SMBs to the server when using */ 265 /* than maxReq distinct unanswered SMBs to the server when using */
268 /* multiplexed reads or writes */ 266 /* multiplexed reads or writes */
@@ -307,6 +305,36 @@ struct TCP_Server_Info {
307#endif 305#endif
308}; 306};
309 307
308static inline unsigned int
309in_flight(struct TCP_Server_Info *server)
310{
311 unsigned int num;
312 spin_lock(&server->req_lock);
313 num = server->in_flight;
314 spin_unlock(&server->req_lock);
315 return num;
316}
317
318static inline int*
319get_credits_field(struct TCP_Server_Info *server)
320{
321 /*
322 * This will change to switch statement when we reserve slots for echos
323 * and oplock breaks.
324 */
325 return &server->credits;
326}
327
328static inline bool
329has_credits(struct TCP_Server_Info *server, int *credits)
330{
331 int num;
332 spin_lock(&server->req_lock);
333 num = *credits;
334 spin_unlock(&server->req_lock);
335 return num > 0;
336}
337
310/* 338/*
311 * Macros to allow the TCP_Server_Info->net field and related code to drop out 339 * Macros to allow the TCP_Server_Info->net field and related code to drop out
312 * when CONFIG_NET_NS isn't set. 340 * when CONFIG_NET_NS isn't set.
@@ -1010,9 +1038,6 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
1010GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ 1038GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
1011GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ 1039GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
1012 1040
1013/* reconnect after this many failed echo attempts */
1014GLOBAL_EXTERN unsigned short echo_retries;
1015
1016#ifdef CONFIG_CIFS_ACL 1041#ifdef CONFIG_CIFS_ACL
1017GLOBAL_EXTERN struct rb_root uidtree; 1042GLOBAL_EXTERN struct rb_root uidtree;
1018GLOBAL_EXTERN struct rb_root gidtree; 1043GLOBAL_EXTERN struct rb_root gidtree;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6f4e243e0f62..503e73d8bdb7 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -88,6 +88,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
88 struct smb_hdr *in_buf , 88 struct smb_hdr *in_buf ,
89 struct smb_hdr *out_buf, 89 struct smb_hdr *out_buf,
90 int *bytes_returned); 90 int *bytes_returned);
91extern void cifs_add_credits(struct TCP_Server_Info *server,
92 const unsigned int add);
93extern void cifs_set_credits(struct TCP_Server_Info *server, const int val);
91extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); 94extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
92extern bool is_valid_oplock_break(struct smb_hdr *smb, 95extern bool is_valid_oplock_break(struct smb_hdr *smb,
93 struct TCP_Server_Info *); 96 struct TCP_Server_Info *);
@@ -168,7 +171,13 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
168 const char *devname); 171 const char *devname);
169extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); 172extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
170extern void cifs_umount(struct cifs_sb_info *); 173extern void cifs_umount(struct cifs_sb_info *);
174
175#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
171extern void cifs_dfs_release_automount_timer(void); 176extern void cifs_dfs_release_automount_timer(void);
177#else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
178#define cifs_dfs_release_automount_timer() do { } while (0)
179#endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
180
172void cifs_proc_init(void); 181void cifs_proc_init(void);
173void cifs_proc_clean(void); 182void cifs_proc_clean(void);
174 183
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8b7794c31591..70aac35c398f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -458,7 +458,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
458 goto neg_err_exit; 458 goto neg_err_exit;
459 } 459 }
460 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); 460 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode);
461 server->maxReq = le16_to_cpu(rsp->MaxMpxCount); 461 server->maxReq = min_t(unsigned int,
462 le16_to_cpu(rsp->MaxMpxCount),
463 cifs_max_pending);
464 cifs_set_credits(server, server->maxReq);
462 server->maxBuf = le16_to_cpu(rsp->MaxBufSize); 465 server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
463 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); 466 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
464 /* even though we do not use raw we might as well set this 467 /* even though we do not use raw we might as well set this
@@ -564,7 +567,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
564 567
565 /* one byte, so no need to convert this or EncryptionKeyLen from 568 /* one byte, so no need to convert this or EncryptionKeyLen from
566 little endian */ 569 little endian */
567 server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); 570 server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount),
571 cifs_max_pending);
572 cifs_set_credits(server, server->maxReq);
568 /* probably no need to store and check maxvcs */ 573 /* probably no need to store and check maxvcs */
569 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); 574 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize);
570 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); 575 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
@@ -716,8 +721,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
716 struct TCP_Server_Info *server = mid->callback_data; 721 struct TCP_Server_Info *server = mid->callback_data;
717 722
718 DeleteMidQEntry(mid); 723 DeleteMidQEntry(mid);
719 atomic_dec(&server->inFlight); 724 cifs_add_credits(server, 1);
720 wake_up(&server->request_q);
721} 725}
722 726
723int 727int
@@ -1669,8 +1673,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
1669 1673
1670 queue_work(system_nrt_wq, &rdata->work); 1674 queue_work(system_nrt_wq, &rdata->work);
1671 DeleteMidQEntry(mid); 1675 DeleteMidQEntry(mid);
1672 atomic_dec(&server->inFlight); 1676 cifs_add_credits(server, 1);
1673 wake_up(&server->request_q);
1674} 1677}
1675 1678
1676/* cifs_async_readv - send an async write, and set up mid to handle result */ 1679/* cifs_async_readv - send an async write, and set up mid to handle result */
@@ -2110,8 +2113,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
2110 2113
2111 queue_work(system_nrt_wq, &wdata->work); 2114 queue_work(system_nrt_wq, &wdata->work);
2112 DeleteMidQEntry(mid); 2115 DeleteMidQEntry(mid);
2113 atomic_dec(&tcon->ses->server->inFlight); 2116 cifs_add_credits(tcon->ses->server, 1);
2114 wake_up(&tcon->ses->server->request_q);
2115} 2117}
2116 2118
2117/* cifs_async_writev - send an async write, and set up mid to handle result */ 2119/* cifs_async_writev - send an async write, and set up mid to handle result */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 602f77c304c9..5560e1d5e54b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -373,12 +373,22 @@ allocate_buffers(struct TCP_Server_Info *server)
373static bool 373static bool
374server_unresponsive(struct TCP_Server_Info *server) 374server_unresponsive(struct TCP_Server_Info *server)
375{ 375{
376 if (echo_retries > 0 && server->tcpStatus == CifsGood && 376 /*
377 time_after(jiffies, server->lstrp + 377 * We need to wait 2 echo intervals to make sure we handle such
378 (echo_retries * SMB_ECHO_INTERVAL))) { 378 * situations right:
379 * 1s client sends a normal SMB request
380 * 2s client gets a response
381 * 30s echo workqueue job pops, and decides we got a response recently
382 * and don't need to send another
383 * ...
384 * 65s kernel_recvmsg times out, and we see that we haven't gotten
385 * a response in >60s.
386 */
387 if (server->tcpStatus == CifsGood &&
388 time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
379 cERROR(1, "Server %s has not responded in %d seconds. " 389 cERROR(1, "Server %s has not responded in %d seconds. "
380 "Reconnecting...", server->hostname, 390 "Reconnecting...", server->hostname,
381 (echo_retries * SMB_ECHO_INTERVAL / HZ)); 391 (2 * SMB_ECHO_INTERVAL) / HZ);
382 cifs_reconnect(server); 392 cifs_reconnect(server);
383 wake_up(&server->response_q); 393 wake_up(&server->response_q);
384 return true; 394 return true;
@@ -642,19 +652,11 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
642 spin_unlock(&GlobalMid_Lock); 652 spin_unlock(&GlobalMid_Lock);
643 wake_up_all(&server->response_q); 653 wake_up_all(&server->response_q);
644 654
645 /* 655 /* check if we have blocked requests that need to free */
646 * Check if we have blocked requests that need to free. Note that 656 spin_lock(&server->req_lock);
647 * cifs_max_pending is normally 50, but can be set at module install 657 if (server->credits <= 0)
648 * time to as little as two. 658 server->credits = 1;
649 */ 659 spin_unlock(&server->req_lock);
650 spin_lock(&GlobalMid_Lock);
651 if (atomic_read(&server->inFlight) >= cifs_max_pending)
652 atomic_set(&server->inFlight, cifs_max_pending - 1);
653 /*
654 * We do not want to set the max_pending too low or we could end up
655 * with the counter going negative.
656 */
657 spin_unlock(&GlobalMid_Lock);
658 /* 660 /*
659 * Although there should not be any requests blocked on this queue it 661 * Although there should not be any requests blocked on this queue it
660 * can not hurt to be paranoid and try to wake up requests that may 662 * can not hurt to be paranoid and try to wake up requests that may
@@ -1909,7 +1911,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1909 tcp_ses->noblocksnd = volume_info->noblocksnd; 1911 tcp_ses->noblocksnd = volume_info->noblocksnd;
1910 tcp_ses->noautotune = volume_info->noautotune; 1912 tcp_ses->noautotune = volume_info->noautotune;
1911 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; 1913 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
1912 atomic_set(&tcp_ses->inFlight, 0); 1914 tcp_ses->in_flight = 0;
1915 tcp_ses->credits = 1;
1913 init_waitqueue_head(&tcp_ses->response_q); 1916 init_waitqueue_head(&tcp_ses->response_q);
1914 init_waitqueue_head(&tcp_ses->request_q); 1917 init_waitqueue_head(&tcp_ses->request_q);
1915 INIT_LIST_HEAD(&tcp_ses->pending_mid_q); 1918 INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
@@ -3371,7 +3374,7 @@ cifs_ra_pages(struct cifs_sb_info *cifs_sb)
3371int 3374int
3372cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) 3375cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
3373{ 3376{
3374 int rc = 0; 3377 int rc;
3375 int xid; 3378 int xid;
3376 struct cifs_ses *pSesInfo; 3379 struct cifs_ses *pSesInfo;
3377 struct cifs_tcon *tcon; 3380 struct cifs_tcon *tcon;
@@ -3398,6 +3401,7 @@ try_mount_again:
3398 FreeXid(xid); 3401 FreeXid(xid);
3399 } 3402 }
3400#endif 3403#endif
3404 rc = 0;
3401 tcon = NULL; 3405 tcon = NULL;
3402 pSesInfo = NULL; 3406 pSesInfo = NULL;
3403 srvTcp = NULL; 3407 srvTcp = NULL;
@@ -3759,9 +3763,11 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3759 if (server->maxBuf != 0) 3763 if (server->maxBuf != 0)
3760 return 0; 3764 return 0;
3761 3765
3766 cifs_set_credits(server, 1);
3762 rc = CIFSSMBNegotiate(xid, ses); 3767 rc = CIFSSMBNegotiate(xid, ses);
3763 if (rc == -EAGAIN) { 3768 if (rc == -EAGAIN) {
3764 /* retry only once on 1st time connection */ 3769 /* retry only once on 1st time connection */
3770 cifs_set_credits(server, 1);
3765 rc = CIFSSMBNegotiate(xid, ses); 3771 rc = CIFSSMBNegotiate(xid, ses);
3766 if (rc == -EAGAIN) 3772 if (rc == -EAGAIN)
3767 rc = -EHOSTDOWN; 3773 rc = -EHOSTDOWN;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 63a196b97d50..d172c8ed9017 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
171 } 171 }
172 tcon = tlink_tcon(tlink); 172 tcon = tlink_tcon(tlink);
173 173
174 if (enable_oplocks) 174 if (tcon->ses->server->oplocks)
175 oplock = REQ_OPLOCK; 175 oplock = REQ_OPLOCK;
176 176
177 if (nd) 177 if (nd)
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
492{ 492{
493 int xid; 493 int xid;
494 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 494 int rc = 0; /* to get around spurious gcc warning, set to zero here */
495 __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; 495 __u32 oplock;
496 __u16 fileHandle = 0; 496 __u16 fileHandle = 0;
497 bool posix_open = false; 497 bool posix_open = false;
498 struct cifs_sb_info *cifs_sb; 498 struct cifs_sb_info *cifs_sb;
@@ -518,6 +518,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
518 } 518 }
519 pTcon = tlink_tcon(tlink); 519 pTcon = tlink_tcon(tlink);
520 520
521 oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
522
521 /* 523 /*
522 * Don't allow the separator character in a path component. 524 * Don't allow the separator character in a path component.
523 * The VFS will not allow "/", but "\" is allowed by posix. 525 * The VFS will not allow "/", but "\" is allowed by posix.
@@ -584,10 +586,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
584 * If either that or op not supported returned, follow 586 * If either that or op not supported returned, follow
585 * the normal lookup. 587 * the normal lookup.
586 */ 588 */
587 if ((rc == 0) || (rc == -ENOENT)) 589 switch (rc) {
590 case 0:
591 /*
592 * The server may allow us to open things like
593 * FIFOs, but the client isn't set up to deal
594 * with that. If it's not a regular file, just
595 * close it and proceed as if it were a normal
596 * lookup.
597 */
598 if (newInode && !S_ISREG(newInode->i_mode)) {
599 CIFSSMBClose(xid, pTcon, fileHandle);
600 break;
601 }
602 case -ENOENT:
588 posix_open = true; 603 posix_open = true;
589 else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) 604 case -EOPNOTSUPP:
605 break;
606 default:
590 pTcon->broken_posix_open = true; 607 pTcon->broken_posix_open = true;
608 }
591 } 609 }
592 if (!posix_open) 610 if (!posix_open)
593 rc = cifs_get_inode_info_unix(&newInode, full_path, 611 rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4dd9283885e7..159fcc56dc2d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file)
380 cFYI(1, "inode = 0x%p file flags are 0x%x for %s", 380 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
381 inode, file->f_flags, full_path); 381 inode, file->f_flags, full_path);
382 382
383 if (enable_oplocks) 383 if (tcon->ses->server->oplocks)
384 oplock = REQ_OPLOCK; 384 oplock = REQ_OPLOCK;
385 else 385 else
386 oplock = 0; 386 oplock = 0;
@@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
505 cFYI(1, "inode = 0x%p file flags 0x%x for %s", 505 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
506 inode, pCifsFile->f_flags, full_path); 506 inode, pCifsFile->f_flags, full_path);
507 507
508 if (enable_oplocks) 508 if (tcon->ses->server->oplocks)
509 oplock = REQ_OPLOCK; 509 oplock = REQ_OPLOCK;
510 else 510 else
511 oplock = 0; 511 oplock = 0;
@@ -920,16 +920,26 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
920 for (lockp = &inode->i_flock; *lockp != NULL; \ 920 for (lockp = &inode->i_flock; *lockp != NULL; \
921 lockp = &(*lockp)->fl_next) 921 lockp = &(*lockp)->fl_next)
922 922
923struct lock_to_push {
924 struct list_head llist;
925 __u64 offset;
926 __u64 length;
927 __u32 pid;
928 __u16 netfid;
929 __u8 type;
930};
931
923static int 932static int
924cifs_push_posix_locks(struct cifsFileInfo *cfile) 933cifs_push_posix_locks(struct cifsFileInfo *cfile)
925{ 934{
926 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 935 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
927 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 936 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
928 struct file_lock *flock, **before; 937 struct file_lock *flock, **before;
929 struct cifsLockInfo *lck, *tmp; 938 unsigned int count = 0, i = 0;
930 int rc = 0, xid, type; 939 int rc = 0, xid, type;
940 struct list_head locks_to_send, *el;
941 struct lock_to_push *lck, *tmp;
931 __u64 length; 942 __u64 length;
932 struct list_head locks_to_send;
933 943
934 xid = GetXid(); 944 xid = GetXid();
935 945
@@ -940,29 +950,56 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
940 return rc; 950 return rc;
941 } 951 }
942 952
953 lock_flocks();
954 cifs_for_each_lock(cfile->dentry->d_inode, before) {
955 if ((*before)->fl_flags & FL_POSIX)
956 count++;
957 }
958 unlock_flocks();
959
943 INIT_LIST_HEAD(&locks_to_send); 960 INIT_LIST_HEAD(&locks_to_send);
944 961
962 /*
963 * Allocating count locks is enough because no FL_POSIX locks can be
964 * added to the list while we are holding cinode->lock_mutex that
965 * protects locking operations of this inode.
966 */
967 for (; i < count; i++) {
968 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
969 if (!lck) {
970 rc = -ENOMEM;
971 goto err_out;
972 }
973 list_add_tail(&lck->llist, &locks_to_send);
974 }
975
976 el = locks_to_send.next;
945 lock_flocks(); 977 lock_flocks();
946 cifs_for_each_lock(cfile->dentry->d_inode, before) { 978 cifs_for_each_lock(cfile->dentry->d_inode, before) {
947 flock = *before; 979 flock = *before;
980 if ((flock->fl_flags & FL_POSIX) == 0)
981 continue;
982 if (el == &locks_to_send) {
983 /*
984 * The list ended. We don't have enough allocated
985 * structures - something is really wrong.
986 */
987 cERROR(1, "Can't push all brlocks!");
988 break;
989 }
948 length = 1 + flock->fl_end - flock->fl_start; 990 length = 1 + flock->fl_end - flock->fl_start;
949 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 991 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
950 type = CIFS_RDLCK; 992 type = CIFS_RDLCK;
951 else 993 else
952 type = CIFS_WRLCK; 994 type = CIFS_WRLCK;
953 995 lck = list_entry(el, struct lock_to_push, llist);
954 lck = cifs_lock_init(flock->fl_start, length, type,
955 cfile->netfid);
956 if (!lck) {
957 rc = -ENOMEM;
958 goto send_locks;
959 }
960 lck->pid = flock->fl_pid; 996 lck->pid = flock->fl_pid;
961 997 lck->netfid = cfile->netfid;
962 list_add_tail(&lck->llist, &locks_to_send); 998 lck->length = length;
999 lck->type = type;
1000 lck->offset = flock->fl_start;
1001 el = el->next;
963 } 1002 }
964
965send_locks:
966 unlock_flocks(); 1003 unlock_flocks();
967 1004
968 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1005 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
@@ -979,11 +1016,18 @@ send_locks:
979 kfree(lck); 1016 kfree(lck);
980 } 1017 }
981 1018
1019out:
982 cinode->can_cache_brlcks = false; 1020 cinode->can_cache_brlcks = false;
983 mutex_unlock(&cinode->lock_mutex); 1021 mutex_unlock(&cinode->lock_mutex);
984 1022
985 FreeXid(xid); 1023 FreeXid(xid);
986 return rc; 1024 return rc;
1025err_out:
1026 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1027 list_del(&lck->llist);
1028 kfree(lck);
1029 }
1030 goto out;
987} 1031}
988 1032
989static int 1033static int
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a5f54b7d9822..745da3d0653e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
536 fattr->cf_dtype = DT_DIR; 536 fattr->cf_dtype = DT_DIR;
537 /*
538 * Server can return wrong NumberOfLinks value for directories
539 * when Unix extensions are disabled - fake it.
540 */
541 fattr->cf_nlink = 2;
537 } else { 542 } else {
538 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; 543 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
539 fattr->cf_dtype = DT_REG; 544 fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
541 /* clear write bits if ATTR_READONLY is set */ 546 /* clear write bits if ATTR_READONLY is set */
542 if (fattr->cf_cifsattrs & ATTR_READONLY) 547 if (fattr->cf_cifsattrs & ATTR_READONLY)
543 fattr->cf_mode &= ~(S_IWUGO); 548 fattr->cf_mode &= ~(S_IWUGO);
544 }
545 549
546 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); 550 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
551 }
547 552
548 fattr->cf_uid = cifs_sb->mnt_uid; 553 fattr->cf_uid = cifs_sb->mnt_uid;
549 fattr->cf_gid = cifs_sb->mnt_gid; 554 fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
1322 } 1327 }
1323/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need 1328/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
1324 to set uid/gid */ 1329 to set uid/gid */
1325 inc_nlink(inode);
1326 1330
1327 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1331 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1328 cifs_fill_uniqueid(inode->i_sb, &fattr); 1332 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
1355 d_drop(direntry); 1359 d_drop(direntry);
1356 } else { 1360 } else {
1357mkdir_get_info: 1361mkdir_get_info:
1358 inc_nlink(inode);
1359 if (pTcon->unix_ext) 1362 if (pTcon->unix_ext)
1360 rc = cifs_get_inode_info_unix(&newinode, full_path, 1363 rc = cifs_get_inode_info_unix(&newinode, full_path,
1361 inode->i_sb, xid); 1364 inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
1436 } 1439 }
1437 } 1440 }
1438mkdir_out: 1441mkdir_out:
1442 /*
1443 * Force revalidate to get parent dir info when needed since cached
1444 * attributes are invalid now.
1445 */
1446 CIFS_I(inode)->time = 0;
1439 kfree(full_path); 1447 kfree(full_path);
1440 FreeXid(xid); 1448 FreeXid(xid);
1441 cifs_put_tlink(tlink); 1449 cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1475 cifs_put_tlink(tlink); 1483 cifs_put_tlink(tlink);
1476 1484
1477 if (!rc) { 1485 if (!rc) {
1478 drop_nlink(inode);
1479 spin_lock(&direntry->d_inode->i_lock); 1486 spin_lock(&direntry->d_inode->i_lock);
1480 i_size_write(direntry->d_inode, 0); 1487 i_size_write(direntry->d_inode, 0);
1481 clear_nlink(direntry->d_inode); 1488 clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1483 } 1490 }
1484 1491
1485 cifsInode = CIFS_I(direntry->d_inode); 1492 cifsInode = CIFS_I(direntry->d_inode);
1486 cifsInode->time = 0; /* force revalidate to go get info when 1493 /* force revalidate to go get info when needed */
1487 needed */ 1494 cifsInode->time = 0;
1488 1495
1489 cifsInode = CIFS_I(inode); 1496 cifsInode = CIFS_I(inode);
1490 cifsInode->time = 0; /* force revalidate to get parent dir info 1497 /*
1491 since cached search results now invalid */ 1498 * Force revalidate to get parent dir info when needed since cached
1499 * attributes are invalid now.
1500 */
1501 cifsInode->time = 0;
1492 1502
1493 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = 1503 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
1494 current_fs_time(inode->i_sb); 1504 current_fs_time(inode->i_sb);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 703ef5c6fdb1..c273c12de98e 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -690,3 +690,22 @@ backup_cred(struct cifs_sb_info *cifs_sb)
690 690
691 return false; 691 return false;
692} 692}
693
694void
695cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add)
696{
697 spin_lock(&server->req_lock);
698 server->credits += add;
699 server->in_flight--;
700 spin_unlock(&server->req_lock);
701 wake_up(&server->request_q);
702}
703
704void
705cifs_set_credits(struct TCP_Server_Info *server, const int val)
706{
707 spin_lock(&server->req_lock);
708 server->credits = val;
709 server->oplocks = val > 1 ? enable_oplocks : false;
710 spin_unlock(&server->req_lock);
711}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0cc9584f5889..310918b6fcb4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -254,44 +254,60 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
254 return smb_sendv(server, &iov, 1); 254 return smb_sendv(server, &iov, 1);
255} 255}
256 256
257static int wait_for_free_request(struct TCP_Server_Info *server, 257static int
258 const int long_op) 258wait_for_free_credits(struct TCP_Server_Info *server, const int optype,
259 int *credits)
259{ 260{
260 if (long_op == CIFS_ASYNC_OP) { 261 int rc;
262
263 spin_lock(&server->req_lock);
264 if (optype == CIFS_ASYNC_OP) {
261 /* oplock breaks must not be held up */ 265 /* oplock breaks must not be held up */
262 atomic_inc(&server->inFlight); 266 server->in_flight++;
267 *credits -= 1;
268 spin_unlock(&server->req_lock);
263 return 0; 269 return 0;
264 } 270 }
265 271
266 spin_lock(&GlobalMid_Lock);
267 while (1) { 272 while (1) {
268 if (atomic_read(&server->inFlight) >= cifs_max_pending) { 273 if (*credits <= 0) {
269 spin_unlock(&GlobalMid_Lock); 274 spin_unlock(&server->req_lock);
270 cifs_num_waiters_inc(server); 275 cifs_num_waiters_inc(server);
271 wait_event(server->request_q, 276 rc = wait_event_killable(server->request_q,
272 atomic_read(&server->inFlight) 277 has_credits(server, credits));
273 < cifs_max_pending);
274 cifs_num_waiters_dec(server); 278 cifs_num_waiters_dec(server);
275 spin_lock(&GlobalMid_Lock); 279 if (rc)
280 return rc;
281 spin_lock(&server->req_lock);
276 } else { 282 } else {
277 if (server->tcpStatus == CifsExiting) { 283 if (server->tcpStatus == CifsExiting) {
278 spin_unlock(&GlobalMid_Lock); 284 spin_unlock(&server->req_lock);
279 return -ENOENT; 285 return -ENOENT;
280 } 286 }
281 287
282 /* can not count locking commands against total 288 /*
283 as they are allowed to block on server */ 289 * Can not count locking commands against total
290 * as they are allowed to block on server.
291 */
284 292
285 /* update # of requests on the wire to server */ 293 /* update # of requests on the wire to server */
286 if (long_op != CIFS_BLOCKING_OP) 294 if (optype != CIFS_BLOCKING_OP) {
287 atomic_inc(&server->inFlight); 295 *credits -= 1;
288 spin_unlock(&GlobalMid_Lock); 296 server->in_flight++;
297 }
298 spin_unlock(&server->req_lock);
289 break; 299 break;
290 } 300 }
291 } 301 }
292 return 0; 302 return 0;
293} 303}
294 304
305static int
306wait_for_free_request(struct TCP_Server_Info *server, const int optype)
307{
308 return wait_for_free_credits(server, optype, get_credits_field(server));
309}
310
295static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, 311static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
296 struct mid_q_entry **ppmidQ) 312 struct mid_q_entry **ppmidQ)
297{ 313{
@@ -359,7 +375,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
359 mid = AllocMidQEntry(hdr, server); 375 mid = AllocMidQEntry(hdr, server);
360 if (mid == NULL) { 376 if (mid == NULL) {
361 mutex_unlock(&server->srv_mutex); 377 mutex_unlock(&server->srv_mutex);
362 atomic_dec(&server->inFlight); 378 cifs_add_credits(server, 1);
363 wake_up(&server->request_q); 379 wake_up(&server->request_q);
364 return -ENOMEM; 380 return -ENOMEM;
365 } 381 }
@@ -392,7 +408,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
392 return rc; 408 return rc;
393out_err: 409out_err:
394 delete_mid(mid); 410 delete_mid(mid);
395 atomic_dec(&server->inFlight); 411 cifs_add_credits(server, 1);
396 wake_up(&server->request_q); 412 wake_up(&server->request_q);
397 return rc; 413 return rc;
398} 414}
@@ -564,8 +580,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
564 mutex_unlock(&ses->server->srv_mutex); 580 mutex_unlock(&ses->server->srv_mutex);
565 cifs_small_buf_release(in_buf); 581 cifs_small_buf_release(in_buf);
566 /* Update # of requests on wire to server */ 582 /* Update # of requests on wire to server */
567 atomic_dec(&ses->server->inFlight); 583 cifs_add_credits(ses->server, 1);
568 wake_up(&ses->server->request_q);
569 return rc; 584 return rc;
570 } 585 }
571 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); 586 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
@@ -601,8 +616,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
601 midQ->callback = DeleteMidQEntry; 616 midQ->callback = DeleteMidQEntry;
602 spin_unlock(&GlobalMid_Lock); 617 spin_unlock(&GlobalMid_Lock);
603 cifs_small_buf_release(in_buf); 618 cifs_small_buf_release(in_buf);
604 atomic_dec(&ses->server->inFlight); 619 cifs_add_credits(ses->server, 1);
605 wake_up(&ses->server->request_q);
606 return rc; 620 return rc;
607 } 621 }
608 spin_unlock(&GlobalMid_Lock); 622 spin_unlock(&GlobalMid_Lock);
@@ -612,8 +626,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
612 626
613 rc = cifs_sync_mid_result(midQ, ses->server); 627 rc = cifs_sync_mid_result(midQ, ses->server);
614 if (rc != 0) { 628 if (rc != 0) {
615 atomic_dec(&ses->server->inFlight); 629 cifs_add_credits(ses->server, 1);
616 wake_up(&ses->server->request_q);
617 return rc; 630 return rc;
618 } 631 }
619 632
@@ -637,8 +650,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
637 midQ->resp_buf = NULL; 650 midQ->resp_buf = NULL;
638out: 651out:
639 delete_mid(midQ); 652 delete_mid(midQ);
640 atomic_dec(&ses->server->inFlight); 653 cifs_add_credits(ses->server, 1);
641 wake_up(&ses->server->request_q);
642 654
643 return rc; 655 return rc;
644} 656}
@@ -688,8 +700,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
688 if (rc) { 700 if (rc) {
689 mutex_unlock(&ses->server->srv_mutex); 701 mutex_unlock(&ses->server->srv_mutex);
690 /* Update # of requests on wire to server */ 702 /* Update # of requests on wire to server */
691 atomic_dec(&ses->server->inFlight); 703 cifs_add_credits(ses->server, 1);
692 wake_up(&ses->server->request_q);
693 return rc; 704 return rc;
694 } 705 }
695 706
@@ -721,8 +732,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
721 /* no longer considered to be "in-flight" */ 732 /* no longer considered to be "in-flight" */
722 midQ->callback = DeleteMidQEntry; 733 midQ->callback = DeleteMidQEntry;
723 spin_unlock(&GlobalMid_Lock); 734 spin_unlock(&GlobalMid_Lock);
724 atomic_dec(&ses->server->inFlight); 735 cifs_add_credits(ses->server, 1);
725 wake_up(&ses->server->request_q);
726 return rc; 736 return rc;
727 } 737 }
728 spin_unlock(&GlobalMid_Lock); 738 spin_unlock(&GlobalMid_Lock);
@@ -730,8 +740,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
730 740
731 rc = cifs_sync_mid_result(midQ, ses->server); 741 rc = cifs_sync_mid_result(midQ, ses->server);
732 if (rc != 0) { 742 if (rc != 0) {
733 atomic_dec(&ses->server->inFlight); 743 cifs_add_credits(ses->server, 1);
734 wake_up(&ses->server->request_q);
735 return rc; 744 return rc;
736 } 745 }
737 746
@@ -747,8 +756,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
747 rc = cifs_check_receive(midQ, ses->server, 0); 756 rc = cifs_check_receive(midQ, ses->server, 0);
748out: 757out:
749 delete_mid(midQ); 758 delete_mid(midQ);
750 atomic_dec(&ses->server->inFlight); 759 cifs_add_credits(ses->server, 1);
751 wake_up(&ses->server->request_q);
752 760
753 return rc; 761 return rc;
754} 762}
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 45f07c46f3ed..10d92cf57ab6 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -105,7 +105,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
105 struct cifs_tcon *pTcon; 105 struct cifs_tcon *pTcon;
106 struct super_block *sb; 106 struct super_block *sb;
107 char *full_path; 107 char *full_path;
108 struct cifs_ntsd *pacl;
109 108
110 if (direntry == NULL) 109 if (direntry == NULL)
111 return -EIO; 110 return -EIO;
@@ -164,23 +163,24 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
164 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 163 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
165 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, 164 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
166 strlen(CIFS_XATTR_CIFS_ACL)) == 0) { 165 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
166#ifdef CONFIG_CIFS_ACL
167 struct cifs_ntsd *pacl;
167 pacl = kmalloc(value_size, GFP_KERNEL); 168 pacl = kmalloc(value_size, GFP_KERNEL);
168 if (!pacl) { 169 if (!pacl) {
169 cFYI(1, "%s: Can't allocate memory for ACL", 170 cFYI(1, "%s: Can't allocate memory for ACL",
170 __func__); 171 __func__);
171 rc = -ENOMEM; 172 rc = -ENOMEM;
172 } else { 173 } else {
173#ifdef CONFIG_CIFS_ACL
174 memcpy(pacl, ea_value, value_size); 174 memcpy(pacl, ea_value, value_size);
175 rc = set_cifs_acl(pacl, value_size, 175 rc = set_cifs_acl(pacl, value_size,
176 direntry->d_inode, full_path, CIFS_ACL_DACL); 176 direntry->d_inode, full_path, CIFS_ACL_DACL);
177 if (rc == 0) /* force revalidate of the inode */ 177 if (rc == 0) /* force revalidate of the inode */
178 CIFS_I(direntry->d_inode)->time = 0; 178 CIFS_I(direntry->d_inode)->time = 0;
179 kfree(pacl); 179 kfree(pacl);
180 }
180#else 181#else
181 cFYI(1, "Set CIFS ACL not supported yet"); 182 cFYI(1, "Set CIFS ACL not supported yet");
182#endif /* CONFIG_CIFS_ACL */ 183#endif /* CONFIG_CIFS_ACL */
183 }
184 } else { 184 } else {
185 int temp; 185 int temp;
186 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 186 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5e2e1b3f068d..05156c17b551 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -208,13 +208,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
208 if (IS_ERR(root)) { 208 if (IS_ERR(root)) {
209 error = PTR_ERR(root); 209 error = PTR_ERR(root);
210 printk("Failure of coda_cnode_make for root: error %d\n", error); 210 printk("Failure of coda_cnode_make for root: error %d\n", error);
211 root = NULL;
212 goto error; 211 goto error;
213 } 212 }
214 213
215 printk("coda_read_super: rootinode is %ld dev %s\n", 214 printk("coda_read_super: rootinode is %ld dev %s\n",
216 root->i_ino, root->i_sb->s_id); 215 root->i_ino, root->i_sb->s_id);
217 sb->s_root = d_alloc_root(root); 216 sb->s_root = d_make_root(root);
218 if (!sb->s_root) { 217 if (!sb->s_root) {
219 error = -EINVAL; 218 error = -EINVAL;
220 goto error; 219 goto error;
@@ -222,9 +221,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
222 return 0; 221 return 0;
223 222
224error: 223error:
225 if (root)
226 iput(root);
227
228 mutex_lock(&vc->vc_mutex); 224 mutex_lock(&vc->vc_mutex);
229 bdi_destroy(&vc->bdi); 225 bdi_destroy(&vc->bdi);
230 vc->vc_sb = NULL; 226 vc->vc_sb = NULL;
diff --git a/fs/compat.c b/fs/compat.c
index 07880bae28a9..14483a715bbb 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -33,7 +33,6 @@
33#include <linux/nfs4_mount.h> 33#include <linux/nfs4_mount.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/ctype.h> 35#include <linux/ctype.h>
36#include <linux/module.h>
37#include <linux/dirent.h> 36#include <linux/dirent.h>
38#include <linux/fsnotify.h> 37#include <linux/fsnotify.h>
39#include <linux/highuid.h> 38#include <linux/highuid.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a26bea10e81b..debdfe0fc809 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -34,7 +34,7 @@
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/file.h> 35#include <linux/file.h>
36#include <linux/ppp_defs.h> 36#include <linux/ppp_defs.h>
37#include <linux/if_ppp.h> 37#include <linux/ppp-ioctl.h>
38#include <linux/if_pppox.h> 38#include <linux/if_pppox.h>
39#include <linux/mtio.h> 39#include <linux/mtio.h>
40#include <linux/auto_fs.h> 40#include <linux/auto_fs.h>
@@ -49,7 +49,6 @@
49#include <linux/elevator.h> 49#include <linux/elevator.h>
50#include <linux/rtc.h> 50#include <linux/rtc.h>
51#include <linux/pci.h> 51#include <linux/pci.h>
52#include <linux/module.h>
53#include <linux/serial.h> 52#include <linux/serial.h>
54#include <linux/if_tun.h> 53#include <linux/if_tun.h>
55#include <linux/ctype.h> 54#include <linux/ctype.h>
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index ede857d20a04..b5f0a3b91f18 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -58,12 +58,11 @@ struct configfs_dirent {
58extern struct mutex configfs_symlink_mutex; 58extern struct mutex configfs_symlink_mutex;
59extern spinlock_t configfs_dirent_lock; 59extern spinlock_t configfs_dirent_lock;
60 60
61extern struct vfsmount * configfs_mount;
62extern struct kmem_cache *configfs_dir_cachep; 61extern struct kmem_cache *configfs_dir_cachep;
63 62
64extern int configfs_is_root(struct config_item *item); 63extern int configfs_is_root(struct config_item *item);
65 64
66extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *); 65extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *);
67extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); 66extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
68extern int configfs_inode_init(void); 67extern int configfs_inode_init(void);
69extern void configfs_inode_exit(void); 68extern void configfs_inode_exit(void);
@@ -80,15 +79,15 @@ extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
80extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); 79extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
81extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr); 80extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
82 81
83extern int configfs_pin_fs(void); 82extern struct dentry *configfs_pin_fs(void);
84extern void configfs_release_fs(void); 83extern void configfs_release_fs(void);
85 84
86extern struct rw_semaphore configfs_rename_sem; 85extern struct rw_semaphore configfs_rename_sem;
87extern struct super_block * configfs_sb;
88extern const struct file_operations configfs_dir_operations; 86extern const struct file_operations configfs_dir_operations;
89extern const struct file_operations configfs_file_operations; 87extern const struct file_operations configfs_file_operations;
90extern const struct file_operations bin_fops; 88extern const struct file_operations bin_fops;
91extern const struct inode_operations configfs_dir_inode_operations; 89extern const struct inode_operations configfs_dir_inode_operations;
90extern const struct inode_operations configfs_root_inode_operations;
92extern const struct inode_operations configfs_symlink_inode_operations; 91extern const struct inode_operations configfs_symlink_inode_operations;
93extern const struct dentry_operations configfs_dentry_ops; 92extern const struct dentry_operations configfs_dentry_ops;
94 93
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5ddd7ebd9dcd..7e6c52d8a207 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -264,11 +264,13 @@ static int init_symlink(struct inode * inode)
264 return 0; 264 return 0;
265} 265}
266 266
267static int create_dir(struct config_item * k, struct dentry * p, 267static int create_dir(struct config_item *k, struct dentry *d)
268 struct dentry * d)
269{ 268{
270 int error; 269 int error;
271 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 270 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
271 struct dentry *p = d->d_parent;
272
273 BUG_ON(!k);
272 274
273 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 275 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
274 if (!error) 276 if (!error)
@@ -304,19 +306,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
304 306
305static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 307static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
306{ 308{
307 struct dentry * parent; 309 int error = create_dir(item, dentry);
308 int error = 0;
309
310 BUG_ON(!item);
311
312 if (item->ci_parent)
313 parent = item->ci_parent->ci_dentry;
314 else if (configfs_mount)
315 parent = configfs_mount->mnt_root;
316 else
317 return -EFAULT;
318
319 error = create_dir(item,parent,dentry);
320 if (!error) 310 if (!error)
321 item->ci_dentry = dentry; 311 item->ci_dentry = dentry;
322 return error; 312 return error;
@@ -1079,23 +1069,24 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1079 int ret; 1069 int ret;
1080 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; 1070 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
1081 struct config_item *s_item = &subsys->su_group.cg_item; 1071 struct config_item *s_item = &subsys->su_group.cg_item;
1072 struct dentry *root;
1082 1073
1083 /* 1074 /*
1084 * Pin the configfs filesystem. This means we can safely access 1075 * Pin the configfs filesystem. This means we can safely access
1085 * the root of the configfs filesystem. 1076 * the root of the configfs filesystem.
1086 */ 1077 */
1087 ret = configfs_pin_fs(); 1078 root = configfs_pin_fs();
1088 if (ret) 1079 if (IS_ERR(root))
1089 return ret; 1080 return PTR_ERR(root);
1090 1081
1091 /* 1082 /*
1092 * Next, lock the root directory. We're going to check that the 1083 * Next, lock the root directory. We're going to check that the
1093 * subsystem is really registered, and so we need to lock out 1084 * subsystem is really registered, and so we need to lock out
1094 * configfs_[un]register_subsystem(). 1085 * configfs_[un]register_subsystem().
1095 */ 1086 */
1096 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); 1087 mutex_lock(&root->d_inode->i_mutex);
1097 1088
1098 root_sd = configfs_sb->s_root->d_fsdata; 1089 root_sd = root->d_fsdata;
1099 1090
1100 list_for_each_entry(p, &root_sd->s_children, s_sibling) { 1091 list_for_each_entry(p, &root_sd->s_children, s_sibling) {
1101 if (p->s_type & CONFIGFS_DIR) { 1092 if (p->s_type & CONFIGFS_DIR) {
@@ -1129,7 +1120,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1129out_unlock_dirent_lock: 1120out_unlock_dirent_lock:
1130 spin_unlock(&configfs_dirent_lock); 1121 spin_unlock(&configfs_dirent_lock);
1131out_unlock_fs: 1122out_unlock_fs:
1132 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1123 mutex_unlock(&root->d_inode->i_mutex);
1133 1124
1134 /* 1125 /*
1135 * If we succeeded, the fs is pinned via other methods. If not, 1126 * If we succeeded, the fs is pinned via other methods. If not,
@@ -1183,11 +1174,6 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
1183 struct module *subsys_owner = NULL, *new_item_owner = NULL; 1174 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1184 char *name; 1175 char *name;
1185 1176
1186 if (dentry->d_parent == configfs_sb->s_root) {
1187 ret = -EPERM;
1188 goto out;
1189 }
1190
1191 sd = dentry->d_parent->d_fsdata; 1177 sd = dentry->d_parent->d_fsdata;
1192 1178
1193 /* 1179 /*
@@ -1359,9 +1345,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1359 struct module *subsys_owner = NULL, *dead_item_owner = NULL; 1345 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1360 int ret; 1346 int ret;
1361 1347
1362 if (dentry->d_parent == configfs_sb->s_root)
1363 return -EPERM;
1364
1365 sd = dentry->d_fsdata; 1348 sd = dentry->d_fsdata;
1366 if (sd->s_type & CONFIGFS_USET_DEFAULT) 1349 if (sd->s_type & CONFIGFS_USET_DEFAULT)
1367 return -EPERM; 1350 return -EPERM;
@@ -1459,6 +1442,11 @@ const struct inode_operations configfs_dir_inode_operations = {
1459 .setattr = configfs_setattr, 1442 .setattr = configfs_setattr,
1460}; 1443};
1461 1444
1445const struct inode_operations configfs_root_inode_operations = {
1446 .lookup = configfs_lookup,
1447 .setattr = configfs_setattr,
1448};
1449
1462#if 0 1450#if 0
1463int configfs_rename_dir(struct config_item * item, const char *new_name) 1451int configfs_rename_dir(struct config_item * item, const char *new_name)
1464{ 1452{
@@ -1546,6 +1534,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd)
1546static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 1534static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1547{ 1535{
1548 struct dentry *dentry = filp->f_path.dentry; 1536 struct dentry *dentry = filp->f_path.dentry;
1537 struct super_block *sb = dentry->d_sb;
1549 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1538 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1550 struct configfs_dirent *cursor = filp->private_data; 1539 struct configfs_dirent *cursor = filp->private_data;
1551 struct list_head *p, *q = &cursor->s_sibling; 1540 struct list_head *p, *q = &cursor->s_sibling;
@@ -1608,7 +1597,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1608 ino = inode->i_ino; 1597 ino = inode->i_ino;
1609 spin_unlock(&configfs_dirent_lock); 1598 spin_unlock(&configfs_dirent_lock);
1610 if (!inode) 1599 if (!inode)
1611 ino = iunique(configfs_sb, 2); 1600 ino = iunique(sb, 2);
1612 1601
1613 if (filldir(dirent, name, len, filp->f_pos, ino, 1602 if (filldir(dirent, name, len, filp->f_pos, ino,
1614 dt_type(next)) < 0) 1603 dt_type(next)) < 0)
@@ -1680,27 +1669,27 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1680 struct config_group *group = &subsys->su_group; 1669 struct config_group *group = &subsys->su_group;
1681 struct qstr name; 1670 struct qstr name;
1682 struct dentry *dentry; 1671 struct dentry *dentry;
1672 struct dentry *root;
1683 struct configfs_dirent *sd; 1673 struct configfs_dirent *sd;
1684 1674
1685 err = configfs_pin_fs(); 1675 root = configfs_pin_fs();
1686 if (err) 1676 if (IS_ERR(root))
1687 return err; 1677 return PTR_ERR(root);
1688 1678
1689 if (!group->cg_item.ci_name) 1679 if (!group->cg_item.ci_name)
1690 group->cg_item.ci_name = group->cg_item.ci_namebuf; 1680 group->cg_item.ci_name = group->cg_item.ci_namebuf;
1691 1681
1692 sd = configfs_sb->s_root->d_fsdata; 1682 sd = root->d_fsdata;
1693 link_group(to_config_group(sd->s_element), group); 1683 link_group(to_config_group(sd->s_element), group);
1694 1684
1695 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1685 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
1696 I_MUTEX_PARENT);
1697 1686
1698 name.name = group->cg_item.ci_name; 1687 name.name = group->cg_item.ci_name;
1699 name.len = strlen(name.name); 1688 name.len = strlen(name.name);
1700 name.hash = full_name_hash(name.name, name.len); 1689 name.hash = full_name_hash(name.name, name.len);
1701 1690
1702 err = -ENOMEM; 1691 err = -ENOMEM;
1703 dentry = d_alloc(configfs_sb->s_root, &name); 1692 dentry = d_alloc(root, &name);
1704 if (dentry) { 1693 if (dentry) {
1705 d_add(dentry, NULL); 1694 d_add(dentry, NULL);
1706 1695
@@ -1717,7 +1706,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1717 } 1706 }
1718 } 1707 }
1719 1708
1720 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1709 mutex_unlock(&root->d_inode->i_mutex);
1721 1710
1722 if (err) { 1711 if (err) {
1723 unlink_group(group); 1712 unlink_group(group);
@@ -1731,13 +1720,14 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1731{ 1720{
1732 struct config_group *group = &subsys->su_group; 1721 struct config_group *group = &subsys->su_group;
1733 struct dentry *dentry = group->cg_item.ci_dentry; 1722 struct dentry *dentry = group->cg_item.ci_dentry;
1723 struct dentry *root = dentry->d_sb->s_root;
1734 1724
1735 if (dentry->d_parent != configfs_sb->s_root) { 1725 if (dentry->d_parent != root) {
1736 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); 1726 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
1737 return; 1727 return;
1738 } 1728 }
1739 1729
1740 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1730 mutex_lock_nested(&root->d_inode->i_mutex,
1741 I_MUTEX_PARENT); 1731 I_MUTEX_PARENT);
1742 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1732 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1743 mutex_lock(&configfs_symlink_mutex); 1733 mutex_lock(&configfs_symlink_mutex);
@@ -1754,7 +1744,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1754 1744
1755 d_delete(dentry); 1745 d_delete(dentry);
1756 1746
1757 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1747 mutex_unlock(&root->d_inode->i_mutex);
1758 1748
1759 dput(dentry); 1749 dput(dentry);
1760 1750
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 3ee36d418863..0074362d9f7f 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -44,8 +44,6 @@
44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; 44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
45#endif 45#endif
46 46
47extern struct super_block * configfs_sb;
48
49static const struct address_space_operations configfs_aops = { 47static const struct address_space_operations configfs_aops = {
50 .readpage = simple_readpage, 48 .readpage = simple_readpage,
51 .write_begin = simple_write_begin, 49 .write_begin = simple_write_begin,
@@ -132,9 +130,10 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
132 inode->i_ctime = iattr->ia_ctime; 130 inode->i_ctime = iattr->ia_ctime;
133} 131}
134 132
135struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd) 133struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
134 struct super_block *s)
136{ 135{
137 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(s);
138 if (inode) { 137 if (inode) {
139 inode->i_ino = get_next_ino(); 138 inode->i_ino = get_next_ino();
140 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
@@ -188,36 +187,35 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
188int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *)) 187int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
189{ 188{
190 int error = 0; 189 int error = 0;
191 struct inode * inode = NULL; 190 struct inode *inode = NULL;
192 if (dentry) { 191 struct configfs_dirent *sd;
193 if (!dentry->d_inode) { 192 struct inode *p_inode;
194 struct configfs_dirent *sd = dentry->d_fsdata; 193
195 if ((inode = configfs_new_inode(mode, sd))) { 194 if (!dentry)
196 if (dentry->d_parent && dentry->d_parent->d_inode) { 195 return -ENOENT;
197 struct inode *p_inode = dentry->d_parent->d_inode; 196
198 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; 197 if (dentry->d_inode)
199 } 198 return -EEXIST;
200 configfs_set_inode_lock_class(sd, inode);
201 goto Proceed;
202 }
203 else
204 error = -ENOMEM;
205 } else
206 error = -EEXIST;
207 } else
208 error = -ENOENT;
209 goto Done;
210 199
211 Proceed: 200 sd = dentry->d_fsdata;
212 if (init) 201 inode = configfs_new_inode(mode, sd, dentry->d_sb);
202 if (!inode)
203 return -ENOMEM;
204
205 p_inode = dentry->d_parent->d_inode;
206 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
207 configfs_set_inode_lock_class(sd, inode);
208
209 if (init) {
213 error = init(inode); 210 error = init(inode);
214 if (!error) { 211 if (error) {
215 d_instantiate(dentry, inode); 212 iput(inode);
216 if (S_ISDIR(mode) || S_ISLNK(mode)) 213 return error;
217 dget(dentry); /* pin link and directory dentries in core */ 214 }
218 } else 215 }
219 iput(inode); 216 d_instantiate(dentry, inode);
220 Done: 217 if (S_ISDIR(mode) || S_ISLNK(mode))
218 dget(dentry); /* pin link and directory dentries in core */
221 return error; 219 return error;
222} 220}
223 221
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 276e15cafd58..aee0a7ebbd8e 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -37,8 +37,7 @@
37/* Random magic number */ 37/* Random magic number */
38#define CONFIGFS_MAGIC 0x62656570 38#define CONFIGFS_MAGIC 0x62656570
39 39
40struct vfsmount * configfs_mount = NULL; 40static struct vfsmount *configfs_mount = NULL;
41struct super_block * configfs_sb = NULL;
42struct kmem_cache *configfs_dir_cachep; 41struct kmem_cache *configfs_dir_cachep;
43static int configfs_mnt_count = 0; 42static int configfs_mnt_count = 0;
44 43
@@ -77,12 +76,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
77 sb->s_magic = CONFIGFS_MAGIC; 76 sb->s_magic = CONFIGFS_MAGIC;
78 sb->s_op = &configfs_ops; 77 sb->s_op = &configfs_ops;
79 sb->s_time_gran = 1; 78 sb->s_time_gran = 1;
80 configfs_sb = sb;
81 79
82 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 80 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
83 &configfs_root); 81 &configfs_root, sb);
84 if (inode) { 82 if (inode) {
85 inode->i_op = &configfs_dir_inode_operations; 83 inode->i_op = &configfs_root_inode_operations;
86 inode->i_fop = &configfs_dir_operations; 84 inode->i_fop = &configfs_dir_operations;
87 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 85 /* directory inodes start off with i_nlink == 2 (for "." entry) */
88 inc_nlink(inode); 86 inc_nlink(inode);
@@ -91,10 +89,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
91 return -ENOMEM; 89 return -ENOMEM;
92 } 90 }
93 91
94 root = d_alloc_root(inode); 92 root = d_make_root(inode);
95 if (!root) { 93 if (!root) {
96 pr_debug("%s: could not get root dentry!\n",__func__); 94 pr_debug("%s: could not get root dentry!\n",__func__);
97 iput(inode);
98 return -ENOMEM; 95 return -ENOMEM;
99 } 96 }
100 config_group_init(&configfs_root_group); 97 config_group_init(&configfs_root_group);
@@ -118,10 +115,11 @@ static struct file_system_type configfs_fs_type = {
118 .kill_sb = kill_litter_super, 115 .kill_sb = kill_litter_super,
119}; 116};
120 117
121int configfs_pin_fs(void) 118struct dentry *configfs_pin_fs(void)
122{ 119{
123 return simple_pin_fs(&configfs_fs_type, &configfs_mount, 120 int err = simple_pin_fs(&configfs_fs_type, &configfs_mount,
124 &configfs_mnt_count); 121 &configfs_mnt_count);
122 return err ? ERR_PTR(err) : configfs_mount->mnt_root;
125} 123}
126 124
127void configfs_release_fs(void) 125void configfs_release_fs(void)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0f3eb41d9201..cc9f2546ea4a 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -110,13 +110,13 @@ out:
110 110
111 111
112static int get_target(const char *symname, struct path *path, 112static int get_target(const char *symname, struct path *path,
113 struct config_item **target) 113 struct config_item **target, struct super_block *sb)
114{ 114{
115 int ret; 115 int ret;
116 116
117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); 117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
118 if (!ret) { 118 if (!ret) {
119 if (path->dentry->d_sb == configfs_sb) { 119 if (path->dentry->d_sb == sb) {
120 *target = configfs_get_config_item(path->dentry); 120 *target = configfs_get_config_item(path->dentry);
121 if (!*target) { 121 if (!*target) {
122 ret = -ENOENT; 122 ret = -ENOENT;
@@ -141,10 +141,6 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 struct config_item *target_item = NULL; 141 struct config_item *target_item = NULL;
142 struct config_item_type *type; 142 struct config_item_type *type;
143 143
144 ret = -EPERM; /* What lack-of-symlink returns */
145 if (dentry->d_parent == configfs_sb->s_root)
146 goto out;
147
148 sd = dentry->d_parent->d_fsdata; 144 sd = dentry->d_parent->d_fsdata;
149 /* 145 /*
150 * Fake invisibility if dir belongs to a group/default groups hierarchy 146 * Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -162,7 +158,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
162 !type->ct_item_ops->allow_link) 158 !type->ct_item_ops->allow_link)
163 goto out_put; 159 goto out_put;
164 160
165 ret = get_target(symname, &path, &target_item); 161 ret = get_target(symname, &path, &target_item, dentry->d_sb);
166 if (ret) 162 if (ret)
167 goto out_put; 163 goto out_put;
168 164
@@ -198,8 +194,6 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
198 if (!(sd->s_type & CONFIGFS_ITEM_LINK)) 194 if (!(sd->s_type & CONFIGFS_ITEM_LINK))
199 goto out; 195 goto out;
200 196
201 BUG_ON(dentry->d_parent == configfs_sb->s_root);
202
203 sl = sd->s_element; 197 sl = sd->s_element;
204 198
205 parent_item = configfs_get_config_item(dentry->d_parent); 199 parent_item = configfs_get_config_item(dentry->d_parent);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a2ee8f9f5a38..d013c46402ed 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
257 257
258 /* Do sanity checks on the superblock */ 258 /* Do sanity checks on the superblock */
259 if (super.magic != CRAMFS_MAGIC) { 259 if (super.magic != CRAMFS_MAGIC) {
260 /* check for wrong endianess */ 260 /* check for wrong endianness */
261 if (super.magic == CRAMFS_MAGIC_WEND) { 261 if (super.magic == CRAMFS_MAGIC_WEND) {
262 if (!silent) 262 if (!silent)
263 printk(KERN_ERR "cramfs: wrong endianess\n"); 263 printk(KERN_ERR "cramfs: wrong endianness\n");
264 goto out; 264 goto out;
265 } 265 }
266 266
@@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
270 mutex_unlock(&read_mutex); 270 mutex_unlock(&read_mutex);
271 if (super.magic != CRAMFS_MAGIC) { 271 if (super.magic != CRAMFS_MAGIC) {
272 if (super.magic == CRAMFS_MAGIC_WEND && !silent) 272 if (super.magic == CRAMFS_MAGIC_WEND && !silent)
273 printk(KERN_ERR "cramfs: wrong endianess\n"); 273 printk(KERN_ERR "cramfs: wrong endianness\n");
274 else if (!silent) 274 else if (!silent)
275 printk(KERN_ERR "cramfs: wrong magic\n"); 275 printk(KERN_ERR "cramfs: wrong magic\n");
276 goto out; 276 goto out;
@@ -318,11 +318,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
318 root = get_cramfs_inode(sb, &super.root, 0); 318 root = get_cramfs_inode(sb, &super.root, 0);
319 if (IS_ERR(root)) 319 if (IS_ERR(root))
320 goto out; 320 goto out;
321 sb->s_root = d_alloc_root(root); 321 sb->s_root = d_make_root(root);
322 if (!sb->s_root) { 322 if (!sb->s_root)
323 iput(root);
324 goto out; 323 goto out;
325 }
326 return 0; 324 return 0;
327out: 325out:
328 kfree(sbi); 326 kfree(sbi);
diff --git a/fs/dcache.c b/fs/dcache.c
index fe19ac13f75f..b60ddc41d783 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -23,7 +23,7 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/hash.h> 24#include <linux/hash.h>
25#include <linux/cache.h> 25#include <linux/cache.h>
26#include <linux/module.h> 26#include <linux/export.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/file.h> 28#include <linux/file.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -104,11 +104,11 @@ static unsigned int d_hash_shift __read_mostly;
104 104
105static struct hlist_bl_head *dentry_hashtable __read_mostly; 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 106
107static inline struct hlist_bl_head *d_hash(struct dentry *parent, 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 unsigned long hash) 108 unsigned int hash)
109{ 109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 110 hash += (unsigned long) parent / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 111 hash = hash + (hash >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK); 112 return dentry_hashtable + (hash & D_HASHMASK);
113} 113}
114 114
@@ -137,6 +137,49 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
137} 137}
138#endif 138#endif
139 139
140/*
141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142 * The strings are both count bytes long, and count is non-zero.
143 */
144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount)
146{
147#ifdef CONFIG_DCACHE_WORD_ACCESS
148 unsigned long a,b,mask;
149
150 if (unlikely(scount != tcount))
151 return 1;
152
153 for (;;) {
154 a = *(unsigned long *)cs;
155 b = *(unsigned long *)ct;
156 if (tcount < sizeof(unsigned long))
157 break;
158 if (unlikely(a != b))
159 return 1;
160 cs += sizeof(unsigned long);
161 ct += sizeof(unsigned long);
162 tcount -= sizeof(unsigned long);
163 if (!tcount)
164 return 0;
165 }
166 mask = ~(~0ul << tcount*8);
167 return unlikely(!!((a ^ b) & mask));
168#else
169 if (scount != tcount)
170 return 1;
171
172 do {
173 if (*cs != *ct)
174 return 1;
175 cs++;
176 ct++;
177 tcount--;
178 } while (tcount);
179 return 0;
180#endif
181}
182
140static void __d_free(struct rcu_head *head) 183static void __d_free(struct rcu_head *head)
141{ 184{
142 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 185 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1423,30 +1466,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1423 1466
1424EXPORT_SYMBOL(d_instantiate_unique); 1467EXPORT_SYMBOL(d_instantiate_unique);
1425 1468
1426/**
1427 * d_alloc_root - allocate root dentry
1428 * @root_inode: inode to allocate the root for
1429 *
1430 * Allocate a root ("/") dentry for the inode given. The inode is
1431 * instantiated and returned. %NULL is returned if there is insufficient
1432 * memory or the inode passed is %NULL.
1433 */
1434
1435struct dentry * d_alloc_root(struct inode * root_inode)
1436{
1437 struct dentry *res = NULL;
1438
1439 if (root_inode) {
1440 static const struct qstr name = { .name = "/", .len = 1 };
1441
1442 res = __d_alloc(root_inode->i_sb, &name);
1443 if (res)
1444 d_instantiate(res, root_inode);
1445 }
1446 return res;
1447}
1448EXPORT_SYMBOL(d_alloc_root);
1449
1450struct dentry *d_make_root(struct inode *root_inode) 1469struct dentry *d_make_root(struct inode *root_inode)
1451{ 1470{
1452 struct dentry *res = NULL; 1471 struct dentry *res = NULL;
@@ -1694,7 +1713,7 @@ EXPORT_SYMBOL(d_add_ci);
1694 * __d_lookup_rcu - search for a dentry (racy, store-free) 1713 * __d_lookup_rcu - search for a dentry (racy, store-free)
1695 * @parent: parent dentry 1714 * @parent: parent dentry
1696 * @name: qstr of name we wish to find 1715 * @name: qstr of name we wish to find
1697 * @seq: returns d_seq value at the point where the dentry was found 1716 * @seqp: returns d_seq value at the point where the dentry was found
1698 * @inode: returns dentry->d_inode when the inode was found valid. 1717 * @inode: returns dentry->d_inode when the inode was found valid.
1699 * Returns: dentry, or NULL 1718 * Returns: dentry, or NULL
1700 * 1719 *
@@ -1717,8 +1736,9 @@ EXPORT_SYMBOL(d_add_ci);
1717 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1736 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1718 * is formed, giving integrity down the path walk. 1737 * is formed, giving integrity down the path walk.
1719 */ 1738 */
1720struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1739struct dentry *__d_lookup_rcu(const struct dentry *parent,
1721 unsigned *seq, struct inode **inode) 1740 const struct qstr *name,
1741 unsigned *seqp, struct inode **inode)
1722{ 1742{
1723 unsigned int len = name->len; 1743 unsigned int len = name->len;
1724 unsigned int hash = name->hash; 1744 unsigned int hash = name->hash;
@@ -1748,6 +1768,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 * See Documentation/filesystems/path-lookup.txt for more details. 1768 * See Documentation/filesystems/path-lookup.txt for more details.
1749 */ 1769 */
1750 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1770 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1771 unsigned seq;
1751 struct inode *i; 1772 struct inode *i;
1752 const char *tname; 1773 const char *tname;
1753 int tlen; 1774 int tlen;
@@ -1756,7 +1777,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1756 continue; 1777 continue;
1757 1778
1758seqretry: 1779seqretry:
1759 *seq = read_seqcount_begin(&dentry->d_seq); 1780 seq = read_seqcount_begin(&dentry->d_seq);
1760 if (dentry->d_parent != parent) 1781 if (dentry->d_parent != parent)
1761 continue; 1782 continue;
1762 if (d_unhashed(dentry)) 1783 if (d_unhashed(dentry))
@@ -1771,7 +1792,7 @@ seqretry:
1771 * edge of memory when walking. If we could load this 1792 * edge of memory when walking. If we could load this
1772 * atomically some other way, we could drop this check. 1793 * atomically some other way, we could drop this check.
1773 */ 1794 */
1774 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1795 if (read_seqcount_retry(&dentry->d_seq, seq))
1775 goto seqretry; 1796 goto seqretry;
1776 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1797 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1777 if (parent->d_op->d_compare(parent, *inode, 1798 if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1809,7 @@ seqretry:
1788 * order to do anything useful with the returned dentry 1809 * order to do anything useful with the returned dentry
1789 * anyway. 1810 * anyway.
1790 */ 1811 */
1812 *seqp = seq;
1791 *inode = i; 1813 *inode = i;
1792 return dentry; 1814 return dentry;
1793 } 1815 }
@@ -2382,6 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2382 if (d_ancestor(alias, dentry)) { 2404 if (d_ancestor(alias, dentry)) {
2383 /* Check for loops */ 2405 /* Check for loops */
2384 actual = ERR_PTR(-ELOOP); 2406 actual = ERR_PTR(-ELOOP);
2407 spin_unlock(&inode->i_lock);
2385 } else if (IS_ROOT(alias)) { 2408 } else if (IS_ROOT(alias)) {
2386 /* Is this an anonymous mountpoint that we 2409 /* Is this an anonymous mountpoint that we
2387 * could splice into our tree? */ 2410 * could splice into our tree? */
@@ -2391,7 +2414,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2391 goto found; 2414 goto found;
2392 } else { 2415 } else {
2393 /* Nope, but we must(!) avoid directory 2416 /* Nope, but we must(!) avoid directory
2394 * aliasing */ 2417 * aliasing. This drops inode->i_lock */
2395 actual = __d_unalias(inode, dentry, alias); 2418 actual = __d_unalias(inode, dentry, alias);
2396 } 2419 }
2397 write_sequnlock(&rename_lock); 2420 write_sequnlock(&rename_lock);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index dda0dc702d1b..17c779967828 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ef023eef0464..21e93605161c 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -611,7 +611,7 @@ static const struct file_operations fops_regset32 = {
611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
612 * code. 612 * code.
613 */ 613 */
614struct dentry *debugfs_create_regset32(const char *name, mode_t mode, 614struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
615 struct dentry *parent, 615 struct dentry *parent,
616 struct debugfs_regset32 *regset) 616 struct debugfs_regset32 *regset)
617{ 617{
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 956d5ddddf6e..b80bc846a15a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,9 +23,13 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/seq_file.h>
27#include <linux/parser.h>
26#include <linux/magic.h> 28#include <linux/magic.h>
27#include <linux/slab.h> 29#include <linux/slab.h>
28 30
31#define DEBUGFS_DEFAULT_MODE 0755
32
29static struct vfsmount *debugfs_mount; 33static struct vfsmount *debugfs_mount;
30static int debugfs_mount_count; 34static int debugfs_mount_count;
31static bool debugfs_registered; 35static bool debugfs_registered;
@@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry)
125 return dentry->d_inode && !d_unhashed(dentry); 129 return dentry->d_inode && !d_unhashed(dentry);
126} 130}
127 131
132struct debugfs_mount_opts {
133 uid_t uid;
134 gid_t gid;
135 umode_t mode;
136};
137
138enum {
139 Opt_uid,
140 Opt_gid,
141 Opt_mode,
142 Opt_err
143};
144
145static const match_table_t tokens = {
146 {Opt_uid, "uid=%u"},
147 {Opt_gid, "gid=%u"},
148 {Opt_mode, "mode=%o"},
149 {Opt_err, NULL}
150};
151
152struct debugfs_fs_info {
153 struct debugfs_mount_opts mount_opts;
154};
155
156static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
157{
158 substring_t args[MAX_OPT_ARGS];
159 int option;
160 int token;
161 char *p;
162
163 opts->mode = DEBUGFS_DEFAULT_MODE;
164
165 while ((p = strsep(&data, ",")) != NULL) {
166 if (!*p)
167 continue;
168
169 token = match_token(p, tokens, args);
170 switch (token) {
171 case Opt_uid:
172 if (match_int(&args[0], &option))
173 return -EINVAL;
174 opts->uid = option;
175 break;
176 case Opt_gid:
177 if (match_octal(&args[0], &option))
178 return -EINVAL;
179 opts->gid = option;
180 break;
181 case Opt_mode:
182 if (match_octal(&args[0], &option))
183 return -EINVAL;
184 opts->mode = option & S_IALLUGO;
185 break;
186 /*
187 * We might like to report bad mount options here;
188 * but traditionally debugfs has ignored all mount options
189 */
190 }
191 }
192
193 return 0;
194}
195
196static int debugfs_apply_options(struct super_block *sb)
197{
198 struct debugfs_fs_info *fsi = sb->s_fs_info;
199 struct inode *inode = sb->s_root->d_inode;
200 struct debugfs_mount_opts *opts = &fsi->mount_opts;
201
202 inode->i_mode &= ~S_IALLUGO;
203 inode->i_mode |= opts->mode;
204
205 inode->i_uid = opts->uid;
206 inode->i_gid = opts->gid;
207
208 return 0;
209}
210
211static int debugfs_remount(struct super_block *sb, int *flags, char *data)
212{
213 int err;
214 struct debugfs_fs_info *fsi = sb->s_fs_info;
215
216 err = debugfs_parse_options(data, &fsi->mount_opts);
217 if (err)
218 goto fail;
219
220 debugfs_apply_options(sb);
221
222fail:
223 return err;
224}
225
226static int debugfs_show_options(struct seq_file *m, struct dentry *root)
227{
228 struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
229 struct debugfs_mount_opts *opts = &fsi->mount_opts;
230
231 if (opts->uid != 0)
232 seq_printf(m, ",uid=%u", opts->uid);
233 if (opts->gid != 0)
234 seq_printf(m, ",gid=%u", opts->gid);
235 if (opts->mode != DEBUGFS_DEFAULT_MODE)
236 seq_printf(m, ",mode=%o", opts->mode);
237
238 return 0;
239}
240
241static const struct super_operations debugfs_super_operations = {
242 .statfs = simple_statfs,
243 .remount_fs = debugfs_remount,
244 .show_options = debugfs_show_options,
245};
246
128static int debug_fill_super(struct super_block *sb, void *data, int silent) 247static int debug_fill_super(struct super_block *sb, void *data, int silent)
129{ 248{
130 static struct tree_descr debug_files[] = {{""}}; 249 static struct tree_descr debug_files[] = {{""}};
250 struct debugfs_fs_info *fsi;
251 int err;
252
253 save_mount_options(sb, data);
254
255 fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
256 sb->s_fs_info = fsi;
257 if (!fsi) {
258 err = -ENOMEM;
259 goto fail;
260 }
261
262 err = debugfs_parse_options(data, &fsi->mount_opts);
263 if (err)
264 goto fail;
265
266 err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
267 if (err)
268 goto fail;
269
270 sb->s_op = &debugfs_super_operations;
271
272 debugfs_apply_options(sb);
273
274 return 0;
131 275
132 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 276fail:
277 kfree(fsi);
278 sb->s_fs_info = NULL;
279 return err;
133} 280}
134 281
135static struct dentry *debug_mount(struct file_system_type *fs_type, 282static struct dentry *debug_mount(struct file_system_type *fs_type,
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c4e2a58a2e82..10f5e0b484db 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -36,7 +36,61 @@
36#define DEVPTS_DEFAULT_PTMX_MODE 0000 36#define DEVPTS_DEFAULT_PTMX_MODE 0000
37#define PTMX_MINOR 2 37#define PTMX_MINOR 2
38 38
39extern int pty_limit; /* Config limit on Unix98 ptys */ 39/*
40 * sysctl support for setting limits on the number of Unix98 ptys allocated.
41 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
42 */
43static int pty_limit = NR_UNIX98_PTY_DEFAULT;
44static int pty_reserve = NR_UNIX98_PTY_RESERVE;
45static int pty_limit_min;
46static int pty_limit_max = INT_MAX;
47static int pty_count;
48
49static struct ctl_table pty_table[] = {
50 {
51 .procname = "max",
52 .maxlen = sizeof(int),
53 .mode = 0644,
54 .data = &pty_limit,
55 .proc_handler = proc_dointvec_minmax,
56 .extra1 = &pty_limit_min,
57 .extra2 = &pty_limit_max,
58 }, {
59 .procname = "reserve",
60 .maxlen = sizeof(int),
61 .mode = 0644,
62 .data = &pty_reserve,
63 .proc_handler = proc_dointvec_minmax,
64 .extra1 = &pty_limit_min,
65 .extra2 = &pty_limit_max,
66 }, {
67 .procname = "nr",
68 .maxlen = sizeof(int),
69 .mode = 0444,
70 .data = &pty_count,
71 .proc_handler = proc_dointvec,
72 },
73 {}
74};
75
76static struct ctl_table pty_kern_table[] = {
77 {
78 .procname = "pty",
79 .mode = 0555,
80 .child = pty_table,
81 },
82 {}
83};
84
85static struct ctl_table pty_root_table[] = {
86 {
87 .procname = "kernel",
88 .mode = 0555,
89 .child = pty_kern_table,
90 },
91 {}
92};
93
40static DEFINE_MUTEX(allocated_ptys_lock); 94static DEFINE_MUTEX(allocated_ptys_lock);
41 95
42static struct vfsmount *devpts_mnt; 96static struct vfsmount *devpts_mnt;
@@ -49,10 +103,11 @@ struct pts_mount_opts {
49 umode_t mode; 103 umode_t mode;
50 umode_t ptmxmode; 104 umode_t ptmxmode;
51 int newinstance; 105 int newinstance;
106 int max;
52}; 107};
53 108
54enum { 109enum {
55 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, 110 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_max,
56 Opt_err 111 Opt_err
57}; 112};
58 113
@@ -63,6 +118,7 @@ static const match_table_t tokens = {
63#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 118#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
64 {Opt_ptmxmode, "ptmxmode=%o"}, 119 {Opt_ptmxmode, "ptmxmode=%o"},
65 {Opt_newinstance, "newinstance"}, 120 {Opt_newinstance, "newinstance"},
121 {Opt_max, "max=%d"},
66#endif 122#endif
67 {Opt_err, NULL} 123 {Opt_err, NULL}
68}; 124};
@@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
109 opts->gid = 0; 165 opts->gid = 0;
110 opts->mode = DEVPTS_DEFAULT_MODE; 166 opts->mode = DEVPTS_DEFAULT_MODE;
111 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; 167 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
168 opts->max = NR_UNIX98_PTY_MAX;
112 169
113 /* newinstance makes sense only on initial mount */ 170 /* newinstance makes sense only on initial mount */
114 if (op == PARSE_MOUNT) 171 if (op == PARSE_MOUNT)
@@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
152 if (op == PARSE_MOUNT) 209 if (op == PARSE_MOUNT)
153 opts->newinstance = 1; 210 opts->newinstance = 1;
154 break; 211 break;
212 case Opt_max:
213 if (match_int(&args[0], &option) ||
214 option < 0 || option > NR_UNIX98_PTY_MAX)
215 return -EINVAL;
216 opts->max = option;
217 break;
155#endif 218#endif
156 default: 219 default:
157 printk(KERN_ERR "devpts: called with bogus options\n"); 220 printk(KERN_ERR "devpts: called with bogus options\n");
@@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
258 seq_printf(seq, ",mode=%03o", opts->mode); 321 seq_printf(seq, ",mode=%03o", opts->mode);
259#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 322#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
260 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); 323 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
324 if (opts->max < NR_UNIX98_PTY_MAX)
325 seq_printf(seq, ",max=%d", opts->max);
261#endif 326#endif
262 327
263 return 0; 328 return 0;
@@ -309,12 +374,11 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
309 inode->i_fop = &simple_dir_operations; 374 inode->i_fop = &simple_dir_operations;
310 set_nlink(inode, 2); 375 set_nlink(inode, 2);
311 376
312 s->s_root = d_alloc_root(inode); 377 s->s_root = d_make_root(inode);
313 if (s->s_root) 378 if (s->s_root)
314 return 0; 379 return 0;
315 380
316 printk(KERN_ERR "devpts: get root dentry failed\n"); 381 printk(KERN_ERR "devpts: get root dentry failed\n");
317 iput(inode);
318 382
319fail: 383fail:
320 return -ENOMEM; 384 return -ENOMEM;
@@ -438,6 +502,12 @@ retry:
438 return -ENOMEM; 502 return -ENOMEM;
439 503
440 mutex_lock(&allocated_ptys_lock); 504 mutex_lock(&allocated_ptys_lock);
505 if (pty_count >= pty_limit -
506 (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
507 mutex_unlock(&allocated_ptys_lock);
508 return -ENOSPC;
509 }
510
441 ida_ret = ida_get_new(&fsi->allocated_ptys, &index); 511 ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
442 if (ida_ret < 0) { 512 if (ida_ret < 0) {
443 mutex_unlock(&allocated_ptys_lock); 513 mutex_unlock(&allocated_ptys_lock);
@@ -446,11 +516,12 @@ retry:
446 return -EIO; 516 return -EIO;
447 } 517 }
448 518
449 if (index >= pty_limit) { 519 if (index >= fsi->mount_opts.max) {
450 ida_remove(&fsi->allocated_ptys, index); 520 ida_remove(&fsi->allocated_ptys, index);
451 mutex_unlock(&allocated_ptys_lock); 521 mutex_unlock(&allocated_ptys_lock);
452 return -EIO; 522 return -ENOSPC;
453 } 523 }
524 pty_count++;
454 mutex_unlock(&allocated_ptys_lock); 525 mutex_unlock(&allocated_ptys_lock);
455 return index; 526 return index;
456} 527}
@@ -462,6 +533,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
462 533
463 mutex_lock(&allocated_ptys_lock); 534 mutex_lock(&allocated_ptys_lock);
464 ida_remove(&fsi->allocated_ptys, idx); 535 ida_remove(&fsi->allocated_ptys, idx);
536 pty_count--;
465 mutex_unlock(&allocated_ptys_lock); 537 mutex_unlock(&allocated_ptys_lock);
466} 538}
467 539
@@ -558,11 +630,15 @@ void devpts_pty_kill(struct tty_struct *tty)
558static int __init init_devpts_fs(void) 630static int __init init_devpts_fs(void)
559{ 631{
560 int err = register_filesystem(&devpts_fs_type); 632 int err = register_filesystem(&devpts_fs_type);
633 struct ctl_table_header *table;
634
561 if (!err) { 635 if (!err) {
636 table = register_sysctl_table(pty_root_table);
562 devpts_mnt = kern_mount(&devpts_fs_type); 637 devpts_mnt = kern_mount(&devpts_fs_type);
563 if (IS_ERR(devpts_mnt)) { 638 if (IS_ERR(devpts_mnt)) {
564 err = PTR_ERR(devpts_mnt); 639 err = PTR_ERR(devpts_mnt);
565 unregister_filesystem(&devpts_fs_type); 640 unregister_filesystem(&devpts_fs_type);
641 unregister_sysctl_table(table);
566 } 642 }
567 } 643 }
568 return err; 644 return err;
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 83641574b016..dc5eb598b81f 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -351,11 +351,28 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) 351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
352{ 352{
353 struct dlm_rsb *r; 353 struct dlm_rsb *r;
354 uint32_t hash, bucket;
355 int rv;
356
357 hash = jhash(name, len, 0);
358 bucket = hash & (ls->ls_rsbtbl_size - 1);
359
360 spin_lock(&ls->ls_rsbtbl[bucket].lock);
361 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r);
362 if (rv)
363 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
364 name, len, 0, &r);
365 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
366
367 if (!rv)
368 return r;
354 369
355 down_read(&ls->ls_root_sem); 370 down_read(&ls->ls_root_sem);
356 list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 371 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
357 if (len == r->res_length && !memcmp(name, r->res_name, len)) { 372 if (len == r->res_length && !memcmp(name, r->res_name, len)) {
358 up_read(&ls->ls_root_sem); 373 up_read(&ls->ls_root_sem);
374 log_error(ls, "find_rsb_root revert to root_list %s",
375 r->res_name);
359 return r; 376 return r;
360 } 377 }
361 } 378 }
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d47183043c59..fa5c07d51dcc 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -411,8 +411,8 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); 411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
412} 412}
413 413
414static int search_rsb_tree(struct rb_root *tree, char *name, int len, 414int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
415 unsigned int flags, struct dlm_rsb **r_ret) 415 unsigned int flags, struct dlm_rsb **r_ret)
416{ 416{
417 struct rb_node *node = tree->rb_node; 417 struct rb_node *node = tree->rb_node;
418 struct dlm_rsb *r; 418 struct dlm_rsb *r;
@@ -474,12 +474,12 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
474 struct dlm_rsb *r; 474 struct dlm_rsb *r;
475 int error; 475 int error;
476 476
477 error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); 477 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
478 if (!error) { 478 if (!error) {
479 kref_get(&r->res_ref); 479 kref_get(&r->res_ref);
480 goto out; 480 goto out;
481 } 481 }
482 error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 482 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
483 if (error) 483 if (error)
484 goto out; 484 goto out;
485 485
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 265017a7c3e7..1a255307f6ff 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -28,6 +28,9 @@ void dlm_scan_waiters(struct dlm_ls *ls);
28void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
29void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
30 30
31int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
32 unsigned int flags, struct dlm_rsb **r_ret);
33
31int dlm_purge_locks(struct dlm_ls *ls); 34int dlm_purge_locks(struct dlm_ls *ls);
32void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 35void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
33void dlm_grant_after_purge(struct dlm_ls *ls); 36void dlm_grant_after_purge(struct dlm_ls *ls);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0b3109ee4257..133ef6dc7cb7 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,6 +52,7 @@
52#include <linux/mutex.h> 52#include <linux/mutex.h>
53#include <linux/sctp.h> 53#include <linux/sctp.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <net/sctp/sctp.h>
55#include <net/sctp/user.h> 56#include <net/sctp/user.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
57 58
@@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con,
474 int prim_len, ret; 475 int prim_len, ret;
475 int addr_len; 476 int addr_len;
476 struct connection *new_con; 477 struct connection *new_con;
477 sctp_peeloff_arg_t parg;
478 int parglen = sizeof(parg);
479 int err;
480 478
481 /* 479 /*
482 * We get this before any data for an association. 480 * We get this before any data for an association.
@@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con,
525 return; 523 return;
526 524
527 /* Peel off a new sock */ 525 /* Peel off a new sock */
528 parg.associd = sn->sn_assoc_change.sac_assoc_id; 526 sctp_lock_sock(con->sock->sk);
529 ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, 527 ret = sctp_do_peeloff(con->sock->sk,
530 SCTP_SOCKOPT_PEELOFF, 528 sn->sn_assoc_change.sac_assoc_id,
531 (void *)&parg, &parglen); 529 &new_con->sock);
530 sctp_release_sock(con->sock->sk);
532 if (ret < 0) { 531 if (ret < 0) {
533 log_print("Can't peel off a socket for " 532 log_print("Can't peel off a socket for "
534 "connection %d to node %d: err=%d", 533 "connection %d to node %d: err=%d",
535 parg.associd, nodeid, ret); 534 (int)sn->sn_assoc_change.sac_assoc_id,
536 return; 535 nodeid, ret);
537 }
538 new_con->sock = sockfd_lookup(parg.sd, &err);
539 if (!new_con->sock) {
540 log_print("sockfd_lookup error %d", err);
541 return; 536 return;
542 } 537 }
543 add_sock(new_con->sock, new_con); 538 add_sock(new_con->sock, new_con);
544 sockfd_put(new_con->sock);
545 539
546 log_print("connecting to %d sctp association %d", 540 log_print("connecting to %d sctp association %d",
547 nodeid, (int)sn->sn_assoc_change.sac_assoc_id); 541 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
@@ -1082,7 +1076,7 @@ static void init_local(void)
1082 int i; 1076 int i;
1083 1077
1084 dlm_local_count = 0; 1078 dlm_local_count = 0;
1085 for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { 1079 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
1086 if (dlm_our_addr(&sas, i)) 1080 if (dlm_our_addr(&sas, i))
1087 break; 1081 break;
1088 1082
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d3f95f941c47..2b17f2f9b121 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -48,8 +48,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
48 unsigned long nr_segs, loff_t pos) 48 unsigned long nr_segs, loff_t pos)
49{ 49{
50 ssize_t rc; 50 ssize_t rc;
51 struct dentry *lower_dentry; 51 struct path lower;
52 struct vfsmount *lower_vfsmount;
53 struct file *file = iocb->ki_filp; 52 struct file *file = iocb->ki_filp;
54 53
55 rc = generic_file_aio_read(iocb, iov, nr_segs, pos); 54 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
@@ -60,9 +59,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
60 if (-EIOCBQUEUED == rc) 59 if (-EIOCBQUEUED == rc)
61 rc = wait_on_sync_kiocb(iocb); 60 rc = wait_on_sync_kiocb(iocb);
62 if (rc >= 0) { 61 if (rc >= 0) {
63 lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); 62 lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
64 lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); 63 lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
65 touch_atime(lower_vfsmount, lower_dentry); 64 touch_atime(&lower);
66 } 65 }
67 return rc; 66 return rc;
68} 67}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index b4a6befb1216..68954937a071 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -550,9 +550,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
550 if (IS_ERR(inode)) 550 if (IS_ERR(inode))
551 goto out_free; 551 goto out_free;
552 552
553 s->s_root = d_alloc_root(inode); 553 s->s_root = d_make_root(inode);
554 if (!s->s_root) { 554 if (!s->s_root) {
555 iput(inode);
556 rc = -ENOMEM; 555 rc = -ENOMEM;
557 goto out_free; 556 goto out_free;
558 } 557 }
@@ -795,15 +794,10 @@ static int __init ecryptfs_init(void)
795 "Failed to allocate one or more kmem_cache objects\n"); 794 "Failed to allocate one or more kmem_cache objects\n");
796 goto out; 795 goto out;
797 } 796 }
798 rc = register_filesystem(&ecryptfs_fs_type);
799 if (rc) {
800 printk(KERN_ERR "Failed to register filesystem\n");
801 goto out_free_kmem_caches;
802 }
803 rc = do_sysfs_registration(); 797 rc = do_sysfs_registration();
804 if (rc) { 798 if (rc) {
805 printk(KERN_ERR "sysfs registration failed\n"); 799 printk(KERN_ERR "sysfs registration failed\n");
806 goto out_unregister_filesystem; 800 goto out_free_kmem_caches;
807 } 801 }
808 rc = ecryptfs_init_kthread(); 802 rc = ecryptfs_init_kthread();
809 if (rc) { 803 if (rc) {
@@ -824,19 +818,24 @@ static int __init ecryptfs_init(void)
824 "rc = [%d]\n", rc); 818 "rc = [%d]\n", rc);
825 goto out_release_messaging; 819 goto out_release_messaging;
826 } 820 }
821 rc = register_filesystem(&ecryptfs_fs_type);
822 if (rc) {
823 printk(KERN_ERR "Failed to register filesystem\n");
824 goto out_destroy_crypto;
825 }
827 if (ecryptfs_verbosity > 0) 826 if (ecryptfs_verbosity > 0)
828 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " 827 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values "
829 "will be written to the syslog!\n", ecryptfs_verbosity); 828 "will be written to the syslog!\n", ecryptfs_verbosity);
830 829
831 goto out; 830 goto out;
831out_destroy_crypto:
832 ecryptfs_destroy_crypto();
832out_release_messaging: 833out_release_messaging:
833 ecryptfs_release_messaging(); 834 ecryptfs_release_messaging();
834out_destroy_kthread: 835out_destroy_kthread:
835 ecryptfs_destroy_kthread(); 836 ecryptfs_destroy_kthread();
836out_do_sysfs_unregistration: 837out_do_sysfs_unregistration:
837 do_sysfs_unregistration(); 838 do_sysfs_unregistration();
838out_unregister_filesystem:
839 unregister_filesystem(&ecryptfs_fs_type);
840out_free_kmem_caches: 839out_free_kmem_caches:
841 ecryptfs_free_kmem_caches(); 840 ecryptfs_free_kmem_caches();
842out: 841out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index cf152823bbf4..2dd946b636d2 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -184,7 +184,6 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
184const struct super_operations ecryptfs_sops = { 184const struct super_operations ecryptfs_sops = {
185 .alloc_inode = ecryptfs_alloc_inode, 185 .alloc_inode = ecryptfs_alloc_inode,
186 .destroy_inode = ecryptfs_destroy_inode, 186 .destroy_inode = ecryptfs_destroy_inode,
187 .drop_inode = generic_drop_inode,
188 .statfs = ecryptfs_statfs, 187 .statfs = ecryptfs_statfs,
189 .remount_fs = NULL, 188 .remount_fs = NULL,
190 .evict_inode = ecryptfs_evict_inode, 189 .evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 981106429a9f..e755ec746c69 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -317,10 +317,9 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
317 goto out_no_fs; 317 goto out_no_fs;
318 } 318 }
319 319
320 s->s_root = d_alloc_root(root); 320 s->s_root = d_make_root(root);
321 if (!(s->s_root)) { 321 if (!(s->s_root)) {
322 printk(KERN_ERR "EFS: get root dentry failed\n"); 322 printk(KERN_ERR "EFS: get root dentry failed\n");
323 iput(root);
324 ret = -ENOMEM; 323 ret = -ENOMEM;
325 goto out_no_fs; 324 goto out_no_fs;
326 } 325 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d9a591773919..dba15fecf23e 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,7 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/anon_inodes.h> 17#include <linux/anon_inodes.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
19#include <linux/module.h> 19#include <linux/export.h>
20#include <linux/kref.h> 20#include <linux/kref.h>
21#include <linux/eventfd.h> 21#include <linux/eventfd.h>
22 22
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ea54cdef04dd..629e9ed99d0f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -427,6 +427,31 @@ out_unlock:
427 return error; 427 return error;
428} 428}
429 429
430/*
431 * As described in commit 0ccf831cb lockdep: annotate epoll
432 * the use of wait queues used by epoll is done in a very controlled
433 * manner. Wake ups can nest inside each other, but are never done
434 * with the same locking. For example:
435 *
436 * dfd = socket(...);
437 * efd1 = epoll_create();
438 * efd2 = epoll_create();
439 * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
440 * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
441 *
442 * When a packet arrives to the device underneath "dfd", the net code will
443 * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
444 * callback wakeup entry on that queue, and the wake_up() performed by the
445 * "dfd" net code will end up in ep_poll_callback(). At this point epoll
446 * (efd1) notices that it may have some event ready, so it needs to wake up
447 * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
448 * that ends up in another wake_up(), after having checked about the
449 * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
450 * avoid stack blasting.
451 *
452 * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle
453 * this special case of epoll.
454 */
430#ifdef CONFIG_DEBUG_LOCK_ALLOC 455#ifdef CONFIG_DEBUG_LOCK_ALLOC
431static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, 456static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
432 unsigned long events, int subclass) 457 unsigned long events, int subclass)
@@ -699,9 +724,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
699 void *priv) 724 void *priv)
700{ 725{
701 struct epitem *epi, *tmp; 726 struct epitem *epi, *tmp;
727 poll_table pt;
702 728
729 init_poll_funcptr(&pt, NULL);
703 list_for_each_entry_safe(epi, tmp, head, rdllink) { 730 list_for_each_entry_safe(epi, tmp, head, rdllink) {
704 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 731 pt._key = epi->event.events;
732 if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
705 epi->event.events) 733 epi->event.events)
706 return POLLIN | POLLRDNORM; 734 return POLLIN | POLLRDNORM;
707 else { 735 else {
@@ -988,6 +1016,10 @@ static int path_count[PATH_ARR_SIZE];
988 1016
989static int path_count_inc(int nests) 1017static int path_count_inc(int nests)
990{ 1018{
1019 /* Allow an arbitrary number of depth 1 paths */
1020 if (nests == 0)
1021 return 0;
1022
991 if (++path_count[nests] > path_limits[nests]) 1023 if (++path_count[nests] > path_limits[nests])
992 return -1; 1024 return -1;
993 return 0; 1025 return 0;
@@ -1045,13 +1077,11 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
1045 */ 1077 */
1046static int reverse_path_check(void) 1078static int reverse_path_check(void)
1047{ 1079{
1048 int length = 0;
1049 int error = 0; 1080 int error = 0;
1050 struct file *current_file; 1081 struct file *current_file;
1051 1082
1052 /* let's call this for all tfiles */ 1083 /* let's call this for all tfiles */
1053 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { 1084 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1054 length++;
1055 path_count_init(); 1085 path_count_init();
1056 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1086 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1057 reverse_path_check_proc, current_file, 1087 reverse_path_check_proc, current_file,
@@ -1093,6 +1123,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1093 /* Initialize the poll table using the queue callback */ 1123 /* Initialize the poll table using the queue callback */
1094 epq.epi = epi; 1124 epq.epi = epi;
1095 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); 1125 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
1126 epq.pt._key = event->events;
1096 1127
1097 /* 1128 /*
1098 * Attach the item to the poll hooks and get current event bits. 1129 * Attach the item to the poll hooks and get current event bits.
@@ -1187,6 +1218,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1187{ 1218{
1188 int pwake = 0; 1219 int pwake = 0;
1189 unsigned int revents; 1220 unsigned int revents;
1221 poll_table pt;
1222
1223 init_poll_funcptr(&pt, NULL);
1190 1224
1191 /* 1225 /*
1192 * Set the new event interest mask before calling f_op->poll(); 1226 * Set the new event interest mask before calling f_op->poll();
@@ -1194,13 +1228,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1194 * f_op->poll() call and the new event set registering. 1228 * f_op->poll() call and the new event set registering.
1195 */ 1229 */
1196 epi->event.events = event->events; 1230 epi->event.events = event->events;
1231 pt._key = event->events;
1197 epi->event.data = event->data; /* protected by mtx */ 1232 epi->event.data = event->data; /* protected by mtx */
1198 1233
1199 /* 1234 /*
1200 * Get current event bits. We can safely use the file* here because 1235 * Get current event bits. We can safely use the file* here because
1201 * its usage count has been increased by the caller of this function. 1236 * its usage count has been increased by the caller of this function.
1202 */ 1237 */
1203 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 1238 revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt);
1204 1239
1205 /* 1240 /*
1206 * If the item is "hot" and it is not registered inside the ready 1241 * If the item is "hot" and it is not registered inside the ready
@@ -1235,6 +1270,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1235 unsigned int revents; 1270 unsigned int revents;
1236 struct epitem *epi; 1271 struct epitem *epi;
1237 struct epoll_event __user *uevent; 1272 struct epoll_event __user *uevent;
1273 poll_table pt;
1274
1275 init_poll_funcptr(&pt, NULL);
1238 1276
1239 /* 1277 /*
1240 * We can loop without lock because we are passed a task private list. 1278 * We can loop without lock because we are passed a task private list.
@@ -1247,7 +1285,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1247 1285
1248 list_del_init(&epi->rdllink); 1286 list_del_init(&epi->rdllink);
1249 1287
1250 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & 1288 pt._key = epi->event.events;
1289 revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
1251 epi->event.events; 1290 epi->event.events;
1252 1291
1253 /* 1292 /*
diff --git a/fs/exec.c b/fs/exec.c
index 92ce83a11e90..23559c227d9c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
63#include <trace/events/task.h> 63#include <trace/events/task.h>
64#include "internal.h" 64#include "internal.h"
65 65
66#include <trace/events/sched.h>
67
66int core_uses_pid; 68int core_uses_pid;
67char core_pattern[CORENAME_MAX_SIZE] = "core"; 69char core_pattern[CORENAME_MAX_SIZE] = "core";
68unsigned int core_pipe_limit; 70unsigned int core_pipe_limit;
@@ -79,15 +81,13 @@ static atomic_t call_count = ATOMIC_INIT(1);
79static LIST_HEAD(formats); 81static LIST_HEAD(formats);
80static DEFINE_RWLOCK(binfmt_lock); 82static DEFINE_RWLOCK(binfmt_lock);
81 83
82int __register_binfmt(struct linux_binfmt * fmt, int insert) 84void __register_binfmt(struct linux_binfmt * fmt, int insert)
83{ 85{
84 if (!fmt) 86 BUG_ON(!fmt);
85 return -EINVAL;
86 write_lock(&binfmt_lock); 87 write_lock(&binfmt_lock);
87 insert ? list_add(&fmt->lh, &formats) : 88 insert ? list_add(&fmt->lh, &formats) :
88 list_add_tail(&fmt->lh, &formats); 89 list_add_tail(&fmt->lh, &formats);
89 write_unlock(&binfmt_lock); 90 write_unlock(&binfmt_lock);
90 return 0;
91} 91}
92 92
93EXPORT_SYMBOL(__register_binfmt); 93EXPORT_SYMBOL(__register_binfmt);
@@ -822,7 +822,7 @@ static int exec_mmap(struct mm_struct *mm)
822 /* Notify parent that we're no longer interested in the old VM */ 822 /* Notify parent that we're no longer interested in the old VM */
823 tsk = current; 823 tsk = current;
824 old_mm = current->mm; 824 old_mm = current->mm;
825 sync_mm_rss(tsk, old_mm); 825 sync_mm_rss(old_mm);
826 mm_release(tsk, old_mm); 826 mm_release(tsk, old_mm);
827 827
828 if (old_mm) { 828 if (old_mm) {
@@ -848,6 +848,7 @@ static int exec_mmap(struct mm_struct *mm)
848 if (old_mm) { 848 if (old_mm) {
849 up_read(&old_mm->mmap_sem); 849 up_read(&old_mm->mmap_sem);
850 BUG_ON(active_mm != old_mm); 850 BUG_ON(active_mm != old_mm);
851 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
851 mm_update_next_owner(old_mm); 852 mm_update_next_owner(old_mm);
852 mmput(old_mm); 853 mmput(old_mm);
853 return 0; 854 return 0;
@@ -975,8 +976,8 @@ static int de_thread(struct task_struct *tsk)
975 sig->notify_count = 0; 976 sig->notify_count = 0;
976 977
977no_thread_group: 978no_thread_group:
978 if (current->mm) 979 /* we have changed execution domain */
979 setmax_mm_hiwater_rss(&sig->maxrss, current->mm); 980 tsk->exit_signal = SIGCHLD;
980 981
981 exit_itimers(sig); 982 exit_itimers(sig);
982 flush_itimer_signals(); 983 flush_itimer_signals();
@@ -1112,7 +1113,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1112 bprm->mm = NULL; /* We're using it now */ 1113 bprm->mm = NULL; /* We're using it now */
1113 1114
1114 set_fs(USER_DS); 1115 set_fs(USER_DS);
1115 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1116 current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
1116 flush_thread(); 1117 flush_thread();
1117 current->personality &= ~bprm->per_clear; 1118 current->personality &= ~bprm->per_clear;
1118 1119
@@ -1339,13 +1340,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
1339 ret = -EFAULT; 1340 ret = -EFAULT;
1340 goto out; 1341 goto out;
1341 } 1342 }
1342 kaddr = kmap_atomic(page, KM_USER0); 1343 kaddr = kmap_atomic(page);
1343 1344
1344 for (; offset < PAGE_SIZE && kaddr[offset]; 1345 for (; offset < PAGE_SIZE && kaddr[offset];
1345 offset++, bprm->p++) 1346 offset++, bprm->p++)
1346 ; 1347 ;
1347 1348
1348 kunmap_atomic(kaddr, KM_USER0); 1349 kunmap_atomic(kaddr);
1349 put_arg_page(page); 1350 put_arg_page(page);
1350 1351
1351 if (offset == PAGE_SIZE) 1352 if (offset == PAGE_SIZE)
@@ -1402,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1402 */ 1403 */
1403 bprm->recursion_depth = depth; 1404 bprm->recursion_depth = depth;
1404 if (retval >= 0) { 1405 if (retval >= 0) {
1405 if (depth == 0) 1406 if (depth == 0) {
1406 ptrace_event(PTRACE_EVENT_EXEC, 1407 trace_sched_process_exec(current, old_pid, bprm);
1407 old_pid); 1408 ptrace_event(PTRACE_EVENT_EXEC, old_pid);
1409 }
1408 put_binfmt(fmt); 1410 put_binfmt(fmt);
1409 allow_write_access(bprm->file); 1411 allow_write_access(bprm->file);
1410 if (bprm->file) 1412 if (bprm->file)
@@ -1915,7 +1917,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1915{ 1917{
1916 struct task_struct *tsk = current; 1918 struct task_struct *tsk = current;
1917 struct mm_struct *mm = tsk->mm; 1919 struct mm_struct *mm = tsk->mm;
1918 struct completion *vfork_done;
1919 int core_waiters = -EBUSY; 1920 int core_waiters = -EBUSY;
1920 1921
1921 init_completion(&core_state->startup); 1922 init_completion(&core_state->startup);
@@ -1927,22 +1928,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1927 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 1928 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1928 up_write(&mm->mmap_sem); 1929 up_write(&mm->mmap_sem);
1929 1930
1930 if (unlikely(core_waiters < 0)) 1931 if (core_waiters > 0)
1931 goto fail;
1932
1933 /*
1934 * Make sure nobody is waiting for us to release the VM,
1935 * otherwise we can deadlock when we wait on each other
1936 */
1937 vfork_done = tsk->vfork_done;
1938 if (vfork_done) {
1939 tsk->vfork_done = NULL;
1940 complete(vfork_done);
1941 }
1942
1943 if (core_waiters)
1944 wait_for_completion(&core_state->startup); 1932 wait_for_completion(&core_state->startup);
1945fail: 1933
1946 return core_waiters; 1934 return core_waiters;
1947} 1935}
1948 1936
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 80405836ba6e..c61e62ac231c 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
597 goto fail; 597 goto fail;
598 } 598 }
599 599
600 kaddr = kmap_atomic(page, KM_USER0); 600 kaddr = kmap_atomic(page);
601 de = (struct exofs_dir_entry *)kaddr; 601 de = (struct exofs_dir_entry *)kaddr;
602 de->name_len = 1; 602 de->name_len = 1;
603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); 603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
@@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
611 de->inode_no = cpu_to_le64(parent->i_ino); 611 de->inode_no = cpu_to_le64(parent->i_ino);
612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); 612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
613 exofs_set_de_type(de, inode); 613 exofs_set_de_type(de, inode);
614 kunmap_atomic(kaddr, KM_USER0); 614 kunmap_atomic(kaddr);
615 err = exofs_commit_chunk(page, 0, chunk_size); 615 err = exofs_commit_chunk(page, 0, chunk_size);
616fail: 616fail:
617 page_cache_release(page); 617 page_cache_release(page);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 9dbf0c301030..fc7161d6bf6b 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
143{ 143{
144 struct inode *inode = old_dentry->d_inode; 144 struct inode *inode = old_dentry->d_inode;
145 145
146 if (inode->i_nlink >= EXOFS_LINK_MAX)
147 return -EMLINK;
148
149 inode->i_ctime = CURRENT_TIME; 146 inode->i_ctime = CURRENT_TIME;
150 inode_inc_link_count(inode); 147 inode_inc_link_count(inode);
151 ihold(inode); 148 ihold(inode);
@@ -156,10 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 153static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
157{ 154{
158 struct inode *inode; 155 struct inode *inode;
159 int err = -EMLINK; 156 int err;
160
161 if (dir->i_nlink >= EXOFS_LINK_MAX)
162 goto out;
163 157
164 inode_inc_link_count(dir); 158 inode_inc_link_count(dir);
165 159
@@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
275 if (err) 269 if (err)
276 goto out_dir; 270 goto out_dir;
277 } else { 271 } else {
278 if (dir_de) {
279 err = -EMLINK;
280 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
281 goto out_dir;
282 }
283 err = exofs_add_link(new_dentry, old_inode); 272 err = exofs_add_link(new_dentry, old_inode);
284 if (err) 273 if (err)
285 goto out_dir; 274 goto out_dir;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index d22cd168c6ee..7f2b590a36b7 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -754,6 +754,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
754 sb->s_blocksize = EXOFS_BLKSIZE; 754 sb->s_blocksize = EXOFS_BLKSIZE;
755 sb->s_blocksize_bits = EXOFS_BLKSHIFT; 755 sb->s_blocksize_bits = EXOFS_BLKSHIFT;
756 sb->s_maxbytes = MAX_LFS_FILESIZE; 756 sb->s_maxbytes = MAX_LFS_FILESIZE;
757 sb->s_max_links = EXOFS_LINK_MAX;
757 atomic_set(&sbi->s_curr_pending, 0); 758 atomic_set(&sbi->s_curr_pending, 0);
758 sb->s_bdev = NULL; 759 sb->s_bdev = NULL;
759 sb->s_dev = 0; 760 sb->s_dev = 0;
@@ -818,9 +819,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
818 ret = PTR_ERR(root); 819 ret = PTR_ERR(root);
819 goto free_sbi; 820 goto free_sbi;
820 } 821 }
821 sb->s_root = d_alloc_root(root); 822 sb->s_root = d_make_root(root);
822 if (!sb->s_root) { 823 if (!sb->s_root) {
823 iput(root);
824 EXOFS_ERR("ERROR: get root inode failed\n"); 824 EXOFS_ERR("ERROR: get root inode failed\n");
825 ret = -ENOMEM; 825 ret = -ENOMEM;
826 goto free_sbi; 826 goto free_sbi;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d37df352d324..0f4f5c929257 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -645,7 +645,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
645 unlock_page(page); 645 unlock_page(page);
646 goto fail; 646 goto fail;
647 } 647 }
648 kaddr = kmap_atomic(page, KM_USER0); 648 kaddr = kmap_atomic(page);
649 memset(kaddr, 0, chunk_size); 649 memset(kaddr, 0, chunk_size);
650 de = (struct ext2_dir_entry_2 *)kaddr; 650 de = (struct ext2_dir_entry_2 *)kaddr;
651 de->name_len = 1; 651 de->name_len = 1;
@@ -660,7 +660,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
660 de->inode = cpu_to_le32(parent->i_ino); 660 de->inode = cpu_to_le32(parent->i_ino);
661 memcpy (de->name, "..\0", 4); 661 memcpy (de->name, "..\0", 4);
662 ext2_set_de_type (de, inode); 662 ext2_set_de_type (de, inode);
663 kunmap_atomic(kaddr, KM_USER0); 663 kunmap_atomic(kaddr);
664 err = ext2_commit_chunk(page, 0, chunk_size); 664 err = ext2_commit_chunk(page, 0, chunk_size);
665fail: 665fail:
666 page_cache_release(page); 666 page_cache_release(page);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 080419814bae..dffb86536285 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -195,9 +195,6 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
195 struct inode *inode = old_dentry->d_inode; 195 struct inode *inode = old_dentry->d_inode;
196 int err; 196 int err;
197 197
198 if (inode->i_nlink >= EXT2_LINK_MAX)
199 return -EMLINK;
200
201 dquot_initialize(dir); 198 dquot_initialize(dir);
202 199
203 inode->i_ctime = CURRENT_TIME_SEC; 200 inode->i_ctime = CURRENT_TIME_SEC;
@@ -217,10 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 214static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
218{ 215{
219 struct inode * inode; 216 struct inode * inode;
220 int err = -EMLINK; 217 int err;
221
222 if (dir->i_nlink >= EXT2_LINK_MAX)
223 goto out;
224 218
225 dquot_initialize(dir); 219 dquot_initialize(dir);
226 220
@@ -346,11 +340,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
346 drop_nlink(new_inode); 340 drop_nlink(new_inode);
347 inode_dec_link_count(new_inode); 341 inode_dec_link_count(new_inode);
348 } else { 342 } else {
349 if (dir_de) {
350 err = -EMLINK;
351 if (new_dir->i_nlink >= EXT2_LINK_MAX)
352 goto out_dir;
353 }
354 err = ext2_add_link(new_dentry, old_inode); 343 err = ext2_add_link(new_dentry, old_inode);
355 if (err) 344 if (err)
356 goto out_dir; 345 goto out_dir;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0090595beb28..e1025c7a437a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -919,6 +919,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
919 } 919 }
920 920
921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); 921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
922 sb->s_max_links = EXT2_LINK_MAX;
922 923
923 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { 924 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
924 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; 925 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
@@ -1087,9 +1088,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1087 goto failed_mount3; 1088 goto failed_mount3;
1088 } 1089 }
1089 1090
1090 sb->s_root = d_alloc_root(root); 1091 sb->s_root = d_make_root(root);
1091 if (!sb->s_root) { 1092 if (!sb->s_root) {
1092 iput(root);
1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed"); 1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1094 ret = -ENOMEM; 1094 ret = -ENOMEM;
1095 goto failed_mount3; 1095 goto failed_mount3;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 726c7ef6cdf1..e0b45b93327b 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2046,10 +2046,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2047 goto failed_mount3; 2047 goto failed_mount3;
2048 } 2048 }
2049 sb->s_root = d_alloc_root(root); 2049 sb->s_root = d_make_root(root);
2050 if (!sb->s_root) { 2050 if (!sb->s_root) {
2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2052 iput(root);
2053 ret = -ENOMEM; 2052 ret = -ENOMEM;
2054 goto failed_mount3; 2053 goto failed_mount3;
2055 } 2054 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61fd7392..933900909ed0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3735,9 +3735,8 @@ no_journal:
3735 iput(root); 3735 iput(root);
3736 goto failed_mount4; 3736 goto failed_mount4;
3737 } 3737 }
3738 sb->s_root = d_alloc_root(root); 3738 sb->s_root = d_make_root(root);
3739 if (!sb->s_root) { 3739 if (!sb->s_root) {
3740 iput(root);
3741 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3740 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3742 ret = -ENOMEM; 3741 ret = -ENOMEM;
3743 goto failed_mount4; 3742 goto failed_mount4;
@@ -5056,6 +5055,9 @@ static int __init ext4_init_fs(void)
5056{ 5055{
5057 int i, err; 5056 int i, err;
5058 5057
5058 ext4_li_info = NULL;
5059 mutex_init(&ext4_li_mtx);
5060
5059 ext4_check_flag_values(); 5061 ext4_check_flag_values();
5060 5062
5061 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5063 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5094,8 +5096,6 @@ static int __init ext4_init_fs(void)
5094 if (err) 5096 if (err)
5095 goto out; 5097 goto out;
5096 5098
5097 ext4_li_info = NULL;
5098 mutex_init(&ext4_li_mtx);
5099 return 0; 5099 return 0;
5100out: 5100out:
5101 unregister_as_ext2(); 5101 unregister_as_ext2();
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3ab841054d53..21687e31acc0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1496,11 +1496,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1496 root_inode->i_ino = MSDOS_ROOT_INO; 1496 root_inode->i_ino = MSDOS_ROOT_INO;
1497 root_inode->i_version = 1; 1497 root_inode->i_version = 1;
1498 error = fat_read_root(root_inode); 1498 error = fat_read_root(root_inode);
1499 if (error < 0) 1499 if (error < 0) {
1500 iput(root_inode);
1500 goto out_fail; 1501 goto out_fail;
1502 }
1501 error = -ENOMEM; 1503 error = -ENOMEM;
1502 insert_inode_hash(root_inode); 1504 insert_inode_hash(root_inode);
1503 sb->s_root = d_alloc_root(root_inode); 1505 sb->s_root = d_make_root(root_inode);
1504 if (!sb->s_root) { 1506 if (!sb->s_root) {
1505 fat_msg(sb, KERN_ERR, "get root inode failed"); 1507 fat_msg(sb, KERN_ERR, "get root inode failed");
1506 goto out_fail; 1508 goto out_fail;
@@ -1516,8 +1518,6 @@ out_invalid:
1516out_fail: 1518out_fail:
1517 if (fat_inode) 1519 if (fat_inode)
1518 iput(fat_inode); 1520 iput(fat_inode);
1519 if (root_inode)
1520 iput(root_inode);
1521 unload_nls(sbi->nls_io); 1521 unload_nls(sbi->nls_io);
1522 unload_nls(sbi->nls_disk); 1522 unload_nls(sbi->nls_disk);
1523 if (sbi->options.iocharset != fat_default_iocharset) 1523 if (sbi->options.iocharset != fat_default_iocharset)
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a81eb2367d39..98ae804f5273 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -521,57 +521,46 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
521 521
522 op = &outname[*outlen * sizeof(wchar_t)]; 522 op = &outname[*outlen * sizeof(wchar_t)];
523 } else { 523 } else {
524 if (nls) { 524 for (i = 0, ip = name, op = outname, *outlen = 0;
525 for (i = 0, ip = name, op = outname, *outlen = 0; 525 i < len && *outlen < FAT_LFN_LEN;
526 i < len && *outlen <= FAT_LFN_LEN; 526 *outlen += 1) {
527 *outlen += 1) 527 if (escape && (*ip == ':')) {
528 { 528 if (i > len - 5)
529 if (escape && (*ip == ':')) { 529 return -EINVAL;
530 if (i > len - 5) 530 ec = 0;
531 return -EINVAL; 531 for (k = 1; k < 5; k++) {
532 ec = 0; 532 nc = ip[k];
533 for (k = 1; k < 5; k++) { 533 ec <<= 4;
534 nc = ip[k]; 534 if (nc >= '0' && nc <= '9') {
535 ec <<= 4; 535 ec |= nc - '0';
536 if (nc >= '0' && nc <= '9') { 536 continue;
537 ec |= nc - '0';
538 continue;
539 }
540 if (nc >= 'a' && nc <= 'f') {
541 ec |= nc - ('a' - 10);
542 continue;
543 }
544 if (nc >= 'A' && nc <= 'F') {
545 ec |= nc - ('A' - 10);
546 continue;
547 }
548 return -EINVAL;
549 } 537 }
550 *op++ = ec & 0xFF; 538 if (nc >= 'a' && nc <= 'f') {
551 *op++ = ec >> 8; 539 ec |= nc - ('a' - 10);
552 ip += 5; 540 continue;
553 i += 5; 541 }
554 } else { 542 if (nc >= 'A' && nc <= 'F') {
555 if ((charlen = nls->char2uni(ip, len - i, (wchar_t *)op)) < 0) 543 ec |= nc - ('A' - 10);
556 return -EINVAL; 544 continue;
557 ip += charlen; 545 }
558 i += charlen; 546 return -EINVAL;
559 op += 2;
560 } 547 }
548 *op++ = ec & 0xFF;
549 *op++ = ec >> 8;
550 ip += 5;
551 i += 5;
552 } else {
553 charlen = nls->char2uni(ip, len - i,
554 (wchar_t *)op);
555 if (charlen < 0)
556 return -EINVAL;
557 ip += charlen;
558 i += charlen;
559 op += 2;
561 } 560 }
562 if (i < len)
563 return -ENAMETOOLONG;
564 } else {
565 for (i = 0, ip = name, op = outname, *outlen = 0;
566 i < len && *outlen <= FAT_LFN_LEN;
567 i++, *outlen += 1)
568 {
569 *op++ = *ip++;
570 *op++ = 0;
571 }
572 if (i < len)
573 return -ENAMETOOLONG;
574 } 561 }
562 if (i < len)
563 return -ENAMETOOLONG;
575 } 564 }
576 565
577 *longlen = *outlen; 566 *longlen = *outlen;
diff --git a/fs/file.c b/fs/file.c
index 4c6992d8f3ba..3c426de7203a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,7 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/mmzone.h> 12#include <linux/mmzone.h>
diff --git a/fs/file_table.c b/fs/file_table.c
index 20002e39754d..70f2a0fd6aec 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(alloc_file);
204 * to write to @file, along with access to write through 204 * to write to @file, along with access to write through
205 * its vfsmount. 205 * its vfsmount.
206 */ 206 */
207void drop_file_write_access(struct file *file) 207static void drop_file_write_access(struct file *file)
208{ 208{
209 struct vfsmount *mnt = file->f_path.mnt; 209 struct vfsmount *mnt = file->f_path.mnt;
210 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -219,7 +219,6 @@ void drop_file_write_access(struct file *file)
219 mnt_drop_write(mnt); 219 mnt_drop_write(mnt);
220 file_release_write(file); 220 file_release_write(file);
221} 221}
222EXPORT_SYMBOL_GPL(drop_file_write_access);
223 222
224/* the real guts of fput() - releasing the last reference to file 223/* the real guts of fput() - releasing the last reference to file
225 */ 224 */
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 9d1c99558389..d4fabd26084e 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -224,9 +224,8 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
224 ret = PTR_ERR(root); 224 ret = PTR_ERR(root);
225 goto out; 225 goto out;
226 } 226 }
227 sbp->s_root = d_alloc_root(root); 227 sbp->s_root = d_make_root(root);
228 if (!sbp->s_root) { 228 if (!sbp->s_root) {
229 iput(root);
230 printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); 229 printk(KERN_WARNING "vxfs: unable to get root dentry.\n");
231 goto out_free_ilist; 230 goto out_free_ilist;
232 } 231 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5b4a9362d5aa..236972b752f5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -14,7 +14,7 @@
14 */ 14 */
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/module.h> 17#include <linux/export.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
@@ -1284,7 +1284,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
1284EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1284EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 1285
1286/** 1286/**
1287 * writeback_inodes_sb_if_idle - start writeback if none underway 1287 * writeback_inodes_sb_nr_if_idle - start writeback if none underway
1288 * @sb: the superblock 1288 * @sb: the superblock
1289 * @nr: the number of pages to write 1289 * @nr: the number of pages to write
1290 * @reason: reason why some writeback work was initiated 1290 * @reason: reason why some writeback work was initiated
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 78b519c13536..e159e682ad4c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,4 +1,4 @@
1#include <linux/module.h> 1#include <linux/export.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/path.h> 4#include <linux/path.h>
@@ -26,11 +26,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
26{ 26{
27 struct path old_root; 27 struct path old_root;
28 28
29 path_get_longterm(path);
29 spin_lock(&fs->lock); 30 spin_lock(&fs->lock);
30 write_seqcount_begin(&fs->seq); 31 write_seqcount_begin(&fs->seq);
31 old_root = fs->root; 32 old_root = fs->root;
32 fs->root = *path; 33 fs->root = *path;
33 path_get_longterm(path);
34 write_seqcount_end(&fs->seq); 34 write_seqcount_end(&fs->seq);
35 spin_unlock(&fs->lock); 35 spin_unlock(&fs->lock);
36 if (old_root.dentry) 36 if (old_root.dentry)
@@ -45,11 +45,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
45{ 45{
46 struct path old_pwd; 46 struct path old_pwd;
47 47
48 path_get_longterm(path);
48 spin_lock(&fs->lock); 49 spin_lock(&fs->lock);
49 write_seqcount_begin(&fs->seq); 50 write_seqcount_begin(&fs->seq);
50 old_pwd = fs->pwd; 51 old_pwd = fs->pwd;
51 fs->pwd = *path; 52 fs->pwd = *path;
52 path_get_longterm(path);
53 write_seqcount_end(&fs->seq); 53 write_seqcount_end(&fs->seq);
54 spin_unlock(&fs->lock); 54 spin_unlock(&fs->lock);
55 55
@@ -57,6 +57,14 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
57 path_put_longterm(&old_pwd); 57 path_put_longterm(&old_pwd);
58} 58}
59 59
60static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
61{
62 if (likely(p->dentry != old->dentry || p->mnt != old->mnt))
63 return 0;
64 *p = *new;
65 return 1;
66}
67
60void chroot_fs_refs(struct path *old_root, struct path *new_root) 68void chroot_fs_refs(struct path *old_root, struct path *new_root)
61{ 69{
62 struct task_struct *g, *p; 70 struct task_struct *g, *p;
@@ -68,21 +76,16 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
68 task_lock(p); 76 task_lock(p);
69 fs = p->fs; 77 fs = p->fs;
70 if (fs) { 78 if (fs) {
79 int hits = 0;
71 spin_lock(&fs->lock); 80 spin_lock(&fs->lock);
72 write_seqcount_begin(&fs->seq); 81 write_seqcount_begin(&fs->seq);
73 if (fs->root.dentry == old_root->dentry 82 hits += replace_path(&fs->root, old_root, new_root);
74 && fs->root.mnt == old_root->mnt) { 83 hits += replace_path(&fs->pwd, old_root, new_root);
75 path_get_longterm(new_root); 84 write_seqcount_end(&fs->seq);
76 fs->root = *new_root; 85 while (hits--) {
77 count++; 86 count++;
78 }
79 if (fs->pwd.dentry == old_root->dentry
80 && fs->pwd.mnt == old_root->mnt) {
81 path_get_longterm(new_root); 87 path_get_longterm(new_root);
82 fs->pwd = *new_root;
83 count++;
84 } 88 }
85 write_seqcount_end(&fs->seq);
86 spin_unlock(&fs->lock); 89 spin_unlock(&fs->lock);
87 } 90 }
88 task_unlock(p); 91 task_unlock(p);
@@ -107,10 +110,8 @@ void exit_fs(struct task_struct *tsk)
107 int kill; 110 int kill;
108 task_lock(tsk); 111 task_lock(tsk);
109 spin_lock(&fs->lock); 112 spin_lock(&fs->lock);
110 write_seqcount_begin(&fs->seq);
111 tsk->fs = NULL; 113 tsk->fs = NULL;
112 kill = !--fs->users; 114 kill = !--fs->users;
113 write_seqcount_end(&fs->seq);
114 spin_unlock(&fs->lock); 115 spin_unlock(&fs->lock);
115 task_unlock(tsk); 116 task_unlock(tsk);
116 if (kill) 117 if (kill)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5f3368ab0fa9..7df2b5e8fbe1 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -838,10 +838,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
838 } 838 }
839 } 839 }
840 if (page) { 840 if (page) {
841 void *mapaddr = kmap_atomic(page, KM_USER0); 841 void *mapaddr = kmap_atomic(page);
842 void *buf = mapaddr + offset; 842 void *buf = mapaddr + offset;
843 offset += fuse_copy_do(cs, &buf, &count); 843 offset += fuse_copy_do(cs, &buf, &count);
844 kunmap_atomic(mapaddr, KM_USER0); 844 kunmap_atomic(mapaddr);
845 } else 845 } else
846 offset += fuse_copy_do(cs, NULL, &count); 846 offset += fuse_copy_do(cs, NULL, &count);
847 } 847 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd93fbd..a841868bf9ce 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1887,11 +1887,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1887 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1887 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1888 goto out; 1888 goto out;
1889 1889
1890 vaddr = kmap_atomic(pages[0], KM_USER0); 1890 vaddr = kmap_atomic(pages[0]);
1891 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 1891 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1892 transferred, in_iovs + out_iovs, 1892 transferred, in_iovs + out_iovs,
1893 (flags & FUSE_IOCTL_COMPAT) != 0); 1893 (flags & FUSE_IOCTL_COMPAT) != 0);
1894 kunmap_atomic(vaddr, KM_USER0); 1894 kunmap_atomic(vaddr);
1895 if (err) 1895 if (err)
1896 goto out; 1896 goto out;
1897 1897
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 64cf8d07393e..4aec5995867e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -988,14 +988,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
988 988
989 err = -ENOMEM; 989 err = -ENOMEM;
990 root = fuse_get_root_inode(sb, d.rootmode); 990 root = fuse_get_root_inode(sb, d.rootmode);
991 if (!root) 991 root_dentry = d_make_root(root);
992 if (!root_dentry)
992 goto err_put_conn; 993 goto err_put_conn;
993
994 root_dentry = d_alloc_root(root);
995 if (!root_dentry) {
996 iput(root);
997 goto err_put_conn;
998 }
999 /* only now - we want root dentry with NULL ->d_op */ 994 /* only now - we want root dentry with NULL ->d_op */
1000 sb->s_d_op = &fuse_dentry_operations; 995 sb->s_d_op = &fuse_dentry_operations;
1001 996
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 501e5cba09b3..38b7a74a0f91 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -434,12 +434,12 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
434 if (error) 434 if (error)
435 return error; 435 return error;
436 436
437 kaddr = kmap_atomic(page, KM_USER0); 437 kaddr = kmap_atomic(page);
438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); 439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
442 kunmap_atomic(kaddr, KM_USER0); 442 kunmap_atomic(kaddr);
443 flush_dcache_page(page); 443 flush_dcache_page(page);
444 brelse(dibh); 444 brelse(dibh);
445 SetPageUptodate(page); 445 SetPageUptodate(page);
@@ -542,9 +542,9 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL); 542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
543 if (IS_ERR(page)) 543 if (IS_ERR(page))
544 return PTR_ERR(page); 544 return PTR_ERR(page);
545 p = kmap_atomic(page, KM_USER0); 545 p = kmap_atomic(page);
546 memcpy(buf + copied, p + offset, amt); 546 memcpy(buf + copied, p + offset, amt);
547 kunmap_atomic(p, KM_USER0); 547 kunmap_atomic(p);
548 mark_page_accessed(page); 548 mark_page_accessed(page);
549 page_cache_release(page); 549 page_cache_release(page);
550 copied += amt; 550 copied += amt;
@@ -788,11 +788,11 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
789 789
790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); 790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
791 kaddr = kmap_atomic(page, KM_USER0); 791 kaddr = kmap_atomic(page);
792 memcpy(buf + pos, kaddr + pos, copied); 792 memcpy(buf + pos, kaddr + pos, copied);
793 memset(kaddr + pos + copied, 0, len - copied); 793 memset(kaddr + pos + copied, 0, len - copied);
794 flush_dcache_page(page); 794 flush_dcache_page(page);
795 kunmap_atomic(kaddr, KM_USER0); 795 kunmap_atomic(kaddr);
796 796
797 if (!PageUptodate(page)) 797 if (!PageUptodate(page))
798 SetPageUptodate(page); 798 SetPageUptodate(page);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 14a704015970..197c5c47e577 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -60,7 +60,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
60 int release = 0; 60 int release = 0;
61 61
62 if (!page || page->index) { 62 if (!page || page->index) {
63 page = grab_cache_page(inode->i_mapping, 0); 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
64 if (!page) 64 if (!page)
65 return -ENOMEM; 65 return -ENOMEM;
66 release = 1; 66 release = 1;
@@ -930,7 +930,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
930 struct page *page; 930 struct page *page;
931 int err; 931 int err;
932 932
933 page = grab_cache_page(mapping, index); 933 page = find_or_create_page(mapping, index, GFP_NOFS);
934 if (!page) 934 if (!page)
935 return 0; 935 return 0;
936 936
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c5fb3597f696..76834587a8a4 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -313,6 +313,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
313 return gfs2_get_flags(filp, (u32 __user *)arg); 313 return gfs2_get_flags(filp, (u32 __user *)arg);
314 case FS_IOC_SETFLAGS: 314 case FS_IOC_SETFLAGS:
315 return gfs2_set_flags(filp, (u32 __user *)arg); 315 return gfs2_set_flags(filp, (u32 __user *)arg);
316 case FITRIM:
317 return gfs2_fitrim(filp, (void __user *)arg);
316 } 318 }
317 return -ENOTTY; 319 return -ENOTTY;
318} 320}
@@ -674,6 +676,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
674 struct gfs2_inode *ip = GFS2_I(inode); 676 struct gfs2_inode *ip = GFS2_I(inode);
675 struct buffer_head *dibh; 677 struct buffer_head *dibh;
676 int error; 678 int error;
679 loff_t size = len;
677 unsigned int nr_blks; 680 unsigned int nr_blks;
678 sector_t lblock = offset >> inode->i_blkbits; 681 sector_t lblock = offset >> inode->i_blkbits;
679 682
@@ -707,8 +710,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
707 goto out; 710 goto out;
708 } 711 }
709 } 712 }
710 if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) 713 if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
711 i_size_write(inode, offset + len); 714 i_size_write(inode, offset + size);
712 715
713 mark_inode_dirty(inode); 716 mark_inode_dirty(inode);
714 717
@@ -777,12 +780,14 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
777 if (unlikely(error)) 780 if (unlikely(error))
778 goto out_uninit; 781 goto out_uninit;
779 782
780 if (!gfs2_write_alloc_required(ip, offset, len))
781 goto out_unlock;
782
783 while (len > 0) { 783 while (len > 0) {
784 if (len < bytes) 784 if (len < bytes)
785 bytes = len; 785 bytes = len;
786 if (!gfs2_write_alloc_required(ip, offset, bytes)) {
787 len -= bytes;
788 offset += bytes;
789 continue;
790 }
786 qa = gfs2_qadata_get(ip); 791 qa = gfs2_qadata_get(ip);
787 if (!qa) { 792 if (!qa) {
788 error = -ENOMEM; 793 error = -ENOMEM;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 351a3e797789..dab2526071cc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -29,6 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h>
32 33
33#include "gfs2.h" 34#include "gfs2.h"
34#include "incore.h" 35#include "incore.h"
@@ -543,6 +544,11 @@ __acquires(&gl->gl_spin)
543 do_error(gl, 0); /* Fail queued try locks */ 544 do_error(gl, 0); /* Fail queued try locks */
544 } 545 }
545 gl->gl_req = target; 546 gl->gl_req = target;
547 set_bit(GLF_BLOCKING, &gl->gl_flags);
548 if ((gl->gl_req == LM_ST_UNLOCKED) ||
549 (gl->gl_state == LM_ST_EXCLUSIVE) ||
550 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
551 clear_bit(GLF_BLOCKING, &gl->gl_flags);
546 spin_unlock(&gl->gl_spin); 552 spin_unlock(&gl->gl_spin);
547 if (glops->go_xmote_th) 553 if (glops->go_xmote_th)
548 glops->go_xmote_th(gl); 554 glops->go_xmote_th(gl);
@@ -744,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
744 return -ENOMEM; 750 return -ENOMEM;
745 751
746 atomic_inc(&sdp->sd_glock_disposal); 752 atomic_inc(&sdp->sd_glock_disposal);
753 gl->gl_sbd = sdp;
747 gl->gl_flags = 0; 754 gl->gl_flags = 0;
748 gl->gl_name = name; 755 gl->gl_name = name;
749 atomic_set(&gl->gl_ref, 1); 756 atomic_set(&gl->gl_ref, 1);
@@ -752,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
752 gl->gl_demote_state = LM_ST_EXCLUSIVE; 759 gl->gl_demote_state = LM_ST_EXCLUSIVE;
753 gl->gl_hash = hash; 760 gl->gl_hash = hash;
754 gl->gl_ops = glops; 761 gl->gl_ops = glops;
755 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); 762 gl->gl_dstamp = ktime_set(0, 0);
763 preempt_disable();
764 /* We use the global stats to estimate the initial per-glock stats */
765 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
766 preempt_enable();
767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
756 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 769 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
757 gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 770 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
758 gl->gl_tchange = jiffies; 771 gl->gl_tchange = jiffies;
759 gl->gl_object = NULL; 772 gl->gl_object = NULL;
760 gl->gl_sbd = sdp;
761 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 773 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
762 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 774 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
763 INIT_WORK(&gl->gl_delete, delete_work_func); 775 INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -999,6 +1011,8 @@ fail:
999 } 1011 }
1000 set_bit(GLF_QUEUED, &gl->gl_flags); 1012 set_bit(GLF_QUEUED, &gl->gl_flags);
1001 trace_gfs2_glock_queue(gh, 1); 1013 trace_gfs2_glock_queue(gh, 1);
1014 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1015 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1002 if (likely(insert_pt == NULL)) { 1016 if (likely(insert_pt == NULL)) {
1003 list_add_tail(&gh->gh_list, &gl->gl_holders); 1017 list_add_tail(&gh->gh_list, &gl->gl_holders);
1004 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1018 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1658,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1658 *p++ = 'L'; 1672 *p++ = 'L';
1659 if (gl->gl_object) 1673 if (gl->gl_object)
1660 *p++ = 'o'; 1674 *p++ = 'o';
1675 if (test_bit(GLF_BLOCKING, gflags))
1676 *p++ = 'b';
1661 *p = 0; 1677 *p = 0;
1662 return buf; 1678 return buf;
1663} 1679}
@@ -1714,8 +1730,78 @@ out:
1714 return error; 1730 return error;
1715} 1731}
1716 1732
1733static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1734{
1735 struct gfs2_glock *gl = iter_ptr;
1736
1737 seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
1738 gl->gl_name.ln_type,
1739 (unsigned long long)gl->gl_name.ln_number,
1740 (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1741 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1742 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1743 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1744 (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1745 (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1746 (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1747 (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1748 return 0;
1749}
1750
1751static const char *gfs2_gltype[] = {
1752 "type",
1753 "reserved",
1754 "nondisk",
1755 "inode",
1756 "rgrp",
1757 "meta",
1758 "iopen",
1759 "flock",
1760 "plock",
1761 "quota",
1762 "journal",
1763};
1764
1765static const char *gfs2_stype[] = {
1766 [GFS2_LKS_SRTT] = "srtt",
1767 [GFS2_LKS_SRTTVAR] = "srttvar",
1768 [GFS2_LKS_SRTTB] = "srttb",
1769 [GFS2_LKS_SRTTVARB] = "srttvarb",
1770 [GFS2_LKS_SIRT] = "sirt",
1771 [GFS2_LKS_SIRTVAR] = "sirtvar",
1772 [GFS2_LKS_DCOUNT] = "dlm",
1773 [GFS2_LKS_QCOUNT] = "queue",
1774};
1775
1776#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1777
1778static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1779{
1780 struct gfs2_glock_iter *gi = seq->private;
1781 struct gfs2_sbd *sdp = gi->sdp;
1782 unsigned index = gi->hash >> 3;
1783 unsigned subindex = gi->hash & 0x07;
1784 s64 value;
1785 int i;
1786
1787 if (index == 0 && subindex != 0)
1788 return 0;
1717 1789
1790 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1791 (index == 0) ? "cpu": gfs2_stype[subindex]);
1718 1792
1793 for_each_possible_cpu(i) {
1794 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1795 if (index == 0) {
1796 value = i;
1797 } else {
1798 value = lkstats->lkstats[index - 1].stats[subindex];
1799 }
1800 seq_printf(seq, " %15lld", (long long)value);
1801 }
1802 seq_putc(seq, '\n');
1803 return 0;
1804}
1719 1805
1720int __init gfs2_glock_init(void) 1806int __init gfs2_glock_init(void)
1721{ 1807{
@@ -1828,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
1828 return dump_glock(seq, iter_ptr); 1914 return dump_glock(seq, iter_ptr);
1829} 1915}
1830 1916
1917static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
1918{
1919 struct gfs2_glock_iter *gi = seq->private;
1920
1921 gi->hash = *pos;
1922 if (*pos >= GFS2_NR_SBSTATS)
1923 return NULL;
1924 preempt_disable();
1925 return SEQ_START_TOKEN;
1926}
1927
1928static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
1929 loff_t *pos)
1930{
1931 struct gfs2_glock_iter *gi = seq->private;
1932 (*pos)++;
1933 gi->hash++;
1934 if (gi->hash >= GFS2_NR_SBSTATS) {
1935 preempt_enable();
1936 return NULL;
1937 }
1938 return SEQ_START_TOKEN;
1939}
1940
1941static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
1942{
1943 preempt_enable();
1944}
1945
1831static const struct seq_operations gfs2_glock_seq_ops = { 1946static const struct seq_operations gfs2_glock_seq_ops = {
1832 .start = gfs2_glock_seq_start, 1947 .start = gfs2_glock_seq_start,
1833 .next = gfs2_glock_seq_next, 1948 .next = gfs2_glock_seq_next,
@@ -1835,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = {
1835 .show = gfs2_glock_seq_show, 1950 .show = gfs2_glock_seq_show,
1836}; 1951};
1837 1952
1838static int gfs2_debugfs_open(struct inode *inode, struct file *file) 1953static const struct seq_operations gfs2_glstats_seq_ops = {
1954 .start = gfs2_glock_seq_start,
1955 .next = gfs2_glock_seq_next,
1956 .stop = gfs2_glock_seq_stop,
1957 .show = gfs2_glstats_seq_show,
1958};
1959
1960static const struct seq_operations gfs2_sbstats_seq_ops = {
1961 .start = gfs2_sbstats_seq_start,
1962 .next = gfs2_sbstats_seq_next,
1963 .stop = gfs2_sbstats_seq_stop,
1964 .show = gfs2_sbstats_seq_show,
1965};
1966
1967static int gfs2_glocks_open(struct inode *inode, struct file *file)
1839{ 1968{
1840 int ret = seq_open_private(file, &gfs2_glock_seq_ops, 1969 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
1841 sizeof(struct gfs2_glock_iter)); 1970 sizeof(struct gfs2_glock_iter));
@@ -1847,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file)
1847 return ret; 1976 return ret;
1848} 1977}
1849 1978
1850static const struct file_operations gfs2_debug_fops = { 1979static int gfs2_glstats_open(struct inode *inode, struct file *file)
1980{
1981 int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
1982 sizeof(struct gfs2_glock_iter));
1983 if (ret == 0) {
1984 struct seq_file *seq = file->private_data;
1985 struct gfs2_glock_iter *gi = seq->private;
1986 gi->sdp = inode->i_private;
1987 }
1988 return ret;
1989}
1990
1991static int gfs2_sbstats_open(struct inode *inode, struct file *file)
1992{
1993 int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
1994 sizeof(struct gfs2_glock_iter));
1995 if (ret == 0) {
1996 struct seq_file *seq = file->private_data;
1997 struct gfs2_glock_iter *gi = seq->private;
1998 gi->sdp = inode->i_private;
1999 }
2000 return ret;
2001}
2002
2003static const struct file_operations gfs2_glocks_fops = {
2004 .owner = THIS_MODULE,
2005 .open = gfs2_glocks_open,
2006 .read = seq_read,
2007 .llseek = seq_lseek,
2008 .release = seq_release_private,
2009};
2010
2011static const struct file_operations gfs2_glstats_fops = {
1851 .owner = THIS_MODULE, 2012 .owner = THIS_MODULE,
1852 .open = gfs2_debugfs_open, 2013 .open = gfs2_glstats_open,
2014 .read = seq_read,
2015 .llseek = seq_lseek,
2016 .release = seq_release_private,
2017};
2018
2019static const struct file_operations gfs2_sbstats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = gfs2_sbstats_open,
1853 .read = seq_read, 2022 .read = seq_read,
1854 .llseek = seq_lseek, 2023 .llseek = seq_lseek,
1855 .release = seq_release_private, 2024 .release = seq_release_private,
@@ -1863,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
1863 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2032 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
1864 S_IFREG | S_IRUGO, 2033 S_IFREG | S_IRUGO,
1865 sdp->debugfs_dir, sdp, 2034 sdp->debugfs_dir, sdp,
1866 &gfs2_debug_fops); 2035 &gfs2_glocks_fops);
1867 if (!sdp->debugfs_dentry_glocks) 2036 if (!sdp->debugfs_dentry_glocks)
1868 return -ENOMEM; 2037 goto fail;
2038
2039 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
2040 S_IFREG | S_IRUGO,
2041 sdp->debugfs_dir, sdp,
2042 &gfs2_glstats_fops);
2043 if (!sdp->debugfs_dentry_glstats)
2044 goto fail;
2045
2046 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
2047 S_IFREG | S_IRUGO,
2048 sdp->debugfs_dir, sdp,
2049 &gfs2_sbstats_fops);
2050 if (!sdp->debugfs_dentry_sbstats)
2051 goto fail;
1869 2052
1870 return 0; 2053 return 0;
2054fail:
2055 gfs2_delete_debugfs_file(sdp);
2056 return -ENOMEM;
1871} 2057}
1872 2058
1873void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2059void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
1874{ 2060{
1875 if (sdp && sdp->debugfs_dir) { 2061 if (sdp->debugfs_dir) {
1876 if (sdp->debugfs_dentry_glocks) { 2062 if (sdp->debugfs_dentry_glocks) {
1877 debugfs_remove(sdp->debugfs_dentry_glocks); 2063 debugfs_remove(sdp->debugfs_dentry_glocks);
1878 sdp->debugfs_dentry_glocks = NULL; 2064 sdp->debugfs_dentry_glocks = NULL;
1879 } 2065 }
2066 if (sdp->debugfs_dentry_glstats) {
2067 debugfs_remove(sdp->debugfs_dentry_glstats);
2068 sdp->debugfs_dentry_glstats = NULL;
2069 }
2070 if (sdp->debugfs_dentry_sbstats) {
2071 debugfs_remove(sdp->debugfs_dentry_sbstats);
2072 sdp->debugfs_dentry_sbstats = NULL;
2073 }
1880 debugfs_remove(sdp->debugfs_dir); 2074 debugfs_remove(sdp->debugfs_dir);
1881 sdp->debugfs_dir = NULL; 2075 sdp->debugfs_dir = NULL;
1882 } 2076 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 97742a7ea9cc..47d0bda5ac2b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -19,6 +19,8 @@
19#include <linux/rculist_bl.h> 19#include <linux/rculist_bl.h>
20#include <linux/completion.h> 20#include <linux/completion.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <linux/ktime.h>
23#include <linux/percpu.h>
22 24
23#define DIO_WAIT 0x00000010 25#define DIO_WAIT 0x00000010
24#define DIO_METADATA 0x00000020 26#define DIO_METADATA 0x00000020
@@ -205,6 +207,22 @@ struct gfs2_glock_operations {
205}; 207};
206 208
207enum { 209enum {
210 GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */
211 GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */
212 GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */
213 GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */
214 GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */
215 GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */
216 GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */
217 GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */
218 GFS2_NR_LKSTATS
219};
220
221struct gfs2_lkstats {
222 s64 stats[GFS2_NR_LKSTATS];
223};
224
225enum {
208 /* States */ 226 /* States */
209 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ 227 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
210 HIF_FIRST = 7, 228 HIF_FIRST = 7,
@@ -238,10 +256,12 @@ enum {
238 GLF_QUEUED = 12, 256 GLF_QUEUED = 12,
239 GLF_LRU = 13, 257 GLF_LRU = 13,
240 GLF_OBJECT = 14, /* Used only for tracing */ 258 GLF_OBJECT = 14, /* Used only for tracing */
259 GLF_BLOCKING = 15,
241}; 260};
242 261
243struct gfs2_glock { 262struct gfs2_glock {
244 struct hlist_bl_node gl_list; 263 struct hlist_bl_node gl_list;
264 struct gfs2_sbd *gl_sbd;
245 unsigned long gl_flags; /* GLF_... */ 265 unsigned long gl_flags; /* GLF_... */
246 struct lm_lockname gl_name; 266 struct lm_lockname gl_name;
247 atomic_t gl_ref; 267 atomic_t gl_ref;
@@ -261,16 +281,14 @@ struct gfs2_glock {
261 struct list_head gl_holders; 281 struct list_head gl_holders;
262 282
263 const struct gfs2_glock_operations *gl_ops; 283 const struct gfs2_glock_operations *gl_ops;
264 char gl_strname[GDLM_STRNAME_BYTES]; 284 ktime_t gl_dstamp;
285 struct gfs2_lkstats gl_stats;
265 struct dlm_lksb gl_lksb; 286 struct dlm_lksb gl_lksb;
266 char gl_lvb[32]; 287 char gl_lvb[32];
267 unsigned long gl_tchange; 288 unsigned long gl_tchange;
268 void *gl_object; 289 void *gl_object;
269 290
270 struct list_head gl_lru; 291 struct list_head gl_lru;
271
272 struct gfs2_sbd *gl_sbd;
273
274 struct list_head gl_ail_list; 292 struct list_head gl_ail_list;
275 atomic_t gl_ail_count; 293 atomic_t gl_ail_count;
276 atomic_t gl_revokes; 294 atomic_t gl_revokes;
@@ -560,8 +578,14 @@ struct lm_lockstruct {
560 uint32_t *ls_recover_result; /* result of last jid recovery */ 578 uint32_t *ls_recover_result; /* result of last jid recovery */
561}; 579};
562 580
581struct gfs2_pcpu_lkstats {
582 /* One struct for each glock type */
583 struct gfs2_lkstats lkstats[10];
584};
585
563struct gfs2_sbd { 586struct gfs2_sbd {
564 struct super_block *sd_vfs; 587 struct super_block *sd_vfs;
588 struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
565 struct kobject sd_kobj; 589 struct kobject sd_kobj;
566 unsigned long sd_flags; /* SDF_... */ 590 unsigned long sd_flags; /* SDF_... */
567 struct gfs2_sb_host sd_sb; 591 struct gfs2_sb_host sd_sb;
@@ -620,7 +644,6 @@ struct gfs2_sbd {
620 644
621 int sd_rindex_uptodate; 645 int sd_rindex_uptodate;
622 spinlock_t sd_rindex_spin; 646 spinlock_t sd_rindex_spin;
623 struct mutex sd_rindex_mutex;
624 struct rb_root sd_rindex_tree; 647 struct rb_root sd_rindex_tree;
625 unsigned int sd_rgrps; 648 unsigned int sd_rgrps;
626 unsigned int sd_max_rg_data; 649 unsigned int sd_max_rg_data;
@@ -725,8 +748,23 @@ struct gfs2_sbd {
725 748
726 unsigned long sd_last_warning; 749 unsigned long sd_last_warning;
727 struct dentry *debugfs_dir; /* debugfs directory */ 750 struct dentry *debugfs_dir; /* debugfs directory */
728 struct dentry *debugfs_dentry_glocks; /* for debugfs */ 751 struct dentry *debugfs_dentry_glocks;
752 struct dentry *debugfs_dentry_glstats;
753 struct dentry *debugfs_dentry_sbstats;
729}; 754};
730 755
756static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
757{
758 gl->gl_stats.stats[which]++;
759}
760
761static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
762{
763 const struct gfs2_sbd *sdp = gl->gl_sbd;
764 preempt_disable();
765 this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
766 preempt_enable();
767}
768
731#endif /* __INCORE_DOT_H__ */ 769#endif /* __INCORE_DOT_H__ */
732 770
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 56987460cdae..c98a60ee6dfd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1036,7 +1036,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1038 1038
1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1040 if (!rgd) 1040 if (!rgd)
1041 goto out_inodes; 1041 goto out_inodes;
1042 1042
@@ -1255,7 +1255,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1255 * this is the case of the target file already existing 1255 * this is the case of the target file already existing
1256 * so we unlink before doing the rename 1256 * so we unlink before doing the rename
1257 */ 1257 */
1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); 1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
1259 if (nrgd) 1259 if (nrgd)
1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1261 } 1261 }
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8944d1e32ab5..f8411bd1b805 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -18,14 +18,106 @@
18#include "glock.h" 18#include "glock.h"
19#include "util.h" 19#include "util.h"
20#include "sys.h" 20#include "sys.h"
21#include "trace_gfs2.h"
21 22
22extern struct workqueue_struct *gfs2_control_wq; 23extern struct workqueue_struct *gfs2_control_wq;
23 24
25/**
26 * gfs2_update_stats - Update time based stats
27 * @mv: Pointer to mean/variance structure to update
28 * @sample: New data to include
29 *
30 * @delta is the difference between the current rtt sample and the
31 * running average srtt. We add 1/8 of that to the srtt in order to
32 * update the current srtt estimate. The varience estimate is a bit
33 * more complicated. We subtract the abs value of the @delta from
34 * the current variance estimate and add 1/4 of that to the running
35 * total.
36 *
37 * Note that the index points at the array entry containing the smoothed
38 * mean value, and the variance is always in the following entry
39 *
40 * Reference: TCP/IP Illustrated, vol 2, p. 831,832
41 * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
42 * they are not scaled fixed point.
43 */
44
45static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
46 s64 sample)
47{
48 s64 delta = sample - s->stats[index];
49 s->stats[index] += (delta >> 3);
50 index++;
51 s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
52}
53
54/**
55 * gfs2_update_reply_times - Update locking statistics
56 * @gl: The glock to update
57 *
58 * This assumes that gl->gl_dstamp has been set earlier.
59 *
60 * The rtt (lock round trip time) is an estimate of the time
61 * taken to perform a dlm lock request. We update it on each
62 * reply from the dlm.
63 *
64 * The blocking flag is set on the glock for all dlm requests
65 * which may potentially block due to lock requests from other nodes.
66 * DLM requests where the current lock state is exclusive, the
67 * requested state is null (or unlocked) or where the TRY or
68 * TRY_1CB flags are set are classified as non-blocking. All
69 * other DLM requests are counted as (potentially) blocking.
70 */
71static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
72{
73 struct gfs2_pcpu_lkstats *lks;
74 const unsigned gltype = gl->gl_name.ln_type;
75 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
76 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
77 s64 rtt;
78
79 preempt_disable();
80 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
81 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
82 gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
83 gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
84 preempt_enable();
85
86 trace_gfs2_glock_lock_time(gl, rtt);
87}
88
89/**
90 * gfs2_update_request_times - Update locking statistics
91 * @gl: The glock to update
92 *
93 * The irt (lock inter-request times) measures the average time
94 * between requests to the dlm. It is updated immediately before
95 * each dlm call.
96 */
97
98static inline void gfs2_update_request_times(struct gfs2_glock *gl)
99{
100 struct gfs2_pcpu_lkstats *lks;
101 const unsigned gltype = gl->gl_name.ln_type;
102 ktime_t dstamp;
103 s64 irt;
104
105 preempt_disable();
106 dstamp = gl->gl_dstamp;
107 gl->gl_dstamp = ktime_get_real();
108 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
109 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
110 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
111 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
112 preempt_enable();
113}
114
24static void gdlm_ast(void *arg) 115static void gdlm_ast(void *arg)
25{ 116{
26 struct gfs2_glock *gl = arg; 117 struct gfs2_glock *gl = arg;
27 unsigned ret = gl->gl_state; 118 unsigned ret = gl->gl_state;
28 119
120 gfs2_update_reply_times(gl);
29 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
30 122
31 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 123 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
@@ -111,7 +203,7 @@ static int make_mode(const unsigned int lmstate)
111static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, 203static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
112 const int req) 204 const int req)
113{ 205{
114 u32 lkf = 0; 206 u32 lkf = DLM_LKF_VALBLK;
115 207
116 if (gfs_flags & LM_FLAG_TRY) 208 if (gfs_flags & LM_FLAG_TRY)
117 lkf |= DLM_LKF_NOQUEUE; 209 lkf |= DLM_LKF_NOQUEUE;
@@ -138,26 +230,43 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
138 if (lkid != 0) 230 if (lkid != 0)
139 lkf |= DLM_LKF_CONVERT; 231 lkf |= DLM_LKF_CONVERT;
140 232
141 lkf |= DLM_LKF_VALBLK;
142
143 return lkf; 233 return lkf;
144} 234}
145 235
236static void gfs2_reverse_hex(char *c, u64 value)
237{
238 while (value) {
239 *c-- = hex_asc[value & 0x0f];
240 value >>= 4;
241 }
242}
243
146static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, 244static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
147 unsigned int flags) 245 unsigned int flags)
148{ 246{
149 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 247 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
150 int req; 248 int req;
151 u32 lkf; 249 u32 lkf;
250 char strname[GDLM_STRNAME_BYTES] = "";
152 251
153 req = make_mode(req_state); 252 req = make_mode(req_state);
154 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 253 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
155 254 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
255 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
256 if (gl->gl_lksb.sb_lkid) {
257 gfs2_update_request_times(gl);
258 } else {
259 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
260 strname[GDLM_STRNAME_BYTES - 1] = '\0';
261 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
262 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
263 gl->gl_dstamp = ktime_get_real();
264 }
156 /* 265 /*
157 * Submit the actual lock request. 266 * Submit the actual lock request.
158 */ 267 */
159 268
160 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 269 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
161 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 270 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
162} 271}
163 272
@@ -172,6 +281,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
172 return; 281 return;
173 } 282 }
174 283
284 clear_bit(GLF_BLOCKING, &gl->gl_flags);
285 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
286 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
287 gfs2_update_request_times(gl);
175 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 288 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
176 NULL, gl); 289 NULL, gl);
177 if (error) { 290 if (error) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 756fae9eaf8f..4752eadc7f6e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -19,6 +19,7 @@
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/list_sort.h>
22 23
23#include "gfs2.h" 24#include "gfs2.h"
24#include "incore.h" 25#include "incore.h"
@@ -358,7 +359,7 @@ retry:
358 return 0; 359 return 0;
359} 360}
360 361
361static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 362u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
362{ 363{
363 struct gfs2_journal_extent *je; 364 struct gfs2_journal_extent *je;
364 365
@@ -467,8 +468,8 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
467 468
468void gfs2_log_incr_head(struct gfs2_sbd *sdp) 469void gfs2_log_incr_head(struct gfs2_sbd *sdp)
469{ 470{
470 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 471 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
471 BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); 472 (sdp->sd_log_flush_head != sdp->sd_log_head));
472 473
473 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 474 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
474 sdp->sd_log_flush_head = 0; 475 sdp->sd_log_flush_head = 0;
@@ -476,99 +477,6 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp)
476 } 477 }
477} 478}
478 479
479/**
480 * gfs2_log_write_endio - End of I/O for a log buffer
481 * @bh: The buffer head
482 * @uptodate: I/O Status
483 *
484 */
485
486static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
487{
488 struct gfs2_sbd *sdp = bh->b_private;
489 bh->b_private = NULL;
490
491 end_buffer_write_sync(bh, uptodate);
492 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
493 wake_up(&sdp->sd_log_flush_wait);
494}
495
496/**
497 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
498 * @sdp: The GFS2 superblock
499 *
500 * Returns: the buffer_head
501 */
502
503struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
504{
505 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
506 struct buffer_head *bh;
507
508 bh = sb_getblk(sdp->sd_vfs, blkno);
509 lock_buffer(bh);
510 memset(bh->b_data, 0, bh->b_size);
511 set_buffer_uptodate(bh);
512 clear_buffer_dirty(bh);
513 gfs2_log_incr_head(sdp);
514 atomic_inc(&sdp->sd_log_in_flight);
515 bh->b_private = sdp;
516 bh->b_end_io = gfs2_log_write_endio;
517
518 return bh;
519}
520
521/**
522 * gfs2_fake_write_endio -
523 * @bh: The buffer head
524 * @uptodate: The I/O Status
525 *
526 */
527
528static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
529{
530 struct buffer_head *real_bh = bh->b_private;
531 struct gfs2_bufdata *bd = real_bh->b_private;
532 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
533
534 end_buffer_write_sync(bh, uptodate);
535 free_buffer_head(bh);
536 unlock_buffer(real_bh);
537 brelse(real_bh);
538 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
539 wake_up(&sdp->sd_log_flush_wait);
540}
541
542/**
543 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
544 * @sdp: the filesystem
545 * @data: the data the buffer_head should point to
546 *
547 * Returns: the log buffer descriptor
548 */
549
550struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
551 struct buffer_head *real)
552{
553 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
554 struct buffer_head *bh;
555
556 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
557 atomic_set(&bh->b_count, 1);
558 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
559 set_bh_page(bh, real->b_page, bh_offset(real));
560 bh->b_blocknr = blkno;
561 bh->b_size = sdp->sd_sb.sb_bsize;
562 bh->b_bdev = sdp->sd_vfs->s_bdev;
563 bh->b_private = real;
564 bh->b_end_io = gfs2_fake_write_endio;
565
566 gfs2_log_incr_head(sdp);
567 atomic_inc(&sdp->sd_log_in_flight);
568
569 return bh;
570}
571
572static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) 480static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
573{ 481{
574 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 482 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -583,66 +491,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
583 sdp->sd_log_tail = new_tail; 491 sdp->sd_log_tail = new_tail;
584} 492}
585 493
586/**
587 * log_write_header - Get and initialize a journal header buffer
588 * @sdp: The GFS2 superblock
589 *
590 * Returns: the initialized log buffer descriptor
591 */
592 494
593static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) 495static void log_flush_wait(struct gfs2_sbd *sdp)
594{
595 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
596 struct buffer_head *bh;
597 struct gfs2_log_header *lh;
598 unsigned int tail;
599 u32 hash;
600
601 bh = sb_getblk(sdp->sd_vfs, blkno);
602 lock_buffer(bh);
603 memset(bh->b_data, 0, bh->b_size);
604 set_buffer_uptodate(bh);
605 clear_buffer_dirty(bh);
606
607 gfs2_ail1_empty(sdp);
608 tail = current_tail(sdp);
609
610 lh = (struct gfs2_log_header *)bh->b_data;
611 memset(lh, 0, sizeof(struct gfs2_log_header));
612 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
613 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
614 lh->lh_header.__pad0 = cpu_to_be64(0);
615 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
616 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
617 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
618 lh->lh_flags = cpu_to_be32(flags);
619 lh->lh_tail = cpu_to_be32(tail);
620 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
621 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
622 lh->lh_hash = cpu_to_be32(hash);
623
624 bh->b_end_io = end_buffer_write_sync;
625 get_bh(bh);
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
630 wait_on_buffer(bh);
631
632 if (!buffer_uptodate(bh))
633 gfs2_io_error_bh(sdp, bh);
634 brelse(bh);
635
636 if (sdp->sd_log_tail != tail)
637 log_pull_tail(sdp, tail);
638 else
639 gfs2_assert_withdraw(sdp, !pull);
640
641 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
642 gfs2_log_incr_head(sdp);
643}
644
645static void log_flush_commit(struct gfs2_sbd *sdp)
646{ 496{
647 DEFINE_WAIT(wait); 497 DEFINE_WAIT(wait);
648 498
@@ -655,8 +505,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
655 } while(atomic_read(&sdp->sd_log_in_flight)); 505 } while(atomic_read(&sdp->sd_log_in_flight));
656 finish_wait(&sdp->sd_log_flush_wait, &wait); 506 finish_wait(&sdp->sd_log_flush_wait, &wait);
657 } 507 }
508}
509
510static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
511{
512 struct gfs2_bufdata *bda, *bdb;
658 513
659 log_write_header(sdp, 0, 0); 514 bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list);
515 bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list);
516
517 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
518 return -1;
519 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
520 return 1;
521 return 0;
660} 522}
661 523
662static void gfs2_ordered_write(struct gfs2_sbd *sdp) 524static void gfs2_ordered_write(struct gfs2_sbd *sdp)
@@ -666,6 +528,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
666 LIST_HEAD(written); 528 LIST_HEAD(written);
667 529
668 gfs2_log_lock(sdp); 530 gfs2_log_lock(sdp);
531 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
669 while (!list_empty(&sdp->sd_log_le_ordered)) { 532 while (!list_empty(&sdp->sd_log_le_ordered)) {
670 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); 533 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
671 list_move(&bd->bd_le.le_list, &written); 534 list_move(&bd->bd_le.le_list, &written);
@@ -711,6 +574,68 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
711} 574}
712 575
713/** 576/**
577 * log_write_header - Get and initialize a journal header buffer
578 * @sdp: The GFS2 superblock
579 *
580 * Returns: the initialized log buffer descriptor
581 */
582
583static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
584{
585 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
586 struct buffer_head *bh;
587 struct gfs2_log_header *lh;
588 unsigned int tail;
589 u32 hash;
590
591 bh = sb_getblk(sdp->sd_vfs, blkno);
592 lock_buffer(bh);
593 memset(bh->b_data, 0, bh->b_size);
594 set_buffer_uptodate(bh);
595 clear_buffer_dirty(bh);
596
597 gfs2_ail1_empty(sdp);
598 tail = current_tail(sdp);
599
600 lh = (struct gfs2_log_header *)bh->b_data;
601 memset(lh, 0, sizeof(struct gfs2_log_header));
602 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
603 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
604 lh->lh_header.__pad0 = cpu_to_be64(0);
605 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
606 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
607 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
608 lh->lh_flags = cpu_to_be32(flags);
609 lh->lh_tail = cpu_to_be32(tail);
610 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
611 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
612 lh->lh_hash = cpu_to_be32(hash);
613
614 bh->b_end_io = end_buffer_write_sync;
615 get_bh(bh);
616 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
617 gfs2_ordered_wait(sdp);
618 log_flush_wait(sdp);
619 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
620 } else {
621 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
622 }
623 wait_on_buffer(bh);
624
625 if (!buffer_uptodate(bh))
626 gfs2_io_error_bh(sdp, bh);
627 brelse(bh);
628
629 if (sdp->sd_log_tail != tail)
630 log_pull_tail(sdp, tail);
631 else
632 gfs2_assert_withdraw(sdp, !pull);
633
634 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
635 gfs2_log_incr_head(sdp);
636}
637
638/**
714 * gfs2_log_flush - flush incore transaction(s) 639 * gfs2_log_flush - flush incore transaction(s)
715 * @sdp: the filesystem 640 * @sdp: the filesystem
716 * @gl: The glock structure to flush. If NULL, flush the whole incore log 641 * @gl: The glock structure to flush. If NULL, flush the whole incore log
@@ -753,11 +678,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
753 678
754 gfs2_ordered_write(sdp); 679 gfs2_ordered_write(sdp);
755 lops_before_commit(sdp); 680 lops_before_commit(sdp);
756 gfs2_ordered_wait(sdp);
757 681
758 if (sdp->sd_log_head != sdp->sd_log_flush_head) 682 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
759 log_flush_commit(sdp); 683 log_write_header(sdp, 0, 0);
760 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 684 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
761 gfs2_log_lock(sdp); 685 gfs2_log_lock(sdp);
762 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 686 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
763 trace_gfs2_log_blocks(sdp, -1); 687 trace_gfs2_log_blocks(sdp, -1);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ab0621698b73..ff07454b582c 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -53,10 +53,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
53 53
54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); 55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56 56extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
57extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 57extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 58extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 59extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0301be655b12..6b1efb594d90 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/mempool.h>
15#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
16#include <linux/bio.h> 17#include <linux/bio.h>
17#include <linux/fs.h> 18#include <linux/fs.h>
@@ -76,7 +77,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
76 if (bi->bi_clone == 0) 77 if (bi->bi_clone == 0)
77 return; 78 return;
78 if (sdp->sd_args.ar_discard) 79 if (sdp->sd_args.ar_discard)
79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); 80 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
80 memcpy(bi->bi_clone + bi->bi_offset, 81 memcpy(bi->bi_clone + bi->bi_offset,
81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 82 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
82 clear_bit(GBF_FULL, &bi->bi_flags); 83 clear_bit(GBF_FULL, &bi->bi_flags);
@@ -143,6 +144,98 @@ static inline __be64 *bh_ptr_end(struct buffer_head *bh)
143 return (__force __be64 *)(bh->b_data + bh->b_size); 144 return (__force __be64 *)(bh->b_data + bh->b_size);
144} 145}
145 146
147/**
148 * gfs2_log_write_endio - End of I/O for a log buffer
149 * @bh: The buffer head
150 * @uptodate: I/O Status
151 *
152 */
153
154static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
155{
156 struct gfs2_sbd *sdp = bh->b_private;
157 bh->b_private = NULL;
158
159 end_buffer_write_sync(bh, uptodate);
160 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
161 wake_up(&sdp->sd_log_flush_wait);
162}
163
164/**
165 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
166 * @sdp: The GFS2 superblock
167 *
168 * tReturns: the buffer_head
169 */
170
171static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
172{
173 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
174 struct buffer_head *bh;
175
176 bh = sb_getblk(sdp->sd_vfs, blkno);
177 lock_buffer(bh);
178 memset(bh->b_data, 0, bh->b_size);
179 set_buffer_uptodate(bh);
180 clear_buffer_dirty(bh);
181 gfs2_log_incr_head(sdp);
182 atomic_inc(&sdp->sd_log_in_flight);
183 bh->b_private = sdp;
184 bh->b_end_io = gfs2_log_write_endio;
185
186 return bh;
187}
188
189/**
190 * gfs2_fake_write_endio -
191 * @bh: The buffer head
192 * @uptodate: The I/O Status
193 *
194 */
195
196static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
197{
198 struct buffer_head *real_bh = bh->b_private;
199 struct gfs2_bufdata *bd = real_bh->b_private;
200 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
201
202 end_buffer_write_sync(bh, uptodate);
203 mempool_free(bh, gfs2_bh_pool);
204 unlock_buffer(real_bh);
205 brelse(real_bh);
206 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
207 wake_up(&sdp->sd_log_flush_wait);
208}
209
210/**
211 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
212 * @sdp: the filesystem
213 * @data: the data the buffer_head should point to
214 *
215 * Returns: the log buffer descriptor
216 */
217
218static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
219 struct buffer_head *real)
220{
221 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
222 struct buffer_head *bh;
223
224 bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
225 atomic_set(&bh->b_count, 1);
226 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
227 set_bh_page(bh, real->b_page, bh_offset(real));
228 bh->b_blocknr = blkno;
229 bh->b_size = sdp->sd_sb.sb_bsize;
230 bh->b_bdev = sdp->sd_vfs->s_bdev;
231 bh->b_private = real;
232 bh->b_end_io = gfs2_fake_write_endio;
233
234 gfs2_log_incr_head(sdp);
235 atomic_inc(&sdp->sd_log_in_flight);
236
237 return bh;
238}
146 239
147static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 240static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
148{ 241{
@@ -553,11 +646,11 @@ static void gfs2_check_magic(struct buffer_head *bh)
553 __be32 *ptr; 646 __be32 *ptr;
554 647
555 clear_buffer_escaped(bh); 648 clear_buffer_escaped(bh);
556 kaddr = kmap_atomic(bh->b_page, KM_USER0); 649 kaddr = kmap_atomic(bh->b_page);
557 ptr = kaddr + bh_offset(bh); 650 ptr = kaddr + bh_offset(bh);
558 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 651 if (*ptr == cpu_to_be32(GFS2_MAGIC))
559 set_buffer_escaped(bh); 652 set_buffer_escaped(bh);
560 kunmap_atomic(kaddr, KM_USER0); 653 kunmap_atomic(kaddr);
561} 654}
562 655
563static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 656static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -594,10 +687,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
594 if (buffer_escaped(bd->bd_bh)) { 687 if (buffer_escaped(bd->bd_bh)) {
595 void *kaddr; 688 void *kaddr;
596 bh1 = gfs2_log_get_buf(sdp); 689 bh1 = gfs2_log_get_buf(sdp);
597 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); 690 kaddr = kmap_atomic(bd->bd_bh->b_page);
598 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 691 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
599 bh1->b_size); 692 bh1->b_size);
600 kunmap_atomic(kaddr, KM_USER0); 693 kunmap_atomic(kaddr);
601 *(__be32 *)bh1->b_data = 0; 694 *(__be32 *)bh1->b_data = 0;
602 clear_buffer_escaped(bd->bd_bh); 695 clear_buffer_escaped(bd->bd_bh);
603 unlock_buffer(bd->bd_bh); 696 unlock_buffer(bd->bd_bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a8d9bcd0e19c..754426b1e52c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -17,6 +17,7 @@
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h> 18#include <linux/rculist_bl.h>
19#include <linux/atomic.h> 19#include <linux/atomic.h>
20#include <linux/mempool.h>
20 21
21#include "gfs2.h" 22#include "gfs2.h"
22#include "incore.h" 23#include "incore.h"
@@ -69,6 +70,16 @@ static void gfs2_init_gl_aspace_once(void *foo)
69 address_space_init_once(mapping); 70 address_space_init_once(mapping);
70} 71}
71 72
73static void *gfs2_bh_alloc(gfp_t mask, void *data)
74{
75 return alloc_buffer_head(mask);
76}
77
78static void gfs2_bh_free(void *ptr, void *data)
79{
80 return free_buffer_head(ptr);
81}
82
72/** 83/**
73 * init_gfs2_fs - Register GFS2 as a filesystem 84 * init_gfs2_fs - Register GFS2 as a filesystem
74 * 85 *
@@ -151,6 +162,10 @@ static int __init init_gfs2_fs(void)
151 gfs2_control_wq = alloc_workqueue("gfs2_control", 162 gfs2_control_wq = alloc_workqueue("gfs2_control",
152 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); 163 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
153 if (!gfs2_control_wq) 164 if (!gfs2_control_wq)
165 goto fail_recovery;
166
167 gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL);
168 if (!gfs2_bh_pool)
154 goto fail_control; 169 goto fail_control;
155 170
156 gfs2_register_debugfs(); 171 gfs2_register_debugfs();
@@ -160,6 +175,8 @@ static int __init init_gfs2_fs(void)
160 return 0; 175 return 0;
161 176
162fail_control: 177fail_control:
178 destroy_workqueue(gfs2_control_wq);
179fail_recovery:
163 destroy_workqueue(gfs_recovery_wq); 180 destroy_workqueue(gfs_recovery_wq);
164fail_wq: 181fail_wq:
165 unregister_filesystem(&gfs2meta_fs_type); 182 unregister_filesystem(&gfs2meta_fs_type);
@@ -208,6 +225,7 @@ static void __exit exit_gfs2_fs(void)
208 225
209 rcu_barrier(); 226 rcu_barrier();
210 227
228 mempool_destroy(gfs2_bh_pool);
211 kmem_cache_destroy(gfs2_quotad_cachep); 229 kmem_cache_destroy(gfs2_quotad_cachep);
212 kmem_cache_destroy(gfs2_rgrpd_cachep); 230 kmem_cache_destroy(gfs2_rgrpd_cachep);
213 kmem_cache_destroy(gfs2_bufdata_cachep); 231 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 24f609c9ef91..6f3a18f9e176 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
68 68
69 sb->s_fs_info = sdp; 69 sb->s_fs_info = sdp;
70 sdp->sd_vfs = sb; 70 sdp->sd_vfs = sb;
71 sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
72 if (!sdp->sd_lkstats) {
73 kfree(sdp);
74 return NULL;
75 }
76
71 set_bit(SDF_NOJOURNALID, &sdp->sd_flags); 77 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
72 gfs2_tune_init(&sdp->sd_tune); 78 gfs2_tune_init(&sdp->sd_tune);
73 79
@@ -77,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
77 spin_lock_init(&sdp->sd_statfs_spin); 83 spin_lock_init(&sdp->sd_statfs_spin);
78 84
79 spin_lock_init(&sdp->sd_rindex_spin); 85 spin_lock_init(&sdp->sd_rindex_spin);
80 mutex_init(&sdp->sd_rindex_mutex);
81 sdp->sd_rindex_tree.rb_node = NULL; 86 sdp->sd_rindex_tree.rb_node = NULL;
82 87
83 INIT_LIST_HEAD(&sdp->sd_jindex_list); 88 INIT_LIST_HEAD(&sdp->sd_jindex_list);
@@ -431,10 +436,9 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
431 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 436 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
432 return PTR_ERR(inode); 437 return PTR_ERR(inode);
433 } 438 }
434 dentry = d_alloc_root(inode); 439 dentry = d_make_root(inode);
435 if (!dentry) { 440 if (!dentry) {
436 fs_err(sdp, "can't alloc %s dentry\n", name); 441 fs_err(sdp, "can't alloc %s dentry\n", name);
437 iput(inode);
438 return -ENOMEM; 442 return -ENOMEM;
439 } 443 }
440 *dptr = dentry; 444 *dptr = dentry;
@@ -1221,6 +1225,7 @@ fail_sys:
1221 gfs2_sys_fs_del(sdp); 1225 gfs2_sys_fs_del(sdp);
1222fail: 1226fail:
1223 gfs2_delete_debugfs_file(sdp); 1227 gfs2_delete_debugfs_file(sdp);
1228 free_percpu(sdp->sd_lkstats);
1224 kfree(sdp); 1229 kfree(sdp);
1225 sb->s_fs_info = NULL; 1230 sb->s_fs_info = NULL;
1226 return error; 1231 return error;
@@ -1393,6 +1398,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1393 shrink_dcache_sb(sb); 1398 shrink_dcache_sb(sb);
1394 kill_block_super(sb); 1399 kill_block_super(sb);
1395 gfs2_delete_debugfs_file(sdp); 1400 gfs2_delete_debugfs_file(sdp);
1401 free_percpu(sdp->sd_lkstats);
1396 kfree(sdp); 1402 kfree(sdp);
1397} 1403}
1398 1404
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a45b21b03915..6019da3dcaed 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -681,7 +681,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
681 ptr = qp; 681 ptr = qp;
682 nbytes = sizeof(struct gfs2_quota); 682 nbytes = sizeof(struct gfs2_quota);
683get_a_page: 683get_a_page:
684 page = grab_cache_page(mapping, index); 684 page = find_or_create_page(mapping, index, GFP_NOFS);
685 if (!page) 685 if (!page)
686 return -ENOMEM; 686 return -ENOMEM;
687 687
@@ -720,12 +720,12 @@ get_a_page:
720 720
721 gfs2_trans_add_bh(ip->i_gl, bh, 0); 721 gfs2_trans_add_bh(ip->i_gl, bh, 0);
722 722
723 kaddr = kmap_atomic(page, KM_USER0); 723 kaddr = kmap_atomic(page);
724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
725 nbytes = PAGE_CACHE_SIZE - offset; 725 nbytes = PAGE_CACHE_SIZE - offset;
726 memcpy(kaddr + offset, ptr, nbytes); 726 memcpy(kaddr + offset, ptr, nbytes);
727 flush_dcache_page(page); 727 flush_dcache_page(page);
728 kunmap_atomic(kaddr, KM_USER0); 728 kunmap_atomic(kaddr);
729 unlock_page(page); 729 unlock_page(page);
730 page_cache_release(page); 730 page_cache_release(page);
731 731
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 49ada95209d0..19bde40b4864 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,23 +327,34 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
327 * Returns: The resource group, or NULL if not found 327 * Returns: The resource group, or NULL if not found
328 */ 328 */
329 329
330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) 330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
331{ 331{
332 struct rb_node **newn; 332 struct rb_node *n, *next;
333 struct gfs2_rgrpd *cur; 333 struct gfs2_rgrpd *cur;
334 334
335 if (gfs2_rindex_update(sdp))
336 return NULL;
337
335 spin_lock(&sdp->sd_rindex_spin); 338 spin_lock(&sdp->sd_rindex_spin);
336 newn = &sdp->sd_rindex_tree.rb_node; 339 n = sdp->sd_rindex_tree.rb_node;
337 while (*newn) { 340 while (n) {
338 cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); 341 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
342 next = NULL;
339 if (blk < cur->rd_addr) 343 if (blk < cur->rd_addr)
340 newn = &((*newn)->rb_left); 344 next = n->rb_left;
341 else if (blk >= cur->rd_data0 + cur->rd_data) 345 else if (blk >= cur->rd_data0 + cur->rd_data)
342 newn = &((*newn)->rb_right); 346 next = n->rb_right;
343 else { 347 if (next == NULL) {
344 spin_unlock(&sdp->sd_rindex_spin); 348 spin_unlock(&sdp->sd_rindex_spin);
349 if (exact) {
350 if (blk < cur->rd_addr)
351 return NULL;
352 if (blk >= cur->rd_data0 + cur->rd_data)
353 return NULL;
354 }
345 return cur; 355 return cur;
346 } 356 }
357 n = next;
347 } 358 }
348 spin_unlock(&sdp->sd_rindex_spin); 359 spin_unlock(&sdp->sd_rindex_spin);
349 360
@@ -532,7 +543,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
532 struct file_ra_state ra_state; 543 struct file_ra_state ra_state;
533 int error, rgrps; 544 int error, rgrps;
534 545
535 mutex_lock(&sdp->sd_rindex_mutex);
536 file_ra_state_init(&ra_state, inode->i_mapping); 546 file_ra_state_init(&ra_state, inode->i_mapping);
537 for (rgrps = 0;; rgrps++) { 547 for (rgrps = 0;; rgrps++) {
538 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 548 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -545,11 +555,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
545 break; 555 break;
546 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 556 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
547 } 557 }
548 mutex_unlock(&sdp->sd_rindex_mutex);
549 return total_data; 558 return total_data;
550} 559}
551 560
552static void rgd_insert(struct gfs2_rgrpd *rgd) 561static int rgd_insert(struct gfs2_rgrpd *rgd)
553{ 562{
554 struct gfs2_sbd *sdp = rgd->rd_sbd; 563 struct gfs2_sbd *sdp = rgd->rd_sbd;
555 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 564 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -565,11 +574,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
565 else if (rgd->rd_addr > cur->rd_addr) 574 else if (rgd->rd_addr > cur->rd_addr)
566 newn = &((*newn)->rb_right); 575 newn = &((*newn)->rb_right);
567 else 576 else
568 return; 577 return -EEXIST;
569 } 578 }
570 579
571 rb_link_node(&rgd->rd_node, parent, newn); 580 rb_link_node(&rgd->rd_node, parent, newn);
572 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 581 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
582 sdp->sd_rgrps++;
583 return 0;
573} 584}
574 585
575/** 586/**
@@ -623,10 +634,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
623 if (rgd->rd_data > sdp->sd_max_rg_data) 634 if (rgd->rd_data > sdp->sd_max_rg_data)
624 sdp->sd_max_rg_data = rgd->rd_data; 635 sdp->sd_max_rg_data = rgd->rd_data;
625 spin_lock(&sdp->sd_rindex_spin); 636 spin_lock(&sdp->sd_rindex_spin);
626 rgd_insert(rgd); 637 error = rgd_insert(rgd);
627 sdp->sd_rgrps++;
628 spin_unlock(&sdp->sd_rindex_spin); 638 spin_unlock(&sdp->sd_rindex_spin);
629 return error; 639 if (!error)
640 return 0;
641
642 error = 0; /* someone else read in the rgrp; free it and ignore it */
630 643
631fail: 644fail:
632 kfree(rgd->rd_bits); 645 kfree(rgd->rd_bits);
@@ -687,7 +700,6 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
687 700
688 /* Read new copy from disk if we don't have the latest */ 701 /* Read new copy from disk if we don't have the latest */
689 if (!sdp->sd_rindex_uptodate) { 702 if (!sdp->sd_rindex_uptodate) {
690 mutex_lock(&sdp->sd_rindex_mutex);
691 if (!gfs2_glock_is_locked_by_me(gl)) { 703 if (!gfs2_glock_is_locked_by_me(gl)) {
692 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 704 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
693 if (error) 705 if (error)
@@ -698,10 +710,8 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
698 error = gfs2_ri_update(ip); 710 error = gfs2_ri_update(ip);
699 if (unlock_required) 711 if (unlock_required)
700 gfs2_glock_dq_uninit(&ri_gh); 712 gfs2_glock_dq_uninit(&ri_gh);
701 mutex_unlock(&sdp->sd_rindex_mutex);
702 } 713 }
703 714
704
705 return error; 715 return error;
706} 716}
707 717
@@ -810,9 +820,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
810 820
811} 821}
812 822
813void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 823int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
814 struct buffer_head *bh, 824 struct buffer_head *bh,
815 const struct gfs2_bitmap *bi) 825 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
816{ 826{
817 struct super_block *sb = sdp->sd_vfs; 827 struct super_block *sb = sdp->sd_vfs;
818 struct block_device *bdev = sb->s_bdev; 828 struct block_device *bdev = sb->s_bdev;
@@ -823,11 +833,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
823 sector_t nr_sects = 0; 833 sector_t nr_sects = 0;
824 int rv; 834 int rv;
825 unsigned int x; 835 unsigned int x;
836 u32 trimmed = 0;
837 u8 diff;
826 838
827 for (x = 0; x < bi->bi_len; x++) { 839 for (x = 0; x < bi->bi_len; x++) {
828 const u8 *orig = bh->b_data + bi->bi_offset + x; 840 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
829 const u8 *clone = bi->bi_clone + bi->bi_offset + x; 841 clone += bi->bi_offset;
830 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 842 clone += x;
843 if (bh) {
844 const u8 *orig = bh->b_data + bi->bi_offset + x;
845 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
846 } else {
847 diff = ~(*clone | (*clone >> 1));
848 }
831 diff &= 0x55; 849 diff &= 0x55;
832 if (diff == 0) 850 if (diff == 0)
833 continue; 851 continue;
@@ -838,11 +856,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
838 if (nr_sects == 0) 856 if (nr_sects == 0)
839 goto start_new_extent; 857 goto start_new_extent;
840 if ((start + nr_sects) != blk) { 858 if ((start + nr_sects) != blk) {
841 rv = blkdev_issue_discard(bdev, start, 859 if (nr_sects >= minlen) {
842 nr_sects, GFP_NOFS, 860 rv = blkdev_issue_discard(bdev,
843 0); 861 start, nr_sects,
844 if (rv) 862 GFP_NOFS, 0);
845 goto fail; 863 if (rv)
864 goto fail;
865 trimmed += nr_sects;
866 }
846 nr_sects = 0; 867 nr_sects = 0;
847start_new_extent: 868start_new_extent:
848 start = blk; 869 start = blk;
@@ -853,15 +874,104 @@ start_new_extent:
853 blk += sects_per_blk; 874 blk += sects_per_blk;
854 } 875 }
855 } 876 }
856 if (nr_sects) { 877 if (nr_sects >= minlen) {
857 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 878 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
858 if (rv) 879 if (rv)
859 goto fail; 880 goto fail;
881 trimmed += nr_sects;
860 } 882 }
861 return; 883 if (ptrimmed)
884 *ptrimmed = trimmed;
885 return 0;
886
862fail: 887fail:
863 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 888 if (sdp->sd_args.ar_discard)
889 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
864 sdp->sd_args.ar_discard = 0; 890 sdp->sd_args.ar_discard = 0;
891 return -EIO;
892}
893
894/**
895 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
896 * @filp: Any file on the filesystem
897 * @argp: Pointer to the arguments (also used to pass result)
898 *
899 * Returns: 0 on success, otherwise error code
900 */
901
902int gfs2_fitrim(struct file *filp, void __user *argp)
903{
904 struct inode *inode = filp->f_dentry->d_inode;
905 struct gfs2_sbd *sdp = GFS2_SB(inode);
906 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
907 struct buffer_head *bh;
908 struct gfs2_rgrpd *rgd;
909 struct gfs2_rgrpd *rgd_end;
910 struct gfs2_holder gh;
911 struct fstrim_range r;
912 int ret = 0;
913 u64 amt;
914 u64 trimmed = 0;
915 unsigned int x;
916
917 if (!capable(CAP_SYS_ADMIN))
918 return -EPERM;
919
920 if (!blk_queue_discard(q))
921 return -EOPNOTSUPP;
922
923 if (argp == NULL) {
924 r.start = 0;
925 r.len = ULLONG_MAX;
926 r.minlen = 0;
927 } else if (copy_from_user(&r, argp, sizeof(r)))
928 return -EFAULT;
929
930 rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
931 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
932
933 while (1) {
934
935 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
936 if (ret)
937 goto out;
938
939 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
940 /* Trim each bitmap in the rgrp */
941 for (x = 0; x < rgd->rd_length; x++) {
942 struct gfs2_bitmap *bi = rgd->rd_bits + x;
943 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
944 if (ret) {
945 gfs2_glock_dq_uninit(&gh);
946 goto out;
947 }
948 trimmed += amt;
949 }
950
951 /* Mark rgrp as having been trimmed */
952 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
953 if (ret == 0) {
954 bh = rgd->rd_bits[0].bi_bh;
955 rgd->rd_flags |= GFS2_RGF_TRIMMED;
956 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
957 gfs2_rgrp_out(rgd, bh->b_data);
958 gfs2_trans_end(sdp);
959 }
960 }
961 gfs2_glock_dq_uninit(&gh);
962
963 if (rgd == rgd_end)
964 break;
965
966 rgd = gfs2_rgrpd_get_next(rgd);
967 }
968
969out:
970 r.len = trimmed << 9;
971 if (argp && copy_to_user(argp, &r, sizeof(r)))
972 return -EFAULT;
973
974 return ret;
865} 975}
866 976
867/** 977/**
@@ -1008,7 +1118,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1008 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1118 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
1009 rgd = begin = ip->i_rgd; 1119 rgd = begin = ip->i_rgd;
1010 else 1120 else
1011 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); 1121 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1012 1122
1013 if (rgd == NULL) 1123 if (rgd == NULL)
1014 return -EBADSLT; 1124 return -EBADSLT;
@@ -1293,7 +1403,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1293 u32 length, rgrp_blk, buf_blk; 1403 u32 length, rgrp_blk, buf_blk;
1294 unsigned int buf; 1404 unsigned int buf;
1295 1405
1296 rgd = gfs2_blk2rgrpd(sdp, bstart); 1406 rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1297 if (!rgd) { 1407 if (!rgd) {
1298 if (gfs2_consist(sdp)) 1408 if (gfs2_consist(sdp))
1299 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1409 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
@@ -1474,7 +1584,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1474 return; 1584 return;
1475 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1585 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1476 rgd->rd_free += blen; 1586 rgd->rd_free += blen;
1477 1587 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1478 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1588 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1479 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1589 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1480 1590
@@ -1560,14 +1670,9 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1560{ 1670{
1561 struct gfs2_rgrpd *rgd; 1671 struct gfs2_rgrpd *rgd;
1562 struct gfs2_holder rgd_gh; 1672 struct gfs2_holder rgd_gh;
1563 int error; 1673 int error = -EINVAL;
1564
1565 error = gfs2_rindex_update(sdp);
1566 if (error)
1567 return error;
1568 1674
1569 error = -EINVAL; 1675 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
1570 rgd = gfs2_blk2rgrpd(sdp, no_addr);
1571 if (!rgd) 1676 if (!rgd)
1572 goto fail; 1677 goto fail;
1573 1678
@@ -1610,7 +1715,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1610 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 1715 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
1611 rgd = ip->i_rgd; 1716 rgd = ip->i_rgd;
1612 else 1717 else
1613 rgd = gfs2_blk2rgrpd(sdp, block); 1718 rgd = gfs2_blk2rgrpd(sdp, block, 1);
1614 if (!rgd) { 1719 if (!rgd) {
1615 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 1720 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
1616 return; 1721 return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ceec9106cdf4..b4b10f4de25f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -11,6 +11,7 @@
11#define __RGRP_DOT_H__ 11#define __RGRP_DOT_H__
12 12
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/uaccess.h>
14 15
15struct gfs2_rgrpd; 16struct gfs2_rgrpd;
16struct gfs2_sbd; 17struct gfs2_sbd;
@@ -18,7 +19,7 @@ struct gfs2_holder;
18 19
19extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); 20extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
20 21
21extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); 22extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
22extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); 23extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
23extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); 24extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
24 25
@@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
62extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 63extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
63extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); 64extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
64extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); 65extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
65extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 66extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
66 struct buffer_head *bh, 67 struct buffer_head *bh,
67 const struct gfs2_bitmap *bi); 68 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
69extern int gfs2_fitrim(struct file *filp, void __user *argp);
68 70
69#endif /* __RGRP_DOT_H__ */ 71#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4553ce515f62..6172fa77ad59 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1417 if (error) 1417 if (error)
1418 goto out; 1418 goto out;
1419 1419
1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1421 if (!rgd) { 1421 if (!rgd) {
1422 gfs2_consist_inode(ip); 1422 gfs2_consist_inode(ip);
1423 error = -EIO; 1423 error = -EIO;
@@ -1557,6 +1557,7 @@ out:
1557 end_writeback(inode); 1557 end_writeback(inode);
1558 gfs2_dir_hash_inval(ip); 1558 gfs2_dir_hash_inval(ip);
1559 ip->i_gl->gl_object = NULL; 1559 ip->i_gl->gl_object = NULL;
1560 flush_delayed_work_sync(&ip->i_gl->gl_work);
1560 gfs2_glock_add_to_lru(ip->i_gl); 1561 gfs2_glock_add_to_lru(ip->i_gl);
1561 gfs2_glock_put(ip->i_gl); 1562 gfs2_glock_put(ip->i_gl);
1562 ip->i_gl = NULL; 1563 ip->i_gl = NULL;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 5d07609ec57d..dfa89cd75534 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -11,6 +11,7 @@
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h> 13#include <linux/writeback.h>
14#include <linux/ktime.h>
14#include "incore.h" 15#include "incore.h"
15#include "glock.h" 16#include "glock.h"
16 17
@@ -43,7 +44,8 @@
43 {(1UL << GLF_FROZEN), "F" }, \ 44 {(1UL << GLF_FROZEN), "F" }, \
44 {(1UL << GLF_QUEUED), "q" }, \ 45 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \ 46 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" }) 47 {(1UL << GLF_OBJECT), "o" }, \
48 {(1UL << GLF_BLOCKING), "b" })
47 49
48#ifndef NUMPTY 50#ifndef NUMPTY
49#define NUMPTY 51#define NUMPTY
@@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue,
236 glock_trace_name(__entry->state)) 238 glock_trace_name(__entry->state))
237); 239);
238 240
241/* DLM sends a reply to GFS2 */
242TRACE_EVENT(gfs2_glock_lock_time,
243
244 TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
245
246 TP_ARGS(gl, tdiff),
247
248 TP_STRUCT__entry(
249 __field( dev_t, dev )
250 __field( u64, glnum )
251 __field( u32, gltype )
252 __field( int, status )
253 __field( char, flags )
254 __field( s64, tdiff )
255 __field( s64, srtt )
256 __field( s64, srttvar )
257 __field( s64, srttb )
258 __field( s64, srttvarb )
259 __field( s64, sirt )
260 __field( s64, sirtvar )
261 __field( s64, dcount )
262 __field( s64, qcount )
263 ),
264
265 TP_fast_assign(
266 __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
267 __entry->glnum = gl->gl_name.ln_number;
268 __entry->gltype = gl->gl_name.ln_type;
269 __entry->status = gl->gl_lksb.sb_status;
270 __entry->flags = gl->gl_lksb.sb_flags;
271 __entry->tdiff = tdiff;
272 __entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT];
273 __entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
274 __entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
275 __entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
276 __entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT];
277 __entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
278 __entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
279 __entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT];
280 ),
281
282 TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
283 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
284 (unsigned long long)__entry->glnum,
285 __entry->status, __entry->flags,
286 (long long)__entry->tdiff,
287 (long long)__entry->srtt,
288 (long long)__entry->srttvar,
289 (long long)__entry->srttb,
290 (long long)__entry->srttvarb,
291 (long long)__entry->sirt,
292 (long long)__entry->sirtvar,
293 (long long)__entry->dcount,
294 (long long)__entry->qcount)
295);
296
239/* Section 2 - Log/journal 297/* Section 2 - Log/journal
240 * 298 *
241 * Objectives: 299 * Objectives:
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 53511291fe36..9e7765e8e7b0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
25struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 25struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
27struct kmem_cache *gfs2_quotad_cachep __read_mostly; 27struct kmem_cache *gfs2_quotad_cachep __read_mostly;
28mempool_t *gfs2_bh_pool __read_mostly;
28 29
29void gfs2_assert_i(struct gfs2_sbd *sdp) 30void gfs2_assert_i(struct gfs2_sbd *sdp)
30{ 31{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index b432e04600de..a4ce76c67dbb 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -10,6 +10,8 @@
10#ifndef __UTIL_DOT_H__ 10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__ 11#define __UTIL_DOT_H__
12 12
13#include <linux/mempool.h>
14
13#include "incore.h" 15#include "incore.h"
14 16
15#define fs_printk(level, fs, fmt, arg...) \ 17#define fs_printk(level, fs, fmt, arg...) \
@@ -150,6 +152,7 @@ extern struct kmem_cache *gfs2_inode_cachep;
150extern struct kmem_cache *gfs2_bufdata_cachep; 152extern struct kmem_cache *gfs2_bufdata_cachep;
151extern struct kmem_cache *gfs2_rgrpd_cachep; 153extern struct kmem_cache *gfs2_rgrpd_cachep;
152extern struct kmem_cache *gfs2_quotad_cachep; 154extern struct kmem_cache *gfs2_quotad_cachep;
155extern mempool_t *gfs2_bh_pool;
153 156
154static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 157static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
155 unsigned int *p) 158 unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index e9636591b5d5..2e5ba425cae7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -251,7 +251,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
251 if (!blks) 251 if (!blks)
252 return 0; 252 return 0;
253 253
254 rgd = gfs2_blk2rgrpd(sdp, bn); 254 rgd = gfs2_blk2rgrpd(sdp, bn, 1);
255 if (!rgd) { 255 if (!rgd) {
256 gfs2_consist_inode(ip); 256 gfs2_consist_inode(ip);
257 return -EIO; 257 return -EIO;
@@ -1439,7 +1439,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1439 struct gfs2_holder gh; 1439 struct gfs2_holder gh;
1440 int error; 1440 int error;
1441 1441
1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
1443 if (!rgd) { 1443 if (!rgd) {
1444 gfs2_consist_inode(ip); 1444 gfs2_consist_inode(ip);
1445 return -EIO; 1445 return -EIO;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8137fb3e6780..7b4c537d6e13 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -430,15 +430,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
430 430
431 sb->s_d_op = &hfs_dentry_operations; 431 sb->s_d_op = &hfs_dentry_operations;
432 res = -ENOMEM; 432 res = -ENOMEM;
433 sb->s_root = d_alloc_root(root_inode); 433 sb->s_root = d_make_root(root_inode);
434 if (!sb->s_root) 434 if (!sb->s_root)
435 goto bail_iput; 435 goto bail_no_root;
436 436
437 /* everything's okay */ 437 /* everything's okay */
438 return 0; 438 return 0;
439 439
440bail_iput:
441 iput(root_inode);
442bail_no_root: 440bail_no_root:
443 printk(KERN_ERR "hfs: get root inode failed.\n"); 441 printk(KERN_ERR "hfs: get root inode failed.\n");
444bail: 442bail:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 21a5b7fc6db4..4e75ac646fea 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -317,6 +317,11 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
317 317
318 318
319/* 319/*
320 * hfs+-specific ioctl for making the filesystem bootable
321 */
322#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
323
324/*
320 * Functions in any *.c used in other files 325 * Functions in any *.c used in other files
321 */ 326 */
322 327
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 927cdd6d5bf5..921967e5abb1 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -117,7 +117,7 @@ struct hfsplus_vh {
117 __be32 write_count; 117 __be32 write_count;
118 __be64 encodings_bmp; 118 __be64 encodings_bmp;
119 119
120 u8 finder_info[32]; 120 u32 finder_info[8];
121 121
122 struct hfsplus_fork_raw alloc_file; 122 struct hfsplus_fork_raw alloc_file;
123 struct hfsplus_fork_raw ext_file; 123 struct hfsplus_fork_raw ext_file;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6643b242bdd7..82b69ee4dacc 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -193,6 +193,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
193 mutex_init(&hip->extents_lock); 193 mutex_init(&hip->extents_lock);
194 hip->extent_state = 0; 194 hip->extent_state = 0;
195 hip->flags = 0; 195 hip->flags = 0;
196 hip->userflags = 0;
196 set_bit(HFSPLUS_I_RSRC, &hip->flags); 197 set_bit(HFSPLUS_I_RSRC, &hip->flags);
197 198
198 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 199 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
@@ -400,6 +401,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
400 atomic_set(&hip->opencnt, 0); 401 atomic_set(&hip->opencnt, 0);
401 hip->extent_state = 0; 402 hip->extent_state = 0;
402 hip->flags = 0; 403 hip->flags = 0;
404 hip->userflags = 0;
403 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); 405 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
404 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); 406 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
405 hip->alloc_blocks = 0; 407 hip->alloc_blocks = 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index f66c7655b3f7..c640ba57074b 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -20,6 +20,38 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include "hfsplus_fs.h" 21#include "hfsplus_fs.h"
22 22
23/*
24 * "Blessing" an HFS+ filesystem writes metadata to the superblock informing
25 * the platform firmware which file to boot from
26 */
27static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
28{
29 struct dentry *dentry = file->f_path.dentry;
30 struct inode *inode = dentry->d_inode;
31 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
32 struct hfsplus_vh *vh = sbi->s_vhdr;
33 struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
34
35 if (!capable(CAP_SYS_ADMIN))
36 return -EPERM;
37
38 mutex_lock(&sbi->vh_mutex);
39
40 /* Directory containing the bootable system */
41 vh->finder_info[0] = bvh->finder_info[0] =
42 cpu_to_be32(parent_ino(dentry));
43
44 /* Bootloader */
45 vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino);
46
47 /* Per spec, the OS X system folder - same as finder_info[0] here */
48 vh->finder_info[5] = bvh->finder_info[5] =
49 cpu_to_be32(parent_ino(dentry));
50
51 mutex_unlock(&sbi->vh_mutex);
52 return 0;
53}
54
23static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) 55static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
24{ 56{
25 struct inode *inode = file->f_path.dentry->d_inode; 57 struct inode *inode = file->f_path.dentry->d_inode;
@@ -108,6 +140,8 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
108 return hfsplus_ioctl_getflags(file, argp); 140 return hfsplus_ioctl_getflags(file, argp);
109 case HFSPLUS_IOC_EXT2_SETFLAGS: 141 case HFSPLUS_IOC_EXT2_SETFLAGS:
110 return hfsplus_ioctl_setflags(file, argp); 142 return hfsplus_ioctl_setflags(file, argp);
143 case HFSPLUS_IOC_BLESS:
144 return hfsplus_ioctl_bless(file, argp);
111 default: 145 default:
112 return -ENOTTY; 146 return -ENOTTY;
113 } 147 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 427682ca9e48..ceb1c281eefb 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -465,6 +465,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
465 goto out_put_alloc_file; 465 goto out_put_alloc_file;
466 } 466 }
467 467
468 sb->s_d_op = &hfsplus_dentry_operations;
469 sb->s_root = d_make_root(root);
470 if (!sb->s_root) {
471 err = -ENOMEM;
472 goto out_put_alloc_file;
473 }
474
468 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 475 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
469 str.name = HFSP_HIDDENDIR_NAME; 476 str.name = HFSP_HIDDENDIR_NAME;
470 err = hfs_find_init(sbi->cat_tree, &fd); 477 err = hfs_find_init(sbi->cat_tree, &fd);
@@ -515,13 +522,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
515 } 522 }
516 } 523 }
517 524
518 sb->s_d_op = &hfsplus_dentry_operations;
519 sb->s_root = d_alloc_root(root);
520 if (!sb->s_root) {
521 err = -ENOMEM;
522 goto out_put_hidden_dir;
523 }
524
525 unload_nls(sbi->nls); 525 unload_nls(sbi->nls);
526 sbi->nls = nls; 526 sbi->nls = nls;
527 return 0; 527 return 0;
@@ -529,7 +529,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
529out_put_hidden_dir: 529out_put_hidden_dir:
530 iput(sbi->hidden_dir); 530 iput(sbi->hidden_dir);
531out_put_root: 531out_put_root:
532 iput(root); 532 dput(sb->s_root);
533 sb->s_root = NULL;
533out_put_alloc_file: 534out_put_alloc_file:
534 iput(sbi->alloc_file); 535 iput(sbi->alloc_file);
535out_close_cat_tree: 536out_close_cat_tree:
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 3cbfa93cd782..1fe731337f07 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -67,7 +67,8 @@ extern int access_file(char *path, int r, int w, int x);
67extern int open_file(char *path, int r, int w, int append); 67extern int open_file(char *path, int r, int w, int append);
68extern void *open_dir(char *path, int *err_out); 68extern void *open_dir(char *path, int *err_out);
69extern char *read_dir(void *stream, unsigned long long *pos, 69extern char *read_dir(void *stream, unsigned long long *pos,
70 unsigned long long *ino_out, int *len_out); 70 unsigned long long *ino_out, int *len_out,
71 unsigned int *type_out);
71extern void close_file(void *stream); 72extern void close_file(void *stream);
72extern int replace_file(int oldfd, int fd); 73extern int replace_file(int oldfd, int fd);
73extern void close_dir(void *stream); 74extern void close_dir(void *stream);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e130bd46d671..07c516bfea76 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -283,6 +283,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
283 char *name; 283 char *name;
284 unsigned long long next, ino; 284 unsigned long long next, ino;
285 int error, len; 285 int error, len;
286 unsigned int type;
286 287
287 name = dentry_name(file->f_path.dentry); 288 name = dentry_name(file->f_path.dentry);
288 if (name == NULL) 289 if (name == NULL)
@@ -292,9 +293,9 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
292 if (dir == NULL) 293 if (dir == NULL)
293 return -error; 294 return -error;
294 next = file->f_pos; 295 next = file->f_pos;
295 while ((name = read_dir(dir, &next, &ino, &len)) != NULL) { 296 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) {
296 error = (*filldir)(ent, name, len, file->f_pos, 297 error = (*filldir)(ent, name, len, file->f_pos,
297 ino, DT_UNKNOWN); 298 ino, type);
298 if (error) break; 299 if (error) break;
299 file->f_pos = next; 300 file->f_pos = next;
300 } 301 }
@@ -966,9 +967,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
966 } 967 }
967 968
968 err = -ENOMEM; 969 err = -ENOMEM;
969 sb->s_root = d_alloc_root(root_inode); 970 sb->s_root = d_make_root(root_inode);
970 if (sb->s_root == NULL) 971 if (sb->s_root == NULL)
971 goto out_put; 972 goto out;
972 973
973 return 0; 974 return 0;
974 975
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index dd7bc38a3825..a74ad0d371c2 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -98,7 +98,8 @@ void *open_dir(char *path, int *err_out)
98} 98}
99 99
100char *read_dir(void *stream, unsigned long long *pos, 100char *read_dir(void *stream, unsigned long long *pos,
101 unsigned long long *ino_out, int *len_out) 101 unsigned long long *ino_out, int *len_out,
102 unsigned int *type_out)
102{ 103{
103 DIR *dir = stream; 104 DIR *dir = stream;
104 struct dirent *ent; 105 struct dirent *ent;
@@ -109,6 +110,7 @@ char *read_dir(void *stream, unsigned long long *pos,
109 return NULL; 110 return NULL;
110 *len_out = strlen(ent->d_name); 111 *len_out = strlen(ent->d_name);
111 *ino_out = ent->d_ino; 112 *ino_out = ent->d_ino;
113 *type_out = ent->d_type;
112 *pos = telldir(dir); 114 *pos = telldir(dir);
113 return ent->d_name; 115 return ent->d_name;
114} 116}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 3690467c944e..54f6eccb79d9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -625,11 +625,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
625 hpfs_init_inode(root); 625 hpfs_init_inode(root);
626 hpfs_read_inode(root); 626 hpfs_read_inode(root);
627 unlock_new_inode(root); 627 unlock_new_inode(root);
628 s->s_root = d_alloc_root(root); 628 s->s_root = d_make_root(root);
629 if (!s->s_root) { 629 if (!s->s_root)
630 iput(root);
631 goto bail0; 630 goto bail0;
632 }
633 631
634 /* 632 /*
635 * find the root directory's . pointer & finish filling in the inode 633 * find the root directory's . pointer & finish filling in the inode
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d92f4ce80925..a80e45a690ac 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -726,17 +726,12 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
726 726
727 err = -ENOMEM; 727 err = -ENOMEM;
728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root)); 728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
729 if (!root_inode) 729 sb->s_root = d_make_root(root_inode);
730 goto out_mntput;
731
732 sb->s_root = d_alloc_root(root_inode);
733 if (!sb->s_root) 730 if (!sb->s_root)
734 goto out_iput; 731 goto out_mntput;
735 732
736 return 0; 733 return 0;
737 734
738 out_iput:
739 iput(root_inode);
740 out_mntput: 735 out_mntput:
741 mntput(proc_mnt); 736 mntput(proc_mnt);
742 out: 737 out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e85a7ac0217..ea251749d9d5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations;
41static const struct inode_operations hugetlbfs_dir_inode_operations; 41static const struct inode_operations hugetlbfs_dir_inode_operations;
42static const struct inode_operations hugetlbfs_inode_operations; 42static const struct inode_operations hugetlbfs_inode_operations;
43 43
44struct hugetlbfs_config {
45 uid_t uid;
46 gid_t gid;
47 umode_t mode;
48 long nr_blocks;
49 long nr_inodes;
50 struct hstate *hstate;
51};
52
53struct hugetlbfs_inode_info {
54 struct shared_policy policy;
55 struct inode vfs_inode;
56};
57
58static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
59{
60 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
61}
62
44static struct backing_dev_info hugetlbfs_backing_dev_info = { 63static struct backing_dev_info hugetlbfs_backing_dev_info = {
45 .name = "hugetlbfs", 64 .name = "hugetlbfs",
46 .ra_pages = 0, /* No readahead */ 65 .ra_pages = 0, /* No readahead */
@@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
154 return addr; 173 return addr;
155 } 174 }
156 175
157 start_addr = mm->free_area_cache; 176 if (len > mm->cached_hole_size)
158 177 start_addr = mm->free_area_cache;
159 if (len <= mm->cached_hole_size) 178 else {
160 start_addr = TASK_UNMAPPED_BASE; 179 start_addr = TASK_UNMAPPED_BASE;
180 mm->cached_hole_size = 0;
181 }
161 182
162full_search: 183full_search:
163 addr = ALIGN(start_addr, huge_page_size(h)); 184 addr = ALIGN(start_addr, huge_page_size(h));
@@ -171,13 +192,18 @@ full_search:
171 */ 192 */
172 if (start_addr != TASK_UNMAPPED_BASE) { 193 if (start_addr != TASK_UNMAPPED_BASE) {
173 start_addr = TASK_UNMAPPED_BASE; 194 start_addr = TASK_UNMAPPED_BASE;
195 mm->cached_hole_size = 0;
174 goto full_search; 196 goto full_search;
175 } 197 }
176 return -ENOMEM; 198 return -ENOMEM;
177 } 199 }
178 200
179 if (!vma || addr + len <= vma->vm_start) 201 if (!vma || addr + len <= vma->vm_start) {
202 mm->free_area_cache = addr + len;
180 return addr; 203 return addr;
204 }
205 if (addr + mm->cached_hole_size < vma->vm_start)
206 mm->cached_hole_size = vma->vm_start - addr;
181 addr = ALIGN(vma->vm_end, huge_page_size(h)); 207 addr = ALIGN(vma->vm_end, huge_page_size(h));
182 } 208 }
183} 209}
@@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
238 loff_t isize; 264 loff_t isize;
239 ssize_t retval = 0; 265 ssize_t retval = 0;
240 266
241 mutex_lock(&inode->i_mutex);
242
243 /* validate length */ 267 /* validate length */
244 if (len == 0) 268 if (len == 0)
245 goto out; 269 goto out;
246 270
247 isize = i_size_read(inode);
248 if (!isize)
249 goto out;
250
251 end_index = (isize - 1) >> huge_page_shift(h);
252 for (;;) { 271 for (;;) {
253 struct page *page; 272 struct page *page;
254 unsigned long nr, ret; 273 unsigned long nr, ret;
@@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
256 275
257 /* nr is the maximum number of bytes to copy from this page */ 276 /* nr is the maximum number of bytes to copy from this page */
258 nr = huge_page_size(h); 277 nr = huge_page_size(h);
278 isize = i_size_read(inode);
279 if (!isize)
280 goto out;
281 end_index = (isize - 1) >> huge_page_shift(h);
259 if (index >= end_index) { 282 if (index >= end_index) {
260 if (index > end_index) 283 if (index > end_index)
261 goto out; 284 goto out;
262 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 285 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
263 if (nr <= offset) { 286 if (nr <= offset)
264 goto out; 287 goto out;
265 }
266 } 288 }
267 nr = nr - offset; 289 nr = nr - offset;
268 290
269 /* Find the page */ 291 /* Find the page */
270 page = find_get_page(mapping, index); 292 page = find_lock_page(mapping, index);
271 if (unlikely(page == NULL)) { 293 if (unlikely(page == NULL)) {
272 /* 294 /*
273 * We have a HOLE, zero out the user-buffer for the 295 * We have a HOLE, zero out the user-buffer for the
@@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
279 else 301 else
280 ra = 0; 302 ra = 0;
281 } else { 303 } else {
304 unlock_page(page);
305
282 /* 306 /*
283 * We have the page, copy it to user space buffer. 307 * We have the page, copy it to user space buffer.
284 */ 308 */
285 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 309 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
286 ret = ra; 310 ret = ra;
311 page_cache_release(page);
287 } 312 }
288 if (ra < 0) { 313 if (ra < 0) {
289 if (retval == 0) 314 if (retval == 0)
290 retval = ra; 315 retval = ra;
291 if (page)
292 page_cache_release(page);
293 goto out; 316 goto out;
294 } 317 }
295 318
@@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
299 index += offset >> huge_page_shift(h); 322 index += offset >> huge_page_shift(h);
300 offset &= ~huge_page_mask(h); 323 offset &= ~huge_page_mask(h);
301 324
302 if (page)
303 page_cache_release(page);
304
305 /* short read or no more work */ 325 /* short read or no more work */
306 if ((ret != nr) || (len == 0)) 326 if ((ret != nr) || (len == 0))
307 break; 327 break;
308 } 328 }
309out: 329out:
310 *ppos = ((loff_t)index << huge_page_shift(h)) + offset; 330 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
311 mutex_unlock(&inode->i_mutex);
312 return retval; 331 return retval;
313} 332}
314 333
@@ -607,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
607 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
608 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
609 * blocks, like simple_statfs() */ 628 * blocks, like simple_statfs() */
610 if (sbinfo->max_blocks >= 0) { 629 if (sbinfo->spool) {
611 buf->f_blocks = sbinfo->max_blocks; 630 long free_pages;
612 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 631
632 spin_lock(&sbinfo->spool->lock);
633 buf->f_blocks = sbinfo->spool->max_hpages;
634 free_pages = sbinfo->spool->max_hpages
635 - sbinfo->spool->used_hpages;
636 buf->f_bavail = buf->f_bfree = free_pages;
637 spin_unlock(&sbinfo->spool->lock);
613 buf->f_files = sbinfo->max_inodes; 638 buf->f_files = sbinfo->max_inodes;
614 buf->f_ffree = sbinfo->free_inodes; 639 buf->f_ffree = sbinfo->free_inodes;
615 } 640 }
@@ -625,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
625 650
626 if (sbi) { 651 if (sbi) {
627 sb->s_fs_info = NULL; 652 sb->s_fs_info = NULL;
653
654 if (sbi->spool)
655 hugepage_put_subpool(sbi->spool);
656
628 kfree(sbi); 657 kfree(sbi);
629 } 658 }
630} 659}
@@ -831,8 +860,6 @@ bad_val:
831static int 860static int
832hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 861hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833{ 862{
834 struct inode * inode;
835 struct dentry * root;
836 int ret; 863 int ret;
837 struct hugetlbfs_config config; 864 struct hugetlbfs_config config;
838 struct hugetlbfs_sb_info *sbinfo; 865 struct hugetlbfs_sb_info *sbinfo;
@@ -855,60 +882,31 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
855 sb->s_fs_info = sbinfo; 882 sb->s_fs_info = sbinfo;
856 sbinfo->hstate = config.hstate; 883 sbinfo->hstate = config.hstate;
857 spin_lock_init(&sbinfo->stat_lock); 884 spin_lock_init(&sbinfo->stat_lock);
858 sbinfo->max_blocks = config.nr_blocks;
859 sbinfo->free_blocks = config.nr_blocks;
860 sbinfo->max_inodes = config.nr_inodes; 885 sbinfo->max_inodes = config.nr_inodes;
861 sbinfo->free_inodes = config.nr_inodes; 886 sbinfo->free_inodes = config.nr_inodes;
887 sbinfo->spool = NULL;
888 if (config.nr_blocks != -1) {
889 sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
890 if (!sbinfo->spool)
891 goto out_free;
892 }
862 sb->s_maxbytes = MAX_LFS_FILESIZE; 893 sb->s_maxbytes = MAX_LFS_FILESIZE;
863 sb->s_blocksize = huge_page_size(config.hstate); 894 sb->s_blocksize = huge_page_size(config.hstate);
864 sb->s_blocksize_bits = huge_page_shift(config.hstate); 895 sb->s_blocksize_bits = huge_page_shift(config.hstate);
865 sb->s_magic = HUGETLBFS_MAGIC; 896 sb->s_magic = HUGETLBFS_MAGIC;
866 sb->s_op = &hugetlbfs_ops; 897 sb->s_op = &hugetlbfs_ops;
867 sb->s_time_gran = 1; 898 sb->s_time_gran = 1;
868 inode = hugetlbfs_get_root(sb, &config); 899 sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
869 if (!inode) 900 if (!sb->s_root)
870 goto out_free;
871
872 root = d_alloc_root(inode);
873 if (!root) {
874 iput(inode);
875 goto out_free; 901 goto out_free;
876 }
877 sb->s_root = root;
878 return 0; 902 return 0;
879out_free: 903out_free:
904 if (sbinfo->spool)
905 kfree(sbinfo->spool);
880 kfree(sbinfo); 906 kfree(sbinfo);
881 return -ENOMEM; 907 return -ENOMEM;
882} 908}
883 909
884int hugetlb_get_quota(struct address_space *mapping, long delta)
885{
886 int ret = 0;
887 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
888
889 if (sbinfo->free_blocks > -1) {
890 spin_lock(&sbinfo->stat_lock);
891 if (sbinfo->free_blocks - delta >= 0)
892 sbinfo->free_blocks -= delta;
893 else
894 ret = -ENOMEM;
895 spin_unlock(&sbinfo->stat_lock);
896 }
897
898 return ret;
899}
900
901void hugetlb_put_quota(struct address_space *mapping, long delta)
902{
903 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
904
905 if (sbinfo->free_blocks > -1) {
906 spin_lock(&sbinfo->stat_lock);
907 sbinfo->free_blocks += delta;
908 spin_unlock(&sbinfo->stat_lock);
909 }
910}
911
912static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, 910static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
913 int flags, const char *dev_name, void *data) 911 int flags, const char *dev_name, void *data)
914{ 912{
@@ -928,8 +926,8 @@ static int can_do_hugetlb_shm(void)
928 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 926 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
929} 927}
930 928
931struct file *hugetlb_file_setup(const char *name, size_t size, 929struct file *hugetlb_file_setup(const char *name, unsigned long addr,
932 vm_flags_t acctflag, 930 size_t size, vm_flags_t acctflag,
933 struct user_struct **user, int creat_flags) 931 struct user_struct **user, int creat_flags)
934{ 932{
935 int error = -ENOMEM; 933 int error = -ENOMEM;
@@ -938,6 +936,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
938 struct path path; 936 struct path path;
939 struct dentry *root; 937 struct dentry *root;
940 struct qstr quick_string; 938 struct qstr quick_string;
939 struct hstate *hstate;
940 unsigned long num_pages;
941 941
942 *user = NULL; 942 *user = NULL;
943 if (!hugetlbfs_vfsmount) 943 if (!hugetlbfs_vfsmount)
@@ -946,7 +946,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
946 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { 946 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
947 *user = current_user(); 947 *user = current_user();
948 if (user_shm_lock(size, *user)) { 948 if (user_shm_lock(size, *user)) {
949 printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); 949 task_lock(current);
950 printk_once(KERN_WARNING
951 "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
952 current->comm, current->pid);
953 task_unlock(current);
950 } else { 954 } else {
951 *user = NULL; 955 *user = NULL;
952 return ERR_PTR(-EPERM); 956 return ERR_PTR(-EPERM);
@@ -967,10 +971,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
967 if (!inode) 971 if (!inode)
968 goto out_dentry; 972 goto out_dentry;
969 973
974 hstate = hstate_inode(inode);
975 size += addr & ~huge_page_mask(hstate);
976 num_pages = ALIGN(size, huge_page_size(hstate)) >>
977 huge_page_shift(hstate);
970 error = -ENOMEM; 978 error = -ENOMEM;
971 if (hugetlb_reserve_pages(inode, 0, 979 if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
972 size >> huge_page_shift(hstate_inode(inode)), NULL,
973 acctflag))
974 goto out_inode; 980 goto out_inode;
975 981
976 d_instantiate(path.dentry, inode); 982 d_instantiate(path.dentry, inode);
@@ -1006,6 +1012,7 @@ static int __init init_hugetlbfs_fs(void)
1006 if (error) 1012 if (error)
1007 return error; 1013 return error;
1008 1014
1015 error = -ENOMEM;
1009 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 1016 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
1010 sizeof(struct hugetlbfs_inode_info), 1017 sizeof(struct hugetlbfs_inode_info),
1011 0, 0, init_once); 1018 0, 0, init_once);
@@ -1024,10 +1031,10 @@ static int __init init_hugetlbfs_fs(void)
1024 } 1031 }
1025 1032
1026 error = PTR_ERR(vfsmount); 1033 error = PTR_ERR(vfsmount);
1034 unregister_filesystem(&hugetlbfs_fs_type);
1027 1035
1028 out: 1036 out:
1029 if (error) 1037 kmem_cache_destroy(hugetlbfs_inode_cachep);
1030 kmem_cache_destroy(hugetlbfs_inode_cachep);
1031 out2: 1038 out2:
1032 bdi_destroy(&hugetlbfs_backing_dev_info); 1039 bdi_destroy(&hugetlbfs_backing_dev_info);
1033 return error; 1040 return error;
diff --git a/fs/inode.c b/fs/inode.c
index d3ebdbe723d0..9f4f5fecc096 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2,29 +2,19 @@
2 * (C) 1997 Linus Torvalds 2 * (C) 1997 Linus Torvalds
3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) 3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
4 */ 4 */
5#include <linux/export.h>
5#include <linux/fs.h> 6#include <linux/fs.h>
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/dcache.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/writeback.h>
11#include <linux/module.h>
12#include <linux/backing-dev.h> 8#include <linux/backing-dev.h>
13#include <linux/wait.h>
14#include <linux/rwsem.h>
15#include <linux/hash.h> 9#include <linux/hash.h>
16#include <linux/swap.h> 10#include <linux/swap.h>
17#include <linux/security.h> 11#include <linux/security.h>
18#include <linux/pagemap.h>
19#include <linux/cdev.h> 12#include <linux/cdev.h>
20#include <linux/bootmem.h> 13#include <linux/bootmem.h>
21#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
22#include <linux/mount.h> 15#include <linux/mount.h>
23#include <linux/async.h>
24#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
25#include <linux/prefetch.h> 17#include <linux/prefetch.h>
26#include <linux/ima.h>
27#include <linux/cred.h>
28#include <linux/buffer_head.h> /* for inode_has_buffers */ 18#include <linux/buffer_head.h> /* for inode_has_buffers */
29#include <linux/ratelimit.h> 19#include <linux/ratelimit.h>
30#include "internal.h" 20#include "internal.h"
@@ -938,8 +928,7 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
938 struct file_system_type *type = inode->i_sb->s_type; 928 struct file_system_type *type = inode->i_sb->s_type;
939 929
940 /* Set new key only if filesystem hasn't already changed it */ 930 /* Set new key only if filesystem hasn't already changed it */
941 if (!lockdep_match_class(&inode->i_mutex, 931 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
942 &type->i_mutex_key)) {
943 /* 932 /*
944 * ensure nobody is actually holding i_mutex 933 * ensure nobody is actually holding i_mutex
945 */ 934 */
@@ -966,6 +955,7 @@ void unlock_new_inode(struct inode *inode)
966 spin_lock(&inode->i_lock); 955 spin_lock(&inode->i_lock);
967 WARN_ON(!(inode->i_state & I_NEW)); 956 WARN_ON(!(inode->i_state & I_NEW));
968 inode->i_state &= ~I_NEW; 957 inode->i_state &= ~I_NEW;
958 smp_mb();
969 wake_up_bit(&inode->i_state, __I_NEW); 959 wake_up_bit(&inode->i_state, __I_NEW);
970 spin_unlock(&inode->i_lock); 960 spin_unlock(&inode->i_lock);
971} 961}
@@ -1369,17 +1359,6 @@ int generic_delete_inode(struct inode *inode)
1369EXPORT_SYMBOL(generic_delete_inode); 1359EXPORT_SYMBOL(generic_delete_inode);
1370 1360
1371/* 1361/*
1372 * Normal UNIX filesystem behaviour: delete the
1373 * inode when the usage count drops to zero, and
1374 * i_nlink is zero.
1375 */
1376int generic_drop_inode(struct inode *inode)
1377{
1378 return !inode->i_nlink || inode_unhashed(inode);
1379}
1380EXPORT_SYMBOL_GPL(generic_drop_inode);
1381
1382/*
1383 * Called when we're dropping the last reference 1362 * Called when we're dropping the last reference
1384 * to an inode. 1363 * to an inode.
1385 * 1364 *
@@ -1510,9 +1489,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1510 * This function automatically handles read only file systems and media, 1489 * This function automatically handles read only file systems and media,
1511 * as well as the "noatime" flag and inode specific "noatime" markers. 1490 * as well as the "noatime" flag and inode specific "noatime" markers.
1512 */ 1491 */
1513void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1492void touch_atime(struct path *path)
1514{ 1493{
1515 struct inode *inode = dentry->d_inode; 1494 struct vfsmount *mnt = path->mnt;
1495 struct inode *inode = path->dentry->d_inode;
1516 struct timespec now; 1496 struct timespec now;
1517 1497
1518 if (inode->i_flags & S_NOATIME) 1498 if (inode->i_flags & S_NOATIME)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 066836e81848..29167bebe874 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -10,7 +10,7 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/security.h> 12#include <linux/security.h>
13#include <linux/module.h> 13#include <linux/export.h>
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bd62c76fb5df..29037c365ba4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -947,9 +947,8 @@ root_found:
947 s->s_d_op = &isofs_dentry_ops[table]; 947 s->s_d_op = &isofs_dentry_ops[table];
948 948
949 /* get the root dentry */ 949 /* get the root dentry */
950 s->s_root = d_alloc_root(inode); 950 s->s_root = d_make_root(inode);
951 if (!(s->s_root)) { 951 if (!(s->s_root)) {
952 iput(inode);
953 error = -ENOMEM; 952 error = -ENOMEM;
954 goto out_no_inode; 953 goto out_no_inode;
955 } 954 }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 59c09f9541b5..0971e9217808 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -129,6 +129,8 @@ static int kjournald(void *arg)
129 setup_timer(&journal->j_commit_timer, commit_timeout, 129 setup_timer(&journal->j_commit_timer, commit_timeout,
130 (unsigned long)current); 130 (unsigned long)current);
131 131
132 set_freezable();
133
132 /* Record that the journal thread is running */ 134 /* Record that the journal thread is running */
133 journal->j_task = current; 135 journal->j_task = current;
134 wake_up(&journal->j_wait_done_commit); 136 wake_up(&journal->j_wait_done_commit);
@@ -328,7 +330,7 @@ repeat:
328 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 330 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
329 } 331 }
330 332
331 mapped_data = kmap_atomic(new_page, KM_USER0); 333 mapped_data = kmap_atomic(new_page);
332 /* 334 /*
333 * Check for escaping 335 * Check for escaping
334 */ 336 */
@@ -337,7 +339,7 @@ repeat:
337 need_copy_out = 1; 339 need_copy_out = 1;
338 do_escape = 1; 340 do_escape = 1;
339 } 341 }
340 kunmap_atomic(mapped_data, KM_USER0); 342 kunmap_atomic(mapped_data);
341 343
342 /* 344 /*
343 * Do we need to do a data copy? 345 * Do we need to do a data copy?
@@ -354,9 +356,9 @@ repeat:
354 } 356 }
355 357
356 jh_in->b_frozen_data = tmp; 358 jh_in->b_frozen_data = tmp;
357 mapped_data = kmap_atomic(new_page, KM_USER0); 359 mapped_data = kmap_atomic(new_page);
358 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 360 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
359 kunmap_atomic(mapped_data, KM_USER0); 361 kunmap_atomic(mapped_data);
360 362
361 new_page = virt_to_page(tmp); 363 new_page = virt_to_page(tmp);
362 new_offset = offset_in_page(tmp); 364 new_offset = offset_in_page(tmp);
@@ -368,9 +370,9 @@ repeat:
368 * copying, we can finally do so. 370 * copying, we can finally do so.
369 */ 371 */
370 if (do_escape) { 372 if (do_escape) {
371 mapped_data = kmap_atomic(new_page, KM_USER0); 373 mapped_data = kmap_atomic(new_page);
372 *((unsigned int *)(mapped_data + new_offset)) = 0; 374 *((unsigned int *)(mapped_data + new_offset)) = 0;
373 kunmap_atomic(mapped_data, KM_USER0); 375 kunmap_atomic(mapped_data);
374 } 376 }
375 377
376 set_bh_page(new_bh, new_page, new_offset); 378 set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7fce94b04bc3..b2a7e5244e39 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -718,9 +718,9 @@ done:
718 "Possible IO failure.\n"); 718 "Possible IO failure.\n");
719 page = jh2bh(jh)->b_page; 719 page = jh2bh(jh)->b_page;
720 offset = offset_in_page(jh2bh(jh)->b_data); 720 offset = offset_in_page(jh2bh(jh)->b_data);
721 source = kmap_atomic(page, KM_USER0); 721 source = kmap_atomic(page);
722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
723 kunmap_atomic(source, KM_USER0); 723 kunmap_atomic(source);
724 } 724 }
725 jbd_unlock_bh_state(bh); 725 jbd_unlock_bh_state(bh);
726 726
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5069b8475150..c067a8cae63b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,10 +286,10 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
286 char *addr; 286 char *addr;
287 __u32 checksum; 287 __u32 checksum;
288 288
289 addr = kmap_atomic(page, KM_USER0); 289 addr = kmap_atomic(page);
290 checksum = crc32_be(crc32_sum, 290 checksum = crc32_be(crc32_sum,
291 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); 291 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
292 kunmap_atomic(addr, KM_USER0); 292 kunmap_atomic(addr);
293 293
294 return checksum; 294 return checksum;
295} 295}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c0a5f9f1b127..839377e3d624 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -139,6 +139,8 @@ static int kjournald2(void *arg)
139 setup_timer(&journal->j_commit_timer, commit_timeout, 139 setup_timer(&journal->j_commit_timer, commit_timeout,
140 (unsigned long)current); 140 (unsigned long)current);
141 141
142 set_freezable();
143
142 /* Record that the journal thread is running */ 144 /* Record that the journal thread is running */
143 journal->j_task = current; 145 journal->j_task = current;
144 wake_up(&journal->j_wait_done_commit); 146 wake_up(&journal->j_wait_done_commit);
@@ -345,7 +347,7 @@ repeat:
345 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 347 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
346 } 348 }
347 349
348 mapped_data = kmap_atomic(new_page, KM_USER0); 350 mapped_data = kmap_atomic(new_page);
349 /* 351 /*
350 * Fire data frozen trigger if data already wasn't frozen. Do this 352 * Fire data frozen trigger if data already wasn't frozen. Do this
351 * before checking for escaping, as the trigger may modify the magic 353 * before checking for escaping, as the trigger may modify the magic
@@ -364,7 +366,7 @@ repeat:
364 need_copy_out = 1; 366 need_copy_out = 1;
365 do_escape = 1; 367 do_escape = 1;
366 } 368 }
367 kunmap_atomic(mapped_data, KM_USER0); 369 kunmap_atomic(mapped_data);
368 370
369 /* 371 /*
370 * Do we need to do a data copy? 372 * Do we need to do a data copy?
@@ -385,9 +387,9 @@ repeat:
385 } 387 }
386 388
387 jh_in->b_frozen_data = tmp; 389 jh_in->b_frozen_data = tmp;
388 mapped_data = kmap_atomic(new_page, KM_USER0); 390 mapped_data = kmap_atomic(new_page);
389 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 391 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
390 kunmap_atomic(mapped_data, KM_USER0); 392 kunmap_atomic(mapped_data);
391 393
392 new_page = virt_to_page(tmp); 394 new_page = virt_to_page(tmp);
393 new_offset = offset_in_page(tmp); 395 new_offset = offset_in_page(tmp);
@@ -406,9 +408,9 @@ repeat:
406 * copying, we can finally do so. 408 * copying, we can finally do so.
407 */ 409 */
408 if (do_escape) { 410 if (do_escape) {
409 mapped_data = kmap_atomic(new_page, KM_USER0); 411 mapped_data = kmap_atomic(new_page);
410 *((unsigned int *)(mapped_data + new_offset)) = 0; 412 *((unsigned int *)(mapped_data + new_offset)) = 0;
411 kunmap_atomic(mapped_data, KM_USER0); 413 kunmap_atomic(mapped_data);
412 } 414 }
413 415
414 set_bh_page(new_bh, new_page, new_offset); 416 set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 35ae096bed5d..e5aba56e1fd5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -783,12 +783,12 @@ done:
783 "Possible IO failure.\n"); 783 "Possible IO failure.\n");
784 page = jh2bh(jh)->b_page; 784 page = jh2bh(jh)->b_page;
785 offset = offset_in_page(jh2bh(jh)->b_data); 785 offset = offset_in_page(jh2bh(jh)->b_data);
786 source = kmap_atomic(page, KM_USER0); 786 source = kmap_atomic(page);
787 /* Fire data frozen trigger just before we copy the data */ 787 /* Fire data frozen trigger just before we copy the data */
788 jbd2_buffer_frozen_trigger(jh, source + offset, 788 jbd2_buffer_frozen_trigger(jh, source + offset,
789 jh->b_triggers); 789 jh->b_triggers);
790 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 790 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
791 kunmap_atomic(source, KM_USER0); 791 kunmap_atomic(source);
792 792
793 /* 793 /*
794 * Now that the frozen data is saved off, we need to store 794 * Now that the frozen data is saved off, we need to store
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 5b6c9d1a2fb9..96ed3c9ec3fc 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -340,7 +340,7 @@ int jffs2_unregister_compressor(struct jffs2_compressor *comp)
340 340
341 if (comp->usecount) { 341 if (comp->usecount) {
342 spin_unlock(&jffs2_compressor_list_lock); 342 spin_unlock(&jffs2_compressor_list_lock);
343 printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n"); 343 printk(KERN_WARNING "JFFS2: Compressor module is in use. Unregister failed.\n");
344 return -1; 344 return -1;
345 } 345 }
346 list_del(&comp->list); 346 list_del(&comp->list);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2e0123867cb1..c0d5c9d770da 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -561,9 +561,9 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
561 ret = -ENOMEM; 561 ret = -ENOMEM;
562 562
563 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); 563 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n"));
564 sb->s_root = d_alloc_root(root_i); 564 sb->s_root = d_make_root(root_i);
565 if (!sb->s_root) 565 if (!sb->s_root)
566 goto out_root_i; 566 goto out_root;
567 567
568 sb->s_maxbytes = 0xFFFFFFFF; 568 sb->s_maxbytes = 0xFFFFFFFF;
569 sb->s_blocksize = PAGE_CACHE_SIZE; 569 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -573,8 +573,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
573 jffs2_start_garbage_collect_thread(c); 573 jffs2_start_garbage_collect_thread(c);
574 return 0; 574 return 0;
575 575
576 out_root_i:
577 iput(root_i);
578out_root: 576out_root:
579 jffs2_free_ino_caches(c); 577 jffs2_free_ino_caches(c);
580 jffs2_free_raw_node_refs(c); 578 jffs2_free_raw_node_refs(c);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5f7c160ea64f..07c91ca6017d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -220,12 +220,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
220 220
221 dquot_initialize(dip); 221 dquot_initialize(dip);
222 222
223 /* link count overflow on parent directory ? */
224 if (dip->i_nlink == JFS_LINK_MAX) {
225 rc = -EMLINK;
226 goto out1;
227 }
228
229 /* 223 /*
230 * search parent directory for entry/freespace 224 * search parent directory for entry/freespace
231 * (dtSearch() returns parent directory page pinned) 225 * (dtSearch() returns parent directory page pinned)
@@ -806,9 +800,6 @@ static int jfs_link(struct dentry *old_dentry,
806 jfs_info("jfs_link: %s %s", old_dentry->d_name.name, 800 jfs_info("jfs_link: %s %s", old_dentry->d_name.name,
807 dentry->d_name.name); 801 dentry->d_name.name);
808 802
809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK;
811
812 dquot_initialize(dir); 803 dquot_initialize(dir);
813 804
814 tid = txBegin(ip->i_sb, 0); 805 tid = txBegin(ip->i_sb, 0);
@@ -1138,10 +1129,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1138 rc = -ENOTEMPTY; 1129 rc = -ENOTEMPTY;
1139 goto out3; 1130 goto out3;
1140 } 1131 }
1141 } else if ((new_dir != old_dir) &&
1142 (new_dir->i_nlink == JFS_LINK_MAX)) {
1143 rc = -EMLINK;
1144 goto out3;
1145 } 1132 }
1146 } else if (new_ip) { 1133 } else if (new_ip) {
1147 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); 1134 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 682bca642f38..4a82950f412f 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -441,6 +441,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 sb->s_fs_info = sbi; 443 sb->s_fs_info = sbi;
444 sb->s_max_links = JFS_LINK_MAX;
444 sbi->sb = sb; 445 sbi->sb = sb;
445 sbi->uid = sbi->gid = sbi->umask = -1; 446 sbi->uid = sbi->gid = sbi->umask = -1;
446 447
@@ -521,7 +522,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
521 ret = PTR_ERR(inode); 522 ret = PTR_ERR(inode);
522 goto out_no_rw; 523 goto out_no_rw;
523 } 524 }
524 sb->s_root = d_alloc_root(inode); 525 sb->s_root = d_make_root(inode);
525 if (!sb->s_root) 526 if (!sb->s_root)
526 goto out_no_root; 527 goto out_no_root;
527 528
@@ -539,7 +540,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
539 540
540out_no_root: 541out_no_root:
541 jfs_err("jfs_read_super: get root dentry failed"); 542 jfs_err("jfs_read_super: get root dentry failed");
542 iput(inode);
543 543
544out_no_rw: 544out_no_rw:
545 rc = jfs_umount(sb); 545 rc = jfs_umount(sb);
@@ -860,8 +860,14 @@ static int __init init_jfs_fs(void)
860 jfs_proc_init(); 860 jfs_proc_init();
861#endif 861#endif
862 862
863 return register_filesystem(&jfs_fs_type); 863 rc = register_filesystem(&jfs_fs_type);
864 if (!rc)
865 return 0;
864 866
867#ifdef PROC_FS_JFS
868 jfs_proc_clean();
869#endif
870 kthread_stop(jfsSyncThread);
865kill_committask: 871kill_committask:
866 for (i = 0; i < commit_threads; i++) 872 for (i = 0; i < commit_threads; i++)
867 kthread_stop(jfsCommitThread[i]); 873 kthread_stop(jfsCommitThread[i]);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b2dbb3ba4fc..4a0d1f06da57 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -3,7 +3,7 @@
3 * Library for filesystems writers. 3 * Library for filesystems writers.
4 */ 4 */
5 5
6#include <linux/module.h> 6#include <linux/export.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
@@ -491,11 +491,9 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
491 inode->i_op = &simple_dir_inode_operations; 491 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 492 inode->i_fop = &simple_dir_operations;
493 set_nlink(inode, 2); 493 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 494 root = d_make_root(inode);
495 if (!root) { 495 if (!root)
496 iput(inode);
497 return -ENOMEM; 496 return -ENOMEM;
498 }
499 for (i = 0; !files->name || files->name[0]; i++, files++) { 497 for (i = 0; !files->name || files->name[0]; i++, files++) {
500 if (!files->name) 498 if (!files->name)
501 continue; 499 continue;
@@ -536,7 +534,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
536 spin_lock(&pin_fs_lock); 534 spin_lock(&pin_fs_lock);
537 if (unlikely(!*mount)) { 535 if (unlikely(!*mount)) {
538 spin_unlock(&pin_fs_lock); 536 spin_unlock(&pin_fs_lock);
539 mnt = vfs_kern_mount(type, 0, type->name, NULL); 537 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
540 if (IS_ERR(mnt)) 538 if (IS_ERR(mnt))
541 return PTR_ERR(mnt); 539 return PTR_ERR(mnt);
542 spin_lock(&pin_fs_lock); 540 spin_lock(&pin_fs_lock);
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f848b52c67b1..3ddcbb1c0a43 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = {
598 PROC(GRANTED_RES, res, norep), 598 PROC(GRANTED_RES, res, norep),
599}; 599};
600 600
601struct rpc_version nlm_version4 = { 601const struct rpc_version nlm_version4 = {
602 .number = 4, 602 .number = 4,
603 .nrprocs = ARRAY_SIZE(nlm4_procedures), 603 .nrprocs = ARRAY_SIZE(nlm4_procedures),
604 .procs = nlm4_procedures, 604 .procs = nlm4_procedures,
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8d4ea8351e3d..ba1dc2eebd1e 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
62 62
63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, 63 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
64 nlm_init->protocol, nlm_version, 64 nlm_init->protocol, nlm_version,
65 nlm_init->hostname, nlm_init->noresvport); 65 nlm_init->hostname, nlm_init->noresvport,
66 nlm_init->net);
66 if (host == NULL) { 67 if (host == NULL) {
67 lockd_down(); 68 lockd_down();
68 return ERR_PTR(-ENOLCK); 69 return ERR_PTR(-ENOLCK);
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 180ac34feb9a..3d35e3e80c1c 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = {
596 PROC(GRANTED_RES, res, norep), 596 PROC(GRANTED_RES, res, norep),
597}; 597};
598 598
599static struct rpc_version nlm_version1 = { 599static const struct rpc_version nlm_version1 = {
600 .number = 1, 600 .number = 1,
601 .nrprocs = ARRAY_SIZE(nlm_procedures), 601 .nrprocs = ARRAY_SIZE(nlm_procedures),
602 .procs = nlm_procedures, 602 .procs = nlm_procedures,
603}; 603};
604 604
605static struct rpc_version nlm_version3 = { 605static const struct rpc_version nlm_version3 = {
606 .number = 3, 606 .number = 3,
607 .nrprocs = ARRAY_SIZE(nlm_procedures), 607 .nrprocs = ARRAY_SIZE(nlm_procedures),
608 .procs = nlm_procedures, 608 .procs = nlm_procedures,
609}; 609};
610 610
611static struct rpc_version *nlm_versions[] = { 611static const struct rpc_version *nlm_versions[] = {
612 [1] = &nlm_version1, 612 [1] = &nlm_version1,
613 [3] = &nlm_version3, 613 [3] = &nlm_version3,
614#ifdef CONFIG_LOCKD_V4 614#ifdef CONFIG_LOCKD_V4
@@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = {
618 618
619static struct rpc_stat nlm_rpc_stats; 619static struct rpc_stat nlm_rpc_stats;
620 620
621struct rpc_program nlm_program = { 621const struct rpc_program nlm_program = {
622 .name = "lockd", 622 .name = "lockd",
623 .number = NLM_PROGRAM, 623 .number = NLM_PROGRAM,
624 .nrvers = ARRAY_SIZE(nlm_versions), 624 .nrvers = ARRAY_SIZE(nlm_versions),
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 6f29836ec0cb..eb75ca7c2d6e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -17,6 +17,8 @@
17#include <linux/lockd/lockd.h> 17#include <linux/lockd/lockd.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19 19
20#include <linux/sunrpc/svc_xprt.h>
21
20#include <net/ipv6.h> 22#include <net/ipv6.h>
21 23
22#define NLMDBG_FACILITY NLMDBG_HOSTCACHE 24#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
@@ -54,6 +56,7 @@ struct nlm_lookup_host_info {
54 const char *hostname; /* remote's hostname */ 56 const char *hostname; /* remote's hostname */
55 const size_t hostname_len; /* it's length */ 57 const size_t hostname_len; /* it's length */
56 const int noresvport; /* use non-priv port */ 58 const int noresvport; /* use non-priv port */
59 struct net *net; /* network namespace to bind */
57}; 60};
58 61
59/* 62/*
@@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
155 INIT_LIST_HEAD(&host->h_reclaim); 158 INIT_LIST_HEAD(&host->h_reclaim);
156 host->h_nsmhandle = nsm; 159 host->h_nsmhandle = nsm;
157 host->h_addrbuf = nsm->sm_addrbuf; 160 host->h_addrbuf = nsm->sm_addrbuf;
161 host->net = ni->net;
158 162
159out: 163out:
160 return host; 164 return host;
@@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
206 const unsigned short protocol, 210 const unsigned short protocol,
207 const u32 version, 211 const u32 version,
208 const char *hostname, 212 const char *hostname,
209 int noresvport) 213 int noresvport,
214 struct net *net)
210{ 215{
211 struct nlm_lookup_host_info ni = { 216 struct nlm_lookup_host_info ni = {
212 .server = 0, 217 .server = 0,
@@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
217 .hostname = hostname, 222 .hostname = hostname,
218 .hostname_len = strlen(hostname), 223 .hostname_len = strlen(hostname),
219 .noresvport = noresvport, 224 .noresvport = noresvport,
225 .net = net,
220 }; 226 };
221 struct hlist_head *chain; 227 struct hlist_head *chain;
222 struct hlist_node *pos; 228 struct hlist_node *pos;
@@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
231 237
232 chain = &nlm_client_hosts[nlm_hash_address(sap)]; 238 chain = &nlm_client_hosts[nlm_hash_address(sap)];
233 hlist_for_each_entry(host, pos, chain, h_hash) { 239 hlist_for_each_entry(host, pos, chain, h_hash) {
240 if (host->net != net)
241 continue;
234 if (!rpc_cmp_addr(nlm_addr(host), sap)) 242 if (!rpc_cmp_addr(nlm_addr(host), sap))
235 continue; 243 continue;
236 244
@@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
318 struct nsm_handle *nsm = NULL; 326 struct nsm_handle *nsm = NULL;
319 struct sockaddr *src_sap = svc_daddr(rqstp); 327 struct sockaddr *src_sap = svc_daddr(rqstp);
320 size_t src_len = rqstp->rq_daddrlen; 328 size_t src_len = rqstp->rq_daddrlen;
329 struct net *net = rqstp->rq_xprt->xpt_net;
321 struct nlm_lookup_host_info ni = { 330 struct nlm_lookup_host_info ni = {
322 .server = 1, 331 .server = 1,
323 .sap = svc_addr(rqstp), 332 .sap = svc_addr(rqstp),
@@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
326 .version = rqstp->rq_vers, 335 .version = rqstp->rq_vers,
327 .hostname = hostname, 336 .hostname = hostname,
328 .hostname_len = hostname_len, 337 .hostname_len = hostname_len,
338 .net = net,
329 }; 339 };
330 340
331 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, 341 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
@@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
339 349
340 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; 350 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
341 hlist_for_each_entry(host, pos, chain, h_hash) { 351 hlist_for_each_entry(host, pos, chain, h_hash) {
352 if (host->net != net)
353 continue;
342 if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) 354 if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
343 continue; 355 continue;
344 356
@@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host)
431 .to_retries = 5U, 443 .to_retries = 5U,
432 }; 444 };
433 struct rpc_create_args args = { 445 struct rpc_create_args args = {
434 .net = &init_net, 446 .net = host->net,
435 .protocol = host->h_proto, 447 .protocol = host->h_proto,
436 .address = nlm_addr(host), 448 .address = nlm_addr(host),
437 .addrsize = host->h_addrlen, 449 .addrsize = host->h_addrlen,
@@ -553,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
553 nsm_release(nsm); 565 nsm_release(nsm);
554} 566}
555 567
556/*
557 * Shut down the hosts module.
558 * Note that this routine is called only at server shutdown time.
559 */
560void 568void
561nlm_shutdown_hosts(void) 569nlm_shutdown_hosts_net(struct net *net)
562{ 570{
563 struct hlist_head *chain; 571 struct hlist_head *chain;
564 struct hlist_node *pos; 572 struct hlist_node *pos;
@@ -570,6 +578,8 @@ nlm_shutdown_hosts(void)
570 /* First, make all hosts eligible for gc */ 578 /* First, make all hosts eligible for gc */
571 dprintk("lockd: nuking all hosts...\n"); 579 dprintk("lockd: nuking all hosts...\n");
572 for_each_host(host, pos, chain, nlm_server_hosts) { 580 for_each_host(host, pos, chain, nlm_server_hosts) {
581 if (net && host->net != net)
582 continue;
573 host->h_expires = jiffies - 1; 583 host->h_expires = jiffies - 1;
574 if (host->h_rpcclnt) { 584 if (host->h_rpcclnt) {
575 rpc_shutdown_client(host->h_rpcclnt); 585 rpc_shutdown_client(host->h_rpcclnt);
@@ -580,15 +590,29 @@ nlm_shutdown_hosts(void)
580 /* Then, perform a garbage collection pass */ 590 /* Then, perform a garbage collection pass */
581 nlm_gc_hosts(); 591 nlm_gc_hosts();
582 mutex_unlock(&nlm_host_mutex); 592 mutex_unlock(&nlm_host_mutex);
593}
594
595/*
596 * Shut down the hosts module.
597 * Note that this routine is called only at server shutdown time.
598 */
599void
600nlm_shutdown_hosts(void)
601{
602 struct hlist_head *chain;
603 struct hlist_node *pos;
604 struct nlm_host *host;
605
606 nlm_shutdown_hosts_net(NULL);
583 607
584 /* complain if any hosts are left */ 608 /* complain if any hosts are left */
585 if (nrhosts != 0) { 609 if (nrhosts != 0) {
586 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 610 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
587 dprintk("lockd: %lu hosts left:\n", nrhosts); 611 dprintk("lockd: %lu hosts left:\n", nrhosts);
588 for_each_host(host, pos, chain, nlm_server_hosts) { 612 for_each_host(host, pos, chain, nlm_server_hosts) {
589 dprintk(" %s (cnt %d use %d exp %ld)\n", 613 dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
590 host->h_name, atomic_read(&host->h_count), 614 host->h_name, atomic_read(&host->h_count),
591 host->h_inuse, host->h_expires); 615 host->h_inuse, host->h_expires, host->net);
592 } 616 }
593 } 617 }
594} 618}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 65ba36b80a9e..7ef14b3c5bee 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -47,7 +47,7 @@ struct nsm_res {
47 u32 state; 47 u32 state;
48}; 48};
49 49
50static struct rpc_program nsm_program; 50static const struct rpc_program nsm_program;
51static LIST_HEAD(nsm_handles); 51static LIST_HEAD(nsm_handles);
52static DEFINE_SPINLOCK(nsm_lock); 52static DEFINE_SPINLOCK(nsm_lock);
53 53
@@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
62 return (struct sockaddr *)&nsm->sm_addr; 62 return (struct sockaddr *)&nsm->sm_addr;
63} 63}
64 64
65static struct rpc_clnt *nsm_create(void) 65static struct rpc_clnt *nsm_create(struct net *net)
66{ 66{
67 struct sockaddr_in sin = { 67 struct sockaddr_in sin = {
68 .sin_family = AF_INET, 68 .sin_family = AF_INET,
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net, 72 .net = net,
73 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
74 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
75 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
@@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void)
83 return rpc_create(&args); 83 return rpc_create(&args);
84} 84}
85 85
86static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) 86static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
87 struct net *net)
87{ 88{
88 struct rpc_clnt *clnt; 89 struct rpc_clnt *clnt;
89 int status; 90 int status;
@@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
99 .rpc_resp = res, 100 .rpc_resp = res,
100 }; 101 };
101 102
102 clnt = nsm_create(); 103 clnt = nsm_create(net);
103 if (IS_ERR(clnt)) { 104 if (IS_ERR(clnt)) {
104 status = PTR_ERR(clnt); 105 status = PTR_ERR(clnt);
105 dprintk("lockd: failed to create NSM upcall transport, " 106 dprintk("lockd: failed to create NSM upcall transport, "
@@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host)
149 */ 150 */
150 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; 151 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
151 152
152 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); 153 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net);
153 if (unlikely(res.status != 0)) 154 if (unlikely(res.status != 0))
154 status = -EIO; 155 status = -EIO;
155 if (unlikely(status < 0)) { 156 if (unlikely(status < 0)) {
@@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host)
183 && nsm->sm_monitored && !nsm->sm_sticky) { 184 && nsm->sm_monitored && !nsm->sm_sticky) {
184 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); 185 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
185 186
186 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); 187 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net);
187 if (res.status != 0) 188 if (res.status != 0)
188 status = -EIO; 189 status = -EIO;
189 if (status < 0) 190 if (status < 0)
@@ -534,19 +535,19 @@ static struct rpc_procinfo nsm_procedures[] = {
534 }, 535 },
535}; 536};
536 537
537static struct rpc_version nsm_version1 = { 538static const struct rpc_version nsm_version1 = {
538 .number = 1, 539 .number = 1,
539 .nrprocs = ARRAY_SIZE(nsm_procedures), 540 .nrprocs = ARRAY_SIZE(nsm_procedures),
540 .procs = nsm_procedures 541 .procs = nsm_procedures
541}; 542};
542 543
543static struct rpc_version * nsm_version[] = { 544static const struct rpc_version *nsm_version[] = {
544 [1] = &nsm_version1, 545 [1] = &nsm_version1,
545}; 546};
546 547
547static struct rpc_stat nsm_stats; 548static struct rpc_stat nsm_stats;
548 549
549static struct rpc_program nsm_program = { 550static const struct rpc_program nsm_program = {
550 .name = "statd", 551 .name = "statd",
551 .number = NSM_PROGRAM, 552 .number = NSM_PROGRAM,
552 .nrvers = ARRAY_SIZE(nsm_version), 553 .nrvers = ARRAY_SIZE(nsm_version),
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
new file mode 100644
index 000000000000..ce227e0fbc5c
--- /dev/null
+++ b/fs/lockd/netns.h
@@ -0,0 +1,12 @@
1#ifndef __LOCKD_NETNS_H__
2#define __LOCKD_NETNS_H__
3
4#include <net/netns/generic.h>
5
6struct lockd_net {
7 unsigned int nlmsvc_users;
8};
9
10extern int lockd_net_id;
11
12#endif
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index c061b9aa7ddb..2774e1013b34 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -35,6 +35,8 @@
35#include <linux/lockd/lockd.h> 35#include <linux/lockd/lockd.h>
36#include <linux/nfs.h> 36#include <linux/nfs.h>
37 37
38#include "netns.h"
39
38#define NLMDBG_FACILITY NLMDBG_SVC 40#define NLMDBG_FACILITY NLMDBG_SVC
39#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) 41#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
40#define ALLOWED_SIGS (sigmask(SIGKILL)) 42#define ALLOWED_SIGS (sigmask(SIGKILL))
@@ -50,6 +52,8 @@ static struct task_struct *nlmsvc_task;
50static struct svc_rqst *nlmsvc_rqst; 52static struct svc_rqst *nlmsvc_rqst;
51unsigned long nlmsvc_timeout; 53unsigned long nlmsvc_timeout;
52 54
55int lockd_net_id;
56
53/* 57/*
54 * These can be set at insmod time (useful for NFS as root filesystem), 58 * These can be set at insmod time (useful for NFS as root filesystem),
55 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 59 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
@@ -189,27 +193,29 @@ lockd(void *vrqstp)
189} 193}
190 194
191static int create_lockd_listener(struct svc_serv *serv, const char *name, 195static int create_lockd_listener(struct svc_serv *serv, const char *name,
192 const int family, const unsigned short port) 196 struct net *net, const int family,
197 const unsigned short port)
193{ 198{
194 struct svc_xprt *xprt; 199 struct svc_xprt *xprt;
195 200
196 xprt = svc_find_xprt(serv, name, family, 0); 201 xprt = svc_find_xprt(serv, name, net, family, 0);
197 if (xprt == NULL) 202 if (xprt == NULL)
198 return svc_create_xprt(serv, name, &init_net, family, port, 203 return svc_create_xprt(serv, name, net, family, port,
199 SVC_SOCK_DEFAULTS); 204 SVC_SOCK_DEFAULTS);
200 svc_xprt_put(xprt); 205 svc_xprt_put(xprt);
201 return 0; 206 return 0;
202} 207}
203 208
204static int create_lockd_family(struct svc_serv *serv, const int family) 209static int create_lockd_family(struct svc_serv *serv, struct net *net,
210 const int family)
205{ 211{
206 int err; 212 int err;
207 213
208 err = create_lockd_listener(serv, "udp", family, nlm_udpport); 214 err = create_lockd_listener(serv, "udp", net, family, nlm_udpport);
209 if (err < 0) 215 if (err < 0)
210 return err; 216 return err;
211 217
212 return create_lockd_listener(serv, "tcp", family, nlm_tcpport); 218 return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport);
213} 219}
214 220
215/* 221/*
@@ -222,16 +228,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family)
222 * Returns zero if all listeners are available; otherwise a 228 * Returns zero if all listeners are available; otherwise a
223 * negative errno value is returned. 229 * negative errno value is returned.
224 */ 230 */
225static int make_socks(struct svc_serv *serv) 231static int make_socks(struct svc_serv *serv, struct net *net)
226{ 232{
227 static int warned; 233 static int warned;
228 int err; 234 int err;
229 235
230 err = create_lockd_family(serv, PF_INET); 236 err = create_lockd_family(serv, net, PF_INET);
231 if (err < 0) 237 if (err < 0)
232 goto out_err; 238 goto out_err;
233 239
234 err = create_lockd_family(serv, PF_INET6); 240 err = create_lockd_family(serv, net, PF_INET6);
235 if (err < 0 && err != -EAFNOSUPPORT) 241 if (err < 0 && err != -EAFNOSUPPORT)
236 goto out_err; 242 goto out_err;
237 243
@@ -245,6 +251,47 @@ out_err:
245 return err; 251 return err;
246} 252}
247 253
254static int lockd_up_net(struct net *net)
255{
256 struct lockd_net *ln = net_generic(net, lockd_net_id);
257 struct svc_serv *serv = nlmsvc_rqst->rq_server;
258 int error;
259
260 if (ln->nlmsvc_users)
261 return 0;
262
263 error = svc_rpcb_setup(serv, net);
264 if (error)
265 goto err_rpcb;
266
267 error = make_socks(serv, net);
268 if (error < 0)
269 goto err_socks;
270 return 0;
271
272err_socks:
273 svc_rpcb_cleanup(serv, net);
274err_rpcb:
275 return error;
276}
277
278static void lockd_down_net(struct net *net)
279{
280 struct lockd_net *ln = net_generic(net, lockd_net_id);
281 struct svc_serv *serv = nlmsvc_rqst->rq_server;
282
283 if (ln->nlmsvc_users) {
284 if (--ln->nlmsvc_users == 0) {
285 nlm_shutdown_hosts_net(net);
286 svc_shutdown_net(serv, net);
287 }
288 } else {
289 printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n",
290 nlmsvc_task, net);
291 BUG();
292 }
293}
294
248/* 295/*
249 * Bring up the lockd process if it's not already up. 296 * Bring up the lockd process if it's not already up.
250 */ 297 */
@@ -252,13 +299,16 @@ int lockd_up(void)
252{ 299{
253 struct svc_serv *serv; 300 struct svc_serv *serv;
254 int error = 0; 301 int error = 0;
302 struct net *net = current->nsproxy->net_ns;
255 303
256 mutex_lock(&nlmsvc_mutex); 304 mutex_lock(&nlmsvc_mutex);
257 /* 305 /*
258 * Check whether we're already up and running. 306 * Check whether we're already up and running.
259 */ 307 */
260 if (nlmsvc_rqst) 308 if (nlmsvc_rqst) {
309 error = lockd_up_net(net);
261 goto out; 310 goto out;
311 }
262 312
263 /* 313 /*
264 * Sanity check: if there's no pid, 314 * Sanity check: if there's no pid,
@@ -275,7 +325,7 @@ int lockd_up(void)
275 goto out; 325 goto out;
276 } 326 }
277 327
278 error = make_socks(serv); 328 error = make_socks(serv, net);
279 if (error < 0) 329 if (error < 0)
280 goto destroy_and_out; 330 goto destroy_and_out;
281 331
@@ -313,8 +363,12 @@ int lockd_up(void)
313destroy_and_out: 363destroy_and_out:
314 svc_destroy(serv); 364 svc_destroy(serv);
315out: 365out:
316 if (!error) 366 if (!error) {
367 struct lockd_net *ln = net_generic(net, lockd_net_id);
368
369 ln->nlmsvc_users++;
317 nlmsvc_users++; 370 nlmsvc_users++;
371 }
318 mutex_unlock(&nlmsvc_mutex); 372 mutex_unlock(&nlmsvc_mutex);
319 return error; 373 return error;
320} 374}
@@ -328,8 +382,10 @@ lockd_down(void)
328{ 382{
329 mutex_lock(&nlmsvc_mutex); 383 mutex_lock(&nlmsvc_mutex);
330 if (nlmsvc_users) { 384 if (nlmsvc_users) {
331 if (--nlmsvc_users) 385 if (--nlmsvc_users) {
386 lockd_down_net(current->nsproxy->net_ns);
332 goto out; 387 goto out;
388 }
333 } else { 389 } else {
334 printk(KERN_ERR "lockd_down: no users! task=%p\n", 390 printk(KERN_ERR "lockd_down: no users! task=%p\n",
335 nlmsvc_task); 391 nlmsvc_task);
@@ -497,24 +553,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int,
497module_param(nsm_use_hostnames, bool, 0644); 553module_param(nsm_use_hostnames, bool, 0644);
498module_param(nlm_max_connections, uint, 0644); 554module_param(nlm_max_connections, uint, 0644);
499 555
556static int lockd_init_net(struct net *net)
557{
558 return 0;
559}
560
561static void lockd_exit_net(struct net *net)
562{
563}
564
565static struct pernet_operations lockd_net_ops = {
566 .init = lockd_init_net,
567 .exit = lockd_exit_net,
568 .id = &lockd_net_id,
569 .size = sizeof(struct lockd_net),
570};
571
572
500/* 573/*
501 * Initialising and terminating the module. 574 * Initialising and terminating the module.
502 */ 575 */
503 576
504static int __init init_nlm(void) 577static int __init init_nlm(void)
505{ 578{
579 int err;
580
506#ifdef CONFIG_SYSCTL 581#ifdef CONFIG_SYSCTL
582 err = -ENOMEM;
507 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); 583 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
508 return nlm_sysctl_table ? 0 : -ENOMEM; 584 if (nlm_sysctl_table == NULL)
509#else 585 goto err_sysctl;
586#endif
587 err = register_pernet_subsys(&lockd_net_ops);
588 if (err)
589 goto err_pernet;
510 return 0; 590 return 0;
591
592err_pernet:
593#ifdef CONFIG_SYSCTL
594 unregister_sysctl_table(nlm_sysctl_table);
511#endif 595#endif
596err_sysctl:
597 return err;
512} 598}
513 599
514static void __exit exit_nlm(void) 600static void __exit exit_nlm(void)
515{ 601{
516 /* FIXME: delete all NLM clients */ 602 /* FIXME: delete all NLM clients */
517 nlm_shutdown_hosts(); 603 nlm_shutdown_hosts();
604 unregister_pernet_subsys(&lockd_net_ops);
518#ifdef CONFIG_SYSCTL 605#ifdef CONFIG_SYSCTL
519 unregister_sysctl_table(nlm_sysctl_table); 606 unregister_sysctl_table(nlm_sysctl_table);
520#endif 607#endif
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index f0179c3745d2..e46353f41a42 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,7 +46,6 @@ static void nlmsvc_remove_block(struct nlm_block *block);
46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
47static void nlmsvc_freegrantargs(struct nlm_rqst *call); 47static void nlmsvc_freegrantargs(struct nlm_rqst *call);
48static const struct rpc_call_ops nlmsvc_grant_ops; 48static const struct rpc_call_ops nlmsvc_grant_ops;
49static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
50 49
51/* 50/*
52 * The list of blocked locks to retry 51 * The list of blocked locks to retry
@@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
54static LIST_HEAD(nlm_blocked); 53static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock); 54static DEFINE_SPINLOCK(nlm_blocked_lock);
56 55
56#ifdef LOCKD_DEBUG
57static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
58{
59 /*
60 * We can get away with a static buffer because we're only
61 * called with BKL held.
62 */
63 static char buf[2*NLM_MAXCOOKIELEN+1];
64 unsigned int i, len = sizeof(buf);
65 char *p = buf;
66
67 len--; /* allow for trailing \0 */
68 if (len < 3)
69 return "???";
70 for (i = 0 ; i < cookie->len ; i++) {
71 if (len < 2) {
72 strcpy(p-3, "...");
73 break;
74 }
75 sprintf(p, "%02x", cookie->data[i]);
76 p += 2;
77 len -= 2;
78 }
79 *p = '\0';
80
81 return buf;
82}
83#endif
84
57/* 85/*
58 * Insert a blocked lock into the global list 86 * Insert a blocked lock into the global list
59 */ 87 */
@@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void)
935 963
936 return timeout; 964 return timeout;
937} 965}
938
939#ifdef RPC_DEBUG
940static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
941{
942 /*
943 * We can get away with a static buffer because we're only
944 * called with BKL held.
945 */
946 static char buf[2*NLM_MAXCOOKIELEN+1];
947 unsigned int i, len = sizeof(buf);
948 char *p = buf;
949
950 len--; /* allow for trailing \0 */
951 if (len < 3)
952 return "???";
953 for (i = 0 ; i < cookie->len ; i++) {
954 if (len < 2) {
955 strcpy(p-3, "...");
956 break;
957 }
958 sprintf(p, "%02x", cookie->data[i]);
959 p += 2;
960 len -= 2;
961 }
962 *p = '\0';
963
964 return buf;
965}
966#endif
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 3de7a32cadbe..bea5d1b9954b 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -177,17 +177,17 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
177 (filler_t *)logfs_readpage, NULL); 177 (filler_t *)logfs_readpage, NULL);
178 if (IS_ERR(page)) 178 if (IS_ERR(page))
179 return page; 179 return page;
180 dd = kmap_atomic(page, KM_USER0); 180 dd = kmap_atomic(page);
181 BUG_ON(dd->namelen == 0); 181 BUG_ON(dd->namelen == 0);
182 182
183 if (name->len != be16_to_cpu(dd->namelen) || 183 if (name->len != be16_to_cpu(dd->namelen) ||
184 memcmp(name->name, dd->name, name->len)) { 184 memcmp(name->name, dd->name, name->len)) {
185 kunmap_atomic(dd, KM_USER0); 185 kunmap_atomic(dd);
186 page_cache_release(page); 186 page_cache_release(page);
187 continue; 187 continue;
188 } 188 }
189 189
190 kunmap_atomic(dd, KM_USER0); 190 kunmap_atomic(dd);
191 return page; 191 return page;
192 } 192 }
193 return NULL; 193 return NULL;
@@ -365,9 +365,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
365 return NULL; 365 return NULL;
366 } 366 }
367 index = page->index; 367 index = page->index;
368 dd = kmap_atomic(page, KM_USER0); 368 dd = kmap_atomic(page);
369 ino = be64_to_cpu(dd->ino); 369 ino = be64_to_cpu(dd->ino);
370 kunmap_atomic(dd, KM_USER0); 370 kunmap_atomic(dd);
371 page_cache_release(page); 371 page_cache_release(page);
372 372
373 inode = logfs_iget(dir->i_sb, ino); 373 inode = logfs_iget(dir->i_sb, ino);
@@ -402,12 +402,12 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
402 if (!page) 402 if (!page)
403 return -ENOMEM; 403 return -ENOMEM;
404 404
405 dd = kmap_atomic(page, KM_USER0); 405 dd = kmap_atomic(page);
406 memset(dd, 0, sizeof(*dd)); 406 memset(dd, 0, sizeof(*dd));
407 dd->ino = cpu_to_be64(inode->i_ino); 407 dd->ino = cpu_to_be64(inode->i_ino);
408 dd->type = logfs_type(inode); 408 dd->type = logfs_type(inode);
409 logfs_set_name(dd, &dentry->d_name); 409 logfs_set_name(dd, &dentry->d_name);
410 kunmap_atomic(dd, KM_USER0); 410 kunmap_atomic(dd);
411 411
412 err = logfs_write_buf(dir, page, WF_LOCK); 412 err = logfs_write_buf(dir, page, WF_LOCK);
413 unlock_page(page); 413 unlock_page(page);
@@ -558,9 +558,6 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
558{ 558{
559 struct inode *inode = old_dentry->d_inode; 559 struct inode *inode = old_dentry->d_inode;
560 560
561 if (inode->i_nlink >= LOGFS_LINK_MAX)
562 return -EMLINK;
563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 561 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 562 ihold(inode);
566 inc_nlink(inode); 563 inc_nlink(inode);
@@ -579,9 +576,9 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
579 if (IS_ERR(page)) 576 if (IS_ERR(page))
580 return PTR_ERR(page); 577 return PTR_ERR(page);
581 *pos = page->index; 578 *pos = page->index;
582 map = kmap_atomic(page, KM_USER0); 579 map = kmap_atomic(page);
583 memcpy(dd, map, sizeof(*dd)); 580 memcpy(dd, map, sizeof(*dd));
584 kunmap_atomic(map, KM_USER0); 581 kunmap_atomic(map);
585 page_cache_release(page); 582 page_cache_release(page);
586 return 0; 583 return 0;
587} 584}
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 4153e65b0148..e3ab5e5a904c 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -517,9 +517,9 @@ static int indirect_write_alias(struct super_block *sb,
517 517
518 ino = page->mapping->host->i_ino; 518 ino = page->mapping->host->i_ino;
519 logfs_unpack_index(page->index, &bix, &level); 519 logfs_unpack_index(page->index, &bix, &level);
520 child = kmap_atomic(page, KM_USER0); 520 child = kmap_atomic(page);
521 val = child[pos]; 521 val = child[pos];
522 kunmap_atomic(child, KM_USER0); 522 kunmap_atomic(child);
523 err = write_one_alias(sb, ino, bix, level, pos, val); 523 err = write_one_alias(sb, ino, bix, level, pos, val);
524 if (err) 524 if (err)
525 return err; 525 return err;
@@ -673,9 +673,9 @@ static void alloc_indirect_block(struct inode *inode, struct page *page,
673 alloc_data_block(inode, page); 673 alloc_data_block(inode, page);
674 674
675 block = logfs_block(page); 675 block = logfs_block(page);
676 array = kmap_atomic(page, KM_USER0); 676 array = kmap_atomic(page);
677 initialize_block_counters(page, block, array, page_is_empty); 677 initialize_block_counters(page, block, array, page_is_empty);
678 kunmap_atomic(array, KM_USER0); 678 kunmap_atomic(array);
679} 679}
680 680
681static void block_set_pointer(struct page *page, int index, u64 ptr) 681static void block_set_pointer(struct page *page, int index, u64 ptr)
@@ -685,10 +685,10 @@ static void block_set_pointer(struct page *page, int index, u64 ptr)
685 u64 oldptr; 685 u64 oldptr;
686 686
687 BUG_ON(!block); 687 BUG_ON(!block);
688 array = kmap_atomic(page, KM_USER0); 688 array = kmap_atomic(page);
689 oldptr = be64_to_cpu(array[index]); 689 oldptr = be64_to_cpu(array[index]);
690 array[index] = cpu_to_be64(ptr); 690 array[index] = cpu_to_be64(ptr);
691 kunmap_atomic(array, KM_USER0); 691 kunmap_atomic(array);
692 SetPageUptodate(page); 692 SetPageUptodate(page);
693 693
694 block->full += !!(ptr & LOGFS_FULLY_POPULATED) 694 block->full += !!(ptr & LOGFS_FULLY_POPULATED)
@@ -701,9 +701,9 @@ static u64 block_get_pointer(struct page *page, int index)
701 __be64 *block; 701 __be64 *block;
702 u64 ptr; 702 u64 ptr;
703 703
704 block = kmap_atomic(page, KM_USER0); 704 block = kmap_atomic(page);
705 ptr = be64_to_cpu(block[index]); 705 ptr = be64_to_cpu(block[index]);
706 kunmap_atomic(block, KM_USER0); 706 kunmap_atomic(block);
707 return ptr; 707 return ptr;
708} 708}
709 709
@@ -850,7 +850,7 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
850 } 850 }
851 851
852 slot = get_bits(bix, SUBLEVEL(level)); 852 slot = get_bits(bix, SUBLEVEL(level));
853 rblock = kmap_atomic(page, KM_USER0); 853 rblock = kmap_atomic(page);
854 while (slot < LOGFS_BLOCK_FACTOR) { 854 while (slot < LOGFS_BLOCK_FACTOR) {
855 if (data && (rblock[slot] != 0)) 855 if (data && (rblock[slot] != 0))
856 break; 856 break;
@@ -861,12 +861,12 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
861 bix &= ~(increment - 1); 861 bix &= ~(increment - 1);
862 } 862 }
863 if (slot >= LOGFS_BLOCK_FACTOR) { 863 if (slot >= LOGFS_BLOCK_FACTOR) {
864 kunmap_atomic(rblock, KM_USER0); 864 kunmap_atomic(rblock);
865 logfs_put_read_page(page); 865 logfs_put_read_page(page);
866 return bix; 866 return bix;
867 } 867 }
868 bofs = be64_to_cpu(rblock[slot]); 868 bofs = be64_to_cpu(rblock[slot]);
869 kunmap_atomic(rblock, KM_USER0); 869 kunmap_atomic(rblock);
870 logfs_put_read_page(page); 870 logfs_put_read_page(page);
871 if (!bofs) { 871 if (!bofs) {
872 BUG_ON(data); 872 BUG_ON(data);
@@ -1961,9 +1961,9 @@ int logfs_read_inode(struct inode *inode)
1961 if (IS_ERR(page)) 1961 if (IS_ERR(page))
1962 return PTR_ERR(page); 1962 return PTR_ERR(page);
1963 1963
1964 di = kmap_atomic(page, KM_USER0); 1964 di = kmap_atomic(page);
1965 logfs_disk_to_inode(di, inode); 1965 logfs_disk_to_inode(di, inode);
1966 kunmap_atomic(di, KM_USER0); 1966 kunmap_atomic(di);
1967 move_page_to_inode(inode, page); 1967 move_page_to_inode(inode, page);
1968 page_cache_release(page); 1968 page_cache_release(page);
1969 return 0; 1969 return 0;
@@ -1982,9 +1982,9 @@ static struct page *inode_to_page(struct inode *inode)
1982 if (!page) 1982 if (!page)
1983 return NULL; 1983 return NULL;
1984 1984
1985 di = kmap_atomic(page, KM_USER0); 1985 di = kmap_atomic(page);
1986 logfs_inode_to_disk(inode, di); 1986 logfs_inode_to_disk(inode, di);
1987 kunmap_atomic(di, KM_USER0); 1987 kunmap_atomic(di);
1988 move_inode_to_page(page, inode); 1988 move_inode_to_page(page, inode);
1989 return page; 1989 return page;
1990} 1990}
@@ -2041,13 +2041,13 @@ static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
2041 2041
2042 if (write) 2042 if (write)
2043 alloc_indirect_block(inode, page, 0); 2043 alloc_indirect_block(inode, page, 0);
2044 se = kmap_atomic(page, KM_USER0); 2044 se = kmap_atomic(page);
2045 change_se(se + child_no, arg); 2045 change_se(se + child_no, arg);
2046 if (write) { 2046 if (write) {
2047 logfs_set_alias(sb, logfs_block(page), child_no); 2047 logfs_set_alias(sb, logfs_block(page), child_no);
2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize); 2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
2049 } 2049 }
2050 kunmap_atomic(se, KM_USER0); 2050 kunmap_atomic(se);
2051 2051
2052 logfs_put_write_page(page); 2052 logfs_put_write_page(page);
2053} 2053}
@@ -2245,10 +2245,10 @@ int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
2245 if (!page) 2245 if (!page)
2246 return -ENOMEM; 2246 return -ENOMEM;
2247 2247
2248 pagebuf = kmap_atomic(page, KM_USER0); 2248 pagebuf = kmap_atomic(page);
2249 memcpy(pagebuf, buf, count); 2249 memcpy(pagebuf, buf, count);
2250 flush_dcache_page(page); 2250 flush_dcache_page(page);
2251 kunmap_atomic(pagebuf, KM_USER0); 2251 kunmap_atomic(pagebuf);
2252 2252
2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE) 2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE); 2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index ab798ed1cc88..e28d090c98d6 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -543,9 +543,9 @@ void move_page_to_btree(struct page *page)
543 BUG_ON(!item); /* mempool empty */ 543 BUG_ON(!item); /* mempool empty */
544 memset(item, 0, sizeof(*item)); 544 memset(item, 0, sizeof(*item));
545 545
546 child = kmap_atomic(page, KM_USER0); 546 child = kmap_atomic(page);
547 item->val = child[pos]; 547 item->val = child[pos];
548 kunmap_atomic(child, KM_USER0); 548 kunmap_atomic(child);
549 item->child_no = pos; 549 item->child_no = pos;
550 list_add(&item->list, &block->item_list); 550 list_add(&item->list, &block->item_list);
551 } 551 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c9ee7f5d1caf..97bca623d893 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -315,11 +315,9 @@ static int logfs_get_sb_final(struct super_block *sb)
315 if (IS_ERR(rootdir)) 315 if (IS_ERR(rootdir))
316 goto fail; 316 goto fail;
317 317
318 sb->s_root = d_alloc_root(rootdir); 318 sb->s_root = d_make_root(rootdir);
319 if (!sb->s_root) { 319 if (!sb->s_root)
320 iput(rootdir);
321 goto fail; 320 goto fail;
322 }
323 321
324 /* at that point we know that ->put_super() will be called */ 322 /* at that point we know that ->put_super() will be called */
325 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 323 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
@@ -542,6 +540,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
542 * the filesystem incompatible with 32bit systems. 540 * the filesystem incompatible with 32bit systems.
543 */ 541 */
544 sb->s_maxbytes = (1ull << 43) - 1; 542 sb->s_maxbytes = (1ull << 43) - 1;
543 sb->s_max_links = LOGFS_LINK_MAX;
545 sb->s_op = &logfs_super_operations; 544 sb->s_op = &logfs_super_operations;
546 sb->s_flags = flags | MS_NOATIME; 545 sb->s_flags = flags | MS_NOATIME;
547 546
@@ -627,7 +626,10 @@ static int __init logfs_init(void)
627 if (ret) 626 if (ret)
628 goto out2; 627 goto out2;
629 628
630 return register_filesystem(&logfs_fs_type); 629 ret = register_filesystem(&logfs_fs_type);
630 if (!ret)
631 return 0;
632 logfs_destroy_inode_cache();
631out2: 633out2:
632 logfs_compr_exit(); 634 logfs_compr_exit();
633out1: 635out1:
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 085a9262c692..685b2d981b87 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -335,7 +335,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
335 goto fail; 335 goto fail;
336 } 336 }
337 337
338 kaddr = kmap_atomic(page, KM_USER0); 338 kaddr = kmap_atomic(page);
339 memset(kaddr, 0, PAGE_CACHE_SIZE); 339 memset(kaddr, 0, PAGE_CACHE_SIZE);
340 340
341 if (sbi->s_version == MINIX_V3) { 341 if (sbi->s_version == MINIX_V3) {
@@ -355,7 +355,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
355 de->inode = dir->i_ino; 355 de->inode = dir->i_ino;
356 strcpy(de->name, ".."); 356 strcpy(de->name, "..");
357 } 357 }
358 kunmap_atomic(kaddr, KM_USER0); 358 kunmap_atomic(kaddr);
359 359
360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize); 360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
361fail: 361fail:
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fa8b612b8ce2..fcb05d2c6b5f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -190,24 +190,24 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
190 sbi->s_version = MINIX_V1; 190 sbi->s_version = MINIX_V1;
191 sbi->s_dirsize = 16; 191 sbi->s_dirsize = 16;
192 sbi->s_namelen = 14; 192 sbi->s_namelen = 14;
193 sbi->s_link_max = MINIX_LINK_MAX; 193 s->s_max_links = MINIX_LINK_MAX;
194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) { 194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) {
195 sbi->s_version = MINIX_V1; 195 sbi->s_version = MINIX_V1;
196 sbi->s_dirsize = 32; 196 sbi->s_dirsize = 32;
197 sbi->s_namelen = 30; 197 sbi->s_namelen = 30;
198 sbi->s_link_max = MINIX_LINK_MAX; 198 s->s_max_links = MINIX_LINK_MAX;
199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) { 199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) {
200 sbi->s_version = MINIX_V2; 200 sbi->s_version = MINIX_V2;
201 sbi->s_nzones = ms->s_zones; 201 sbi->s_nzones = ms->s_zones;
202 sbi->s_dirsize = 16; 202 sbi->s_dirsize = 16;
203 sbi->s_namelen = 14; 203 sbi->s_namelen = 14;
204 sbi->s_link_max = MINIX2_LINK_MAX; 204 s->s_max_links = MINIX2_LINK_MAX;
205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) { 205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) {
206 sbi->s_version = MINIX_V2; 206 sbi->s_version = MINIX_V2;
207 sbi->s_nzones = ms->s_zones; 207 sbi->s_nzones = ms->s_zones;
208 sbi->s_dirsize = 32; 208 sbi->s_dirsize = 32;
209 sbi->s_namelen = 30; 209 sbi->s_namelen = 30;
210 sbi->s_link_max = MINIX2_LINK_MAX; 210 s->s_max_links = MINIX2_LINK_MAX;
211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) { 211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
212 m3s = (struct minix3_super_block *) bh->b_data; 212 m3s = (struct minix3_super_block *) bh->b_data;
213 s->s_magic = m3s->s_magic; 213 s->s_magic = m3s->s_magic;
@@ -221,9 +221,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
221 sbi->s_dirsize = 64; 221 sbi->s_dirsize = 64;
222 sbi->s_namelen = 60; 222 sbi->s_namelen = 60;
223 sbi->s_version = MINIX_V3; 223 sbi->s_version = MINIX_V3;
224 sbi->s_link_max = MINIX2_LINK_MAX;
225 sbi->s_mount_state = MINIX_VALID_FS; 224 sbi->s_mount_state = MINIX_VALID_FS;
226 sb_set_blocksize(s, m3s->s_blocksize); 225 sb_set_blocksize(s, m3s->s_blocksize);
226 s->s_max_links = MINIX2_LINK_MAX;
227 } else 227 } else
228 goto out_no_fs; 228 goto out_no_fs;
229 229
@@ -254,14 +254,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
254 minix_set_bit(0,sbi->s_imap[0]->b_data); 254 minix_set_bit(0,sbi->s_imap[0]->b_data);
255 minix_set_bit(0,sbi->s_zmap[0]->b_data); 255 minix_set_bit(0,sbi->s_zmap[0]->b_data);
256 256
257 /* set up enough so that it can read an inode */
258 s->s_op = &minix_sops;
259 root_inode = minix_iget(s, MINIX_ROOT_INO);
260 if (IS_ERR(root_inode)) {
261 ret = PTR_ERR(root_inode);
262 goto out_no_root;
263 }
264
265 /* Apparently minix can create filesystems that allocate more blocks for 257 /* Apparently minix can create filesystems that allocate more blocks for
266 * the bitmaps than needed. We simply ignore that, but verify it didn't 258 * the bitmaps than needed. We simply ignore that, but verify it didn't
267 * create one with not enough blocks and bail out if so. 259 * create one with not enough blocks and bail out if so.
@@ -270,7 +262,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
270 if (sbi->s_imap_blocks < block) { 262 if (sbi->s_imap_blocks < block) {
271 printk("MINIX-fs: file system does not have enough " 263 printk("MINIX-fs: file system does not have enough "
272 "imap blocks allocated. Refusing to mount\n"); 264 "imap blocks allocated. Refusing to mount\n");
273 goto out_iput; 265 goto out_no_bitmap;
274 } 266 }
275 267
276 block = minix_blocks_needed( 268 block = minix_blocks_needed(
@@ -279,13 +271,21 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
279 if (sbi->s_zmap_blocks < block) { 271 if (sbi->s_zmap_blocks < block) {
280 printk("MINIX-fs: file system does not have enough " 272 printk("MINIX-fs: file system does not have enough "
281 "zmap blocks allocated. Refusing to mount.\n"); 273 "zmap blocks allocated. Refusing to mount.\n");
282 goto out_iput; 274 goto out_no_bitmap;
275 }
276
277 /* set up enough so that it can read an inode */
278 s->s_op = &minix_sops;
279 root_inode = minix_iget(s, MINIX_ROOT_INO);
280 if (IS_ERR(root_inode)) {
281 ret = PTR_ERR(root_inode);
282 goto out_no_root;
283 } 283 }
284 284
285 ret = -ENOMEM; 285 ret = -ENOMEM;
286 s->s_root = d_alloc_root(root_inode); 286 s->s_root = d_make_root(root_inode);
287 if (!s->s_root) 287 if (!s->s_root)
288 goto out_iput; 288 goto out_no_root;
289 289
290 if (!(s->s_flags & MS_RDONLY)) { 290 if (!(s->s_flags & MS_RDONLY)) {
291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
@@ -301,10 +301,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
301 301
302 return 0; 302 return 0;
303 303
304out_iput:
305 iput(root_inode);
306 goto out_freemap;
307
308out_no_root: 304out_no_root:
309 if (!silent) 305 if (!silent)
310 printk("MINIX-fs: get root inode failed\n"); 306 printk("MINIX-fs: get root inode failed\n");
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c889ef0aa571..1ebd11854622 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -34,7 +34,6 @@ struct minix_sb_info {
34 unsigned long s_max_size; 34 unsigned long s_max_size;
35 int s_dirsize; 35 int s_dirsize;
36 int s_namelen; 36 int s_namelen;
37 int s_link_max;
38 struct buffer_head ** s_imap; 37 struct buffer_head ** s_imap;
39 struct buffer_head ** s_zmap; 38 struct buffer_head ** s_zmap;
40 struct buffer_head * s_sbh; 39 struct buffer_head * s_sbh;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2f76e38c2065..2d0ee1786305 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -94,9 +94,6 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
94{ 94{
95 struct inode *inode = old_dentry->d_inode; 95 struct inode *inode = old_dentry->d_inode;
96 96
97 if (inode->i_nlink >= minix_sb(inode->i_sb)->s_link_max)
98 return -EMLINK;
99
100 inode->i_ctime = CURRENT_TIME_SEC; 97 inode->i_ctime = CURRENT_TIME_SEC;
101 inode_inc_link_count(inode); 98 inode_inc_link_count(inode);
102 ihold(inode); 99 ihold(inode);
@@ -106,10 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
106static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 103static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
107{ 104{
108 struct inode * inode; 105 struct inode * inode;
109 int err = -EMLINK; 106 int err;
110
111 if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
112 goto out;
113 107
114 inode_inc_link_count(dir); 108 inode_inc_link_count(dir);
115 109
@@ -181,7 +175,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
181static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, 175static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
182 struct inode * new_dir, struct dentry *new_dentry) 176 struct inode * new_dir, struct dentry *new_dentry)
183{ 177{
184 struct minix_sb_info * info = minix_sb(old_dir->i_sb);
185 struct inode * old_inode = old_dentry->d_inode; 178 struct inode * old_inode = old_dentry->d_inode;
186 struct inode * new_inode = new_dentry->d_inode; 179 struct inode * new_inode = new_dentry->d_inode;
187 struct page * dir_page = NULL; 180 struct page * dir_page = NULL;
@@ -219,11 +212,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
219 drop_nlink(new_inode); 212 drop_nlink(new_inode);
220 inode_dec_link_count(new_inode); 213 inode_dec_link_count(new_inode);
221 } else { 214 } else {
222 if (dir_de) {
223 err = -EMLINK;
224 if (new_dir->i_nlink >= info->s_link_max)
225 goto out_dir;
226 }
227 err = minix_add_link(new_dentry, old_inode); 215 err = minix_add_link(new_dentry, old_inode);
228 if (err) 216 if (err)
229 goto out_dir; 217 goto out_dir;
diff --git a/fs/mpage.c b/fs/mpage.c
index 643e9f55ef29..0face1c4d4c6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/kdev_t.h> 18#include <linux/kdev_t.h>
19#include <linux/gfp.h> 19#include <linux/gfp.h>
diff --git a/fs/namei.c b/fs/namei.c
index a780ea515c47..e615ff37e27d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -15,7 +15,7 @@
15 */ 15 */
16 16
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/export.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
@@ -161,7 +161,7 @@ static char *getname_flags(const char __user *filename, int flags, int *empty)
161 161
162char *getname(const char __user * filename) 162char *getname(const char __user * filename)
163{ 163{
164 return getname_flags(filename, 0, 0); 164 return getname_flags(filename, 0, NULL);
165} 165}
166 166
167#ifdef CONFIG_AUDITSYSCALL 167#ifdef CONFIG_AUDITSYSCALL
@@ -642,7 +642,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
642 cond_resched(); 642 cond_resched();
643 current->total_link_count++; 643 current->total_link_count++;
644 644
645 touch_atime(link->mnt, dentry); 645 touch_atime(link);
646 nd_set_link(nd, NULL); 646 nd_set_link(nd, NULL);
647 647
648 error = security_inode_follow_link(link->dentry, nd); 648 error = security_inode_follow_link(link->dentry, nd);
@@ -1375,6 +1375,157 @@ static inline int can_lookup(struct inode *inode)
1375} 1375}
1376 1376
1377/* 1377/*
1378 * We can do the critical dentry name comparison and hashing
1379 * operations one word at a time, but we are limited to:
1380 *
1381 * - Architectures with fast unaligned word accesses. We could
1382 * do a "get_unaligned()" if this helps and is sufficiently
1383 * fast.
1384 *
1385 * - Little-endian machines (so that we can generate the mask
1386 * of low bytes efficiently). Again, we *could* do a byte
1387 * swapping load on big-endian architectures if that is not
1388 * expensive enough to make the optimization worthless.
1389 *
1390 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1391 * do not trap on the (extremely unlikely) case of a page
1392 * crossing operation.
1393 *
1394 * - Furthermore, we need an efficient 64-bit compile for the
1395 * 64-bit case in order to generate the "number of bytes in
1396 * the final mask". Again, that could be replaced with a
1397 * efficient population count instruction or similar.
1398 */
1399#ifdef CONFIG_DCACHE_WORD_ACCESS
1400
1401#ifdef CONFIG_64BIT
1402
1403/*
1404 * Jan Achrenius on G+: microoptimized version of
1405 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
1406 * that works for the bytemasks without having to
1407 * mask them first.
1408 */
1409static inline long count_masked_bytes(unsigned long mask)
1410{
1411 return mask*0x0001020304050608ul >> 56;
1412}
1413
1414static inline unsigned int fold_hash(unsigned long hash)
1415{
1416 hash += hash >> (8*sizeof(int));
1417 return hash;
1418}
1419
1420#else /* 32-bit case */
1421
1422/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
1423static inline long count_masked_bytes(long mask)
1424{
1425 /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
1426 long a = (0x0ff0001+mask) >> 23;
1427 /* Fix the 1 for 00 case */
1428 return a & mask;
1429}
1430
1431#define fold_hash(x) (x)
1432
1433#endif
1434
1435unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1436{
1437 unsigned long a, mask;
1438 unsigned long hash = 0;
1439
1440 for (;;) {
1441 a = *(unsigned long *)name;
1442 if (len < sizeof(unsigned long))
1443 break;
1444 hash += a;
1445 hash *= 9;
1446 name += sizeof(unsigned long);
1447 len -= sizeof(unsigned long);
1448 if (!len)
1449 goto done;
1450 }
1451 mask = ~(~0ul << len*8);
1452 hash += mask & a;
1453done:
1454 return fold_hash(hash);
1455}
1456EXPORT_SYMBOL(full_name_hash);
1457
1458#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
1459#define ONEBYTES REPEAT_BYTE(0x01)
1460#define SLASHBYTES REPEAT_BYTE('/')
1461#define HIGHBITS REPEAT_BYTE(0x80)
1462
1463/* Return the high bit set in the first byte that is a zero */
1464static inline unsigned long has_zero(unsigned long a)
1465{
1466 return ((a - ONEBYTES) & ~a) & HIGHBITS;
1467}
1468
1469/*
1470 * Calculate the length and hash of the path component, and
1471 * return the length of the component;
1472 */
1473static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1474{
1475 unsigned long a, mask, hash, len;
1476
1477 hash = a = 0;
1478 len = -sizeof(unsigned long);
1479 do {
1480 hash = (hash + a) * 9;
1481 len += sizeof(unsigned long);
1482 a = *(unsigned long *)(name+len);
1483 /* Do we have any NUL or '/' bytes in this word? */
1484 mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
1485 } while (!mask);
1486
1487 /* The mask *below* the first high bit set */
1488 mask = (mask - 1) & ~mask;
1489 mask >>= 7;
1490 hash += a & mask;
1491 *hashp = fold_hash(hash);
1492
1493 return len + count_masked_bytes(mask);
1494}
1495
1496#else
1497
1498unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1499{
1500 unsigned long hash = init_name_hash();
1501 while (len--)
1502 hash = partial_name_hash(*name++, hash);
1503 return end_name_hash(hash);
1504}
1505EXPORT_SYMBOL(full_name_hash);
1506
1507/*
1508 * We know there's a real path component here of at least
1509 * one character.
1510 */
1511static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1512{
1513 unsigned long hash = init_name_hash();
1514 unsigned long len = 0, c;
1515
1516 c = (unsigned char)*name;
1517 do {
1518 len++;
1519 hash = partial_name_hash(c, hash);
1520 c = (unsigned char)name[len];
1521 } while (c && c != '/');
1522 *hashp = end_name_hash(hash);
1523 return len;
1524}
1525
1526#endif
1527
1528/*
1378 * Name resolution. 1529 * Name resolution.
1379 * This is the basic name resolution function, turning a pathname into 1530 * This is the basic name resolution function, turning a pathname into
1380 * the final dentry. We expect 'base' to be positive and a directory. 1531 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1394,31 +1545,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1394 1545
1395 /* At this point we know we have a real path component. */ 1546 /* At this point we know we have a real path component. */
1396 for(;;) { 1547 for(;;) {
1397 unsigned long hash;
1398 struct qstr this; 1548 struct qstr this;
1399 unsigned int c; 1549 long len;
1400 int type; 1550 int type;
1401 1551
1402 err = may_lookup(nd); 1552 err = may_lookup(nd);
1403 if (err) 1553 if (err)
1404 break; 1554 break;
1405 1555
1556 len = hash_name(name, &this.hash);
1406 this.name = name; 1557 this.name = name;
1407 c = *(const unsigned char *)name; 1558 this.len = len;
1408
1409 hash = init_name_hash();
1410 do {
1411 name++;
1412 hash = partial_name_hash(c, hash);
1413 c = *(const unsigned char *)name;
1414 } while (c && (c != '/'));
1415 this.len = name - (const char *) this.name;
1416 this.hash = end_name_hash(hash);
1417 1559
1418 type = LAST_NORM; 1560 type = LAST_NORM;
1419 if (this.name[0] == '.') switch (this.len) { 1561 if (name[0] == '.') switch (len) {
1420 case 2: 1562 case 2:
1421 if (this.name[1] == '.') { 1563 if (name[1] == '.') {
1422 type = LAST_DOTDOT; 1564 type = LAST_DOTDOT;
1423 nd->flags |= LOOKUP_JUMPED; 1565 nd->flags |= LOOKUP_JUMPED;
1424 } 1566 }
@@ -1437,12 +1579,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1437 } 1579 }
1438 } 1580 }
1439 1581
1440 /* remove trailing slashes? */ 1582 if (!name[len])
1441 if (!c)
1442 goto last_component; 1583 goto last_component;
1443 while (*++name == '/'); 1584 /*
1444 if (!*name) 1585 * If it wasn't NUL, we know it was '/'. Skip that
1586 * slash, and continue until no more slashes.
1587 */
1588 do {
1589 len++;
1590 } while (unlikely(name[len] == '/'));
1591 if (!name[len])
1445 goto last_component; 1592 goto last_component;
1593 name += len;
1446 1594
1447 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1595 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1448 if (err < 0) 1596 if (err < 0)
@@ -1775,24 +1923,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1775struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1923struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1776{ 1924{
1777 struct qstr this; 1925 struct qstr this;
1778 unsigned long hash;
1779 unsigned int c; 1926 unsigned int c;
1780 1927
1781 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1928 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1782 1929
1783 this.name = name; 1930 this.name = name;
1784 this.len = len; 1931 this.len = len;
1932 this.hash = full_name_hash(name, len);
1785 if (!len) 1933 if (!len)
1786 return ERR_PTR(-EACCES); 1934 return ERR_PTR(-EACCES);
1787 1935
1788 hash = init_name_hash();
1789 while (len--) { 1936 while (len--) {
1790 c = *(const unsigned char *)name++; 1937 c = *(const unsigned char *)name++;
1791 if (c == '/' || c == '\0') 1938 if (c == '/' || c == '\0')
1792 return ERR_PTR(-EACCES); 1939 return ERR_PTR(-EACCES);
1793 hash = partial_name_hash(c, hash);
1794 } 1940 }
1795 this.hash = end_name_hash(hash);
1796 /* 1941 /*
1797 * See if the low-level filesystem might want 1942 * See if the low-level filesystem might want
1798 * to use its own hash.. 1943 * to use its own hash..
@@ -1827,7 +1972,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1827int user_path_at(int dfd, const char __user *name, unsigned flags, 1972int user_path_at(int dfd, const char __user *name, unsigned flags,
1828 struct path *path) 1973 struct path *path)
1829{ 1974{
1830 return user_path_at_empty(dfd, name, flags, path, 0); 1975 return user_path_at_empty(dfd, name, flags, path, NULL);
1831} 1976}
1832 1977
1833static int user_path_parent(int dfd, const char __user *path, 1978static int user_path_parent(int dfd, const char __user *path,
@@ -2140,7 +2285,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2140 /* sayonara */ 2285 /* sayonara */
2141 error = complete_walk(nd); 2286 error = complete_walk(nd);
2142 if (error) 2287 if (error)
2143 return ERR_PTR(-ECHILD); 2288 return ERR_PTR(error);
2144 2289
2145 error = -ENOTDIR; 2290 error = -ENOTDIR;
2146 if (nd->flags & LOOKUP_DIRECTORY) { 2291 if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2239,7 +2384,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2239 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ 2384 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
2240 error = complete_walk(nd); 2385 error = complete_walk(nd);
2241 if (error) 2386 if (error)
2242 goto exit; 2387 return ERR_PTR(error);
2243 error = -EISDIR; 2388 error = -EISDIR;
2244 if (S_ISDIR(nd->inode->i_mode)) 2389 if (S_ISDIR(nd->inode->i_mode))
2245 goto exit; 2390 goto exit;
@@ -2547,6 +2692,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
2547int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 2692int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2548{ 2693{
2549 int error = may_create(dir, dentry); 2694 int error = may_create(dir, dentry);
2695 unsigned max_links = dir->i_sb->s_max_links;
2550 2696
2551 if (error) 2697 if (error)
2552 return error; 2698 return error;
@@ -2559,6 +2705,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2559 if (error) 2705 if (error)
2560 return error; 2706 return error;
2561 2707
2708 if (max_links && dir->i_nlink >= max_links)
2709 return -EMLINK;
2710
2562 error = dir->i_op->mkdir(dir, dentry, mode); 2711 error = dir->i_op->mkdir(dir, dentry, mode);
2563 if (!error) 2712 if (!error)
2564 fsnotify_mkdir(dir, dentry); 2713 fsnotify_mkdir(dir, dentry);
@@ -2889,6 +3038,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
2889int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 3038int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2890{ 3039{
2891 struct inode *inode = old_dentry->d_inode; 3040 struct inode *inode = old_dentry->d_inode;
3041 unsigned max_links = dir->i_sb->s_max_links;
2892 int error; 3042 int error;
2893 3043
2894 if (!inode) 3044 if (!inode)
@@ -2919,6 +3069,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2919 /* Make sure we don't allow creating hardlink to an unlinked file */ 3069 /* Make sure we don't allow creating hardlink to an unlinked file */
2920 if (inode->i_nlink == 0) 3070 if (inode->i_nlink == 0)
2921 error = -ENOENT; 3071 error = -ENOENT;
3072 else if (max_links && inode->i_nlink >= max_links)
3073 error = -EMLINK;
2922 else 3074 else
2923 error = dir->i_op->link(old_dentry, dir, new_dentry); 3075 error = dir->i_op->link(old_dentry, dir, new_dentry);
2924 mutex_unlock(&inode->i_mutex); 3076 mutex_unlock(&inode->i_mutex);
@@ -3028,6 +3180,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3028{ 3180{
3029 int error = 0; 3181 int error = 0;
3030 struct inode *target = new_dentry->d_inode; 3182 struct inode *target = new_dentry->d_inode;
3183 unsigned max_links = new_dir->i_sb->s_max_links;
3031 3184
3032 /* 3185 /*
3033 * If we are going to change the parent - check write permissions, 3186 * If we are going to change the parent - check write permissions,
@@ -3051,6 +3204,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3051 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) 3204 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3052 goto out; 3205 goto out;
3053 3206
3207 error = -EMLINK;
3208 if (max_links && !target && new_dir != old_dir &&
3209 new_dir->i_nlink >= max_links)
3210 goto out;
3211
3054 if (target) 3212 if (target)
3055 shrink_dcache_parent(new_dentry); 3213 shrink_dcache_parent(new_dentry);
3056 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3214 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3349,9 +3507,9 @@ retry:
3349 if (err) 3507 if (err)
3350 goto fail; 3508 goto fail;
3351 3509
3352 kaddr = kmap_atomic(page, KM_USER0); 3510 kaddr = kmap_atomic(page);
3353 memcpy(kaddr, symname, len-1); 3511 memcpy(kaddr, symname, len-1);
3354 kunmap_atomic(kaddr, KM_USER0); 3512 kunmap_atomic(kaddr);
3355 3513
3356 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 3514 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
3357 page, fsdata); 3515 page, fsdata);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 3d1e34f8a68e..49df0e7f8379 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -716,13 +716,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
716 if (!root_inode) 716 if (!root_inode)
717 goto out_disconnect; 717 goto out_disconnect;
718 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber); 718 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
719 sb->s_root = d_alloc_root(root_inode); 719 sb->s_root = d_make_root(root_inode);
720 if (!sb->s_root) 720 if (!sb->s_root)
721 goto out_no_root; 721 goto out_disconnect;
722 return 0; 722 return 0;
723 723
724out_no_root:
725 iput(root_inode);
726out_disconnect: 724out_disconnect:
727 ncp_lock_server(server); 725 ncp_lock_server(server);
728 ncp_disconnect(server); 726 ncp_disconnect(server);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index dbcd82126aed..2a0e6c599147 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -64,6 +64,7 @@ config NFS_V4
64 bool "NFS client support for NFS version 4" 64 bool "NFS client support for NFS version 4"
65 depends on NFS_FS 65 depends on NFS_FS
66 select SUNRPC_GSS 66 select SUNRPC_GSS
67 select KEYS
67 help 68 help
68 This option enables support for version 4 of the NFS protocol 69 This option enables support for version 4 of the NFS protocol
69 (RFC 3530) in the kernel's NFS client. 70 (RFC 3530) in the kernel's NFS client.
@@ -98,6 +99,18 @@ config PNFS_OBJLAYOUT
98 depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD 99 depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
99 default m 100 default m
100 101
102config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
103 string "NFSv4.1 Implementation ID Domain"
104 depends on NFS_V4_1
105 default "kernel.org"
106 help
107 This option defines the domain portion of the implementation ID that
108 may be sent in the NFS exchange_id operation. The value must be in
109 the format of a DNS domain name and should be set to the DNS domain
110 name of the distribution.
111 If the NFS client is unchanged from the upstream kernel, this
112 option should be set to the default "kernel.org".
113
101config ROOT_NFS 114config ROOT_NFS
102 bool "Root file system on NFS" 115 bool "Root file system on NFS"
103 depends on NFS_FS=y && IP_PNP 116 depends on NFS_FS=y && IP_PNP
@@ -130,16 +143,10 @@ config NFS_USE_KERNEL_DNS
130 bool 143 bool
131 depends on NFS_V4 && !NFS_USE_LEGACY_DNS 144 depends on NFS_V4 && !NFS_USE_LEGACY_DNS
132 select DNS_RESOLVER 145 select DNS_RESOLVER
133 select KEYS
134 default y 146 default y
135 147
136config NFS_USE_NEW_IDMAPPER 148config NFS_DEBUG
137 bool "Use the new idmapper upcall routine" 149 bool
138 depends on NFS_V4 && KEYS 150 depends on NFS_FS && SUNRPC_DEBUG
139 help 151 select CRC32
140 Say Y here if you want NFS to use the new idmapper upcall functions. 152 default y
141 You will need /sbin/request-key (usually provided by the keyutils
142 package). For details, read
143 <file:Documentation/filesystems/nfs/idmapper.txt>.
144
145 If you are unsure, say N.
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 48cfac31f64c..9c94297bb70e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -46,9 +46,6 @@ MODULE_LICENSE("GPL");
46MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); 46MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
47MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); 47MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
48 48
49struct dentry *bl_device_pipe;
50wait_queue_head_t bl_wq;
51
52static void print_page(struct page *page) 49static void print_page(struct page *page)
53{ 50{
54 dprintk("PRINTPAGE page %p\n", page); 51 dprintk("PRINTPAGE page %p\n", page);
@@ -236,12 +233,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
236 sector_t isect, extent_length = 0; 233 sector_t isect, extent_length = 0;
237 struct parallel_io *par; 234 struct parallel_io *par;
238 loff_t f_offset = rdata->args.offset; 235 loff_t f_offset = rdata->args.offset;
239 size_t count = rdata->args.count;
240 struct page **pages = rdata->args.pages; 236 struct page **pages = rdata->args.pages;
241 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 237 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
242 238
243 dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, 239 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
244 rdata->npages, f_offset, count); 240 rdata->npages, f_offset, (unsigned int)rdata->args.count);
245 241
246 par = alloc_parallel(rdata); 242 par = alloc_parallel(rdata);
247 if (!par) 243 if (!par)
@@ -1025,10 +1021,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
1025 .destroy_msg = bl_pipe_destroy_msg, 1021 .destroy_msg = bl_pipe_destroy_msg,
1026}; 1022};
1027 1023
1024static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
1025 struct rpc_pipe *pipe)
1026{
1027 struct dentry *dir, *dentry;
1028
1029 dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
1030 if (dir == NULL)
1031 return ERR_PTR(-ENOENT);
1032 dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
1033 dput(dir);
1034 return dentry;
1035}
1036
1037static void nfs4blocklayout_unregister_sb(struct super_block *sb,
1038 struct rpc_pipe *pipe)
1039{
1040 if (pipe->dentry)
1041 rpc_unlink(pipe->dentry);
1042}
1043
1044static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
1045 void *ptr)
1046{
1047 struct super_block *sb = ptr;
1048 struct net *net = sb->s_fs_info;
1049 struct nfs_net *nn = net_generic(net, nfs_net_id);
1050 struct dentry *dentry;
1051 int ret = 0;
1052
1053 if (!try_module_get(THIS_MODULE))
1054 return 0;
1055
1056 if (nn->bl_device_pipe == NULL) {
1057 module_put(THIS_MODULE);
1058 return 0;
1059 }
1060
1061 switch (event) {
1062 case RPC_PIPEFS_MOUNT:
1063 dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
1064 if (IS_ERR(dentry)) {
1065 ret = PTR_ERR(dentry);
1066 break;
1067 }
1068 nn->bl_device_pipe->dentry = dentry;
1069 break;
1070 case RPC_PIPEFS_UMOUNT:
1071 if (nn->bl_device_pipe->dentry)
1072 nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
1073 break;
1074 default:
1075 ret = -ENOTSUPP;
1076 break;
1077 }
1078 module_put(THIS_MODULE);
1079 return ret;
1080}
1081
1082static struct notifier_block nfs4blocklayout_block = {
1083 .notifier_call = rpc_pipefs_event,
1084};
1085
1086static struct dentry *nfs4blocklayout_register_net(struct net *net,
1087 struct rpc_pipe *pipe)
1088{
1089 struct super_block *pipefs_sb;
1090 struct dentry *dentry;
1091
1092 pipefs_sb = rpc_get_sb_net(net);
1093 if (!pipefs_sb)
1094 return NULL;
1095 dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
1096 rpc_put_sb_net(net);
1097 return dentry;
1098}
1099
1100static void nfs4blocklayout_unregister_net(struct net *net,
1101 struct rpc_pipe *pipe)
1102{
1103 struct super_block *pipefs_sb;
1104
1105 pipefs_sb = rpc_get_sb_net(net);
1106 if (pipefs_sb) {
1107 nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
1108 rpc_put_sb_net(net);
1109 }
1110}
1111
1112static int nfs4blocklayout_net_init(struct net *net)
1113{
1114 struct nfs_net *nn = net_generic(net, nfs_net_id);
1115 struct dentry *dentry;
1116
1117 init_waitqueue_head(&nn->bl_wq);
1118 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
1119 if (IS_ERR(nn->bl_device_pipe))
1120 return PTR_ERR(nn->bl_device_pipe);
1121 dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
1122 if (IS_ERR(dentry)) {
1123 rpc_destroy_pipe_data(nn->bl_device_pipe);
1124 return PTR_ERR(dentry);
1125 }
1126 nn->bl_device_pipe->dentry = dentry;
1127 return 0;
1128}
1129
1130static void nfs4blocklayout_net_exit(struct net *net)
1131{
1132 struct nfs_net *nn = net_generic(net, nfs_net_id);
1133
1134 nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
1135 rpc_destroy_pipe_data(nn->bl_device_pipe);
1136 nn->bl_device_pipe = NULL;
1137}
1138
1139static struct pernet_operations nfs4blocklayout_net_ops = {
1140 .init = nfs4blocklayout_net_init,
1141 .exit = nfs4blocklayout_net_exit,
1142};
1143
1028static int __init nfs4blocklayout_init(void) 1144static int __init nfs4blocklayout_init(void)
1029{ 1145{
1030 struct vfsmount *mnt;
1031 struct path path;
1032 int ret; 1146 int ret;
1033 1147
1034 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 1148 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
@@ -1037,32 +1151,17 @@ static int __init nfs4blocklayout_init(void)
1037 if (ret) 1151 if (ret)
1038 goto out; 1152 goto out;
1039 1153
1040 init_waitqueue_head(&bl_wq); 1154 ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
1041 1155 if (ret)
1042 mnt = rpc_get_mount();
1043 if (IS_ERR(mnt)) {
1044 ret = PTR_ERR(mnt);
1045 goto out_remove; 1156 goto out_remove;
1046 } 1157 ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
1047
1048 ret = vfs_path_lookup(mnt->mnt_root,
1049 mnt,
1050 NFS_PIPE_DIRNAME, 0, &path);
1051 if (ret) 1158 if (ret)
1052 goto out_putrpc; 1159 goto out_notifier;
1053
1054 bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
1055 &bl_upcall_ops, 0);
1056 path_put(&path);
1057 if (IS_ERR(bl_device_pipe)) {
1058 ret = PTR_ERR(bl_device_pipe);
1059 goto out_putrpc;
1060 }
1061out: 1160out:
1062 return ret; 1161 return ret;
1063 1162
1064out_putrpc: 1163out_notifier:
1065 rpc_put_mount(); 1164 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1066out_remove: 1165out_remove:
1067 pnfs_unregister_layoutdriver(&blocklayout_type); 1166 pnfs_unregister_layoutdriver(&blocklayout_type);
1068 return ret; 1167 return ret;
@@ -1073,9 +1172,9 @@ static void __exit nfs4blocklayout_exit(void)
1073 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 1172 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
1074 __func__); 1173 __func__);
1075 1174
1175 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1176 unregister_pernet_subsys(&nfs4blocklayout_net_ops);
1076 pnfs_unregister_layoutdriver(&blocklayout_type); 1177 pnfs_unregister_layoutdriver(&blocklayout_type);
1077 rpc_unlink(bl_device_pipe);
1078 rpc_put_mount();
1079} 1178}
1080 1179
1081MODULE_ALIAS("nfs-layouttype4-3"); 1180MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index e31a2df28e70..03350690118e 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -37,6 +37,7 @@
37#include <linux/sunrpc/rpc_pipe_fs.h> 37#include <linux/sunrpc/rpc_pipe_fs.h>
38 38
39#include "../pnfs.h" 39#include "../pnfs.h"
40#include "../netns.h"
40 41
41#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) 42#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
42#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 43#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
@@ -50,6 +51,7 @@ struct pnfs_block_dev {
50 struct list_head bm_node; 51 struct list_head bm_node;
51 struct nfs4_deviceid bm_mdevid; /* associated devid */ 52 struct nfs4_deviceid bm_mdevid; /* associated devid */
52 struct block_device *bm_mdev; /* meta device itself */ 53 struct block_device *bm_mdev; /* meta device itself */
54 struct net *net;
53}; 55};
54 56
55enum exstate4 { 57enum exstate4 {
@@ -151,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
151 return BLK_LO2EXT(lseg->pls_layout); 153 return BLK_LO2EXT(lseg->pls_layout);
152} 154}
153 155
154struct bl_dev_msg { 156struct bl_pipe_msg {
155 int32_t status; 157 struct rpc_pipe_msg msg;
156 uint32_t major, minor; 158 wait_queue_head_t *bl_wq;
157}; 159};
158 160
159struct bl_msg_hdr { 161struct bl_msg_hdr {
@@ -161,9 +163,6 @@ struct bl_msg_hdr {
161 u16 totallen; /* length of entire message, including hdr itself */ 163 u16 totallen; /* length of entire message, including hdr itself */
162}; 164};
163 165
164extern struct dentry *bl_device_pipe;
165extern wait_queue_head_t bl_wq;
166
167#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ 166#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
168#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ 167#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/
169#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ 168#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index d08ba9107fde..a5c88a554d92 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
46 46
47 *rp = xdr_decode_hyper(*rp, &s); 47 *rp = xdr_decode_hyper(*rp, &s);
48 if (s & 0x1ff) { 48 if (s & 0x1ff) {
49 printk(KERN_WARNING "%s: sector not aligned\n", __func__); 49 printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
50 return -1; 50 return -1;
51 } 51 }
52 *sp = s >> SECTOR_SHIFT; 52 *sp = s >> SECTOR_SHIFT;
@@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev)
79 return blkdev_put(bdev, FMODE_READ); 79 return blkdev_put(bdev, FMODE_READ);
80} 80}
81 81
82static struct bl_dev_msg bl_mount_reply;
83
84ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, 82ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
85 size_t mlen) 83 size_t mlen)
86{ 84{
85 struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
86 nfs_net_id);
87
87 if (mlen != sizeof (struct bl_dev_msg)) 88 if (mlen != sizeof (struct bl_dev_msg))
88 return -EINVAL; 89 return -EINVAL;
89 90
90 if (copy_from_user(&bl_mount_reply, src, mlen) != 0) 91 if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
91 return -EFAULT; 92 return -EFAULT;
92 93
93 wake_up(&bl_wq); 94 wake_up(&nn->bl_wq);
94 95
95 return mlen; 96 return mlen;
96} 97}
97 98
98void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) 99void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
99{ 100{
101 struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
102
100 if (msg->errno >= 0) 103 if (msg->errno >= 0)
101 return; 104 return;
102 wake_up(&bl_wq); 105 wake_up(bl_pipe_msg->bl_wq);
103} 106}
104 107
105/* 108/*
@@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server,
111{ 114{
112 struct pnfs_block_dev *rv; 115 struct pnfs_block_dev *rv;
113 struct block_device *bd = NULL; 116 struct block_device *bd = NULL;
114 struct rpc_pipe_msg msg; 117 struct bl_pipe_msg bl_pipe_msg;
118 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
115 struct bl_msg_hdr bl_msg = { 119 struct bl_msg_hdr bl_msg = {
116 .type = BL_DEVICE_MOUNT, 120 .type = BL_DEVICE_MOUNT,
117 .totallen = dev->mincount, 121 .totallen = dev->mincount,
118 }; 122 };
119 uint8_t *dataptr; 123 uint8_t *dataptr;
120 DECLARE_WAITQUEUE(wq, current); 124 DECLARE_WAITQUEUE(wq, current);
121 struct bl_dev_msg *reply = &bl_mount_reply;
122 int offset, len, i, rc; 125 int offset, len, i, rc;
126 struct net *net = server->nfs_client->net;
127 struct nfs_net *nn = net_generic(net, nfs_net_id);
128 struct bl_dev_msg *reply = &nn->bl_mount_reply;
123 129
124 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 130 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
125 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, 131 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
126 dev->mincount); 132 dev->mincount);
127 133
128 memset(&msg, 0, sizeof(msg)); 134 bl_pipe_msg.bl_wq = &nn->bl_wq;
129 msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); 135 memset(msg, 0, sizeof(*msg));
130 if (!msg.data) { 136 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
137 if (!msg->data) {
131 rv = ERR_PTR(-ENOMEM); 138 rv = ERR_PTR(-ENOMEM);
132 goto out; 139 goto out;
133 } 140 }
134 141
135 memcpy(msg.data, &bl_msg, sizeof(bl_msg)); 142 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
136 dataptr = (uint8_t *) msg.data; 143 dataptr = (uint8_t *) msg->data;
137 len = dev->mincount; 144 len = dev->mincount;
138 offset = sizeof(bl_msg); 145 offset = sizeof(bl_msg);
139 for (i = 0; len > 0; i++) { 146 for (i = 0; len > 0; i++) {
@@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server,
142 len -= PAGE_CACHE_SIZE; 149 len -= PAGE_CACHE_SIZE;
143 offset += PAGE_CACHE_SIZE; 150 offset += PAGE_CACHE_SIZE;
144 } 151 }
145 msg.len = sizeof(bl_msg) + dev->mincount; 152 msg->len = sizeof(bl_msg) + dev->mincount;
146 153
147 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 154 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
148 add_wait_queue(&bl_wq, &wq); 155 add_wait_queue(&nn->bl_wq, &wq);
149 rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); 156 rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
150 if (rc < 0) { 157 if (rc < 0) {
151 remove_wait_queue(&bl_wq, &wq); 158 remove_wait_queue(&nn->bl_wq, &wq);
152 rv = ERR_PTR(rc); 159 rv = ERR_PTR(rc);
153 goto out; 160 goto out;
154 } 161 }
@@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
156 set_current_state(TASK_UNINTERRUPTIBLE); 163 set_current_state(TASK_UNINTERRUPTIBLE);
157 schedule(); 164 schedule();
158 __set_current_state(TASK_RUNNING); 165 __set_current_state(TASK_RUNNING);
159 remove_wait_queue(&bl_wq, &wq); 166 remove_wait_queue(&nn->bl_wq, &wq);
160 167
161 if (reply->status != BL_DEVICE_REQUEST_PROC) { 168 if (reply->status != BL_DEVICE_REQUEST_PROC) {
162 dprintk("%s failed to open device: %d\n", 169 dprintk("%s failed to open device: %d\n",
@@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server,
181 188
182 rv->bm_mdev = bd; 189 rv->bm_mdev = bd;
183 memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); 190 memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
191 rv->net = net;
184 dprintk("%s Created device %s with bd_block_size %u\n", 192 dprintk("%s Created device %s with bd_block_size %u\n",
185 __func__, 193 __func__,
186 bd->bd_disk->disk_name, 194 bd->bd_disk->disk_name,
187 bd->bd_block_size); 195 bd->bd_block_size);
188 196
189out: 197out:
190 kfree(msg.data); 198 kfree(msg->data);
191 return rv; 199 return rv;
192} 200}
193 201
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index d055c7558073..737d839bc17b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -38,9 +38,10 @@
38 38
39#define NFSDBG_FACILITY NFSDBG_PNFS_LD 39#define NFSDBG_FACILITY NFSDBG_PNFS_LD
40 40
41static void dev_remove(dev_t dev) 41static void dev_remove(struct net *net, dev_t dev)
42{ 42{
43 struct rpc_pipe_msg msg; 43 struct bl_pipe_msg bl_pipe_msg;
44 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
44 struct bl_dev_msg bl_umount_request; 45 struct bl_dev_msg bl_umount_request;
45 struct bl_msg_hdr bl_msg = { 46 struct bl_msg_hdr bl_msg = {
46 .type = BL_DEVICE_UMOUNT, 47 .type = BL_DEVICE_UMOUNT,
@@ -48,36 +49,38 @@ static void dev_remove(dev_t dev)
48 }; 49 };
49 uint8_t *dataptr; 50 uint8_t *dataptr;
50 DECLARE_WAITQUEUE(wq, current); 51 DECLARE_WAITQUEUE(wq, current);
52 struct nfs_net *nn = net_generic(net, nfs_net_id);
51 53
52 dprintk("Entering %s\n", __func__); 54 dprintk("Entering %s\n", __func__);
53 55
54 memset(&msg, 0, sizeof(msg)); 56 bl_pipe_msg.bl_wq = &nn->bl_wq;
55 msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); 57 memset(msg, 0, sizeof(*msg));
56 if (!msg.data) 58 msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
59 if (!msg->data)
57 goto out; 60 goto out;
58 61
59 memset(&bl_umount_request, 0, sizeof(bl_umount_request)); 62 memset(&bl_umount_request, 0, sizeof(bl_umount_request));
60 bl_umount_request.major = MAJOR(dev); 63 bl_umount_request.major = MAJOR(dev);
61 bl_umount_request.minor = MINOR(dev); 64 bl_umount_request.minor = MINOR(dev);
62 65
63 memcpy(msg.data, &bl_msg, sizeof(bl_msg)); 66 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
64 dataptr = (uint8_t *) msg.data; 67 dataptr = (uint8_t *) msg->data;
65 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 68 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
66 msg.len = sizeof(bl_msg) + bl_msg.totallen; 69 msg->len = sizeof(bl_msg) + bl_msg.totallen;
67 70
68 add_wait_queue(&bl_wq, &wq); 71 add_wait_queue(&nn->bl_wq, &wq);
69 if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { 72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
70 remove_wait_queue(&bl_wq, &wq); 73 remove_wait_queue(&nn->bl_wq, &wq);
71 goto out; 74 goto out;
72 } 75 }
73 76
74 set_current_state(TASK_UNINTERRUPTIBLE); 77 set_current_state(TASK_UNINTERRUPTIBLE);
75 schedule(); 78 schedule();
76 __set_current_state(TASK_RUNNING); 79 __set_current_state(TASK_RUNNING);
77 remove_wait_queue(&bl_wq, &wq); 80 remove_wait_queue(&nn->bl_wq, &wq);
78 81
79out: 82out:
80 kfree(msg.data); 83 kfree(msg->data);
81} 84}
82 85
83/* 86/*
@@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
90 dprintk("%s Releasing\n", __func__); 93 dprintk("%s Releasing\n", __func__);
91 rv = nfs4_blkdev_put(bdev->bm_mdev); 94 rv = nfs4_blkdev_put(bdev->bm_mdev);
92 if (rv) 95 if (rv)
93 printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", 96 printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
94 __func__, rv); 97 __func__, rv);
95 98
96 dev_remove(bdev->bm_mdev->bd_dev); 99 dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
97} 100}
98 101
99void bl_free_block_dev(struct pnfs_block_dev *bdev) 102void bl_free_block_dev(struct pnfs_block_dev *bdev)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 1abac09f7cd5..1f9a6032796b 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks,
147 count = (int)(end - start) / (int)tree->mtt_step_size; 147 count = (int)(end - start) / (int)tree->mtt_step_size;
148 148
149 /* Pre-malloc what memory we might need */ 149 /* Pre-malloc what memory we might need */
150 storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); 150 storage = kcalloc(count, sizeof(*storage), GFP_NOFS);
151 if (!storage) 151 if (!storage)
152 return -ENOMEM; 152 return -ENOMEM;
153 for (i = 0; i < count; i++) { 153 for (i = 0; i < count; i++) {
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index c98b439332fc..dded26368111 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/sunrpc/cache.h> 14#include <linux/sunrpc/cache.h>
15#include <linux/sunrpc/rpc_pipe_fs.h> 15#include <linux/sunrpc/rpc_pipe_fs.h>
16#include <net/net_namespace.h>
16 17
17#include "cache_lib.h" 18#include "cache_lib.h"
18 19
@@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
111 return 0; 112 return 0;
112} 113}
113 114
114int nfs_cache_register(struct cache_detail *cd) 115int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd)
115{ 116{
116 struct vfsmount *mnt;
117 struct path path;
118 int ret; 117 int ret;
118 struct dentry *dir;
119 119
120 mnt = rpc_get_mount(); 120 dir = rpc_d_lookup_sb(sb, "cache");
121 if (IS_ERR(mnt)) 121 BUG_ON(dir == NULL);
122 return PTR_ERR(mnt); 122 ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); 123 dput(dir);
124 if (ret)
125 goto err;
126 ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
127 path_put(&path);
128 if (!ret)
129 return ret;
130err:
131 rpc_put_mount();
132 return ret; 124 return ret;
133} 125}
134 126
135void nfs_cache_unregister(struct cache_detail *cd) 127int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
136{ 128{
137 sunrpc_cache_unregister_pipefs(cd); 129 struct super_block *pipefs_sb;
138 rpc_put_mount(); 130 int ret = 0;
131
132 pipefs_sb = rpc_get_sb_net(net);
133 if (pipefs_sb) {
134 ret = nfs_cache_register_sb(pipefs_sb, cd);
135 rpc_put_sb_net(net);
136 }
137 return ret;
138}
139
140void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
141{
142 if (cd->u.pipefs.dir)
143 sunrpc_cache_unregister_pipefs(cd);
144}
145
146void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
147{
148 struct super_block *pipefs_sb;
149
150 pipefs_sb = rpc_get_sb_net(net);
151 if (pipefs_sb) {
152 nfs_cache_unregister_sb(pipefs_sb, cd);
153 rpc_put_sb_net(net);
154 }
155}
156
157void nfs_cache_init(struct cache_detail *cd)
158{
159 sunrpc_init_cache_detail(cd);
139} 160}
140 161
162void nfs_cache_destroy(struct cache_detail *cd)
163{
164 sunrpc_destroy_cache_detail(cd);
165}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 7cf6cafcc007..317db95e37f8 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); 23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); 24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
25 25
26extern int nfs_cache_register(struct cache_detail *cd); 26extern void nfs_cache_init(struct cache_detail *cd);
27extern void nfs_cache_unregister(struct cache_detail *cd); 27extern void nfs_cache_destroy(struct cache_detail *cd);
28extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
29extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
30extern int nfs_cache_register_sb(struct super_block *sb,
31 struct cache_detail *cd);
32extern void nfs_cache_unregister_sb(struct super_block *sb,
33 struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 516f3375e067..eb95f5091c1a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp)
85 } 85 }
86 if (err < 0) { 86 if (err < 0) {
87 if (err != preverr) { 87 if (err != preverr) {
88 printk(KERN_WARNING "%s: unexpected error " 88 printk(KERN_WARNING "NFS: %s: unexpected error "
89 "from svc_recv (%d)\n", __func__, err); 89 "from svc_recv (%d)\n", __func__, err);
90 preverr = err; 90 preverr = err;
91 } 91 }
@@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp)
101/* 101/*
102 * Prepare to bring up the NFSv4 callback service 102 * Prepare to bring up the NFSv4 callback service
103 */ 103 */
104struct svc_rqst * 104static struct svc_rqst *
105nfs4_callback_up(struct svc_serv *serv) 105nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
106{ 106{
107 int ret; 107 int ret;
108 108
109 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, 109 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET,
110 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 110 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
111 if (ret <= 0) 111 if (ret <= 0)
112 goto out_err; 112 goto out_err;
@@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv)
114 dprintk("NFS: Callback listener port = %u (af %u)\n", 114 dprintk("NFS: Callback listener port = %u (af %u)\n",
115 nfs_callback_tcpport, PF_INET); 115 nfs_callback_tcpport, PF_INET);
116 116
117 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, 117 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6,
118 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 118 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
119 if (ret > 0) { 119 if (ret > 0) {
120 nfs_callback_tcpport6 = ret; 120 nfs_callback_tcpport6 = ret;
@@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp)
172/* 172/*
173 * Bring up the NFSv4.1 callback service 173 * Bring up the NFSv4.1 callback service
174 */ 174 */
175struct svc_rqst * 175static struct svc_rqst *
176nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 176nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
177{ 177{
178 struct svc_rqst *rqstp; 178 struct svc_rqst *rqstp;
@@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
183 * fore channel connection. 183 * fore channel connection.
184 * Returns the input port (0) and sets the svc_serv bc_xprt on success 184 * Returns the input port (0) and sets the svc_serv bc_xprt on success
185 */ 185 */
186 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, 186 ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0,
187 SVC_SOCK_ANONYMOUS); 187 SVC_SOCK_ANONYMOUS);
188 if (ret < 0) { 188 if (ret < 0) {
189 rqstp = ERR_PTR(ret); 189 rqstp = ERR_PTR(ret);
@@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
269 serv, xprt, &rqstp, &callback_svc); 269 serv, xprt, &rqstp, &callback_svc);
270 if (!minorversion_setup) { 270 if (!minorversion_setup) {
271 /* v4.0 callback setup */ 271 /* v4.0 callback setup */
272 rqstp = nfs4_callback_up(serv); 272 rqstp = nfs4_callback_up(serv, xprt);
273 callback_svc = nfs4_callback_svc; 273 callback_svc = nfs4_callback_svc;
274 } 274 }
275 275
@@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion)
332int 332int
333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) 333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
334{ 334{
335 struct rpc_clnt *r = clp->cl_rpcclient;
336 char *p = svc_gss_principal(rqstp); 335 char *p = svc_gss_principal(rqstp);
337 336
338 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) 337 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
@@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
353 if (memcmp(p, "nfs@", 4) != 0) 352 if (memcmp(p, "nfs@", 4) != 0)
354 return 0; 353 return 0;
355 p += 4; 354 p += 4;
356 if (strcmp(p, r->cl_server) != 0) 355 if (strcmp(p, clp->cl_hostname) != 0)
357 return 0; 356 return 0;
358 return 1; 357 return 1;
359} 358}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c89d3b9e483c..a5527c90a5aa 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,7 +38,8 @@ enum nfs4_callback_opnum {
38struct cb_process_state { 38struct cb_process_state {
39 __be32 drc_status; 39 __be32 drc_status;
40 struct nfs_client *clp; 40 struct nfs_client *clp;
41 int slotid; 41 u32 slotid;
42 struct net *net;
42}; 43};
43 44
44struct cb_compound_hdr_arg { 45struct cb_compound_hdr_arg {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 54cea8ad5a76..1b5d809a105e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -8,6 +8,7 @@
8#include <linux/nfs4.h> 8#include <linux/nfs4.h>
9#include <linux/nfs_fs.h> 9#include <linux/nfs_fs.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/rcupdate.h>
11#include "nfs4_fs.h" 12#include "nfs4_fs.h"
12#include "callback.h" 13#include "callback.h"
13#include "delegation.h" 14#include "delegation.h"
@@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
33 res->bitmap[0] = res->bitmap[1] = 0; 34 res->bitmap[0] = res->bitmap[1] = 0;
34 res->status = htonl(NFS4ERR_BADHANDLE); 35 res->status = htonl(NFS4ERR_BADHANDLE);
35 36
36 dprintk("NFS: GETATTR callback request from %s\n", 37 dprintk_rcu("NFS: GETATTR callback request from %s\n",
37 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 38 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
38 39
39 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 40 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
@@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
73 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ 74 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
74 goto out; 75 goto out;
75 76
76 dprintk("NFS: RECALL callback request from %s\n", 77 dprintk_rcu("NFS: RECALL callback request from %s\n",
77 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 78 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
78 79
79 res = htonl(NFS4ERR_BADHANDLE); 80 res = htonl(NFS4ERR_BADHANDLE);
@@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
86 res = 0; 87 res = 0;
87 break; 88 break;
88 case -ENOENT: 89 case -ENOENT:
89 if (res != 0) 90 res = htonl(NFS4ERR_BAD_STATEID);
90 res = htonl(NFS4ERR_BAD_STATEID);
91 break; 91 break;
92 default: 92 default:
93 res = htonl(NFS4ERR_RESOURCE); 93 res = htonl(NFS4ERR_RESOURCE);
@@ -98,52 +98,64 @@ out:
98 return res; 98 return res;
99} 99}
100 100
101int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
102{
103 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
104 sizeof(delegation->stateid.data)) != 0)
105 return 0;
106 return 1;
107}
108
109#if defined(CONFIG_NFS_V4_1) 101#if defined(CONFIG_NFS_V4_1)
110 102
111static u32 initiate_file_draining(struct nfs_client *clp, 103/*
112 struct cb_layoutrecallargs *args) 104 * Lookup a layout by filehandle.
105 *
106 * Note: gets a refcount on the layout hdr and on its respective inode.
107 * Caller must put the layout hdr and the inode.
108 *
109 * TODO: keep track of all layouts (and delegations) in a hash table
110 * hashed by filehandle.
111 */
112static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
113{ 113{
114 struct nfs_server *server; 114 struct nfs_server *server;
115 struct pnfs_layout_hdr *lo;
116 struct inode *ino; 115 struct inode *ino;
117 bool found = false; 116 struct pnfs_layout_hdr *lo;
118 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
119 LIST_HEAD(free_me_list);
120 117
121 spin_lock(&clp->cl_lock);
122 rcu_read_lock();
123 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 118 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
124 list_for_each_entry(lo, &server->layouts, plh_layouts) { 119 list_for_each_entry(lo, &server->layouts, plh_layouts) {
125 if (nfs_compare_fh(&args->cbl_fh, 120 if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
126 &NFS_I(lo->plh_inode)->fh))
127 continue; 121 continue;
128 ino = igrab(lo->plh_inode); 122 ino = igrab(lo->plh_inode);
129 if (!ino) 123 if (!ino)
130 continue; 124 continue;
131 found = true;
132 /* Without this, layout can be freed as soon
133 * as we release cl_lock.
134 */
135 get_layout_hdr(lo); 125 get_layout_hdr(lo);
136 break; 126 return lo;
137 } 127 }
138 if (found)
139 break;
140 } 128 }
129
130 return NULL;
131}
132
133static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
134{
135 struct pnfs_layout_hdr *lo;
136
137 spin_lock(&clp->cl_lock);
138 rcu_read_lock();
139 lo = get_layout_by_fh_locked(clp, fh);
141 rcu_read_unlock(); 140 rcu_read_unlock();
142 spin_unlock(&clp->cl_lock); 141 spin_unlock(&clp->cl_lock);
143 142
144 if (!found) 143 return lo;
144}
145
146static u32 initiate_file_draining(struct nfs_client *clp,
147 struct cb_layoutrecallargs *args)
148{
149 struct inode *ino;
150 struct pnfs_layout_hdr *lo;
151 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
152 LIST_HEAD(free_me_list);
153
154 lo = get_layout_by_fh(clp, &args->cbl_fh);
155 if (!lo)
145 return NFS4ERR_NOMATCHING_LAYOUT; 156 return NFS4ERR_NOMATCHING_LAYOUT;
146 157
158 ino = lo->plh_inode;
147 spin_lock(&ino->i_lock); 159 spin_lock(&ino->i_lock);
148 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 160 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
149 mark_matching_lsegs_invalid(lo, &free_me_list, 161 mark_matching_lsegs_invalid(lo, &free_me_list,
@@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
213static u32 do_callback_layoutrecall(struct nfs_client *clp, 225static u32 do_callback_layoutrecall(struct nfs_client *clp,
214 struct cb_layoutrecallargs *args) 226 struct cb_layoutrecallargs *args)
215{ 227{
216 u32 res = NFS4ERR_DELAY; 228 u32 res;
217 229
218 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); 230 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
219 if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
220 goto out;
221 if (args->cbl_recall_type == RETURN_FILE) 231 if (args->cbl_recall_type == RETURN_FILE)
222 res = initiate_file_draining(clp, args); 232 res = initiate_file_draining(clp, args);
223 else 233 else
224 res = initiate_bulk_draining(clp, args); 234 res = initiate_bulk_draining(clp, args);
225 clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
226out:
227 dprintk("%s returning %i\n", __func__, res); 235 dprintk("%s returning %i\n", __func__, res);
228 return res; 236 return res;
229 237
@@ -303,21 +311,6 @@ out:
303 return res; 311 return res;
304} 312}
305 313
306int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
307{
308 if (delegation == NULL)
309 return 0;
310
311 if (stateid->stateid.seqid != 0)
312 return 0;
313 if (memcmp(&delegation->stateid.stateid.other,
314 &stateid->stateid.other,
315 NFS4_STATEID_OTHER_SIZE))
316 return 0;
317
318 return 1;
319}
320
321/* 314/*
322 * Validate the sequenceID sent by the server. 315 * Validate the sequenceID sent by the server.
323 * Return success if the sequenceID is one more than what we last saw on 316 * Return success if the sequenceID is one more than what we last saw on
@@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
441 int i; 434 int i;
442 __be32 status = htonl(NFS4ERR_BADSESSION); 435 __be32 status = htonl(NFS4ERR_BADSESSION);
443 436
444 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); 437 clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid);
445 if (clp == NULL) 438 if (clp == NULL)
446 goto out; 439 goto out;
447 440
@@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
517 if (!cps->clp) /* set in cb_sequence */ 510 if (!cps->clp) /* set in cb_sequence */
518 goto out; 511 goto out;
519 512
520 dprintk("NFS: RECALL_ANY callback request from %s\n", 513 dprintk_rcu("NFS: RECALL_ANY callback request from %s\n",
521 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 514 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
522 515
523 status = cpu_to_be32(NFS4ERR_INVAL); 516 status = cpu_to_be32(NFS4ERR_INVAL);
@@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
552 if (!cps->clp) /* set in cb_sequence */ 545 if (!cps->clp) /* set in cb_sequence */
553 goto out; 546 goto out;
554 547
555 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", 548 dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
556 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), 549 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
557 args->crsa_target_max_slots); 550 args->crsa_target_max_slots);
558 551
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d50b2742f23b..95bfc243992c 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -9,6 +9,8 @@
9#include <linux/sunrpc/svc.h> 9#include <linux/sunrpc/svc.h>
10#include <linux/nfs4.h> 10#include <linux/nfs4.h>
11#include <linux/nfs_fs.h> 11#include <linux/nfs_fs.h>
12#include <linux/ratelimit.h>
13#include <linux/printk.h>
12#include <linux/slab.h> 14#include <linux/slab.h>
13#include <linux/sunrpc/bc_xprt.h> 15#include <linux/sunrpc/bc_xprt.h>
14#include "nfs4_fs.h" 16#include "nfs4_fs.h"
@@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
73 75
74 p = xdr_inline_decode(xdr, nbytes); 76 p = xdr_inline_decode(xdr, nbytes);
75 if (unlikely(p == NULL)) 77 if (unlikely(p == NULL))
76 printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); 78 printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
77 return p; 79 return p;
78} 80}
79 81
@@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
138{ 140{
139 __be32 *p; 141 __be32 *p;
140 142
141 p = read_buf(xdr, 16); 143 p = read_buf(xdr, NFS4_STATEID_SIZE);
142 if (unlikely(p == NULL)) 144 if (unlikely(p == NULL))
143 return htonl(NFS4ERR_RESOURCE); 145 return htonl(NFS4ERR_RESOURCE);
144 memcpy(stateid->data, p, 16); 146 memcpy(stateid, p, NFS4_STATEID_SIZE);
145 return 0; 147 return 0;
146} 148}
147 149
@@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
155 return status; 157 return status;
156 /* We do not like overly long tags! */ 158 /* We do not like overly long tags! */
157 if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { 159 if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
158 printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", 160 printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
159 __func__, hdr->taglen); 161 __func__, hdr->taglen);
160 return htonl(NFS4ERR_RESOURCE); 162 return htonl(NFS4ERR_RESOURCE);
161 } 163 }
@@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
167 if (hdr->minorversion <= 1) { 169 if (hdr->minorversion <= 1) {
168 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ 170 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
169 } else { 171 } else {
170 printk(KERN_WARNING "%s: NFSv4 server callback with " 172 pr_warn_ratelimited("NFS: %s: NFSv4 server callback with "
171 "illegal minor version %u!\n", 173 "illegal minor version %u!\n",
172 __func__, hdr->minorversion); 174 __func__, hdr->minorversion);
173 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 175 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
@@ -759,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
759 * Let the state manager know callback processing done. 761 * Let the state manager know callback processing done.
760 * A single slot, so highest used slotid is either 0 or -1 762 * A single slot, so highest used slotid is either 0 or -1
761 */ 763 */
762 tbl->highest_used_slotid = -1; 764 tbl->highest_used_slotid = NFS4_NO_SLOT;
763 nfs4_check_drain_bc_complete(session); 765 nfs4_check_drain_bc_complete(session);
764 spin_unlock(&tbl->slot_tbl_lock); 766 spin_unlock(&tbl->slot_tbl_lock);
765} 767}
766 768
767static void nfs4_cb_free_slot(struct cb_process_state *cps) 769static void nfs4_cb_free_slot(struct cb_process_state *cps)
768{ 770{
769 if (cps->slotid != -1) 771 if (cps->slotid != NFS4_NO_SLOT)
770 nfs4_callback_free_slot(cps->clp->cl_session); 772 nfs4_callback_free_slot(cps->clp->cl_session);
771} 773}
772 774
@@ -860,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
860 struct cb_process_state cps = { 862 struct cb_process_state cps = {
861 .drc_status = 0, 863 .drc_status = 0,
862 .clp = NULL, 864 .clp = NULL,
863 .slotid = -1, 865 .slotid = NFS4_NO_SLOT,
866 .net = rqstp->rq_xprt->xpt_net,
864 }; 867 };
865 unsigned int nops = 0; 868 unsigned int nops = 0;
866 869
@@ -876,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
876 return rpc_garbage_args; 879 return rpc_garbage_args;
877 880
878 if (hdr_arg.minorversion == 0) { 881 if (hdr_arg.minorversion == 0) {
879 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); 882 cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident);
880 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) 883 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
881 return rpc_drop_reply; 884 return rpc_drop_reply;
882 } 885 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 31778f74357d..4a108a0a2a60 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -36,9 +36,12 @@
36#include <linux/inet.h> 36#include <linux/inet.h>
37#include <linux/in6.h> 37#include <linux/in6.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/idr.h>
39#include <net/ipv6.h> 40#include <net/ipv6.h>
40#include <linux/nfs_xdr.h> 41#include <linux/nfs_xdr.h>
41#include <linux/sunrpc/bc_xprt.h> 42#include <linux/sunrpc/bc_xprt.h>
43#include <linux/nsproxy.h>
44#include <linux/pid_namespace.h>
42 45
43#include <asm/system.h> 46#include <asm/system.h>
44 47
@@ -49,15 +52,12 @@
49#include "internal.h" 52#include "internal.h"
50#include "fscache.h" 53#include "fscache.h"
51#include "pnfs.h" 54#include "pnfs.h"
55#include "netns.h"
52 56
53#define NFSDBG_FACILITY NFSDBG_CLIENT 57#define NFSDBG_FACILITY NFSDBG_CLIENT
54 58
55static DEFINE_SPINLOCK(nfs_client_lock);
56static LIST_HEAD(nfs_client_list);
57static LIST_HEAD(nfs_volume_list);
58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); 59static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
59#ifdef CONFIG_NFS_V4 60#ifdef CONFIG_NFS_V4
60static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
61 61
62/* 62/*
63 * Get a unique NFSv4.0 callback identifier which will be used 63 * Get a unique NFSv4.0 callback identifier which will be used
@@ -66,15 +66,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
66static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) 66static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
67{ 67{
68 int ret = 0; 68 int ret = 0;
69 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
69 70
70 if (clp->rpc_ops->version != 4 || minorversion != 0) 71 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret; 72 return ret;
72retry: 73retry:
73 if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) 74 if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
74 return -ENOMEM; 75 return -ENOMEM;
75 spin_lock(&nfs_client_lock); 76 spin_lock(&nn->nfs_client_lock);
76 ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); 77 ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
77 spin_unlock(&nfs_client_lock); 78 spin_unlock(&nn->nfs_client_lock);
78 if (ret == -EAGAIN) 79 if (ret == -EAGAIN)
79 goto retry; 80 goto retry;
80 return ret; 81 return ret;
@@ -89,7 +90,7 @@ static bool nfs4_disable_idmapping = true;
89/* 90/*
90 * RPC cruft for NFS 91 * RPC cruft for NFS
91 */ 92 */
92static struct rpc_version *nfs_version[5] = { 93static const struct rpc_version *nfs_version[5] = {
93 [2] = &nfs_version2, 94 [2] = &nfs_version2,
94#ifdef CONFIG_NFS_V3 95#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 96 [3] = &nfs_version3,
@@ -99,7 +100,7 @@ static struct rpc_version *nfs_version[5] = {
99#endif 100#endif
100}; 101};
101 102
102struct rpc_program nfs_program = { 103const struct rpc_program nfs_program = {
103 .name = "nfs", 104 .name = "nfs",
104 .number = NFS_PROGRAM, 105 .number = NFS_PROGRAM,
105 .nrvers = ARRAY_SIZE(nfs_version), 106 .nrvers = ARRAY_SIZE(nfs_version),
@@ -115,11 +116,11 @@ struct rpc_stat nfs_rpcstat = {
115 116
116#ifdef CONFIG_NFS_V3_ACL 117#ifdef CONFIG_NFS_V3_ACL
117static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; 118static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
118static struct rpc_version * nfsacl_version[] = { 119static const struct rpc_version *nfsacl_version[] = {
119 [3] = &nfsacl_version3, 120 [3] = &nfsacl_version3,
120}; 121};
121 122
122struct rpc_program nfsacl_program = { 123const struct rpc_program nfsacl_program = {
123 .name = "nfsacl", 124 .name = "nfsacl",
124 .number = NFS_ACL_PROGRAM, 125 .number = NFS_ACL_PROGRAM,
125 .nrvers = ARRAY_SIZE(nfsacl_version), 126 .nrvers = ARRAY_SIZE(nfsacl_version),
@@ -135,6 +136,7 @@ struct nfs_client_initdata {
135 const struct nfs_rpc_ops *rpc_ops; 136 const struct nfs_rpc_ops *rpc_ops;
136 int proto; 137 int proto;
137 u32 minorversion; 138 u32 minorversion;
139 struct net *net;
138}; 140};
139 141
140/* 142/*
@@ -171,6 +173,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
171 clp->cl_rpcclient = ERR_PTR(-EINVAL); 173 clp->cl_rpcclient = ERR_PTR(-EINVAL);
172 174
173 clp->cl_proto = cl_init->proto; 175 clp->cl_proto = cl_init->proto;
176 clp->net = get_net(cl_init->net);
174 177
175#ifdef CONFIG_NFS_V4 178#ifdef CONFIG_NFS_V4
176 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); 179 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -202,8 +205,11 @@ error_0:
202#ifdef CONFIG_NFS_V4_1 205#ifdef CONFIG_NFS_V4_1
203static void nfs4_shutdown_session(struct nfs_client *clp) 206static void nfs4_shutdown_session(struct nfs_client *clp)
204{ 207{
205 if (nfs4_has_session(clp)) 208 if (nfs4_has_session(clp)) {
209 nfs4_deviceid_purge_client(clp);
206 nfs4_destroy_session(clp->cl_session); 210 nfs4_destroy_session(clp->cl_session);
211 }
212
207} 213}
208#else /* CONFIG_NFS_V4_1 */ 214#else /* CONFIG_NFS_V4_1 */
209static void nfs4_shutdown_session(struct nfs_client *clp) 215static void nfs4_shutdown_session(struct nfs_client *clp)
@@ -233,16 +239,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
233} 239}
234 240
235/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ 241/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
236void nfs_cleanup_cb_ident_idr(void) 242void nfs_cleanup_cb_ident_idr(struct net *net)
237{ 243{
238 idr_destroy(&cb_ident_idr); 244 struct nfs_net *nn = net_generic(net, nfs_net_id);
245
246 idr_destroy(&nn->cb_ident_idr);
239} 247}
240 248
241/* nfs_client_lock held */ 249/* nfs_client_lock held */
242static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 250static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
243{ 251{
252 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
253
244 if (clp->cl_cb_ident) 254 if (clp->cl_cb_ident)
245 idr_remove(&cb_ident_idr, clp->cl_cb_ident); 255 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
246} 256}
247 257
248static void pnfs_init_server(struct nfs_server *server) 258static void pnfs_init_server(struct nfs_server *server)
@@ -260,7 +270,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
260{ 270{
261} 271}
262 272
263void nfs_cleanup_cb_ident_idr(void) 273void nfs_cleanup_cb_ident_idr(struct net *net)
264{ 274{
265} 275}
266 276
@@ -292,10 +302,10 @@ static void nfs_free_client(struct nfs_client *clp)
292 if (clp->cl_machine_cred != NULL) 302 if (clp->cl_machine_cred != NULL)
293 put_rpccred(clp->cl_machine_cred); 303 put_rpccred(clp->cl_machine_cred);
294 304
295 nfs4_deviceid_purge_client(clp); 305 put_net(clp->net);
296
297 kfree(clp->cl_hostname); 306 kfree(clp->cl_hostname);
298 kfree(clp->server_scope); 307 kfree(clp->server_scope);
308 kfree(clp->impl_id);
299 kfree(clp); 309 kfree(clp);
300 310
301 dprintk("<-- nfs_free_client()\n"); 311 dprintk("<-- nfs_free_client()\n");
@@ -306,15 +316,18 @@ static void nfs_free_client(struct nfs_client *clp)
306 */ 316 */
307void nfs_put_client(struct nfs_client *clp) 317void nfs_put_client(struct nfs_client *clp)
308{ 318{
319 struct nfs_net *nn;
320
309 if (!clp) 321 if (!clp)
310 return; 322 return;
311 323
312 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); 324 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
325 nn = net_generic(clp->net, nfs_net_id);
313 326
314 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { 327 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
315 list_del(&clp->cl_share_link); 328 list_del(&clp->cl_share_link);
316 nfs_cb_idr_remove_locked(clp); 329 nfs_cb_idr_remove_locked(clp);
317 spin_unlock(&nfs_client_lock); 330 spin_unlock(&nn->nfs_client_lock);
318 331
319 BUG_ON(!list_empty(&clp->cl_superblocks)); 332 BUG_ON(!list_empty(&clp->cl_superblocks));
320 333
@@ -392,6 +405,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
392 (sin1->sin_port == sin2->sin_port); 405 (sin1->sin_port == sin2->sin_port);
393} 406}
394 407
408#if defined(CONFIG_NFS_V4_1)
395/* 409/*
396 * Test if two socket addresses represent the same actual socket, 410 * Test if two socket addresses represent the same actual socket,
397 * by comparing (only) relevant fields, excluding the port number. 411 * by comparing (only) relevant fields, excluding the port number.
@@ -410,6 +424,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
410 } 424 }
411 return 0; 425 return 0;
412} 426}
427#endif /* CONFIG_NFS_V4_1 */
413 428
414/* 429/*
415 * Test if two socket addresses represent the same actual socket, 430 * Test if two socket addresses represent the same actual socket,
@@ -430,10 +445,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
430 return 0; 445 return 0;
431} 446}
432 447
448#if defined(CONFIG_NFS_V4_1)
433/* Common match routine for v4.0 and v4.1 callback services */ 449/* Common match routine for v4.0 and v4.1 callback services */
434bool 450static bool nfs4_cb_match_client(const struct sockaddr *addr,
435nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, 451 struct nfs_client *clp, u32 minorversion)
436 u32 minorversion)
437{ 452{
438 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 453 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
439 454
@@ -453,6 +468,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
453 468
454 return true; 469 return true;
455} 470}
471#endif /* CONFIG_NFS_V4_1 */
456 472
457/* 473/*
458 * Find an nfs_client on the list that matches the initialisation data 474 * Find an nfs_client on the list that matches the initialisation data
@@ -462,8 +478,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
462{ 478{
463 struct nfs_client *clp; 479 struct nfs_client *clp;
464 const struct sockaddr *sap = data->addr; 480 const struct sockaddr *sap = data->addr;
481 struct nfs_net *nn = net_generic(data->net, nfs_net_id);
465 482
466 list_for_each_entry(clp, &nfs_client_list, cl_share_link) { 483 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
467 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 484 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
468 /* Don't match clients that failed to initialise properly */ 485 /* Don't match clients that failed to initialise properly */
469 if (clp->cl_cons_state < 0) 486 if (clp->cl_cons_state < 0)
@@ -501,13 +518,14 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
501{ 518{
502 struct nfs_client *clp, *new = NULL; 519 struct nfs_client *clp, *new = NULL;
503 int error; 520 int error;
521 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
504 522
505 dprintk("--> nfs_get_client(%s,v%u)\n", 523 dprintk("--> nfs_get_client(%s,v%u)\n",
506 cl_init->hostname ?: "", cl_init->rpc_ops->version); 524 cl_init->hostname ?: "", cl_init->rpc_ops->version);
507 525
508 /* see if the client already exists */ 526 /* see if the client already exists */
509 do { 527 do {
510 spin_lock(&nfs_client_lock); 528 spin_lock(&nn->nfs_client_lock);
511 529
512 clp = nfs_match_client(cl_init); 530 clp = nfs_match_client(cl_init);
513 if (clp) 531 if (clp)
@@ -515,7 +533,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
515 if (new) 533 if (new)
516 goto install_client; 534 goto install_client;
517 535
518 spin_unlock(&nfs_client_lock); 536 spin_unlock(&nn->nfs_client_lock);
519 537
520 new = nfs_alloc_client(cl_init); 538 new = nfs_alloc_client(cl_init);
521 } while (!IS_ERR(new)); 539 } while (!IS_ERR(new));
@@ -526,8 +544,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
526 /* install a new client and return with it unready */ 544 /* install a new client and return with it unready */
527install_client: 545install_client:
528 clp = new; 546 clp = new;
529 list_add(&clp->cl_share_link, &nfs_client_list); 547 list_add(&clp->cl_share_link, &nn->nfs_client_list);
530 spin_unlock(&nfs_client_lock); 548 spin_unlock(&nn->nfs_client_lock);
531 549
532 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, 550 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
533 authflavour, noresvport); 551 authflavour, noresvport);
@@ -542,7 +560,7 @@ install_client:
542 * - make sure it's ready before returning 560 * - make sure it's ready before returning
543 */ 561 */
544found_client: 562found_client:
545 spin_unlock(&nfs_client_lock); 563 spin_unlock(&nn->nfs_client_lock);
546 564
547 if (new) 565 if (new)
548 nfs_free_client(new); 566 nfs_free_client(new);
@@ -642,7 +660,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
642{ 660{
643 struct rpc_clnt *clnt = NULL; 661 struct rpc_clnt *clnt = NULL;
644 struct rpc_create_args args = { 662 struct rpc_create_args args = {
645 .net = &init_net, 663 .net = clp->net,
646 .protocol = clp->cl_proto, 664 .protocol = clp->cl_proto,
647 .address = (struct sockaddr *)&clp->cl_addr, 665 .address = (struct sockaddr *)&clp->cl_addr,
648 .addrsize = clp->cl_addrlen, 666 .addrsize = clp->cl_addrlen,
@@ -696,6 +714,7 @@ static int nfs_start_lockd(struct nfs_server *server)
696 .nfs_version = clp->rpc_ops->version, 714 .nfs_version = clp->rpc_ops->version,
697 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 715 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
698 1 : 0, 716 1 : 0,
717 .net = clp->net,
699 }; 718 };
700 719
701 if (nlm_init.nfs_version > 3) 720 if (nlm_init.nfs_version > 3)
@@ -831,6 +850,7 @@ static int nfs_init_server(struct nfs_server *server,
831 .addrlen = data->nfs_server.addrlen, 850 .addrlen = data->nfs_server.addrlen,
832 .rpc_ops = &nfs_v2_clientops, 851 .rpc_ops = &nfs_v2_clientops,
833 .proto = data->nfs_server.protocol, 852 .proto = data->nfs_server.protocol,
853 .net = data->net,
834 }; 854 };
835 struct rpc_timeout timeparms; 855 struct rpc_timeout timeparms;
836 struct nfs_client *clp; 856 struct nfs_client *clp;
@@ -1029,25 +1049,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
1029static void nfs_server_insert_lists(struct nfs_server *server) 1049static void nfs_server_insert_lists(struct nfs_server *server)
1030{ 1050{
1031 struct nfs_client *clp = server->nfs_client; 1051 struct nfs_client *clp = server->nfs_client;
1052 struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
1032 1053
1033 spin_lock(&nfs_client_lock); 1054 spin_lock(&nn->nfs_client_lock);
1034 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1055 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1035 list_add_tail(&server->master_link, &nfs_volume_list); 1056 list_add_tail(&server->master_link, &nn->nfs_volume_list);
1036 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); 1057 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1037 spin_unlock(&nfs_client_lock); 1058 spin_unlock(&nn->nfs_client_lock);
1038 1059
1039} 1060}
1040 1061
1041static void nfs_server_remove_lists(struct nfs_server *server) 1062static void nfs_server_remove_lists(struct nfs_server *server)
1042{ 1063{
1043 struct nfs_client *clp = server->nfs_client; 1064 struct nfs_client *clp = server->nfs_client;
1065 struct nfs_net *nn;
1044 1066
1045 spin_lock(&nfs_client_lock); 1067 if (clp == NULL)
1068 return;
1069 nn = net_generic(clp->net, nfs_net_id);
1070 spin_lock(&nn->nfs_client_lock);
1046 list_del_rcu(&server->client_link); 1071 list_del_rcu(&server->client_link);
1047 if (clp && list_empty(&clp->cl_superblocks)) 1072 if (list_empty(&clp->cl_superblocks))
1048 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); 1073 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1049 list_del(&server->master_link); 1074 list_del(&server->master_link);
1050 spin_unlock(&nfs_client_lock); 1075 spin_unlock(&nn->nfs_client_lock);
1051 1076
1052 synchronize_rcu(); 1077 synchronize_rcu();
1053} 1078}
@@ -1086,6 +1111,8 @@ static struct nfs_server *nfs_alloc_server(void)
1086 return NULL; 1111 return NULL;
1087 } 1112 }
1088 1113
1114 ida_init(&server->openowner_id);
1115 ida_init(&server->lockowner_id);
1089 pnfs_init_server(server); 1116 pnfs_init_server(server);
1090 1117
1091 return server; 1118 return server;
@@ -1111,6 +1138,8 @@ void nfs_free_server(struct nfs_server *server)
1111 1138
1112 nfs_put_client(server->nfs_client); 1139 nfs_put_client(server->nfs_client);
1113 1140
1141 ida_destroy(&server->lockowner_id);
1142 ida_destroy(&server->openowner_id);
1114 nfs_free_iostats(server->io_stats); 1143 nfs_free_iostats(server->io_stats);
1115 bdi_destroy(&server->backing_dev_info); 1144 bdi_destroy(&server->backing_dev_info);
1116 kfree(server); 1145 kfree(server);
@@ -1189,45 +1218,19 @@ error:
1189/* 1218/*
1190 * NFSv4.0 callback thread helper 1219 * NFSv4.0 callback thread helper
1191 * 1220 *
1192 * Find a client by IP address, protocol version, and minorversion
1193 *
1194 * Called from the pg_authenticate method. The callback identifier
1195 * is not used as it has not been decoded.
1196 *
1197 * Returns NULL if no such client
1198 */
1199struct nfs_client *
1200nfs4_find_client_no_ident(const struct sockaddr *addr)
1201{
1202 struct nfs_client *clp;
1203
1204 spin_lock(&nfs_client_lock);
1205 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1206 if (nfs4_cb_match_client(addr, clp, 0) == false)
1207 continue;
1208 atomic_inc(&clp->cl_count);
1209 spin_unlock(&nfs_client_lock);
1210 return clp;
1211 }
1212 spin_unlock(&nfs_client_lock);
1213 return NULL;
1214}
1215
1216/*
1217 * NFSv4.0 callback thread helper
1218 *
1219 * Find a client by callback identifier 1221 * Find a client by callback identifier
1220 */ 1222 */
1221struct nfs_client * 1223struct nfs_client *
1222nfs4_find_client_ident(int cb_ident) 1224nfs4_find_client_ident(struct net *net, int cb_ident)
1223{ 1225{
1224 struct nfs_client *clp; 1226 struct nfs_client *clp;
1227 struct nfs_net *nn = net_generic(net, nfs_net_id);
1225 1228
1226 spin_lock(&nfs_client_lock); 1229 spin_lock(&nn->nfs_client_lock);
1227 clp = idr_find(&cb_ident_idr, cb_ident); 1230 clp = idr_find(&nn->cb_ident_idr, cb_ident);
1228 if (clp) 1231 if (clp)
1229 atomic_inc(&clp->cl_count); 1232 atomic_inc(&clp->cl_count);
1230 spin_unlock(&nfs_client_lock); 1233 spin_unlock(&nn->nfs_client_lock);
1231 return clp; 1234 return clp;
1232} 1235}
1233 1236
@@ -1240,13 +1243,14 @@ nfs4_find_client_ident(int cb_ident)
1240 * Returns NULL if no such client 1243 * Returns NULL if no such client
1241 */ 1244 */
1242struct nfs_client * 1245struct nfs_client *
1243nfs4_find_client_sessionid(const struct sockaddr *addr, 1246nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
1244 struct nfs4_sessionid *sid) 1247 struct nfs4_sessionid *sid)
1245{ 1248{
1246 struct nfs_client *clp; 1249 struct nfs_client *clp;
1250 struct nfs_net *nn = net_generic(net, nfs_net_id);
1247 1251
1248 spin_lock(&nfs_client_lock); 1252 spin_lock(&nn->nfs_client_lock);
1249 list_for_each_entry(clp, &nfs_client_list, cl_share_link) { 1253 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
1250 if (nfs4_cb_match_client(addr, clp, 1) == false) 1254 if (nfs4_cb_match_client(addr, clp, 1) == false)
1251 continue; 1255 continue;
1252 1256
@@ -1259,17 +1263,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1259 continue; 1263 continue;
1260 1264
1261 atomic_inc(&clp->cl_count); 1265 atomic_inc(&clp->cl_count);
1262 spin_unlock(&nfs_client_lock); 1266 spin_unlock(&nn->nfs_client_lock);
1263 return clp; 1267 return clp;
1264 } 1268 }
1265 spin_unlock(&nfs_client_lock); 1269 spin_unlock(&nn->nfs_client_lock);
1266 return NULL; 1270 return NULL;
1267} 1271}
1268 1272
1269#else /* CONFIG_NFS_V4_1 */ 1273#else /* CONFIG_NFS_V4_1 */
1270 1274
1271struct nfs_client * 1275struct nfs_client *
1272nfs4_find_client_sessionid(const struct sockaddr *addr, 1276nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
1273 struct nfs4_sessionid *sid) 1277 struct nfs4_sessionid *sid)
1274{ 1278{
1275 return NULL; 1279 return NULL;
@@ -1284,16 +1288,18 @@ static int nfs4_init_callback(struct nfs_client *clp)
1284 int error; 1288 int error;
1285 1289
1286 if (clp->rpc_ops->version == 4) { 1290 if (clp->rpc_ops->version == 4) {
1291 struct rpc_xprt *xprt;
1292
1293 xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
1294
1287 if (nfs4_has_session(clp)) { 1295 if (nfs4_has_session(clp)) {
1288 error = xprt_setup_backchannel( 1296 error = xprt_setup_backchannel(xprt,
1289 clp->cl_rpcclient->cl_xprt,
1290 NFS41_BC_MIN_CALLBACKS); 1297 NFS41_BC_MIN_CALLBACKS);
1291 if (error < 0) 1298 if (error < 0)
1292 return error; 1299 return error;
1293 } 1300 }
1294 1301
1295 error = nfs_callback_up(clp->cl_mvops->minor_version, 1302 error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
1296 clp->cl_rpcclient->cl_xprt);
1297 if (error < 0) { 1303 if (error < 0) {
1298 dprintk("%s: failed to start callback. Error = %d\n", 1304 dprintk("%s: failed to start callback. Error = %d\n",
1299 __func__, error); 1305 __func__, error);
@@ -1344,6 +1350,7 @@ int nfs4_init_client(struct nfs_client *clp,
1344 rpc_authflavor_t authflavour, 1350 rpc_authflavor_t authflavour,
1345 int noresvport) 1351 int noresvport)
1346{ 1352{
1353 char buf[INET6_ADDRSTRLEN + 1];
1347 int error; 1354 int error;
1348 1355
1349 if (clp->cl_cons_state == NFS_CS_READY) { 1356 if (clp->cl_cons_state == NFS_CS_READY) {
@@ -1359,6 +1366,20 @@ int nfs4_init_client(struct nfs_client *clp,
1359 1, noresvport); 1366 1, noresvport);
1360 if (error < 0) 1367 if (error < 0)
1361 goto error; 1368 goto error;
1369
1370 /* If no clientaddr= option was specified, find a usable cb address */
1371 if (ip_addr == NULL) {
1372 struct sockaddr_storage cb_addr;
1373 struct sockaddr *sap = (struct sockaddr *)&cb_addr;
1374
1375 error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
1376 if (error < 0)
1377 goto error;
1378 error = rpc_ntop(sap, buf, sizeof(buf));
1379 if (error < 0)
1380 goto error;
1381 ip_addr = (const char *)buf;
1382 }
1362 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1383 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
1363 1384
1364 error = nfs_idmap_new(clp); 1385 error = nfs_idmap_new(clp);
@@ -1393,7 +1414,7 @@ static int nfs4_set_client(struct nfs_server *server,
1393 const char *ip_addr, 1414 const char *ip_addr,
1394 rpc_authflavor_t authflavour, 1415 rpc_authflavor_t authflavour,
1395 int proto, const struct rpc_timeout *timeparms, 1416 int proto, const struct rpc_timeout *timeparms,
1396 u32 minorversion) 1417 u32 minorversion, struct net *net)
1397{ 1418{
1398 struct nfs_client_initdata cl_init = { 1419 struct nfs_client_initdata cl_init = {
1399 .hostname = hostname, 1420 .hostname = hostname,
@@ -1402,6 +1423,7 @@ static int nfs4_set_client(struct nfs_server *server,
1402 .rpc_ops = &nfs_v4_clientops, 1423 .rpc_ops = &nfs_v4_clientops,
1403 .proto = proto, 1424 .proto = proto,
1404 .minorversion = minorversion, 1425 .minorversion = minorversion,
1426 .net = net,
1405 }; 1427 };
1406 struct nfs_client *clp; 1428 struct nfs_client *clp;
1407 int error; 1429 int error;
@@ -1453,6 +1475,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1453 .rpc_ops = &nfs_v4_clientops, 1475 .rpc_ops = &nfs_v4_clientops,
1454 .proto = ds_proto, 1476 .proto = ds_proto,
1455 .minorversion = mds_clp->cl_minorversion, 1477 .minorversion = mds_clp->cl_minorversion,
1478 .net = mds_clp->net,
1456 }; 1479 };
1457 struct rpc_timeout ds_timeout = { 1480 struct rpc_timeout ds_timeout = {
1458 .to_initval = 15 * HZ, 1481 .to_initval = 15 * HZ,
@@ -1580,7 +1603,8 @@ static int nfs4_init_server(struct nfs_server *server,
1580 data->auth_flavors[0], 1603 data->auth_flavors[0],
1581 data->nfs_server.protocol, 1604 data->nfs_server.protocol,
1582 &timeparms, 1605 &timeparms,
1583 data->minorversion); 1606 data->minorversion,
1607 data->net);
1584 if (error < 0) 1608 if (error < 0)
1585 goto error; 1609 goto error;
1586 1610
@@ -1675,9 +1699,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1675 data->addrlen, 1699 data->addrlen,
1676 parent_client->cl_ipaddr, 1700 parent_client->cl_ipaddr,
1677 data->authflavor, 1701 data->authflavor,
1678 parent_server->client->cl_xprt->prot, 1702 rpc_protocol(parent_server->client),
1679 parent_server->client->cl_timeout, 1703 parent_server->client->cl_timeout,
1680 parent_client->cl_mvops->minor_version); 1704 parent_client->cl_mvops->minor_version,
1705 parent_client->net);
1681 if (error < 0) 1706 if (error < 0)
1682 goto error; 1707 goto error;
1683 1708
@@ -1770,6 +1795,18 @@ out_free_server:
1770 return ERR_PTR(error); 1795 return ERR_PTR(error);
1771} 1796}
1772 1797
1798void nfs_clients_init(struct net *net)
1799{
1800 struct nfs_net *nn = net_generic(net, nfs_net_id);
1801
1802 INIT_LIST_HEAD(&nn->nfs_client_list);
1803 INIT_LIST_HEAD(&nn->nfs_volume_list);
1804#ifdef CONFIG_NFS_V4
1805 idr_init(&nn->cb_ident_idr);
1806#endif
1807 spin_lock_init(&nn->nfs_client_lock);
1808}
1809
1773#ifdef CONFIG_PROC_FS 1810#ifdef CONFIG_PROC_FS
1774static struct proc_dir_entry *proc_fs_nfs; 1811static struct proc_dir_entry *proc_fs_nfs;
1775 1812
@@ -1823,13 +1860,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1823{ 1860{
1824 struct seq_file *m; 1861 struct seq_file *m;
1825 int ret; 1862 int ret;
1863 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1864 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1826 1865
1827 ret = seq_open(file, &nfs_server_list_ops); 1866 ret = seq_open(file, &nfs_server_list_ops);
1828 if (ret < 0) 1867 if (ret < 0)
1829 return ret; 1868 return ret;
1830 1869
1831 m = file->private_data; 1870 m = file->private_data;
1832 m->private = PDE(inode)->data; 1871 m->private = net;
1833 1872
1834 return 0; 1873 return 0;
1835} 1874}
@@ -1839,9 +1878,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1839 */ 1878 */
1840static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1879static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1841{ 1880{
1881 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1882
1842 /* lock the list against modification */ 1883 /* lock the list against modification */
1843 spin_lock(&nfs_client_lock); 1884 spin_lock(&nn->nfs_client_lock);
1844 return seq_list_start_head(&nfs_client_list, *_pos); 1885 return seq_list_start_head(&nn->nfs_client_list, *_pos);
1845} 1886}
1846 1887
1847/* 1888/*
@@ -1849,7 +1890,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1849 */ 1890 */
1850static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1891static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1851{ 1892{
1852 return seq_list_next(v, &nfs_client_list, pos); 1893 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1894
1895 return seq_list_next(v, &nn->nfs_client_list, pos);
1853} 1896}
1854 1897
1855/* 1898/*
@@ -1857,7 +1900,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1857 */ 1900 */
1858static void nfs_server_list_stop(struct seq_file *p, void *v) 1901static void nfs_server_list_stop(struct seq_file *p, void *v)
1859{ 1902{
1860 spin_unlock(&nfs_client_lock); 1903 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1904
1905 spin_unlock(&nn->nfs_client_lock);
1861} 1906}
1862 1907
1863/* 1908/*
@@ -1866,9 +1911,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
1866static int nfs_server_list_show(struct seq_file *m, void *v) 1911static int nfs_server_list_show(struct seq_file *m, void *v)
1867{ 1912{
1868 struct nfs_client *clp; 1913 struct nfs_client *clp;
1914 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1869 1915
1870 /* display header on line 1 */ 1916 /* display header on line 1 */
1871 if (v == &nfs_client_list) { 1917 if (v == &nn->nfs_client_list) {
1872 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); 1918 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
1873 return 0; 1919 return 0;
1874 } 1920 }
@@ -1880,12 +1926,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1880 if (clp->cl_cons_state != NFS_CS_READY) 1926 if (clp->cl_cons_state != NFS_CS_READY)
1881 return 0; 1927 return 0;
1882 1928
1929 rcu_read_lock();
1883 seq_printf(m, "v%u %s %s %3d %s\n", 1930 seq_printf(m, "v%u %s %s %3d %s\n",
1884 clp->rpc_ops->version, 1931 clp->rpc_ops->version,
1885 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 1932 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
1886 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), 1933 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
1887 atomic_read(&clp->cl_count), 1934 atomic_read(&clp->cl_count),
1888 clp->cl_hostname); 1935 clp->cl_hostname);
1936 rcu_read_unlock();
1889 1937
1890 return 0; 1938 return 0;
1891} 1939}
@@ -1897,13 +1945,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1897{ 1945{
1898 struct seq_file *m; 1946 struct seq_file *m;
1899 int ret; 1947 int ret;
1948 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1949 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1900 1950
1901 ret = seq_open(file, &nfs_volume_list_ops); 1951 ret = seq_open(file, &nfs_volume_list_ops);
1902 if (ret < 0) 1952 if (ret < 0)
1903 return ret; 1953 return ret;
1904 1954
1905 m = file->private_data; 1955 m = file->private_data;
1906 m->private = PDE(inode)->data; 1956 m->private = net;
1907 1957
1908 return 0; 1958 return 0;
1909} 1959}
@@ -1913,9 +1963,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1913 */ 1963 */
1914static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1964static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1915{ 1965{
1966 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1967
1916 /* lock the list against modification */ 1968 /* lock the list against modification */
1917 spin_lock(&nfs_client_lock); 1969 spin_lock(&nn->nfs_client_lock);
1918 return seq_list_start_head(&nfs_volume_list, *_pos); 1970 return seq_list_start_head(&nn->nfs_volume_list, *_pos);
1919} 1971}
1920 1972
1921/* 1973/*
@@ -1923,7 +1975,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1923 */ 1975 */
1924static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1976static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1925{ 1977{
1926 return seq_list_next(v, &nfs_volume_list, pos); 1978 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1979
1980 return seq_list_next(v, &nn->nfs_volume_list, pos);
1927} 1981}
1928 1982
1929/* 1983/*
@@ -1931,7 +1985,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1931 */ 1985 */
1932static void nfs_volume_list_stop(struct seq_file *p, void *v) 1986static void nfs_volume_list_stop(struct seq_file *p, void *v)
1933{ 1987{
1934 spin_unlock(&nfs_client_lock); 1988 struct nfs_net *nn = net_generic(p->private, nfs_net_id);
1989
1990 spin_unlock(&nn->nfs_client_lock);
1935} 1991}
1936 1992
1937/* 1993/*
@@ -1942,9 +1998,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1942 struct nfs_server *server; 1998 struct nfs_server *server;
1943 struct nfs_client *clp; 1999 struct nfs_client *clp;
1944 char dev[8], fsid[17]; 2000 char dev[8], fsid[17];
2001 struct nfs_net *nn = net_generic(m->private, nfs_net_id);
1945 2002
1946 /* display header on line 1 */ 2003 /* display header on line 1 */
1947 if (v == &nfs_volume_list) { 2004 if (v == &nn->nfs_volume_list) {
1948 seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); 2005 seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
1949 return 0; 2006 return 0;
1950 } 2007 }
@@ -1959,6 +2016,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1959 (unsigned long long) server->fsid.major, 2016 (unsigned long long) server->fsid.major,
1960 (unsigned long long) server->fsid.minor); 2017 (unsigned long long) server->fsid.minor);
1961 2018
2019 rcu_read_lock();
1962 seq_printf(m, "v%u %s %s %-7s %-17s %s\n", 2020 seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
1963 clp->rpc_ops->version, 2021 clp->rpc_ops->version,
1964 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 2022 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
@@ -1966,6 +2024,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1966 dev, 2024 dev,
1967 fsid, 2025 fsid,
1968 nfs_server_fscache_state(server)); 2026 nfs_server_fscache_state(server));
2027 rcu_read_unlock();
1969 2028
1970 return 0; 2029 return 0;
1971} 2030}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f2654069806..89af1d269274 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -105,7 +105,7 @@ again:
105 continue; 105 continue;
106 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 106 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
107 continue; 107 continue;
108 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) 108 if (!nfs4_stateid_match(&state->stateid, stateid))
109 continue; 109 continue;
110 get_nfs_open_context(ctx); 110 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 111 spin_unlock(&inode->i_lock);
@@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
139 if (delegation != NULL) { 139 if (delegation != NULL) {
140 spin_lock(&delegation->lock); 140 spin_lock(&delegation->lock);
141 if (delegation->inode != NULL) { 141 if (delegation->inode != NULL) {
142 memcpy(delegation->stateid.data, res->delegation.data, 142 nfs4_stateid_copy(&delegation->stateid, &res->delegation);
143 sizeof(delegation->stateid.data));
144 delegation->type = res->delegation_type; 143 delegation->type = res->delegation_type;
145 delegation->maxsize = res->maxsize; 144 delegation->maxsize = res->maxsize;
146 oldcred = delegation->cred; 145 oldcred = delegation->cred;
@@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
236 delegation = kmalloc(sizeof(*delegation), GFP_NOFS); 235 delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
237 if (delegation == NULL) 236 if (delegation == NULL)
238 return -ENOMEM; 237 return -ENOMEM;
239 memcpy(delegation->stateid.data, res->delegation.data, 238 nfs4_stateid_copy(&delegation->stateid, &res->delegation);
240 sizeof(delegation->stateid.data));
241 delegation->type = res->delegation_type; 239 delegation->type = res->delegation_type;
242 delegation->maxsize = res->maxsize; 240 delegation->maxsize = res->maxsize;
243 delegation->change_attr = inode->i_version; 241 delegation->change_attr = inode->i_version;
@@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
250 old_delegation = rcu_dereference_protected(nfsi->delegation, 248 old_delegation = rcu_dereference_protected(nfsi->delegation,
251 lockdep_is_held(&clp->cl_lock)); 249 lockdep_is_held(&clp->cl_lock));
252 if (old_delegation != NULL) { 250 if (old_delegation != NULL) {
253 if (memcmp(&delegation->stateid, &old_delegation->stateid, 251 if (nfs4_stateid_match(&delegation->stateid,
254 sizeof(old_delegation->stateid)) == 0 && 252 &old_delegation->stateid) &&
255 delegation->type == old_delegation->type) { 253 delegation->type == old_delegation->type) {
256 goto out; 254 goto out;
257 } 255 }
258 /* 256 /*
259 * Deal with broken servers that hand out two 257 * Deal with broken servers that hand out two
260 * delegations for the same file. 258 * delegations for the same file.
259 * Allow for upgrades to a WRITE delegation, but
260 * nothing else.
261 */ 261 */
262 dfprintk(FILE, "%s: server %s handed out " 262 dfprintk(FILE, "%s: server %s handed out "
263 "a duplicate delegation!\n", 263 "a duplicate delegation!\n",
264 __func__, clp->cl_hostname); 264 __func__, clp->cl_hostname);
265 if (delegation->type <= old_delegation->type) { 265 if (delegation->type == old_delegation->type ||
266 !(delegation->type & FMODE_WRITE)) {
266 freeme = delegation; 267 freeme = delegation;
267 delegation = NULL; 268 delegation = NULL;
268 goto out; 269 goto out;
@@ -455,17 +456,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
455 rcu_read_unlock(); 456 rcu_read_unlock();
456} 457}
457 458
458static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
459{
460 nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
461}
462
463static void nfs_delegation_run_state_manager(struct nfs_client *clp) 459static void nfs_delegation_run_state_manager(struct nfs_client *clp)
464{ 460{
465 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) 461 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
466 nfs4_schedule_state_manager(clp); 462 nfs4_schedule_state_manager(clp);
467} 463}
468 464
465void nfs_remove_bad_delegation(struct inode *inode)
466{
467 struct nfs_delegation *delegation;
468
469 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
470 if (delegation) {
471 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
472 nfs_free_delegation(delegation);
473 }
474}
475EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
476
469/** 477/**
470 * nfs_expire_all_delegation_types 478 * nfs_expire_all_delegation_types
471 * @clp: client to process 479 * @clp: client to process
@@ -488,18 +496,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
488 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); 496 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
489} 497}
490 498
491/**
492 * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
493 * @clp: client to process
494 *
495 */
496void nfs_handle_cb_pathdown(struct nfs_client *clp)
497{
498 if (clp == NULL)
499 return;
500 nfs_client_mark_return_all_delegations(clp);
501}
502
503static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) 499static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
504{ 500{
505 struct nfs_delegation *delegation; 501 struct nfs_delegation *delegation;
@@ -531,7 +527,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
531/** 527/**
532 * nfs_async_inode_return_delegation - asynchronously return a delegation 528 * nfs_async_inode_return_delegation - asynchronously return a delegation
533 * @inode: inode to process 529 * @inode: inode to process
534 * @stateid: state ID information from CB_RECALL arguments 530 * @stateid: state ID information
535 * 531 *
536 * Returns zero on success, or a negative errno value. 532 * Returns zero on success, or a negative errno value.
537 */ 533 */
@@ -545,7 +541,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
545 rcu_read_lock(); 541 rcu_read_lock();
546 delegation = rcu_dereference(NFS_I(inode)->delegation); 542 delegation = rcu_dereference(NFS_I(inode)->delegation);
547 543
548 if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { 544 if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
549 rcu_read_unlock(); 545 rcu_read_unlock();
550 return -ENOENT; 546 return -ENOENT;
551 } 547 }
@@ -684,21 +680,25 @@ int nfs_delegations_present(struct nfs_client *clp)
684 * nfs4_copy_delegation_stateid - Copy inode's state ID information 680 * nfs4_copy_delegation_stateid - Copy inode's state ID information
685 * @dst: stateid data structure to fill in 681 * @dst: stateid data structure to fill in
686 * @inode: inode to check 682 * @inode: inode to check
683 * @flags: delegation type requirement
687 * 684 *
688 * Returns one and fills in "dst->data" * if inode had a delegation, 685 * Returns "true" and fills in "dst->data" * if inode had a delegation,
689 * otherwise zero is returned. 686 * otherwise "false" is returned.
690 */ 687 */
691int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 688bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
689 fmode_t flags)
692{ 690{
693 struct nfs_inode *nfsi = NFS_I(inode); 691 struct nfs_inode *nfsi = NFS_I(inode);
694 struct nfs_delegation *delegation; 692 struct nfs_delegation *delegation;
695 int ret = 0; 693 bool ret;
696 694
695 flags &= FMODE_READ|FMODE_WRITE;
697 rcu_read_lock(); 696 rcu_read_lock();
698 delegation = rcu_dereference(nfsi->delegation); 697 delegation = rcu_dereference(nfsi->delegation);
699 if (delegation != NULL) { 698 ret = (delegation != NULL && (delegation->type & flags) == flags);
700 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); 699 if (ret) {
701 ret = 1; 700 nfs4_stateid_copy(dst, &delegation->stateid);
701 nfs_mark_delegation_referenced(delegation);
702 } 702 }
703 rcu_read_unlock(); 703 rcu_read_unlock();
704 return ret; 704 return ret;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d9322e490c56..cd6a7a8dadae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb);
42void nfs_expire_all_delegations(struct nfs_client *clp); 42void nfs_expire_all_delegations(struct nfs_client *clp);
43void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); 43void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
44void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 44void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
45void nfs_handle_cb_pathdown(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 45int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp); 46int nfs_delegations_present(struct nfs_client *clp);
47void nfs_remove_bad_delegation(struct inode *inode);
48 48
49void nfs_delegation_mark_reclaim(struct nfs_client *clp); 49void nfs_delegation_mark_reclaim(struct nfs_client *clp);
50void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 50void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
53int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); 53int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
54int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); 54int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
55int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 55int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
56int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 56bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
57 57
58void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 58void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
59int nfs_have_delegation(struct inode *inode, fmode_t flags); 59int nfs_have_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fd9a872fada0..4aaf0316d76a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -207,7 +207,7 @@ struct nfs_cache_array_entry {
207}; 207};
208 208
209struct nfs_cache_array { 209struct nfs_cache_array {
210 unsigned int size; 210 int size;
211 int eof_index; 211 int eof_index;
212 u64 last_cookie; 212 u64 last_cookie;
213 struct nfs_cache_array_entry array[0]; 213 struct nfs_cache_array_entry array[0];
@@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page)
260 struct nfs_cache_array *array; 260 struct nfs_cache_array *array;
261 int i; 261 int i;
262 262
263 array = kmap_atomic(page, KM_USER0); 263 array = kmap_atomic(page);
264 for (i = 0; i < array->size; i++) 264 for (i = 0; i < array->size; i++)
265 kfree(array->array[i].string.name); 265 kfree(array->array[i].string.name);
266 kunmap_atomic(array, KM_USER0); 266 kunmap_atomic(array);
267} 267}
268 268
269/* 269/*
@@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1429 } 1429 }
1430 1430
1431 open_flags = nd->intent.open.flags; 1431 open_flags = nd->intent.open.flags;
1432 attr.ia_valid = 0;
1432 1433
1433 ctx = create_nfs_open_context(dentry, open_flags); 1434 ctx = create_nfs_open_context(dentry, open_flags);
1434 res = ERR_CAST(ctx); 1435 res = ERR_CAST(ctx);
@@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1437 1438
1438 if (nd->flags & LOOKUP_CREATE) { 1439 if (nd->flags & LOOKUP_CREATE) {
1439 attr.ia_mode = nd->intent.open.create_mode; 1440 attr.ia_mode = nd->intent.open.create_mode;
1440 attr.ia_valid = ATTR_MODE; 1441 attr.ia_valid |= ATTR_MODE;
1441 attr.ia_mode &= ~current_umask(); 1442 attr.ia_mode &= ~current_umask();
1442 } else { 1443 } else
1443 open_flags &= ~(O_EXCL | O_CREAT); 1444 open_flags &= ~(O_EXCL | O_CREAT);
1444 attr.ia_valid = 0; 1445
1446 if (open_flags & O_TRUNC) {
1447 attr.ia_valid |= ATTR_SIZE;
1448 attr.ia_size = 0;
1445 } 1449 }
1446 1450
1447 /* Open the file on the server */ 1451 /* Open the file on the server */
@@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1495 struct inode *inode; 1499 struct inode *inode;
1496 struct inode *dir; 1500 struct inode *dir;
1497 struct nfs_open_context *ctx; 1501 struct nfs_open_context *ctx;
1502 struct iattr attr;
1498 int openflags, ret = 0; 1503 int openflags, ret = 0;
1499 1504
1500 if (nd->flags & LOOKUP_RCU) 1505 if (nd->flags & LOOKUP_RCU)
@@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1523 /* We cannot do exclusive creation on a positive dentry */ 1528 /* We cannot do exclusive creation on a positive dentry */
1524 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1529 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1525 goto no_open_dput; 1530 goto no_open_dput;
1526 /* We can't create new files, or truncate existing ones here */ 1531 /* We can't create new files here */
1527 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1532 openflags &= ~(O_CREAT|O_EXCL);
1528 1533
1529 ctx = create_nfs_open_context(dentry, openflags); 1534 ctx = create_nfs_open_context(dentry, openflags);
1530 ret = PTR_ERR(ctx); 1535 ret = PTR_ERR(ctx);
1531 if (IS_ERR(ctx)) 1536 if (IS_ERR(ctx))
1532 goto out; 1537 goto out;
1538
1539 attr.ia_valid = 0;
1540 if (openflags & O_TRUNC) {
1541 attr.ia_valid |= ATTR_SIZE;
1542 attr.ia_size = 0;
1543 nfs_wb_all(inode);
1544 }
1545
1533 /* 1546 /*
1534 * Note: we're not holding inode->i_mutex and so may be racing with 1547 * Note: we're not holding inode->i_mutex and so may be racing with
1535 * operations that change the directory. We therefore save the 1548 * operations that change the directory. We therefore save the
1536 * change attribute *before* we do the RPC call. 1549 * change attribute *before* we do the RPC call.
1537 */ 1550 */
1538 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); 1551 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
1539 if (IS_ERR(inode)) { 1552 if (IS_ERR(inode)) {
1540 ret = PTR_ERR(inode); 1553 ret = PTR_ERR(inode);
1541 switch (ret) { 1554 switch (ret) {
@@ -1870,11 +1883,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1870 if (!page) 1883 if (!page)
1871 return -ENOMEM; 1884 return -ENOMEM;
1872 1885
1873 kaddr = kmap_atomic(page, KM_USER0); 1886 kaddr = kmap_atomic(page);
1874 memcpy(kaddr, symname, pathlen); 1887 memcpy(kaddr, symname, pathlen);
1875 if (pathlen < PAGE_SIZE) 1888 if (pathlen < PAGE_SIZE)
1876 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); 1889 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1877 kunmap_atomic(kaddr, KM_USER0); 1890 kunmap_atomic(kaddr);
1878 1891
1879 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); 1892 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1880 if (error != 0) { 1893 if (error != 0) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1940f1a56a5f..9c7f66ac6cc2 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -265,9 +265,7 @@ static void nfs_direct_read_release(void *calldata)
265} 265}
266 266
267static const struct rpc_call_ops nfs_read_direct_ops = { 267static const struct rpc_call_ops nfs_read_direct_ops = {
268#if defined(CONFIG_NFS_V4_1)
269 .rpc_call_prepare = nfs_read_prepare, 268 .rpc_call_prepare = nfs_read_prepare,
270#endif /* CONFIG_NFS_V4_1 */
271 .rpc_call_done = nfs_direct_read_result, 269 .rpc_call_done = nfs_direct_read_result,
272 .rpc_release = nfs_direct_read_release, 270 .rpc_release = nfs_direct_read_release,
273}; 271};
@@ -554,9 +552,7 @@ static void nfs_direct_commit_release(void *calldata)
554} 552}
555 553
556static const struct rpc_call_ops nfs_commit_direct_ops = { 554static const struct rpc_call_ops nfs_commit_direct_ops = {
557#if defined(CONFIG_NFS_V4_1)
558 .rpc_call_prepare = nfs_write_prepare, 555 .rpc_call_prepare = nfs_write_prepare,
559#endif /* CONFIG_NFS_V4_1 */
560 .rpc_call_done = nfs_direct_commit_result, 556 .rpc_call_done = nfs_direct_commit_result,
561 .rpc_release = nfs_direct_commit_release, 557 .rpc_release = nfs_direct_commit_release,
562}; 558};
@@ -696,9 +692,7 @@ out_unlock:
696} 692}
697 693
698static const struct rpc_call_ops nfs_write_direct_ops = { 694static const struct rpc_call_ops nfs_write_direct_ops = {
699#if defined(CONFIG_NFS_V4_1)
700 .rpc_call_prepare = nfs_write_prepare, 695 .rpc_call_prepare = nfs_write_prepare,
701#endif /* CONFIG_NFS_V4_1 */
702 .rpc_call_done = nfs_direct_write_result, 696 .rpc_call_done = nfs_direct_write_result,
703 .rpc_release = nfs_direct_write_release, 697 .rpc_release = nfs_direct_write_release,
704}; 698};
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index a6e711ad130f..b3924b8a6000 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -10,8 +10,9 @@
10 10
11#include <linux/sunrpc/clnt.h> 11#include <linux/sunrpc/clnt.h>
12#include <linux/dns_resolver.h> 12#include <linux/dns_resolver.h>
13#include "dns_resolve.h"
13 14
14ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 15ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
15 struct sockaddr *sa, size_t salen) 16 struct sockaddr *sa, size_t salen)
16{ 17{
17 ssize_t ret; 18 ssize_t ret;
@@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
20 21
21 ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); 22 ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
22 if (ip_len > 0) 23 if (ip_len > 0)
23 ret = rpc_pton(ip_addr, ip_len, sa, salen); 24 ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
24 else 25 else
25 ret = -ESRCH; 26 ret = -ESRCH;
26 kfree(ip_addr); 27 kfree(ip_addr);
@@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
40#include <linux/sunrpc/clnt.h> 41#include <linux/sunrpc/clnt.h>
41#include <linux/sunrpc/cache.h> 42#include <linux/sunrpc/cache.h>
42#include <linux/sunrpc/svcauth.h> 43#include <linux/sunrpc/svcauth.h>
44#include <linux/sunrpc/rpc_pipe_fs.h>
43 45
44#include "dns_resolve.h" 46#include "dns_resolve.h"
45#include "cache_lib.h" 47#include "cache_lib.h"
48#include "netns.h"
46 49
47#define NFS_DNS_HASHBITS 4 50#define NFS_DNS_HASHBITS 4
48#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) 51#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
49 52
50static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
51
52struct nfs_dns_ent { 53struct nfs_dns_ent {
53 struct cache_head h; 54 struct cache_head h;
54 55
@@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
224 len = qword_get(&buf, buf1, sizeof(buf1)); 225 len = qword_get(&buf, buf1, sizeof(buf1));
225 if (len <= 0) 226 if (len <= 0)
226 goto out; 227 goto out;
227 key.addrlen = rpc_pton(buf1, len, 228 key.addrlen = rpc_pton(cd->net, buf1, len,
228 (struct sockaddr *)&key.addr, 229 (struct sockaddr *)&key.addr,
229 sizeof(key.addr)); 230 sizeof(key.addr));
230 231
@@ -259,21 +260,6 @@ out:
259 return ret; 260 return ret;
260} 261}
261 262
262static struct cache_detail nfs_dns_resolve = {
263 .owner = THIS_MODULE,
264 .hash_size = NFS_DNS_HASHTBL_SIZE,
265 .hash_table = nfs_dns_table,
266 .name = "dns_resolve",
267 .cache_put = nfs_dns_ent_put,
268 .cache_upcall = nfs_dns_upcall,
269 .cache_parse = nfs_dns_parse,
270 .cache_show = nfs_dns_show,
271 .match = nfs_dns_match,
272 .init = nfs_dns_ent_init,
273 .update = nfs_dns_ent_update,
274 .alloc = nfs_dns_ent_alloc,
275};
276
277static int do_cache_lookup(struct cache_detail *cd, 263static int do_cache_lookup(struct cache_detail *cd,
278 struct nfs_dns_ent *key, 264 struct nfs_dns_ent *key,
279 struct nfs_dns_ent **item, 265 struct nfs_dns_ent **item,
@@ -336,8 +322,8 @@ out:
336 return ret; 322 return ret;
337} 323}
338 324
339ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 325ssize_t nfs_dns_resolve_name(struct net *net, char *name,
340 struct sockaddr *sa, size_t salen) 326 size_t namelen, struct sockaddr *sa, size_t salen)
341{ 327{
342 struct nfs_dns_ent key = { 328 struct nfs_dns_ent key = {
343 .hostname = name, 329 .hostname = name,
@@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
345 }; 331 };
346 struct nfs_dns_ent *item = NULL; 332 struct nfs_dns_ent *item = NULL;
347 ssize_t ret; 333 ssize_t ret;
334 struct nfs_net *nn = net_generic(net, nfs_net_id);
348 335
349 ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); 336 ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
350 if (ret == 0) { 337 if (ret == 0) {
351 if (salen >= item->addrlen) { 338 if (salen >= item->addrlen) {
352 memcpy(sa, &item->addr, item->addrlen); 339 memcpy(sa, &item->addr, item->addrlen);
353 ret = item->addrlen; 340 ret = item->addrlen;
354 } else 341 } else
355 ret = -EOVERFLOW; 342 ret = -EOVERFLOW;
356 cache_put(&item->h, &nfs_dns_resolve); 343 cache_put(&item->h, nn->nfs_dns_resolve);
357 } else if (ret == -ENOENT) 344 } else if (ret == -ENOENT)
358 ret = -ESRCH; 345 ret = -ESRCH;
359 return ret; 346 return ret;
360} 347}
361 348
349int nfs_dns_resolver_cache_init(struct net *net)
350{
351 int err = -ENOMEM;
352 struct nfs_net *nn = net_generic(net, nfs_net_id);
353 struct cache_detail *cd;
354 struct cache_head **tbl;
355
356 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
357 if (cd == NULL)
358 goto err_cd;
359
360 tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
361 GFP_KERNEL);
362 if (tbl == NULL)
363 goto err_tbl;
364
365 cd->owner = THIS_MODULE,
366 cd->hash_size = NFS_DNS_HASHTBL_SIZE,
367 cd->hash_table = tbl,
368 cd->name = "dns_resolve",
369 cd->cache_put = nfs_dns_ent_put,
370 cd->cache_upcall = nfs_dns_upcall,
371 cd->cache_parse = nfs_dns_parse,
372 cd->cache_show = nfs_dns_show,
373 cd->match = nfs_dns_match,
374 cd->init = nfs_dns_ent_init,
375 cd->update = nfs_dns_ent_update,
376 cd->alloc = nfs_dns_ent_alloc,
377
378 nfs_cache_init(cd);
379 err = nfs_cache_register_net(net, cd);
380 if (err)
381 goto err_reg;
382 nn->nfs_dns_resolve = cd;
383 return 0;
384
385err_reg:
386 nfs_cache_destroy(cd);
387 kfree(cd->hash_table);
388err_tbl:
389 kfree(cd);
390err_cd:
391 return err;
392}
393
394void nfs_dns_resolver_cache_destroy(struct net *net)
395{
396 struct nfs_net *nn = net_generic(net, nfs_net_id);
397 struct cache_detail *cd = nn->nfs_dns_resolve;
398
399 nfs_cache_unregister_net(net, cd);
400 nfs_cache_destroy(cd);
401 kfree(cd->hash_table);
402 kfree(cd);
403}
404
405static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
406 void *ptr)
407{
408 struct super_block *sb = ptr;
409 struct net *net = sb->s_fs_info;
410 struct nfs_net *nn = net_generic(net, nfs_net_id);
411 struct cache_detail *cd = nn->nfs_dns_resolve;
412 int ret = 0;
413
414 if (cd == NULL)
415 return 0;
416
417 if (!try_module_get(THIS_MODULE))
418 return 0;
419
420 switch (event) {
421 case RPC_PIPEFS_MOUNT:
422 ret = nfs_cache_register_sb(sb, cd);
423 break;
424 case RPC_PIPEFS_UMOUNT:
425 nfs_cache_unregister_sb(sb, cd);
426 break;
427 default:
428 ret = -ENOTSUPP;
429 break;
430 }
431 module_put(THIS_MODULE);
432 return ret;
433}
434
435static struct notifier_block nfs_dns_resolver_block = {
436 .notifier_call = rpc_pipefs_event,
437};
438
362int nfs_dns_resolver_init(void) 439int nfs_dns_resolver_init(void)
363{ 440{
364 return nfs_cache_register(&nfs_dns_resolve); 441 return rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
365} 442}
366 443
367void nfs_dns_resolver_destroy(void) 444void nfs_dns_resolver_destroy(void)
368{ 445{
369 nfs_cache_unregister(&nfs_dns_resolve); 446 rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
370} 447}
371
372#endif 448#endif
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 199bb5543a91..2e4f596d2923 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void)
15 15
16static inline void nfs_dns_resolver_destroy(void) 16static inline void nfs_dns_resolver_destroy(void)
17{} 17{}
18
19static inline int nfs_dns_resolver_cache_init(struct net *net)
20{
21 return 0;
22}
23
24static inline void nfs_dns_resolver_cache_destroy(struct net *net)
25{}
18#else 26#else
19extern int nfs_dns_resolver_init(void); 27extern int nfs_dns_resolver_init(void);
20extern void nfs_dns_resolver_destroy(void); 28extern void nfs_dns_resolver_destroy(void);
29extern int nfs_dns_resolver_cache_init(struct net *net);
30extern void nfs_dns_resolver_cache_destroy(struct net *net);
21#endif 31#endif
22 32
23extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 33extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
24 struct sockaddr *sa, size_t salen); 34 size_t namelen, struct sockaddr *sa, size_t salen);
25 35
26#endif 36#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c43a452f7da2..4fdaaa63cf1c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -530,6 +530,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
530 if (mapping != dentry->d_inode->i_mapping) 530 if (mapping != dentry->d_inode->i_mapping)
531 goto out_unlock; 531 goto out_unlock;
532 532
533 wait_on_page_writeback(page);
534
533 pagelen = nfs_page_length(page); 535 pagelen = nfs_page_length(page);
534 if (pagelen == 0) 536 if (pagelen == 0)
535 goto out_unlock; 537 goto out_unlock;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 419119c371bf..ae65c16b3670 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
327{ 327{
328 struct nfs_inode *nfsi = NFS_I(inode); 328 struct nfs_inode *nfsi = NFS_I(inode);
329 struct nfs_server *nfss = NFS_SERVER(inode); 329 struct nfs_server *nfss = NFS_SERVER(inode);
330 struct fscache_cookie *old = nfsi->fscache; 330 NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
331 331
332 nfs_fscache_inode_lock(inode); 332 nfs_fscache_inode_lock(inode);
333 if (nfsi->fscache) { 333 if (nfsi->fscache) {
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index dcb61548887f..801d6d830787 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -49,11 +49,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
49{ 49{
50 /* The mntroot acts as the dummy root dentry for this superblock */ 50 /* The mntroot acts as the dummy root dentry for this superblock */
51 if (sb->s_root == NULL) { 51 if (sb->s_root == NULL) {
52 sb->s_root = d_alloc_root(inode); 52 sb->s_root = d_make_root(inode);
53 if (sb->s_root == NULL) { 53 if (sb->s_root == NULL)
54 iput(inode);
55 return -ENOMEM; 54 return -ENOMEM;
56 }
57 ihold(inode); 55 ihold(inode);
58 /* 56 /*
59 * Ensure that this dentry is invisible to d_find_alias(). 57 * Ensure that this dentry is invisible to d_find_alias().
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 2c05f1991e1e..b7f348bb618b 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,11 +34,29 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/string.h> 37#include <linux/parser.h>
38#include <linux/kernel.h> 38#include <linux/fs.h>
39#include <linux/slab.h>
40#include <linux/nfs_idmap.h> 39#include <linux/nfs_idmap.h>
40#include <net/net_namespace.h>
41#include <linux/sunrpc/rpc_pipe_fs.h>
41#include <linux/nfs_fs.h> 42#include <linux/nfs_fs.h>
43#include <linux/nfs_fs_sb.h>
44#include <linux/key.h>
45#include <linux/keyctl.h>
46#include <linux/key-type.h>
47#include <keys/user-type.h>
48#include <linux/module.h>
49
50#include "internal.h"
51#include "netns.h"
52
53#define NFS_UINT_MAXLEN 11
54
55/* Default cache timeout is 10 minutes */
56unsigned int nfs_idmap_cache_timeout = 600;
57static const struct cred *id_resolver_cache;
58static struct key_type key_type_id_resolver_legacy;
59
42 60
43/** 61/**
44 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 62 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
@@ -142,24 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
142 return snprintf(buf, buflen, "%u", id); 160 return snprintf(buf, buflen, "%u", id);
143} 161}
144 162
145#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 163static struct key_type key_type_id_resolver = {
146
147#include <linux/cred.h>
148#include <linux/sunrpc/sched.h>
149#include <linux/nfs4.h>
150#include <linux/nfs_fs_sb.h>
151#include <linux/keyctl.h>
152#include <linux/key-type.h>
153#include <linux/rcupdate.h>
154#include <linux/err.h>
155
156#include <keys/user-type.h>
157
158#define NFS_UINT_MAXLEN 11
159
160const struct cred *id_resolver_cache;
161
162struct key_type key_type_id_resolver = {
163 .name = "id_resolver", 164 .name = "id_resolver",
164 .instantiate = user_instantiate, 165 .instantiate = user_instantiate,
165 .match = user_match, 166 .match = user_match,
@@ -169,13 +170,14 @@ struct key_type key_type_id_resolver = {
169 .read = user_read, 170 .read = user_read,
170}; 171};
171 172
172int nfs_idmap_init(void) 173static int nfs_idmap_init_keyring(void)
173{ 174{
174 struct cred *cred; 175 struct cred *cred;
175 struct key *keyring; 176 struct key *keyring;
176 int ret = 0; 177 int ret = 0;
177 178
178 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); 179 printk(KERN_NOTICE "NFS: Registering the %s key type\n",
180 key_type_id_resolver.name);
179 181
180 cred = prepare_kernel_cred(NULL); 182 cred = prepare_kernel_cred(NULL);
181 if (!cred) 183 if (!cred)
@@ -198,6 +200,7 @@ int nfs_idmap_init(void)
198 if (ret < 0) 200 if (ret < 0)
199 goto failed_put_key; 201 goto failed_put_key;
200 202
203 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
201 cred->thread_keyring = keyring; 204 cred->thread_keyring = keyring;
202 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 205 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
203 id_resolver_cache = cred; 206 id_resolver_cache = cred;
@@ -210,7 +213,7 @@ failed_put_cred:
210 return ret; 213 return ret;
211} 214}
212 215
213void nfs_idmap_quit(void) 216static void nfs_idmap_quit_keyring(void)
214{ 217{
215 key_revoke(id_resolver_cache->thread_keyring); 218 key_revoke(id_resolver_cache->thread_keyring);
216 unregister_key_type(&key_type_id_resolver); 219 unregister_key_type(&key_type_id_resolver);
@@ -245,8 +248,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
245 return desclen; 248 return desclen;
246} 249}
247 250
248static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, 251static ssize_t nfs_idmap_request_key(struct key_type *key_type,
249 const char *type, void *data, size_t data_size) 252 const char *name, size_t namelen,
253 const char *type, void *data,
254 size_t data_size, struct idmap *idmap)
250{ 255{
251 const struct cred *saved_cred; 256 const struct cred *saved_cred;
252 struct key *rkey; 257 struct key *rkey;
@@ -259,8 +264,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
259 goto out; 264 goto out;
260 265
261 saved_cred = override_creds(id_resolver_cache); 266 saved_cred = override_creds(id_resolver_cache);
262 rkey = request_key(&key_type_id_resolver, desc, ""); 267 if (idmap)
268 rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap);
269 else
270 rkey = request_key(&key_type_id_resolver, desc, "");
263 revert_creds(saved_cred); 271 revert_creds(saved_cred);
272
264 kfree(desc); 273 kfree(desc);
265 if (IS_ERR(rkey)) { 274 if (IS_ERR(rkey)) {
266 ret = PTR_ERR(rkey); 275 ret = PTR_ERR(rkey);
@@ -293,31 +302,46 @@ out:
293 return ret; 302 return ret;
294} 303}
295 304
305static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
306 const char *type, void *data,
307 size_t data_size, struct idmap *idmap)
308{
309 ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver,
310 name, namelen, type, data,
311 data_size, NULL);
312 if (ret < 0) {
313 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
314 name, namelen, type, data,
315 data_size, idmap);
316 }
317 return ret;
318}
296 319
297/* ID -> Name */ 320/* ID -> Name */
298static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) 321static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
322 size_t buflen, struct idmap *idmap)
299{ 323{
300 char id_str[NFS_UINT_MAXLEN]; 324 char id_str[NFS_UINT_MAXLEN];
301 int id_len; 325 int id_len;
302 ssize_t ret; 326 ssize_t ret;
303 327
304 id_len = snprintf(id_str, sizeof(id_str), "%u", id); 328 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
305 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); 329 ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
306 if (ret < 0) 330 if (ret < 0)
307 return -EINVAL; 331 return -EINVAL;
308 return ret; 332 return ret;
309} 333}
310 334
311/* Name -> ID */ 335/* Name -> ID */
312static int nfs_idmap_lookup_id(const char *name, size_t namelen, 336static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
313 const char *type, __u32 *id) 337 __u32 *id, struct idmap *idmap)
314{ 338{
315 char id_str[NFS_UINT_MAXLEN]; 339 char id_str[NFS_UINT_MAXLEN];
316 long id_long; 340 long id_long;
317 ssize_t data_size; 341 ssize_t data_size;
318 int ret = 0; 342 int ret = 0;
319 343
320 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); 344 data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
321 if (data_size <= 0) { 345 if (data_size <= 0) {
322 ret = -EINVAL; 346 ret = -EINVAL;
323 } else { 347 } else {
@@ -327,114 +351,103 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
327 return ret; 351 return ret;
328} 352}
329 353
330int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 354/* idmap classic begins here */
331{ 355module_param(nfs_idmap_cache_timeout, int, 0644);
332 if (nfs_map_string_to_numeric(name, namelen, uid))
333 return 0;
334 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
335}
336
337int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
338{
339 if (nfs_map_string_to_numeric(name, namelen, gid))
340 return 0;
341 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
342}
343
344int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
345{
346 int ret = -EINVAL;
347
348 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
349 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
350 if (ret < 0)
351 ret = nfs_map_numeric_to_string(uid, buf, buflen);
352 return ret;
353}
354int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
355{
356 int ret = -EINVAL;
357 356
358 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 357struct idmap {
359 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); 358 struct rpc_pipe *idmap_pipe;
360 if (ret < 0) 359 struct key_construction *idmap_key_cons;
361 ret = nfs_map_numeric_to_string(gid, buf, buflen);
362 return ret;
363}
364
365#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
366
367#include <linux/module.h>
368#include <linux/mutex.h>
369#include <linux/init.h>
370#include <linux/socket.h>
371#include <linux/in.h>
372#include <linux/sched.h>
373#include <linux/sunrpc/clnt.h>
374#include <linux/workqueue.h>
375#include <linux/sunrpc/rpc_pipe_fs.h>
376
377#include <linux/nfs_fs.h>
378
379#include "nfs4_fs.h"
380
381#define IDMAP_HASH_SZ 128
382
383/* Default cache timeout is 10 minutes */
384unsigned int nfs_idmap_cache_timeout = 600 * HZ;
385
386static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
387{
388 char *endp;
389 int num = simple_strtol(val, &endp, 0);
390 int jif = num * HZ;
391 if (endp == val || *endp || num < 0 || jif < num)
392 return -EINVAL;
393 *((int *)kp->arg) = jif;
394 return 0;
395}
396
397module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
398 &nfs_idmap_cache_timeout, 0644);
399
400struct idmap_hashent {
401 unsigned long ih_expires;
402 __u32 ih_id;
403 size_t ih_namelen;
404 char ih_name[IDMAP_NAMESZ];
405}; 360};
406 361
407struct idmap_hashtable { 362enum {
408 __u8 h_type; 363 Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
409 struct idmap_hashent h_entries[IDMAP_HASH_SZ];
410}; 364};
411 365
412struct idmap { 366static const match_table_t nfs_idmap_tokens = {
413 struct dentry *idmap_dentry; 367 { Opt_find_uid, "uid:%s" },
414 wait_queue_head_t idmap_wq; 368 { Opt_find_gid, "gid:%s" },
415 struct idmap_msg idmap_im; 369 { Opt_find_user, "user:%s" },
416 struct mutex idmap_lock; /* Serializes upcalls */ 370 { Opt_find_group, "group:%s" },
417 struct mutex idmap_im_lock; /* Protects the hashtable */ 371 { Opt_find_err, NULL }
418 struct idmap_hashtable idmap_user_hash;
419 struct idmap_hashtable idmap_group_hash;
420}; 372};
421 373
374static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
422static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 375static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
423 size_t); 376 size_t);
424static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); 377static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
425 378
426static unsigned int fnvhash32(const void *, size_t);
427
428static const struct rpc_pipe_ops idmap_upcall_ops = { 379static const struct rpc_pipe_ops idmap_upcall_ops = {
429 .upcall = rpc_pipe_generic_upcall, 380 .upcall = rpc_pipe_generic_upcall,
430 .downcall = idmap_pipe_downcall, 381 .downcall = idmap_pipe_downcall,
431 .destroy_msg = idmap_pipe_destroy_msg, 382 .destroy_msg = idmap_pipe_destroy_msg,
432}; 383};
433 384
385static struct key_type key_type_id_resolver_legacy = {
386 .name = "id_resolver",
387 .instantiate = user_instantiate,
388 .match = user_match,
389 .revoke = user_revoke,
390 .destroy = user_destroy,
391 .describe = user_describe,
392 .read = user_read,
393 .request_key = nfs_idmap_legacy_upcall,
394};
395
396static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
397{
398 if (pipe->dentry)
399 rpc_unlink(pipe->dentry);
400}
401
402static int __nfs_idmap_register(struct dentry *dir,
403 struct idmap *idmap,
404 struct rpc_pipe *pipe)
405{
406 struct dentry *dentry;
407
408 dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
409 if (IS_ERR(dentry))
410 return PTR_ERR(dentry);
411 pipe->dentry = dentry;
412 return 0;
413}
414
415static void nfs_idmap_unregister(struct nfs_client *clp,
416 struct rpc_pipe *pipe)
417{
418 struct net *net = clp->net;
419 struct super_block *pipefs_sb;
420
421 pipefs_sb = rpc_get_sb_net(net);
422 if (pipefs_sb) {
423 __nfs_idmap_unregister(pipe);
424 rpc_put_sb_net(net);
425 }
426}
427
428static int nfs_idmap_register(struct nfs_client *clp,
429 struct idmap *idmap,
430 struct rpc_pipe *pipe)
431{
432 struct net *net = clp->net;
433 struct super_block *pipefs_sb;
434 int err = 0;
435
436 pipefs_sb = rpc_get_sb_net(net);
437 if (pipefs_sb) {
438 if (clp->cl_rpcclient->cl_dentry)
439 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
440 idmap, pipe);
441 rpc_put_sb_net(net);
442 }
443 return err;
444}
445
434int 446int
435nfs_idmap_new(struct nfs_client *clp) 447nfs_idmap_new(struct nfs_client *clp)
436{ 448{
437 struct idmap *idmap; 449 struct idmap *idmap;
450 struct rpc_pipe *pipe;
438 int error; 451 int error;
439 452
440 BUG_ON(clp->cl_idmap != NULL); 453 BUG_ON(clp->cl_idmap != NULL);
@@ -443,19 +456,19 @@ nfs_idmap_new(struct nfs_client *clp)
443 if (idmap == NULL) 456 if (idmap == NULL)
444 return -ENOMEM; 457 return -ENOMEM;
445 458
446 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, 459 pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
447 "idmap", idmap, &idmap_upcall_ops, 0); 460 if (IS_ERR(pipe)) {
448 if (IS_ERR(idmap->idmap_dentry)) { 461 error = PTR_ERR(pipe);
449 error = PTR_ERR(idmap->idmap_dentry);
450 kfree(idmap); 462 kfree(idmap);
451 return error; 463 return error;
452 } 464 }
453 465 error = nfs_idmap_register(clp, idmap, pipe);
454 mutex_init(&idmap->idmap_lock); 466 if (error) {
455 mutex_init(&idmap->idmap_im_lock); 467 rpc_destroy_pipe_data(pipe);
456 init_waitqueue_head(&idmap->idmap_wq); 468 kfree(idmap);
457 idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; 469 return error;
458 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; 470 }
471 idmap->idmap_pipe = pipe;
459 472
460 clp->cl_idmap = idmap; 473 clp->cl_idmap = idmap;
461 return 0; 474 return 0;
@@ -468,211 +481,220 @@ nfs_idmap_delete(struct nfs_client *clp)
468 481
469 if (!idmap) 482 if (!idmap)
470 return; 483 return;
471 rpc_unlink(idmap->idmap_dentry); 484 nfs_idmap_unregister(clp, idmap->idmap_pipe);
485 rpc_destroy_pipe_data(idmap->idmap_pipe);
472 clp->cl_idmap = NULL; 486 clp->cl_idmap = NULL;
473 kfree(idmap); 487 kfree(idmap);
474} 488}
475 489
476/* 490static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
477 * Helper routines for manipulating the hashtable 491 struct super_block *sb)
478 */
479static inline struct idmap_hashent *
480idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
481{
482 return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
483}
484
485static struct idmap_hashent *
486idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
487{ 492{
488 struct idmap_hashent *he = idmap_name_hash(h, name, len); 493 int err = 0;
489 494
490 if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) 495 switch (event) {
491 return NULL; 496 case RPC_PIPEFS_MOUNT:
492 if (time_after(jiffies, he->ih_expires)) 497 BUG_ON(clp->cl_rpcclient->cl_dentry == NULL);
493 return NULL; 498 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
494 return he; 499 clp->cl_idmap,
500 clp->cl_idmap->idmap_pipe);
501 break;
502 case RPC_PIPEFS_UMOUNT:
503 if (clp->cl_idmap->idmap_pipe) {
504 struct dentry *parent;
505
506 parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
507 __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
508 /*
509 * Note: This is a dirty hack. SUNRPC hook has been
510 * called already but simple_rmdir() call for the
511 * directory returned with error because of idmap pipe
512 * inside. Thus now we have to remove this directory
513 * here.
514 */
515 if (rpc_rmdir(parent))
516 printk(KERN_ERR "NFS: %s: failed to remove "
517 "clnt dir!\n", __func__);
518 }
519 break;
520 default:
521 printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
522 event);
523 return -ENOTSUPP;
524 }
525 return err;
526}
527
528static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
529{
530 struct nfs_net *nn = net_generic(net, nfs_net_id);
531 struct dentry *cl_dentry;
532 struct nfs_client *clp;
533
534 spin_lock(&nn->nfs_client_lock);
535 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
536 if (clp->rpc_ops != &nfs_v4_clientops)
537 continue;
538 cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
539 if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
540 ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
541 continue;
542 atomic_inc(&clp->cl_count);
543 spin_unlock(&nn->nfs_client_lock);
544 return clp;
545 }
546 spin_unlock(&nn->nfs_client_lock);
547 return NULL;
495} 548}
496 549
497static inline struct idmap_hashent * 550static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
498idmap_id_hash(struct idmap_hashtable* h, __u32 id) 551 void *ptr)
499{ 552{
500 return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; 553 struct super_block *sb = ptr;
501} 554 struct nfs_client *clp;
555 int error = 0;
502 556
503static struct idmap_hashent * 557 while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
504idmap_lookup_id(struct idmap_hashtable *h, __u32 id) 558 error = __rpc_pipefs_event(clp, event, sb);
505{ 559 nfs_put_client(clp);
506 struct idmap_hashent *he = idmap_id_hash(h, id); 560 if (error)
507 if (he->ih_id != id || he->ih_namelen == 0) 561 break;
508 return NULL; 562 }
509 if (time_after(jiffies, he->ih_expires)) 563 return error;
510 return NULL;
511 return he;
512} 564}
513 565
514/* 566#define PIPEFS_NFS_PRIO 1
515 * Routines for allocating new entries in the hashtable. 567
516 * For now, we just have 1 entry per bucket, so it's all 568static struct notifier_block nfs_idmap_block = {
517 * pretty trivial. 569 .notifier_call = rpc_pipefs_event,
518 */ 570 .priority = SUNRPC_PIPEFS_NFS_PRIO,
519static inline struct idmap_hashent * 571};
520idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
521{
522 return idmap_name_hash(h, name, len);
523}
524 572
525static inline struct idmap_hashent * 573int nfs_idmap_init(void)
526idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
527{ 574{
528 return idmap_id_hash(h, id); 575 int ret;
576 ret = nfs_idmap_init_keyring();
577 if (ret != 0)
578 goto out;
579 ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
580 if (ret != 0)
581 nfs_idmap_quit_keyring();
582out:
583 return ret;
529} 584}
530 585
531static void 586void nfs_idmap_quit(void)
532idmap_update_entry(struct idmap_hashent *he, const char *name,
533 size_t namelen, __u32 id)
534{ 587{
535 he->ih_id = id; 588 rpc_pipefs_notifier_unregister(&nfs_idmap_block);
536 memcpy(he->ih_name, name, namelen); 589 nfs_idmap_quit_keyring();
537 he->ih_name[namelen] = '\0';
538 he->ih_namelen = namelen;
539 he->ih_expires = jiffies + nfs_idmap_cache_timeout;
540} 590}
541 591
542/* 592static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
543 * Name -> ID 593 struct rpc_pipe_msg *msg)
544 */
545static int
546nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
547 const char *name, size_t namelen, __u32 *id)
548{ 594{
549 struct rpc_pipe_msg msg; 595 substring_t substr;
550 struct idmap_msg *im; 596 int token, ret;
551 struct idmap_hashent *he;
552 DECLARE_WAITQUEUE(wq, current);
553 int ret = -EIO;
554
555 im = &idmap->idmap_im;
556
557 /*
558 * String sanity checks
559 * Note that the userland daemon expects NUL terminated strings
560 */
561 for (;;) {
562 if (namelen == 0)
563 return -EINVAL;
564 if (name[namelen-1] != '\0')
565 break;
566 namelen--;
567 }
568 if (namelen >= IDMAP_NAMESZ)
569 return -EINVAL;
570 597
571 mutex_lock(&idmap->idmap_lock); 598 memset(im, 0, sizeof(*im));
572 mutex_lock(&idmap->idmap_im_lock); 599 memset(msg, 0, sizeof(*msg));
573
574 he = idmap_lookup_name(h, name, namelen);
575 if (he != NULL) {
576 *id = he->ih_id;
577 ret = 0;
578 goto out;
579 }
580 600
581 memset(im, 0, sizeof(*im)); 601 im->im_type = IDMAP_TYPE_GROUP;
582 memcpy(im->im_name, name, namelen); 602 token = match_token(desc, nfs_idmap_tokens, &substr);
583 603
584 im->im_type = h->h_type; 604 switch (token) {
585 im->im_conv = IDMAP_CONV_NAMETOID; 605 case Opt_find_uid:
606 im->im_type = IDMAP_TYPE_USER;
607 case Opt_find_gid:
608 im->im_conv = IDMAP_CONV_NAMETOID;
609 ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
610 break;
586 611
587 memset(&msg, 0, sizeof(msg)); 612 case Opt_find_user:
588 msg.data = im; 613 im->im_type = IDMAP_TYPE_USER;
589 msg.len = sizeof(*im); 614 case Opt_find_group:
615 im->im_conv = IDMAP_CONV_IDTONAME;
616 ret = match_int(&substr, &im->im_id);
617 break;
590 618
591 add_wait_queue(&idmap->idmap_wq, &wq); 619 default:
592 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { 620 ret = -EINVAL;
593 remove_wait_queue(&idmap->idmap_wq, &wq);
594 goto out; 621 goto out;
595 } 622 }
596 623
597 set_current_state(TASK_UNINTERRUPTIBLE); 624 msg->data = im;
598 mutex_unlock(&idmap->idmap_im_lock); 625 msg->len = sizeof(struct idmap_msg);
599 schedule();
600 __set_current_state(TASK_RUNNING);
601 remove_wait_queue(&idmap->idmap_wq, &wq);
602 mutex_lock(&idmap->idmap_im_lock);
603 626
604 if (im->im_status & IDMAP_STATUS_SUCCESS) { 627out:
605 *id = im->im_id;
606 ret = 0;
607 }
608
609 out:
610 memset(im, 0, sizeof(*im));
611 mutex_unlock(&idmap->idmap_im_lock);
612 mutex_unlock(&idmap->idmap_lock);
613 return ret; 628 return ret;
614} 629}
615 630
616/* 631static int nfs_idmap_legacy_upcall(struct key_construction *cons,
617 * ID -> Name 632 const char *op,
618 */ 633 void *aux)
619static int
620nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
621 __u32 id, char *name)
622{ 634{
623 struct rpc_pipe_msg msg; 635 struct rpc_pipe_msg *msg;
624 struct idmap_msg *im; 636 struct idmap_msg *im;
625 struct idmap_hashent *he; 637 struct idmap *idmap = (struct idmap *)aux;
626 DECLARE_WAITQUEUE(wq, current); 638 struct key *key = cons->key;
627 int ret = -EIO; 639 int ret;
628 unsigned int len;
629
630 im = &idmap->idmap_im;
631 640
632 mutex_lock(&idmap->idmap_lock); 641 /* msg and im are freed in idmap_pipe_destroy_msg */
633 mutex_lock(&idmap->idmap_im_lock); 642 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
643 if (IS_ERR(msg)) {
644 ret = PTR_ERR(msg);
645 goto out0;
646 }
634 647
635 he = idmap_lookup_id(h, id); 648 im = kmalloc(sizeof(*im), GFP_KERNEL);
636 if (he) { 649 if (IS_ERR(im)) {
637 memcpy(name, he->ih_name, he->ih_namelen); 650 ret = PTR_ERR(im);
638 ret = he->ih_namelen; 651 goto out1;
639 goto out;
640 } 652 }
641 653
642 memset(im, 0, sizeof(*im)); 654 ret = nfs_idmap_prepare_message(key->description, im, msg);
643 im->im_type = h->h_type; 655 if (ret < 0)
644 im->im_conv = IDMAP_CONV_IDTONAME; 656 goto out2;
645 im->im_id = id;
646 657
647 memset(&msg, 0, sizeof(msg)); 658 idmap->idmap_key_cons = cons;
648 msg.data = im;
649 msg.len = sizeof(*im);
650 659
651 add_wait_queue(&idmap->idmap_wq, &wq); 660 ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
661 if (ret < 0)
662 goto out2;
652 663
653 if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { 664 return ret;
654 remove_wait_queue(&idmap->idmap_wq, &wq); 665
655 goto out; 666out2:
656 } 667 kfree(im);
668out1:
669 kfree(msg);
670out0:
671 key_revoke(cons->key);
672 key_revoke(cons->authkey);
673 return ret;
674}
675
676static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
677{
678 return key_instantiate_and_link(key, data, strlen(data) + 1,
679 id_resolver_cache->thread_keyring,
680 authkey);
681}
657 682
658 set_current_state(TASK_UNINTERRUPTIBLE); 683static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey)
659 mutex_unlock(&idmap->idmap_im_lock); 684{
660 schedule(); 685 char id_str[NFS_UINT_MAXLEN];
661 __set_current_state(TASK_RUNNING); 686 int ret = -EINVAL;
662 remove_wait_queue(&idmap->idmap_wq, &wq); 687
663 mutex_lock(&idmap->idmap_im_lock); 688 switch (im->im_conv) {
664 689 case IDMAP_CONV_NAMETOID:
665 if (im->im_status & IDMAP_STATUS_SUCCESS) { 690 sprintf(id_str, "%d", im->im_id);
666 if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) 691 ret = nfs_idmap_instantiate(key, authkey, id_str);
667 goto out; 692 break;
668 memcpy(name, im->im_name, len); 693 case IDMAP_CONV_IDTONAME:
669 ret = len; 694 ret = nfs_idmap_instantiate(key, authkey, im->im_name);
695 break;
670 } 696 }
671 697
672 out:
673 memset(im, 0, sizeof(*im));
674 mutex_unlock(&idmap->idmap_im_lock);
675 mutex_unlock(&idmap->idmap_lock);
676 return ret; 698 return ret;
677} 699}
678 700
@@ -681,115 +703,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
681{ 703{
682 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 704 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
683 struct idmap *idmap = (struct idmap *)rpci->private; 705 struct idmap *idmap = (struct idmap *)rpci->private;
684 struct idmap_msg im_in, *im = &idmap->idmap_im; 706 struct key_construction *cons = idmap->idmap_key_cons;
685 struct idmap_hashtable *h; 707 struct idmap_msg im;
686 struct idmap_hashent *he = NULL;
687 size_t namelen_in; 708 size_t namelen_in;
688 int ret; 709 int ret;
689 710
690 if (mlen != sizeof(im_in)) 711 if (mlen != sizeof(im)) {
691 return -ENOSPC; 712 ret = -ENOSPC;
692
693 if (copy_from_user(&im_in, src, mlen) != 0)
694 return -EFAULT;
695
696 mutex_lock(&idmap->idmap_im_lock);
697
698 ret = mlen;
699 im->im_status = im_in.im_status;
700 /* If we got an error, terminate now, and wake up pending upcalls */
701 if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
702 wake_up(&idmap->idmap_wq);
703 goto out; 713 goto out;
704 } 714 }
705 715
706 /* Sanity checking of strings */ 716 if (copy_from_user(&im, src, mlen) != 0) {
707 ret = -EINVAL; 717 ret = -EFAULT;
708 namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
709 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
710 goto out; 718 goto out;
719 }
711 720
712 switch (im_in.im_type) { 721 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
713 case IDMAP_TYPE_USER: 722 ret = mlen;
714 h = &idmap->idmap_user_hash; 723 complete_request_key(idmap->idmap_key_cons, -ENOKEY);
715 break; 724 goto out_incomplete;
716 case IDMAP_TYPE_GROUP:
717 h = &idmap->idmap_group_hash;
718 break;
719 default:
720 goto out;
721 } 725 }
722 726
723 switch (im_in.im_conv) { 727 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
724 case IDMAP_CONV_IDTONAME: 728 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
725 /* Did we match the current upcall? */ 729 ret = -EINVAL;
726 if (im->im_conv == IDMAP_CONV_IDTONAME
727 && im->im_type == im_in.im_type
728 && im->im_id == im_in.im_id) {
729 /* Yes: copy string, including the terminating '\0' */
730 memcpy(im->im_name, im_in.im_name, namelen_in);
731 im->im_name[namelen_in] = '\0';
732 wake_up(&idmap->idmap_wq);
733 }
734 he = idmap_alloc_id(h, im_in.im_id);
735 break;
736 case IDMAP_CONV_NAMETOID:
737 /* Did we match the current upcall? */
738 if (im->im_conv == IDMAP_CONV_NAMETOID
739 && im->im_type == im_in.im_type
740 && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
741 && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
742 im->im_id = im_in.im_id;
743 wake_up(&idmap->idmap_wq);
744 }
745 he = idmap_alloc_name(h, im_in.im_name, namelen_in);
746 break;
747 default:
748 goto out; 730 goto out;
749 } 731 }
750 732
751 /* If the entry is valid, also copy it to the cache */ 733 ret = nfs_idmap_read_message(&im, cons->key, cons->authkey);
752 if (he != NULL) 734 if (ret >= 0) {
753 idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); 735 key_set_timeout(cons->key, nfs_idmap_cache_timeout);
754 ret = mlen; 736 ret = mlen;
737 }
738
755out: 739out:
756 mutex_unlock(&idmap->idmap_im_lock); 740 complete_request_key(idmap->idmap_key_cons, ret);
741out_incomplete:
757 return ret; 742 return ret;
758} 743}
759 744
760static void 745static void
761idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) 746idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
762{ 747{
763 struct idmap_msg *im = msg->data; 748 /* Free memory allocated in nfs_idmap_legacy_upcall() */
764 struct idmap *idmap = container_of(im, struct idmap, idmap_im); 749 kfree(msg->data);
765 750 kfree(msg);
766 if (msg->errno >= 0)
767 return;
768 mutex_lock(&idmap->idmap_im_lock);
769 im->im_status = IDMAP_STATUS_LOOKUPFAIL;
770 wake_up(&idmap->idmap_wq);
771 mutex_unlock(&idmap->idmap_im_lock);
772}
773
774/*
775 * Fowler/Noll/Vo hash
776 * http://www.isthe.com/chongo/tech/comp/fnv/
777 */
778
779#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
780#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
781
782static unsigned int fnvhash32(const void *buf, size_t buflen)
783{
784 const unsigned char *p, *end = (const unsigned char *)buf + buflen;
785 unsigned int hash = FNV_1_32;
786
787 for (p = buf; p < end; p++) {
788 hash *= FNV_P_32;
789 hash ^= (unsigned int)*p;
790 }
791
792 return hash;
793} 751}
794 752
795int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 753int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
@@ -798,16 +756,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
798 756
799 if (nfs_map_string_to_numeric(name, namelen, uid)) 757 if (nfs_map_string_to_numeric(name, namelen, uid))
800 return 0; 758 return 0;
801 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 759 return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
802} 760}
803 761
804int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 762int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
805{ 763{
806 struct idmap *idmap = server->nfs_client->cl_idmap; 764 struct idmap *idmap = server->nfs_client->cl_idmap;
807 765
808 if (nfs_map_string_to_numeric(name, namelen, uid)) 766 if (nfs_map_string_to_numeric(name, namelen, gid))
809 return 0; 767 return 0;
810 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 768 return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
811} 769}
812 770
813int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 771int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
@@ -816,21 +774,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s
816 int ret = -EINVAL; 774 int ret = -EINVAL;
817 775
818 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 776 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
819 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 777 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
820 if (ret < 0) 778 if (ret < 0)
821 ret = nfs_map_numeric_to_string(uid, buf, buflen); 779 ret = nfs_map_numeric_to_string(uid, buf, buflen);
822 return ret; 780 return ret;
823} 781}
824int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 782int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
825{ 783{
826 struct idmap *idmap = server->nfs_client->cl_idmap; 784 struct idmap *idmap = server->nfs_client->cl_idmap;
827 int ret = -EINVAL; 785 int ret = -EINVAL;
828 786
829 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 787 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
830 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 788 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
831 if (ret < 0) 789 if (ret < 0)
832 ret = nfs_map_numeric_to_string(uid, buf, buflen); 790 ret = nfs_map_numeric_to_string(gid, buf, buflen);
833 return ret; 791 return ret;
834} 792}
835
836#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f649fba8c384..7bb4d13c1cd5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -39,6 +39,7 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h> 40#include <linux/compat.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/crc32.h>
42 43
43#include <asm/system.h> 44#include <asm/system.h>
44#include <asm/uaccess.h> 45#include <asm/uaccess.h>
@@ -51,6 +52,7 @@
51#include "fscache.h" 52#include "fscache.h"
52#include "dns_resolve.h" 53#include "dns_resolve.h"
53#include "pnfs.h" 54#include "pnfs.h"
55#include "netns.h"
54 56
55#define NFSDBG_FACILITY NFSDBG_VFS 57#define NFSDBG_FACILITY NFSDBG_VFS
56 58
@@ -388,9 +390,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
388 unlock_new_inode(inode); 390 unlock_new_inode(inode);
389 } else 391 } else
390 nfs_refresh_inode(inode, fattr); 392 nfs_refresh_inode(inode, fattr);
391 dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", 393 dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
392 inode->i_sb->s_id, 394 inode->i_sb->s_id,
393 (long long)NFS_FILEID(inode), 395 (long long)NFS_FILEID(inode),
396 nfs_display_fhandle_hash(fh),
394 atomic_read(&inode->i_count)); 397 atomic_read(&inode->i_count));
395 398
396out: 399out:
@@ -401,7 +404,7 @@ out_no_inode:
401 goto out; 404 goto out;
402} 405}
403 406
404#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) 407#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
405 408
406int 409int
407nfs_setattr(struct dentry *dentry, struct iattr *attr) 410nfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -423,7 +426,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
423 426
424 /* Optimization: if the end result is no change, don't RPC */ 427 /* Optimization: if the end result is no change, don't RPC */
425 attr->ia_valid &= NFS_VALID_ATTRS; 428 attr->ia_valid &= NFS_VALID_ATTRS;
426 if ((attr->ia_valid & ~ATTR_FILE) == 0) 429 if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
427 return 0; 430 return 0;
428 431
429 /* Write all dirty data */ 432 /* Write all dirty data */
@@ -1044,6 +1047,67 @@ struct nfs_fh *nfs_alloc_fhandle(void)
1044 return fh; 1047 return fh;
1045} 1048}
1046 1049
1050#ifdef NFS_DEBUG
1051/*
1052 * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle
1053 * in the same way that wireshark does
1054 *
1055 * @fh: file handle
1056 *
1057 * For debugging only.
1058 */
1059u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
1060{
1061 /* wireshark uses 32-bit AUTODIN crc and does a bitwise
1062 * not on the result */
1063 return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
1064}
1065
1066/*
1067 * _nfs_display_fhandle - display an NFS file handle on the console
1068 *
1069 * @fh: file handle to display
1070 * @caption: display caption
1071 *
1072 * For debugging only.
1073 */
1074void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
1075{
1076 unsigned short i;
1077
1078 if (fh == NULL || fh->size == 0) {
1079 printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh);
1080 return;
1081 }
1082
1083 printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n",
1084 caption, fh, fh->size, _nfs_display_fhandle_hash(fh));
1085 for (i = 0; i < fh->size; i += 16) {
1086 __be32 *pos = (__be32 *)&fh->data[i];
1087
1088 switch ((fh->size - i - 1) >> 2) {
1089 case 0:
1090 printk(KERN_DEFAULT " %08x\n",
1091 be32_to_cpup(pos));
1092 break;
1093 case 1:
1094 printk(KERN_DEFAULT " %08x %08x\n",
1095 be32_to_cpup(pos), be32_to_cpup(pos + 1));
1096 break;
1097 case 2:
1098 printk(KERN_DEFAULT " %08x %08x %08x\n",
1099 be32_to_cpup(pos), be32_to_cpup(pos + 1),
1100 be32_to_cpup(pos + 2));
1101 break;
1102 default:
1103 printk(KERN_DEFAULT " %08x %08x %08x %08x\n",
1104 be32_to_cpup(pos), be32_to_cpup(pos + 1),
1105 be32_to_cpup(pos + 2), be32_to_cpup(pos + 3));
1106 }
1107 }
1108}
1109#endif
1110
1047/** 1111/**
1048 * nfs_inode_attrs_need_update - check if the inode attributes need updating 1112 * nfs_inode_attrs_need_update - check if the inode attributes need updating
1049 * @inode - pointer to inode 1113 * @inode - pointer to inode
@@ -1211,8 +1275,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1211 unsigned long now = jiffies; 1275 unsigned long now = jiffies;
1212 unsigned long save_cache_validity; 1276 unsigned long save_cache_validity;
1213 1277
1214 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 1278 dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
1215 __func__, inode->i_sb->s_id, inode->i_ino, 1279 __func__, inode->i_sb->s_id, inode->i_ino,
1280 nfs_display_fhandle_hash(NFS_FH(inode)),
1216 atomic_read(&inode->i_count), fattr->valid); 1281 atomic_read(&inode->i_count), fattr->valid);
1217 1282
1218 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1283 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
@@ -1406,7 +1471,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1406 /* 1471 /*
1407 * Big trouble! The inode has become a different object. 1472 * Big trouble! The inode has become a different object.
1408 */ 1473 */
1409 printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", 1474 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1410 __func__, inode->i_ino, inode->i_mode, fattr->mode); 1475 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1411 out_err: 1476 out_err:
1412 /* 1477 /*
@@ -1495,7 +1560,7 @@ static void init_once(void *foo)
1495 INIT_LIST_HEAD(&nfsi->open_files); 1560 INIT_LIST_HEAD(&nfsi->open_files);
1496 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1561 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1497 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1562 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1498 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1563 INIT_LIST_HEAD(&nfsi->commit_list);
1499 nfsi->npages = 0; 1564 nfsi->npages = 0;
1500 nfsi->ncommit = 0; 1565 nfsi->ncommit = 0;
1501 atomic_set(&nfsi->silly_count, 1); 1566 atomic_set(&nfsi->silly_count, 1);
@@ -1552,6 +1617,28 @@ static void nfsiod_stop(void)
1552 destroy_workqueue(wq); 1617 destroy_workqueue(wq);
1553} 1618}
1554 1619
1620int nfs_net_id;
1621EXPORT_SYMBOL_GPL(nfs_net_id);
1622
1623static int nfs_net_init(struct net *net)
1624{
1625 nfs_clients_init(net);
1626 return nfs_dns_resolver_cache_init(net);
1627}
1628
1629static void nfs_net_exit(struct net *net)
1630{
1631 nfs_dns_resolver_cache_destroy(net);
1632 nfs_cleanup_cb_ident_idr(net);
1633}
1634
1635static struct pernet_operations nfs_net_ops = {
1636 .init = nfs_net_init,
1637 .exit = nfs_net_exit,
1638 .id = &nfs_net_id,
1639 .size = sizeof(struct nfs_net),
1640};
1641
1555/* 1642/*
1556 * Initialize NFS 1643 * Initialize NFS
1557 */ 1644 */
@@ -1561,10 +1648,14 @@ static int __init init_nfs_fs(void)
1561 1648
1562 err = nfs_idmap_init(); 1649 err = nfs_idmap_init();
1563 if (err < 0) 1650 if (err < 0)
1564 goto out9; 1651 goto out10;
1565 1652
1566 err = nfs_dns_resolver_init(); 1653 err = nfs_dns_resolver_init();
1567 if (err < 0) 1654 if (err < 0)
1655 goto out9;
1656
1657 err = register_pernet_subsys(&nfs_net_ops);
1658 if (err < 0)
1568 goto out8; 1659 goto out8;
1569 1660
1570 err = nfs_fscache_register(); 1661 err = nfs_fscache_register();
@@ -1600,14 +1691,14 @@ static int __init init_nfs_fs(void)
1600 goto out0; 1691 goto out0;
1601 1692
1602#ifdef CONFIG_PROC_FS 1693#ifdef CONFIG_PROC_FS
1603 rpc_proc_register(&nfs_rpcstat); 1694 rpc_proc_register(&init_net, &nfs_rpcstat);
1604#endif 1695#endif
1605 if ((err = register_nfs_fs()) != 0) 1696 if ((err = register_nfs_fs()) != 0)
1606 goto out; 1697 goto out;
1607 return 0; 1698 return 0;
1608out: 1699out:
1609#ifdef CONFIG_PROC_FS 1700#ifdef CONFIG_PROC_FS
1610 rpc_proc_unregister("nfs"); 1701 rpc_proc_unregister(&init_net, "nfs");
1611#endif 1702#endif
1612 nfs_destroy_directcache(); 1703 nfs_destroy_directcache();
1613out0: 1704out0:
@@ -1625,10 +1716,12 @@ out5:
1625out6: 1716out6:
1626 nfs_fscache_unregister(); 1717 nfs_fscache_unregister();
1627out7: 1718out7:
1628 nfs_dns_resolver_destroy(); 1719 unregister_pernet_subsys(&nfs_net_ops);
1629out8: 1720out8:
1630 nfs_idmap_quit(); 1721 nfs_dns_resolver_destroy();
1631out9: 1722out9:
1723 nfs_idmap_quit();
1724out10:
1632 return err; 1725 return err;
1633} 1726}
1634 1727
@@ -1640,12 +1733,12 @@ static void __exit exit_nfs_fs(void)
1640 nfs_destroy_inodecache(); 1733 nfs_destroy_inodecache();
1641 nfs_destroy_nfspagecache(); 1734 nfs_destroy_nfspagecache();
1642 nfs_fscache_unregister(); 1735 nfs_fscache_unregister();
1736 unregister_pernet_subsys(&nfs_net_ops);
1643 nfs_dns_resolver_destroy(); 1737 nfs_dns_resolver_destroy();
1644 nfs_idmap_quit(); 1738 nfs_idmap_quit();
1645#ifdef CONFIG_PROC_FS 1739#ifdef CONFIG_PROC_FS
1646 rpc_proc_unregister("nfs"); 1740 rpc_proc_unregister(&init_net, "nfs");
1647#endif 1741#endif
1648 nfs_cleanup_cb_ident_idr();
1649 unregister_nfs_fs(); 1742 unregister_nfs_fs();
1650 nfs_fs_proc_exit(); 1743 nfs_fs_proc_exit();
1651 nfsiod_stop(); 1744 nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8102db9b926c..2476dc69365f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -123,6 +123,7 @@ struct nfs_parsed_mount_data {
123 } nfs_server; 123 } nfs_server;
124 124
125 struct security_mnt_opts lsm_opts; 125 struct security_mnt_opts lsm_opts;
126 struct net *net;
126}; 127};
127 128
128/* mount_clnt.c */ 129/* mount_clnt.c */
@@ -137,20 +138,22 @@ struct nfs_mount_request {
137 int noresvport; 138 int noresvport;
138 unsigned int *auth_flav_len; 139 unsigned int *auth_flav_len;
139 rpc_authflavor_t *auth_flavs; 140 rpc_authflavor_t *auth_flavs;
141 struct net *net;
140}; 142};
141 143
142extern int nfs_mount(struct nfs_mount_request *info); 144extern int nfs_mount(struct nfs_mount_request *info);
143extern void nfs_umount(const struct nfs_mount_request *info); 145extern void nfs_umount(const struct nfs_mount_request *info);
144 146
145/* client.c */ 147/* client.c */
146extern struct rpc_program nfs_program; 148extern const struct rpc_program nfs_program;
149extern void nfs_clients_init(struct net *net);
147 150
148extern void nfs_cleanup_cb_ident_idr(void); 151extern void nfs_cleanup_cb_ident_idr(struct net *);
149extern void nfs_put_client(struct nfs_client *); 152extern void nfs_put_client(struct nfs_client *);
150extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); 153extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
151extern struct nfs_client *nfs4_find_client_ident(int);
152extern struct nfs_client * 154extern struct nfs_client *
153nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); 155nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
156 struct nfs4_sessionid *);
154extern struct nfs_server *nfs_create_server( 157extern struct nfs_server *nfs_create_server(
155 const struct nfs_parsed_mount_data *, 158 const struct nfs_parsed_mount_data *,
156 struct nfs_fh *); 159 struct nfs_fh *);
@@ -329,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list,
329void nfs_commit_clear_lock(struct nfs_inode *nfsi); 332void nfs_commit_clear_lock(struct nfs_inode *nfsi);
330void nfs_commitdata_release(void *data); 333void nfs_commitdata_release(void *data);
331void nfs_commit_release_pages(struct nfs_write_data *data); 334void nfs_commit_release_pages(struct nfs_write_data *data);
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
336void nfs_request_remove_commit_list(struct nfs_page *req);
332 337
333#ifdef CONFIG_MIGRATION 338#ifdef CONFIG_MIGRATION
334extern int nfs_migrate_page(struct address_space *, 339extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d4c2d6b7507e..8e65c7f1f87c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -16,7 +16,7 @@
16#include <linux/nfs_fs.h> 16#include <linux/nfs_fs.h>
17#include "internal.h" 17#include "internal.h"
18 18
19#ifdef RPC_DEBUG 19#ifdef NFS_DEBUG
20# define NFSDBG_FACILITY NFSDBG_MOUNT 20# define NFSDBG_FACILITY NFSDBG_MOUNT
21#endif 21#endif
22 22
@@ -67,7 +67,7 @@ enum {
67 MOUNTPROC3_EXPORT = 5, 67 MOUNTPROC3_EXPORT = 5,
68}; 68};
69 69
70static struct rpc_program mnt_program; 70static const struct rpc_program mnt_program;
71 71
72/* 72/*
73 * Defined by OpenGroup XNFS Version 3W, chapter 8 73 * Defined by OpenGroup XNFS Version 3W, chapter 8
@@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net, 156 .net = info->net,
157 .protocol = info->protocol, 157 .protocol = info->protocol,
158 .address = info->sap, 158 .address = info->sap,
159 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
225 .to_retries = 2, 225 .to_retries = 2,
226 }; 226 };
227 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net, 228 .net = info->net,
229 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
230 .address = info->sap, 230 .address = info->sap,
231 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = {
488}; 488};
489 489
490 490
491static struct rpc_version mnt_version1 = { 491static const struct rpc_version mnt_version1 = {
492 .number = 1, 492 .number = 1,
493 .nrprocs = ARRAY_SIZE(mnt_procedures), 493 .nrprocs = ARRAY_SIZE(mnt_procedures),
494 .procs = mnt_procedures, 494 .procs = mnt_procedures,
495}; 495};
496 496
497static struct rpc_version mnt_version3 = { 497static const struct rpc_version mnt_version3 = {
498 .number = 3, 498 .number = 3,
499 .nrprocs = ARRAY_SIZE(mnt3_procedures), 499 .nrprocs = ARRAY_SIZE(mnt3_procedures),
500 .procs = mnt3_procedures, 500 .procs = mnt3_procedures,
501}; 501};
502 502
503static struct rpc_version *mnt_version[] = { 503static const struct rpc_version *mnt_version[] = {
504 NULL, 504 NULL,
505 &mnt_version1, 505 &mnt_version1,
506 NULL, 506 NULL,
@@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = {
509 509
510static struct rpc_stat mnt_stats; 510static struct rpc_stat mnt_stats;
511 511
512static struct rpc_program mnt_program = { 512static const struct rpc_program mnt_program = {
513 .name = "mount", 513 .name = "mount",
514 .number = NFS_MNT_PROGRAM, 514 .number = NFS_MNT_PROGRAM,
515 .nrvers = ARRAY_SIZE(mnt_version), 515 .nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 8102391bb374..1807866bb3ab 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -276,7 +276,10 @@ out:
276 nfs_free_fattr(fattr); 276 nfs_free_fattr(fattr);
277 nfs_free_fhandle(fh); 277 nfs_free_fhandle(fh);
278out_nofree: 278out_nofree:
279 dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); 279 if (IS_ERR(mnt))
280 dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
281 else
282 dprintk("<-- %s() = %p\n", __func__, mnt);
280 return mnt; 283 return mnt;
281} 284}
282 285
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
new file mode 100644
index 000000000000..aa14ec303e94
--- /dev/null
+++ b/fs/nfs/netns.h
@@ -0,0 +1,27 @@
1#ifndef __NFS_NETNS_H__
2#define __NFS_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct bl_dev_msg {
8 int32_t status;
9 uint32_t major, minor;
10};
11
12struct nfs_net {
13 struct cache_detail *nfs_dns_resolve;
14 struct rpc_pipe *bl_device_pipe;
15 struct bl_dev_msg bl_mount_reply;
16 wait_queue_head_t bl_wq;
17 struct list_head nfs_client_list;
18 struct list_head nfs_volume_list;
19#ifdef CONFIG_NFS_V4
20 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
21#endif
22 spinlock_t nfs_client_lock;
23};
24
25extern int nfs_net_id;
26
27#endif
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 792cb13a4304..1f56000fabbd 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = {
1150 PROC(STATFS, fhandle, statfsres, 0), 1150 PROC(STATFS, fhandle, statfsres, 0),
1151}; 1151};
1152 1152
1153struct rpc_version nfs_version2 = { 1153const struct rpc_version nfs_version2 = {
1154 .number = 2, 1154 .number = 2,
1155 .nrprocs = ARRAY_SIZE(nfs_procedures), 1155 .nrprocs = ARRAY_SIZE(nfs_procedures),
1156 .procs = nfs_procedures 1156 .procs = nfs_procedures
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7ef23979896d..e4498dc351a8 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
192 .pages = pages, 192 .pages = pages,
193 }; 193 };
194 struct nfs3_getaclres res = { 194 struct nfs3_getaclres res = {
195 0 195 NULL,
196 }; 196 };
197 struct rpc_message msg = { 197 struct rpc_message msg = {
198 .rpc_argp = &args, 198 .rpc_argp = &args,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 91943953a370..5242eae6711a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
428 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; 428 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
429} 429}
430 430
431static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
432{
433 rpc_call_start(task);
434}
435
431static int 436static int
432nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) 437nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
433{ 438{
@@ -445,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
445 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; 450 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
446} 451}
447 452
453static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
454{
455 rpc_call_start(task);
456}
457
448static int 458static int
449nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 459nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
450 struct inode *new_dir) 460 struct inode *new_dir)
@@ -814,6 +824,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message
814 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 824 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
815} 825}
816 826
827static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
828{
829 rpc_call_start(task);
830}
831
817static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 832static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
818{ 833{
819 if (nfs3_async_handle_jukebox(task, data->inode)) 834 if (nfs3_async_handle_jukebox(task, data->inode))
@@ -828,6 +843,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
828 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 843 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
829} 844}
830 845
846static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
847{
848 rpc_call_start(task);
849}
850
831static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 851static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
832{ 852{
833 if (nfs3_async_handle_jukebox(task, data->inode)) 853 if (nfs3_async_handle_jukebox(task, data->inode))
@@ -864,9 +884,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
864 .create = nfs3_proc_create, 884 .create = nfs3_proc_create,
865 .remove = nfs3_proc_remove, 885 .remove = nfs3_proc_remove,
866 .unlink_setup = nfs3_proc_unlink_setup, 886 .unlink_setup = nfs3_proc_unlink_setup,
887 .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
867 .unlink_done = nfs3_proc_unlink_done, 888 .unlink_done = nfs3_proc_unlink_done,
868 .rename = nfs3_proc_rename, 889 .rename = nfs3_proc_rename,
869 .rename_setup = nfs3_proc_rename_setup, 890 .rename_setup = nfs3_proc_rename_setup,
891 .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
870 .rename_done = nfs3_proc_rename_done, 892 .rename_done = nfs3_proc_rename_done,
871 .link = nfs3_proc_link, 893 .link = nfs3_proc_link,
872 .symlink = nfs3_proc_symlink, 894 .symlink = nfs3_proc_symlink,
@@ -879,8 +901,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
879 .pathconf = nfs3_proc_pathconf, 901 .pathconf = nfs3_proc_pathconf,
880 .decode_dirent = nfs3_decode_dirent, 902 .decode_dirent = nfs3_decode_dirent,
881 .read_setup = nfs3_proc_read_setup, 903 .read_setup = nfs3_proc_read_setup,
904 .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
882 .read_done = nfs3_read_done, 905 .read_done = nfs3_read_done,
883 .write_setup = nfs3_proc_write_setup, 906 .write_setup = nfs3_proc_write_setup,
907 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
884 .write_done = nfs3_write_done, 908 .write_done = nfs3_write_done,
885 .commit_setup = nfs3_proc_commit_setup, 909 .commit_setup = nfs3_proc_commit_setup,
886 .commit_done = nfs3_commit_done, 910 .commit_done = nfs3_commit_done,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 183c6b123d0f..a77cc9a3ce55 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = {
2461 PROC(COMMIT, commit, commit, 5), 2461 PROC(COMMIT, commit, commit, 5),
2462}; 2462};
2463 2463
2464struct rpc_version nfs_version3 = { 2464const struct rpc_version nfs_version3 = {
2465 .number = 3, 2465 .number = 3,
2466 .nrprocs = ARRAY_SIZE(nfs3_procedures), 2466 .nrprocs = ARRAY_SIZE(nfs3_procedures),
2467 .procs = nfs3_procedures 2467 .procs = nfs3_procedures
@@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
2489 }, 2489 },
2490}; 2490};
2491 2491
2492struct rpc_version nfsacl_version3 = { 2492const struct rpc_version nfsacl_version3 = {
2493 .number = 3, 2493 .number = 3,
2494 .nrprocs = sizeof(nfs3_acl_procedures)/ 2494 .nrprocs = sizeof(nfs3_acl_procedures)/
2495 sizeof(nfs3_acl_procedures[0]), 2495 sizeof(nfs3_acl_procedures[0]),
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4d7d0aedc101..97ecc863dd76 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -20,7 +20,6 @@ enum nfs4_client_state {
20 NFS4CLNT_RECLAIM_REBOOT, 20 NFS4CLNT_RECLAIM_REBOOT,
21 NFS4CLNT_RECLAIM_NOGRACE, 21 NFS4CLNT_RECLAIM_NOGRACE,
22 NFS4CLNT_DELEGRETURN, 22 NFS4CLNT_DELEGRETURN,
23 NFS4CLNT_LAYOUTRECALL,
24 NFS4CLNT_SESSION_RESET, 23 NFS4CLNT_SESSION_RESET,
25 NFS4CLNT_RECALL_SLOT, 24 NFS4CLNT_RECALL_SLOT,
26 NFS4CLNT_LEASE_CONFIRM, 25 NFS4CLNT_LEASE_CONFIRM,
@@ -44,7 +43,7 @@ struct nfs4_minor_version_ops {
44 struct nfs4_sequence_args *args, 43 struct nfs4_sequence_args *args,
45 struct nfs4_sequence_res *res, 44 struct nfs4_sequence_res *res,
46 int cache_reply); 45 int cache_reply);
47 int (*validate_stateid)(struct nfs_delegation *, 46 bool (*match_stateid)(const nfs4_stateid *,
48 const nfs4_stateid *); 47 const nfs4_stateid *);
49 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 48 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
50 struct nfs_fsinfo *); 49 struct nfs_fsinfo *);
@@ -53,26 +52,25 @@ struct nfs4_minor_version_ops {
53 const struct nfs4_state_maintenance_ops *state_renewal_ops; 52 const struct nfs4_state_maintenance_ops *state_renewal_ops;
54}; 53};
55 54
56/* 55struct nfs_unique_id {
57 * struct rpc_sequence ensures that RPC calls are sent in the exact 56 struct rb_node rb_node;
58 * order that they appear on the list. 57 __u64 id;
59 */
60struct rpc_sequence {
61 struct rpc_wait_queue wait; /* RPC call delay queue */
62 spinlock_t lock; /* Protects the list */
63 struct list_head list; /* Defines sequence of RPC calls */
64}; 58};
65 59
66#define NFS_SEQID_CONFIRMED 1 60#define NFS_SEQID_CONFIRMED 1
67struct nfs_seqid_counter { 61struct nfs_seqid_counter {
68 struct rpc_sequence *sequence; 62 int owner_id;
69 int flags; 63 int flags;
70 u32 counter; 64 u32 counter;
65 spinlock_t lock; /* Protects the list */
66 struct list_head list; /* Defines sequence of RPC calls */
67 struct rpc_wait_queue wait; /* RPC call delay queue */
71}; 68};
72 69
73struct nfs_seqid { 70struct nfs_seqid {
74 struct nfs_seqid_counter *sequence; 71 struct nfs_seqid_counter *sequence;
75 struct list_head list; 72 struct list_head list;
73 struct rpc_task *task;
76}; 74};
77 75
78static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) 76static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
@@ -81,18 +79,12 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
81 seqid->flags |= NFS_SEQID_CONFIRMED; 79 seqid->flags |= NFS_SEQID_CONFIRMED;
82} 80}
83 81
84struct nfs_unique_id {
85 struct rb_node rb_node;
86 __u64 id;
87};
88
89/* 82/*
90 * NFS4 state_owners and lock_owners are simply labels for ordered 83 * NFS4 state_owners and lock_owners are simply labels for ordered
91 * sequences of RPC calls. Their sole purpose is to provide once-only 84 * sequences of RPC calls. Their sole purpose is to provide once-only
92 * semantics by allowing the server to identify replayed requests. 85 * semantics by allowing the server to identify replayed requests.
93 */ 86 */
94struct nfs4_state_owner { 87struct nfs4_state_owner {
95 struct nfs_unique_id so_owner_id;
96 struct nfs_server *so_server; 88 struct nfs_server *so_server;
97 struct list_head so_lru; 89 struct list_head so_lru;
98 unsigned long so_expires; 90 unsigned long so_expires;
@@ -105,7 +97,6 @@ struct nfs4_state_owner {
105 unsigned long so_flags; 97 unsigned long so_flags;
106 struct list_head so_states; 98 struct list_head so_states;
107 struct nfs_seqid_counter so_seqid; 99 struct nfs_seqid_counter so_seqid;
108 struct rpc_sequence so_sequence;
109}; 100};
110 101
111enum { 102enum {
@@ -146,8 +137,6 @@ struct nfs4_lock_state {
146#define NFS_LOCK_INITIALIZED 1 137#define NFS_LOCK_INITIALIZED 1
147 int ls_flags; 138 int ls_flags;
148 struct nfs_seqid_counter ls_seqid; 139 struct nfs_seqid_counter ls_seqid;
149 struct rpc_sequence ls_sequence;
150 struct nfs_unique_id ls_id;
151 nfs4_stateid ls_stateid; 140 nfs4_stateid ls_stateid;
152 atomic_t ls_count; 141 atomic_t ls_count;
153 struct nfs4_lock_owner ls_owner; 142 struct nfs4_lock_owner ls_owner;
@@ -193,6 +182,7 @@ struct nfs4_exception {
193 long timeout; 182 long timeout;
194 int retry; 183 int retry;
195 struct nfs4_state *state; 184 struct nfs4_state *state;
185 struct inode *inode;
196}; 186};
197 187
198struct nfs4_state_recovery_ops { 188struct nfs4_state_recovery_ops {
@@ -224,7 +214,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo
224extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 214extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
225extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 215extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
226 struct nfs4_fs_locations *fs_locations, struct page *page); 216 struct nfs4_fs_locations *fs_locations, struct page *page);
227extern void nfs4_release_lockowner(const struct nfs4_lock_state *); 217extern int nfs4_release_lockowner(struct nfs4_lock_state *);
228extern const struct xattr_handler *nfs4_xattr_handlers[]; 218extern const struct xattr_handler *nfs4_xattr_handlers[];
229 219
230#if defined(CONFIG_NFS_V4_1) 220#if defined(CONFIG_NFS_V4_1)
@@ -233,12 +223,13 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
233 return server->nfs_client->cl_session; 223 return server->nfs_client->cl_session;
234} 224}
235 225
226extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
236extern int nfs4_setup_sequence(const struct nfs_server *server, 227extern int nfs4_setup_sequence(const struct nfs_server *server,
237 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 228 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
238 int cache_reply, struct rpc_task *task); 229 struct rpc_task *task);
239extern int nfs41_setup_sequence(struct nfs4_session *session, 230extern int nfs41_setup_sequence(struct nfs4_session *session,
240 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 231 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
241 int cache_reply, struct rpc_task *task); 232 struct rpc_task *task);
242extern void nfs4_destroy_session(struct nfs4_session *session); 233extern void nfs4_destroy_session(struct nfs4_session *session);
243extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 234extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
244extern int nfs4_proc_create_session(struct nfs_client *); 235extern int nfs4_proc_create_session(struct nfs_client *);
@@ -269,7 +260,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
269 260
270static inline int nfs4_setup_sequence(const struct nfs_server *server, 261static inline int nfs4_setup_sequence(const struct nfs_server *server,
271 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 262 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
272 int cache_reply, struct rpc_task *task) 263 struct rpc_task *task)
273{ 264{
274 return 0; 265 return 0;
275} 266}
@@ -319,7 +310,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
319} 310}
320#endif /* CONFIG_NFS_V4_1 */ 311#endif /* CONFIG_NFS_V4_1 */
321 312
322extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 313extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
323extern void nfs4_put_state_owner(struct nfs4_state_owner *); 314extern void nfs4_put_state_owner(struct nfs4_state_owner *);
324extern void nfs4_purge_state_owners(struct nfs_server *); 315extern void nfs4_purge_state_owners(struct nfs_server *);
325extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 316extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
@@ -327,6 +318,8 @@ extern void nfs4_put_open_state(struct nfs4_state *);
327extern void nfs4_close_state(struct nfs4_state *, fmode_t); 318extern void nfs4_close_state(struct nfs4_state *, fmode_t);
328extern void nfs4_close_sync(struct nfs4_state *, fmode_t); 319extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
329extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 320extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
321extern void nfs_inode_find_state_and_recover(struct inode *inode,
322 const nfs4_stateid *stateid);
330extern void nfs4_schedule_lease_recovery(struct nfs_client *); 323extern void nfs4_schedule_lease_recovery(struct nfs_client *);
331extern void nfs4_schedule_state_manager(struct nfs_client *); 324extern void nfs4_schedule_state_manager(struct nfs_client *);
332extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); 325extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
@@ -337,7 +330,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *,
337 struct server_scope **); 330 struct server_scope **);
338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 331extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 332extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
340extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); 333extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
334 fmode_t, fl_owner_t, pid_t);
341 335
342extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 336extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
343extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 337extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -346,6 +340,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
346extern void nfs_release_seqid(struct nfs_seqid *seqid); 340extern void nfs_release_seqid(struct nfs_seqid *seqid);
347extern void nfs_free_seqid(struct nfs_seqid *seqid); 341extern void nfs_free_seqid(struct nfs_seqid *seqid);
348 342
343extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
344
349extern const nfs4_stateid zero_stateid; 345extern const nfs4_stateid zero_stateid;
350 346
351/* nfs4xdr.c */ 347/* nfs4xdr.c */
@@ -357,6 +353,16 @@ struct nfs4_mount_data;
357extern struct svc_version nfs4_callback_version1; 353extern struct svc_version nfs4_callback_version1;
358extern struct svc_version nfs4_callback_version4; 354extern struct svc_version nfs4_callback_version4;
359 355
356static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
357{
358 memcpy(dst, src, sizeof(*dst));
359}
360
361static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
362{
363 return memcmp(dst, src, sizeof(*dst)) == 0;
364}
365
360#else 366#else
361 367
362#define nfs4_close_state(a, b) do { } while (0) 368#define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 71ec08617e23..634c0bcb4fd6 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,7 +33,10 @@
33#include <linux/nfs_page.h> 33#include <linux/nfs_page.h>
34#include <linux/module.h> 34#include <linux/module.h>
35 35
36#include <linux/sunrpc/metrics.h>
37
36#include "internal.h" 38#include "internal.h"
39#include "delegation.h"
37#include "nfs4filelayout.h" 40#include "nfs4filelayout.h"
38 41
39#define NFSDBG_FACILITY NFSDBG_PNFS_LD 42#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task,
84 struct nfs_client *clp, 87 struct nfs_client *clp,
85 int *reset) 88 int *reset)
86{ 89{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode);
91 struct nfs_client *mds_client = mds_server->nfs_client;
92
87 if (task->tk_status >= 0) 93 if (task->tk_status >= 0)
88 return 0; 94 return 0;
89
90 *reset = 0; 95 *reset = 0;
91 96
92 switch (task->tk_status) { 97 switch (task->tk_status) {
98 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID:
102 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE:
104 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery;
110 /* DS session errors */
93 case -NFS4ERR_BADSESSION: 111 case -NFS4ERR_BADSESSION:
94 case -NFS4ERR_BADSLOT: 112 case -NFS4ERR_BADSLOT:
95 case -NFS4ERR_BAD_HIGH_SLOT: 113 case -NFS4ERR_BAD_HIGH_SLOT:
@@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
115 *reset = 1; 133 *reset = 1;
116 break; 134 break;
117 } 135 }
136out:
118 task->tk_status = 0; 137 task->tk_status = 0;
119 return -EAGAIN; 138 return -EAGAIN;
139wait_on_recovery:
140 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
141 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
142 rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
143 goto out;
120} 144}
121 145
122/* NFS_PROTO call done callback routines */ 146/* NFS_PROTO call done callback routines */
@@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
173 197
174 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
175 &rdata->args.seq_args, &rdata->res.seq_res, 199 &rdata->args.seq_args, &rdata->res.seq_res,
176 0, task)) 200 task))
177 return; 201 return;
178 202
179 rpc_call_start(task); 203 rpc_call_start(task);
@@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
189 rdata->mds_ops->rpc_call_done(task, data); 213 rdata->mds_ops->rpc_call_done(task, data);
190} 214}
191 215
216static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
219
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
221}
222
192static void filelayout_read_release(void *data) 223static void filelayout_read_release(void *data)
193{ 224{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 225 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
195 226
227 put_lseg(rdata->lseg);
196 rdata->mds_ops->rpc_release(data); 228 rdata->mds_ops->rpc_release(data);
197} 229}
198 230
@@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
254 286
255 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
256 &wdata->args.seq_args, &wdata->res.seq_res, 288 &wdata->args.seq_args, &wdata->res.seq_res,
257 0, task)) 289 task))
258 return; 290 return;
259 291
260 rpc_call_start(task); 292 rpc_call_start(task);
@@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
268 wdata->mds_ops->rpc_call_done(task, data); 300 wdata->mds_ops->rpc_call_done(task, data);
269} 301}
270 302
303static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
306
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
308}
309
271static void filelayout_write_release(void *data) 310static void filelayout_write_release(void *data)
272{ 311{
273 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 312 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
274 313
314 put_lseg(wdata->lseg);
275 wdata->mds_ops->rpc_release(data); 315 wdata->mds_ops->rpc_release(data);
276} 316}
277 317
@@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data)
282 nfs_commit_release_pages(wdata); 322 nfs_commit_release_pages(wdata);
283 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
284 nfs_commit_clear_lock(NFS_I(wdata->inode)); 324 nfs_commit_clear_lock(NFS_I(wdata->inode));
325 put_lseg(wdata->lseg);
285 nfs_commitdata_release(wdata); 326 nfs_commitdata_release(wdata);
286} 327}
287 328
288struct rpc_call_ops filelayout_read_call_ops = { 329static const struct rpc_call_ops filelayout_read_call_ops = {
289 .rpc_call_prepare = filelayout_read_prepare, 330 .rpc_call_prepare = filelayout_read_prepare,
290 .rpc_call_done = filelayout_read_call_done, 331 .rpc_call_done = filelayout_read_call_done,
332 .rpc_count_stats = filelayout_read_count_stats,
291 .rpc_release = filelayout_read_release, 333 .rpc_release = filelayout_read_release,
292}; 334};
293 335
294struct rpc_call_ops filelayout_write_call_ops = { 336static const struct rpc_call_ops filelayout_write_call_ops = {
295 .rpc_call_prepare = filelayout_write_prepare, 337 .rpc_call_prepare = filelayout_write_prepare,
296 .rpc_call_done = filelayout_write_call_done, 338 .rpc_call_done = filelayout_write_call_done,
339 .rpc_count_stats = filelayout_write_count_stats,
297 .rpc_release = filelayout_write_release, 340 .rpc_release = filelayout_write_release,
298}; 341};
299 342
300struct rpc_call_ops filelayout_commit_call_ops = { 343static const struct rpc_call_ops filelayout_commit_call_ops = {
301 .rpc_call_prepare = filelayout_write_prepare, 344 .rpc_call_prepare = filelayout_write_prepare,
302 .rpc_call_done = filelayout_write_call_done, 345 .rpc_call_done = filelayout_write_call_done,
346 .rpc_count_stats = filelayout_write_count_stats,
303 .rpc_release = filelayout_commit_release, 347 .rpc_release = filelayout_commit_release,
304}; 348};
305 349
@@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
367 idx = nfs4_fl_calc_ds_index(lseg, j); 411 idx = nfs4_fl_calc_ds_index(lseg, j);
368 ds = nfs4_fl_prepare_ds(lseg, idx); 412 ds = nfs4_fl_prepare_ds(lseg, idx);
369 if (!ds) { 413 if (!ds) {
370 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); 414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
371 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); 416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
372 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
373 return PNFS_NOT_ATTEMPTED; 418 return PNFS_NOT_ATTEMPTED;
@@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
575 goto out_err_free; 620 goto out_err_free;
576 fl->fh_array[i]->size = be32_to_cpup(p++); 621 fl->fh_array[i]->size = be32_to_cpup(p++);
577 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 622 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
578 printk(KERN_ERR "Too big fh %d received %d\n", 623 printk(KERN_ERR "NFS: Too big fh %d received %d\n",
579 i, fl->fh_array[i]->size); 624 i, fl->fh_array[i]->size);
580 goto out_err_free; 625 goto out_err_free;
581 } 626 }
@@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
640 int size = (fl->stripe_type == STRIPE_SPARSE) ? 685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
641 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
642 687
643 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); 688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
644 if (!fl->commit_buckets) { 689 if (!fl->commit_buckets) {
645 filelayout_free_lseg(&fl->generic_hdr); 690 filelayout_free_lseg(&fl->generic_hdr);
646 return NULL; 691 return NULL;
647 } 692 }
648 fl->number_of_buckets = size; 693 fl->number_of_buckets = size;
649 for (i = 0; i < size; i++) 694 for (i = 0; i < size; i++) {
650 INIT_LIST_HEAD(&fl->commit_buckets[i]); 695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
651 } 698 }
652 return &fl->generic_hdr; 699 return &fl->generic_hdr;
653} 700}
@@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
679 return (p_stripe == r_stripe); 726 return (p_stripe == r_stripe);
680} 727}
681 728
682void 729static void
683filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 730filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
684 struct nfs_page *req) 731 struct nfs_page *req)
685{ 732{
@@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
696 nfs_pageio_reset_read_mds(pgio); 743 nfs_pageio_reset_read_mds(pgio);
697} 744}
698 745
699void 746static void
700filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
701 struct nfs_page *req) 748 struct nfs_page *req)
702{ 749{
@@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
725 .pg_doio = pnfs_generic_pg_writepages, 772 .pg_doio = pnfs_generic_pg_writepages,
726}; 773};
727 774
728static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
729{
730 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
731}
732
733static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) 775static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
734{ 776{
735 if (fl->stripe_type == STRIPE_SPARSE) 777 if (fl->stripe_type == STRIPE_SPARSE)
@@ -738,13 +780,49 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
738 return j; 780 return j;
739} 781}
740 782
741struct list_head *filelayout_choose_commit_list(struct nfs_page *req) 783/* The generic layer is about to remove the req from the commit list.
784 * If this will make the bucket empty, it will need to put the lseg reference.
785 */
786static void
787filelayout_clear_request_commit(struct nfs_page *req)
788{
789 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791
792 spin_lock(&inode->i_lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out;
795 if (list_is_singular(&req->wb_list)) {
796 struct inode *inode = req->wb_context->dentry->d_inode;
797 struct pnfs_layout_segment *lseg;
798
799 /* From here we can find the bucket, but for the moment,
800 * since there is only one relevant lseg...
801 */
802 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
803 if (lseg->pls_range.iomode == IOMODE_RW) {
804 freeme = lseg;
805 break;
806 }
807 }
808 }
809out:
810 nfs_request_remove_commit_list(req);
811 spin_unlock(&inode->i_lock);
812 put_lseg(freeme);
813}
814
815static struct list_head *
816filelayout_choose_commit_list(struct nfs_page *req,
817 struct pnfs_layout_segment *lseg)
742{ 818{
743 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
744 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 819 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
745 u32 i, j; 820 u32 i, j;
746 struct list_head *list; 821 struct list_head *list;
747 822
823 if (fl->commit_through_mds)
824 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
825
748 /* Note that we are calling nfs4_fl_calc_j_index on each page 826 /* Note that we are calling nfs4_fl_calc_j_index on each page
749 * that ends up being committed to a data server. An attractive 827 * that ends up being committed to a data server. An attractive
750 * alternative is to add a field to nfs_write_data and nfs_page 828 * alternative is to add a field to nfs_write_data and nfs_page
@@ -754,14 +832,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
754 j = nfs4_fl_calc_j_index(lseg, 832 j = nfs4_fl_calc_j_index(lseg,
755 (loff_t)req->wb_index << PAGE_CACHE_SHIFT); 833 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
756 i = select_bucket_index(fl, j); 834 i = select_bucket_index(fl, j);
757 list = &fl->commit_buckets[i]; 835 list = &fl->commit_buckets[i].written;
758 if (list_empty(list)) { 836 if (list_empty(list)) {
759 /* Non-empty buckets hold a reference on the lseg */ 837 /* Non-empty buckets hold a reference on the lseg. That ref
838 * is normally transferred to the COMMIT call and released
839 * there. It could also be released if the last req is pulled
840 * off due to a rewrite, in which case it will be done in
841 * filelayout_remove_commit_req
842 */
760 get_lseg(lseg); 843 get_lseg(lseg);
761 } 844 }
845 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
762 return list; 846 return list;
763} 847}
764 848
849static void
850filelayout_mark_request_commit(struct nfs_page *req,
851 struct pnfs_layout_segment *lseg)
852{
853 struct list_head *list;
854
855 list = filelayout_choose_commit_list(req, lseg);
856 nfs_request_add_commit_list(req, list);
857}
858
765static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 859static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
766{ 860{
767 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 861 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -797,11 +891,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
797 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 891 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
798 ds = nfs4_fl_prepare_ds(lseg, idx); 892 ds = nfs4_fl_prepare_ds(lseg, idx);
799 if (!ds) { 893 if (!ds) {
800 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); 894 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
895 __func__);
801 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); 896 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
802 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 897 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
803 prepare_to_resend_writes(data); 898 prepare_to_resend_writes(data);
804 data->mds_ops->rpc_release(data); 899 filelayout_commit_release(data);
805 return -EAGAIN; 900 return -EAGAIN;
806 } 901 }
807 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 902 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
@@ -817,24 +912,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
817/* 912/*
818 * This is only useful while we are using whole file layouts. 913 * This is only useful while we are using whole file layouts.
819 */ 914 */
820static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) 915static struct pnfs_layout_segment *
916find_only_write_lseg_locked(struct inode *inode)
821{ 917{
822 struct pnfs_layout_segment *lseg, *rv = NULL; 918 struct pnfs_layout_segment *lseg;
823 919
824 spin_lock(&inode->i_lock);
825 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) 920 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
826 if (lseg->pls_range.iomode == IOMODE_RW) 921 if (lseg->pls_range.iomode == IOMODE_RW)
827 rv = get_lseg(lseg); 922 return lseg;
923 return NULL;
924}
925
926static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
927{
928 struct pnfs_layout_segment *rv;
929
930 spin_lock(&inode->i_lock);
931 rv = find_only_write_lseg_locked(inode);
932 if (rv)
933 get_lseg(rv);
828 spin_unlock(&inode->i_lock); 934 spin_unlock(&inode->i_lock);
829 return rv; 935 return rv;
830} 936}
831 937
832static int alloc_ds_commits(struct inode *inode, struct list_head *list) 938static int
939filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
940 spinlock_t *lock)
941{
942 struct list_head *src = &bucket->written;
943 struct list_head *dst = &bucket->committing;
944 struct nfs_page *req, *tmp;
945 int ret = 0;
946
947 list_for_each_entry_safe(req, tmp, src, wb_list) {
948 if (!nfs_lock_request(req))
949 continue;
950 if (cond_resched_lock(lock))
951 list_safe_reset_next(req, tmp, wb_list);
952 nfs_request_remove_commit_list(req);
953 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
954 nfs_list_add_request(req, dst);
955 ret++;
956 if (ret == max)
957 break;
958 }
959 return ret;
960}
961
962/* Move reqs from written to committing lists, returning count of number moved.
963 * Note called with i_lock held.
964 */
965static int filelayout_scan_commit_lists(struct inode *inode, int max,
966 spinlock_t *lock)
967{
968 struct pnfs_layout_segment *lseg;
969 struct nfs4_filelayout_segment *fl;
970 int i, rv = 0, cnt;
971
972 lseg = find_only_write_lseg_locked(inode);
973 if (!lseg)
974 goto out_done;
975 fl = FILELAYOUT_LSEG(lseg);
976 if (fl->commit_through_mds)
977 goto out_done;
978 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
979 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
980 max, lock);
981 max -= cnt;
982 rv += cnt;
983 }
984out_done:
985 return rv;
986}
987
988static unsigned int
989alloc_ds_commits(struct inode *inode, struct list_head *list)
833{ 990{
834 struct pnfs_layout_segment *lseg; 991 struct pnfs_layout_segment *lseg;
835 struct nfs4_filelayout_segment *fl; 992 struct nfs4_filelayout_segment *fl;
836 struct nfs_write_data *data; 993 struct nfs_write_data *data;
837 int i, j; 994 int i, j;
995 unsigned int nreq = 0;
838 996
839 /* Won't need this when non-whole file layout segments are supported 997 /* Won't need this when non-whole file layout segments are supported
840 * instead we will use a pnfs_layout_hdr structure */ 998 * instead we will use a pnfs_layout_hdr structure */
@@ -843,28 +1001,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
843 return 0; 1001 return 0;
844 fl = FILELAYOUT_LSEG(lseg); 1002 fl = FILELAYOUT_LSEG(lseg);
845 for (i = 0; i < fl->number_of_buckets; i++) { 1003 for (i = 0; i < fl->number_of_buckets; i++) {
846 if (list_empty(&fl->commit_buckets[i])) 1004 if (list_empty(&fl->commit_buckets[i].committing))
847 continue; 1005 continue;
848 data = nfs_commitdata_alloc(); 1006 data = nfs_commitdata_alloc();
849 if (!data) 1007 if (!data)
850 goto out_bad; 1008 break;
851 data->ds_commit_index = i; 1009 data->ds_commit_index = i;
852 data->lseg = lseg; 1010 data->lseg = lseg;
853 list_add(&data->pages, list); 1011 list_add(&data->pages, list);
1012 nreq++;
854 } 1013 }
855 put_lseg(lseg);
856 return 0;
857 1014
858out_bad: 1015 /* Clean up on error */
859 for (j = i; j < fl->number_of_buckets; j++) { 1016 for (j = i; j < fl->number_of_buckets; j++) {
860 if (list_empty(&fl->commit_buckets[i])) 1017 if (list_empty(&fl->commit_buckets[i].committing))
861 continue; 1018 continue;
862 nfs_retry_commit(&fl->commit_buckets[i], lseg); 1019 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
863 put_lseg(lseg); /* associated with emptying bucket */ 1020 put_lseg(lseg); /* associated with emptying bucket */
864 } 1021 }
865 put_lseg(lseg); 1022 put_lseg(lseg);
866 /* Caller will clean up entries put on list */ 1023 /* Caller will clean up entries put on list */
867 return -ENOMEM; 1024 return nreq;
868} 1025}
869 1026
870/* This follows nfs_commit_list pretty closely */ 1027/* This follows nfs_commit_list pretty closely */
@@ -874,40 +1031,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
874{ 1031{
875 struct nfs_write_data *data, *tmp; 1032 struct nfs_write_data *data, *tmp;
876 LIST_HEAD(list); 1033 LIST_HEAD(list);
1034 unsigned int nreq = 0;
877 1035
878 if (!list_empty(mds_pages)) { 1036 if (!list_empty(mds_pages)) {
879 data = nfs_commitdata_alloc(); 1037 data = nfs_commitdata_alloc();
880 if (!data) 1038 if (data != NULL) {
881 goto out_bad; 1039 data->lseg = NULL;
882 data->lseg = NULL; 1040 list_add(&data->pages, &list);
883 list_add(&data->pages, &list); 1041 nreq++;
1042 } else
1043 nfs_retry_commit(mds_pages, NULL);
884 } 1044 }
885 1045
886 if (alloc_ds_commits(inode, &list)) 1046 nreq += alloc_ds_commits(inode, &list);
887 goto out_bad; 1047
1048 if (nreq == 0) {
1049 nfs_commit_clear_lock(NFS_I(inode));
1050 goto out;
1051 }
1052
1053 atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
888 1054
889 list_for_each_entry_safe(data, tmp, &list, pages) { 1055 list_for_each_entry_safe(data, tmp, &list, pages) {
890 list_del_init(&data->pages); 1056 list_del_init(&data->pages);
891 atomic_inc(&NFS_I(inode)->commits_outstanding);
892 if (!data->lseg) { 1057 if (!data->lseg) {
893 nfs_init_commit(data, mds_pages, NULL); 1058 nfs_init_commit(data, mds_pages, NULL);
894 nfs_initiate_commit(data, NFS_CLIENT(inode), 1059 nfs_initiate_commit(data, NFS_CLIENT(inode),
895 data->mds_ops, how); 1060 data->mds_ops, how);
896 } else { 1061 } else {
897 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); 1062 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
898 filelayout_initiate_commit(data, how); 1063 filelayout_initiate_commit(data, how);
899 } 1064 }
900 } 1065 }
901 return 0; 1066out:
902 out_bad: 1067 return PNFS_ATTEMPTED;
903 list_for_each_entry_safe(data, tmp, &list, pages) {
904 nfs_retry_commit(&data->pages, data->lseg);
905 list_del_init(&data->pages);
906 nfs_commit_free(data);
907 }
908 nfs_retry_commit(mds_pages, NULL);
909 nfs_commit_clear_lock(NFS_I(inode));
910 return -ENOMEM;
911} 1068}
912 1069
913static void 1070static void
@@ -924,8 +1081,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
924 .free_lseg = filelayout_free_lseg, 1081 .free_lseg = filelayout_free_lseg,
925 .pg_read_ops = &filelayout_pg_read_ops, 1082 .pg_read_ops = &filelayout_pg_read_ops,
926 .pg_write_ops = &filelayout_pg_write_ops, 1083 .pg_write_ops = &filelayout_pg_write_ops,
927 .mark_pnfs_commit = filelayout_mark_pnfs_commit, 1084 .mark_request_commit = filelayout_mark_request_commit,
928 .choose_commit_list = filelayout_choose_commit_list, 1085 .clear_request_commit = filelayout_clear_request_commit,
1086 .scan_commit_lists = filelayout_scan_commit_lists,
929 .commit_pagelist = filelayout_commit_pagelist, 1087 .commit_pagelist = filelayout_commit_pagelist,
930 .read_pagelist = filelayout_read_pagelist, 1088 .read_pagelist = filelayout_read_pagelist,
931 .write_pagelist = filelayout_write_pagelist, 1089 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284253fa..21190bb1f5e3 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr {
74 struct nfs4_pnfs_ds *ds_list[1]; 74 struct nfs4_pnfs_ds *ds_list[1];
75}; 75};
76 76
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
77struct nfs4_filelayout_segment { 82struct nfs4_filelayout_segment {
78 struct pnfs_layout_segment generic_hdr; 83 struct pnfs_layout_segment generic_hdr;
79 u32 stripe_type; 84 u32 stripe_type;
@@ -84,7 +89,7 @@ struct nfs4_filelayout_segment {
84 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
85 unsigned int num_fh; 90 unsigned int num_fh;
86 struct nfs_fh **fh_array; 91 struct nfs_fh **fh_array;
87 struct list_head *commit_buckets; /* Sort commits to ds */ 92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
88 int number_of_buckets; 93 int number_of_buckets;
89}; 94};
90 95
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 8ae91908f5aa..a866bbd2890a 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -45,7 +45,7 @@
45 * - incremented when a device id maps a data server already in the cache. 45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache. 46 * - decremented when deviceid is removed from the cache.
47 */ 47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock); 48static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache); 49static LIST_HEAD(nfs4_data_server_cache);
50 50
51/* Debug routines */ 51/* Debug routines */
@@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
108 return false; 108 return false;
109} 109}
110 110
111/* 111static bool
112 * Lookup DS by addresses. The first matching address returns true. 112_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
113 * nfs4_ds_cache_lock is held 113 const struct list_head *dsaddrs2)
114 */
115static struct nfs4_pnfs_ds *
116_data_server_lookup_locked(struct list_head *dsaddrs)
117{ 114{
118 struct nfs4_pnfs_ds *ds;
119 struct nfs4_pnfs_ds_addr *da1, *da2; 115 struct nfs4_pnfs_ds_addr *da1, *da2;
120 116
121 list_for_each_entry(da1, dsaddrs, da_node) { 117 /* step through both lists, comparing as we go */
122 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 118 for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
123 list_for_each_entry(da2, &ds->ds_addrs, da_node) { 119 da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
124 if (same_sockaddr( 120 da1 != NULL && da2 != NULL;
125 (struct sockaddr *)&da1->da_addr, 121 da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
126 (struct sockaddr *)&da2->da_addr)) 122 da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
127 return ds; 123 if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
128 } 124 (struct sockaddr *)&da2->da_addr))
129 } 125 return false;
130 } 126 }
131 return NULL; 127 if (da1 == NULL && da2 == NULL)
128 return true;
129
130 return false;
132} 131}
133 132
134/* 133/*
135 * Compare two lists of addresses. 134 * Lookup DS by addresses. nfs4_ds_cache_lock is held
136 */ 135 */
137static bool 136static struct nfs4_pnfs_ds *
138_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, 137_data_server_lookup_locked(const struct list_head *dsaddrs)
139 struct list_head *dsaddrs2)
140{ 138{
141 struct nfs4_pnfs_ds_addr *da1, *da2; 139 struct nfs4_pnfs_ds *ds;
142 size_t count1 = 0,
143 count2 = 0;
144
145 list_for_each_entry(da1, dsaddrs1, da_node)
146 count1++;
147
148 list_for_each_entry(da2, dsaddrs2, da_node) {
149 bool found = false;
150 count2++;
151 list_for_each_entry(da1, dsaddrs1, da_node) {
152 if (same_sockaddr((struct sockaddr *)&da1->da_addr,
153 (struct sockaddr *)&da2->da_addr)) {
154 found = true;
155 break;
156 }
157 }
158 if (!found)
159 return false;
160 }
161 140
162 return (count1 == count2); 141 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
142 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
143 return ds;
144 return NULL;
163} 145}
164 146
165/* 147/*
@@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
356 dprintk("%s add new data server %s\n", __func__, 338 dprintk("%s add new data server %s\n", __func__,
357 ds->ds_remotestr); 339 ds->ds_remotestr);
358 } else { 340 } else {
359 if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
360 dsaddrs)) {
361 dprintk("%s: multipath address mismatch: %s != %s",
362 __func__, tmp_ds->ds_remotestr, remotestr);
363 }
364 kfree(remotestr); 341 kfree(remotestr);
365 kfree(ds); 342 kfree(ds);
366 atomic_inc(&tmp_ds->ds_count); 343 atomic_inc(&tmp_ds->ds_count);
@@ -378,7 +355,7 @@ out:
378 * Currently only supports ipv4, ipv6 and one multi-path address. 355 * Currently only supports ipv4, ipv6 and one multi-path address.
379 */ 356 */
380static struct nfs4_pnfs_ds_addr * 357static struct nfs4_pnfs_ds_addr *
381decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) 358decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
382{ 359{
383 struct nfs4_pnfs_ds_addr *da = NULL; 360 struct nfs4_pnfs_ds_addr *da = NULL;
384 char *buf, *portstr; 361 char *buf, *portstr;
@@ -457,7 +434,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
457 434
458 INIT_LIST_HEAD(&da->da_node); 435 INIT_LIST_HEAD(&da->da_node);
459 436
460 if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, 437 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
461 sizeof(da->da_addr))) { 438 sizeof(da->da_addr))) {
462 dprintk("%s: error parsing address %s\n", __func__, buf); 439 dprintk("%s: error parsing address %s\n", __func__, buf);
463 goto out_free_da; 440 goto out_free_da;
@@ -554,7 +531,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
554 cnt = be32_to_cpup(p); 531 cnt = be32_to_cpup(p);
555 dprintk("%s stripe count %d\n", __func__, cnt); 532 dprintk("%s stripe count %d\n", __func__, cnt);
556 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 533 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
557 printk(KERN_WARNING "%s: stripe count %d greater than " 534 printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
558 "supported maximum %d\n", __func__, 535 "supported maximum %d\n", __func__,
559 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 536 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
560 goto out_err_free_scratch; 537 goto out_err_free_scratch;
@@ -585,7 +562,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
585 num = be32_to_cpup(p); 562 num = be32_to_cpup(p);
586 dprintk("%s ds_num %u\n", __func__, num); 563 dprintk("%s ds_num %u\n", __func__, num);
587 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 564 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
588 printk(KERN_WARNING "%s: multipath count %d greater than " 565 printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
589 "supported maximum %d\n", __func__, 566 "supported maximum %d\n", __func__,
590 num, NFS4_PNFS_MAX_MULTI_CNT); 567 num, NFS4_PNFS_MAX_MULTI_CNT);
591 goto out_err_free_stripe_indices; 568 goto out_err_free_stripe_indices;
@@ -593,7 +570,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
593 570
594 /* validate stripe indices are all < num */ 571 /* validate stripe indices are all < num */
595 if (max_stripe_index >= num) { 572 if (max_stripe_index >= num) {
596 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", 573 printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
597 __func__, max_stripe_index, num); 574 __func__, max_stripe_index, num);
598 goto out_err_free_stripe_indices; 575 goto out_err_free_stripe_indices;
599 } 576 }
@@ -625,7 +602,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
625 602
626 mp_count = be32_to_cpup(p); /* multipath count */ 603 mp_count = be32_to_cpup(p); /* multipath count */
627 for (j = 0; j < mp_count; j++) { 604 for (j = 0; j < mp_count; j++) {
628 da = decode_ds_addr(&stream, gfp_flags); 605 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
606 &stream, gfp_flags);
629 if (da) 607 if (da)
630 list_add_tail(&da->da_node, &dsaddrs); 608 list_add_tail(&da->da_node, &dsaddrs);
631 } 609 }
@@ -686,7 +664,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
686 664
687 new = decode_device(inode, dev, gfp_flags); 665 new = decode_device(inode, dev, gfp_flags);
688 if (!new) { 666 if (!new) {
689 printk(KERN_WARNING "%s: Could not decode or add device\n", 667 printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
690 __func__); 668 __func__);
691 return NULL; 669 return NULL;
692 } 670 }
@@ -835,7 +813,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
835 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
836 814
837 if (ds == NULL) { 815 if (ds == NULL) {
838 printk(KERN_ERR "%s: No data server for offset index %d\n", 816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
839 __func__, ds_idx); 817 __func__, ds_idx);
840 return NULL; 818 return NULL;
841 } 819 }
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index bb80c49b6533..9c8eca315f43 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -94,13 +94,14 @@ static int nfs4_validate_fspath(struct dentry *dentry,
94} 94}
95 95
96static size_t nfs_parse_server_name(char *string, size_t len, 96static size_t nfs_parse_server_name(char *string, size_t len,
97 struct sockaddr *sa, size_t salen) 97 struct sockaddr *sa, size_t salen, struct nfs_server *server)
98{ 98{
99 struct net *net = rpc_net_ns(server->client);
99 ssize_t ret; 100 ssize_t ret;
100 101
101 ret = rpc_pton(string, len, sa, salen); 102 ret = rpc_pton(net, string, len, sa, salen);
102 if (ret == 0) { 103 if (ret == 0) {
103 ret = nfs_dns_resolve_name(string, len, sa, salen); 104 ret = nfs_dns_resolve_name(net, string, len, sa, salen);
104 if (ret < 0) 105 if (ret < 0)
105 ret = 0; 106 ret = 0;
106 } 107 }
@@ -137,7 +138,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
137 continue; 138 continue;
138 139
139 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, 140 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
140 mountdata->addr, addr_bufsize); 141 mountdata->addr, addr_bufsize,
142 NFS_SB(mountdata->sb));
141 if (mountdata->addrlen == 0) 143 if (mountdata->addrlen == 0)
142 continue; 144 continue;
143 145
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ec9f6ef6c5dd..e809d2305ebf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -72,18 +72,21 @@
72 72
73#define NFS4_MAX_LOOP_ON_RECOVER (10) 73#define NFS4_MAX_LOOP_ON_RECOVER (10)
74 74
75static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
76
75struct nfs4_opendata; 77struct nfs4_opendata;
76static int _nfs4_proc_open(struct nfs4_opendata *data); 78static int _nfs4_proc_open(struct nfs4_opendata *data);
77static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 79static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
78static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
79static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
80static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
81static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
82 struct nfs_fattr *fattr, struct iattr *sattr, 85 struct nfs_fattr *fattr, struct iattr *sattr,
83 struct nfs4_state *state); 86 struct nfs4_state *state);
84#ifdef CONFIG_NFS_V4_1 87#ifdef CONFIG_NFS_V4_1
85static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); 88static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *);
86static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); 89static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *);
87#endif 90#endif
88/* Prevent leaks of NFSv4 errors into userland */ 91/* Prevent leaks of NFSv4 errors into userland */
89static int nfs4_map_errors(int err) 92static int nfs4_map_errors(int err)
@@ -193,7 +196,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
193 * when talking to the server, we always send cookie 0 196 * when talking to the server, we always send cookie 0
194 * instead of 1 or 2. 197 * instead of 1 or 2.
195 */ 198 */
196 start = p = kmap_atomic(*readdir->pages, KM_USER0); 199 start = p = kmap_atomic(*readdir->pages);
197 200
198 if (cookie == 0) { 201 if (cookie == 0) {
199 *p++ = xdr_one; /* next */ 202 *p++ = xdr_one; /* next */
@@ -221,7 +224,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
221 224
222 readdir->pgbase = (char *)p - (char *)start; 225 readdir->pgbase = (char *)p - (char *)start;
223 readdir->count -= readdir->pgbase; 226 readdir->count -= readdir->pgbase;
224 kunmap_atomic(start, KM_USER0); 227 kunmap_atomic(start);
225} 228}
226 229
227static int nfs4_wait_clnt_recover(struct nfs_client *clp) 230static int nfs4_wait_clnt_recover(struct nfs_client *clp)
@@ -259,15 +262,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
259{ 262{
260 struct nfs_client *clp = server->nfs_client; 263 struct nfs_client *clp = server->nfs_client;
261 struct nfs4_state *state = exception->state; 264 struct nfs4_state *state = exception->state;
265 struct inode *inode = exception->inode;
262 int ret = errorcode; 266 int ret = errorcode;
263 267
264 exception->retry = 0; 268 exception->retry = 0;
265 switch(errorcode) { 269 switch(errorcode) {
266 case 0: 270 case 0:
267 return 0; 271 return 0;
272 case -NFS4ERR_OPENMODE:
273 if (nfs_have_delegation(inode, FMODE_READ)) {
274 nfs_inode_return_delegation(inode);
275 exception->retry = 1;
276 return 0;
277 }
278 if (state == NULL)
279 break;
280 nfs4_schedule_stateid_recovery(server, state);
281 goto wait_on_recovery;
282 case -NFS4ERR_DELEG_REVOKED:
268 case -NFS4ERR_ADMIN_REVOKED: 283 case -NFS4ERR_ADMIN_REVOKED:
269 case -NFS4ERR_BAD_STATEID: 284 case -NFS4ERR_BAD_STATEID:
270 case -NFS4ERR_OPENMODE: 285 if (state != NULL)
286 nfs_remove_bad_delegation(state->inode);
271 if (state == NULL) 287 if (state == NULL)
272 break; 288 break;
273 nfs4_schedule_stateid_recovery(server, state); 289 nfs4_schedule_stateid_recovery(server, state);
@@ -360,16 +376,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
360 * When updating highest_used_slotid there may be "holes" in the bitmap 376 * When updating highest_used_slotid there may be "holes" in the bitmap
361 * so we need to scan down from highest_used_slotid to 0 looking for the now 377 * so we need to scan down from highest_used_slotid to 0 looking for the now
362 * highest slotid in use. 378 * highest slotid in use.
363 * If none found, highest_used_slotid is set to -1. 379 * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
364 * 380 *
365 * Must be called while holding tbl->slot_tbl_lock 381 * Must be called while holding tbl->slot_tbl_lock
366 */ 382 */
367static void 383static void
368nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 384nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid)
369{ 385{
370 int slotid = free_slotid; 386 BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE);
371
372 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
373 /* clear used bit in bitmap */ 387 /* clear used bit in bitmap */
374 __clear_bit(slotid, tbl->used_slots); 388 __clear_bit(slotid, tbl->used_slots);
375 389
@@ -379,10 +393,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
379 if (slotid < tbl->max_slots) 393 if (slotid < tbl->max_slots)
380 tbl->highest_used_slotid = slotid; 394 tbl->highest_used_slotid = slotid;
381 else 395 else
382 tbl->highest_used_slotid = -1; 396 tbl->highest_used_slotid = NFS4_NO_SLOT;
383 } 397 }
384 dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, 398 dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
385 free_slotid, tbl->highest_used_slotid); 399 slotid, tbl->highest_used_slotid);
400}
401
402bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
403{
404 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
405 return true;
386} 406}
387 407
388/* 408/*
@@ -390,16 +410,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
390 */ 410 */
391static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) 411static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
392{ 412{
393 struct rpc_task *task;
394
395 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { 413 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
396 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); 414 rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
397 if (task) 415 nfs4_set_task_privileged, NULL);
398 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
399 return; 416 return;
400 } 417 }
401 418
402 if (ses->fc_slot_table.highest_used_slotid != -1) 419 if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
403 return; 420 return;
404 421
405 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); 422 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
@@ -412,7 +429,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
412void nfs4_check_drain_bc_complete(struct nfs4_session *ses) 429void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
413{ 430{
414 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || 431 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
415 ses->bc_slot_table.highest_used_slotid != -1) 432 ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
416 return; 433 return;
417 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); 434 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
418 complete(&ses->bc_slot_table.complete); 435 complete(&ses->bc_slot_table.complete);
@@ -507,25 +524,25 @@ static int nfs4_sequence_done(struct rpc_task *task,
507 * nfs4_find_slot looks for an unset bit in the used_slots bitmap. 524 * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
508 * If found, we mark the slot as used, update the highest_used_slotid, 525 * If found, we mark the slot as used, update the highest_used_slotid,
509 * and respectively set up the sequence operation args. 526 * and respectively set up the sequence operation args.
510 * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. 527 * The slot number is returned if found, or NFS4_NO_SLOT otherwise.
511 * 528 *
512 * Note: must be called with under the slot_tbl_lock. 529 * Note: must be called with under the slot_tbl_lock.
513 */ 530 */
514static u8 531static u32
515nfs4_find_slot(struct nfs4_slot_table *tbl) 532nfs4_find_slot(struct nfs4_slot_table *tbl)
516{ 533{
517 int slotid; 534 u32 slotid;
518 u8 ret_id = NFS4_MAX_SLOT_TABLE; 535 u32 ret_id = NFS4_NO_SLOT;
519 BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
520 536
521 dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", 537 dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
522 __func__, tbl->used_slots[0], tbl->highest_used_slotid, 538 __func__, tbl->used_slots[0], tbl->highest_used_slotid,
523 tbl->max_slots); 539 tbl->max_slots);
524 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); 540 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
525 if (slotid >= tbl->max_slots) 541 if (slotid >= tbl->max_slots)
526 goto out; 542 goto out;
527 __set_bit(slotid, tbl->used_slots); 543 __set_bit(slotid, tbl->used_slots);
528 if (slotid > tbl->highest_used_slotid) 544 if (slotid > tbl->highest_used_slotid ||
545 tbl->highest_used_slotid == NFS4_NO_SLOT)
529 tbl->highest_used_slotid = slotid; 546 tbl->highest_used_slotid = slotid;
530 ret_id = slotid; 547 ret_id = slotid;
531out: 548out:
@@ -534,15 +551,25 @@ out:
534 return ret_id; 551 return ret_id;
535} 552}
536 553
554static void nfs41_init_sequence(struct nfs4_sequence_args *args,
555 struct nfs4_sequence_res *res, int cache_reply)
556{
557 args->sa_session = NULL;
558 args->sa_cache_this = 0;
559 if (cache_reply)
560 args->sa_cache_this = 1;
561 res->sr_session = NULL;
562 res->sr_slot = NULL;
563}
564
537int nfs41_setup_sequence(struct nfs4_session *session, 565int nfs41_setup_sequence(struct nfs4_session *session,
538 struct nfs4_sequence_args *args, 566 struct nfs4_sequence_args *args,
539 struct nfs4_sequence_res *res, 567 struct nfs4_sequence_res *res,
540 int cache_reply,
541 struct rpc_task *task) 568 struct rpc_task *task)
542{ 569{
543 struct nfs4_slot *slot; 570 struct nfs4_slot *slot;
544 struct nfs4_slot_table *tbl; 571 struct nfs4_slot_table *tbl;
545 u8 slotid; 572 u32 slotid;
546 573
547 dprintk("--> %s\n", __func__); 574 dprintk("--> %s\n", __func__);
548 /* slot already allocated? */ 575 /* slot already allocated? */
@@ -570,7 +597,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
570 } 597 }
571 598
572 slotid = nfs4_find_slot(tbl); 599 slotid = nfs4_find_slot(tbl);
573 if (slotid == NFS4_MAX_SLOT_TABLE) { 600 if (slotid == NFS4_NO_SLOT) {
574 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 601 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
575 spin_unlock(&tbl->slot_tbl_lock); 602 spin_unlock(&tbl->slot_tbl_lock);
576 dprintk("<-- %s: no free slots\n", __func__); 603 dprintk("<-- %s: no free slots\n", __func__);
@@ -582,7 +609,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
582 slot = tbl->slots + slotid; 609 slot = tbl->slots + slotid;
583 args->sa_session = session; 610 args->sa_session = session;
584 args->sa_slotid = slotid; 611 args->sa_slotid = slotid;
585 args->sa_cache_this = cache_reply;
586 612
587 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 613 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
588 614
@@ -602,24 +628,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
602int nfs4_setup_sequence(const struct nfs_server *server, 628int nfs4_setup_sequence(const struct nfs_server *server,
603 struct nfs4_sequence_args *args, 629 struct nfs4_sequence_args *args,
604 struct nfs4_sequence_res *res, 630 struct nfs4_sequence_res *res,
605 int cache_reply,
606 struct rpc_task *task) 631 struct rpc_task *task)
607{ 632{
608 struct nfs4_session *session = nfs4_get_session(server); 633 struct nfs4_session *session = nfs4_get_session(server);
609 int ret = 0; 634 int ret = 0;
610 635
611 if (session == NULL) { 636 if (session == NULL)
612 args->sa_session = NULL;
613 res->sr_session = NULL;
614 goto out; 637 goto out;
615 }
616 638
617 dprintk("--> %s clp %p session %p sr_slot %td\n", 639 dprintk("--> %s clp %p session %p sr_slot %td\n",
618 __func__, session->clp, session, res->sr_slot ? 640 __func__, session->clp, session, res->sr_slot ?
619 res->sr_slot - session->fc_slot_table.slots : -1); 641 res->sr_slot - session->fc_slot_table.slots : -1);
620 642
621 ret = nfs41_setup_sequence(session, args, res, cache_reply, 643 ret = nfs41_setup_sequence(session, args, res, task);
622 task);
623out: 644out:
624 dprintk("<-- %s status=%d\n", __func__, ret); 645 dprintk("<-- %s status=%d\n", __func__, ret);
625 return ret; 646 return ret;
@@ -629,7 +650,6 @@ struct nfs41_call_sync_data {
629 const struct nfs_server *seq_server; 650 const struct nfs_server *seq_server;
630 struct nfs4_sequence_args *seq_args; 651 struct nfs4_sequence_args *seq_args;
631 struct nfs4_sequence_res *seq_res; 652 struct nfs4_sequence_res *seq_res;
632 int cache_reply;
633}; 653};
634 654
635static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) 655static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
@@ -639,7 +659,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
639 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); 659 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
640 660
641 if (nfs4_setup_sequence(data->seq_server, data->seq_args, 661 if (nfs4_setup_sequence(data->seq_server, data->seq_args,
642 data->seq_res, data->cache_reply, task)) 662 data->seq_res, task))
643 return; 663 return;
644 rpc_call_start(task); 664 rpc_call_start(task);
645} 665}
@@ -657,12 +677,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
657 nfs41_sequence_done(task, data->seq_res); 677 nfs41_sequence_done(task, data->seq_res);
658} 678}
659 679
660struct rpc_call_ops nfs41_call_sync_ops = { 680static const struct rpc_call_ops nfs41_call_sync_ops = {
661 .rpc_call_prepare = nfs41_call_sync_prepare, 681 .rpc_call_prepare = nfs41_call_sync_prepare,
662 .rpc_call_done = nfs41_call_sync_done, 682 .rpc_call_done = nfs41_call_sync_done,
663}; 683};
664 684
665struct rpc_call_ops nfs41_call_priv_sync_ops = { 685static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
666 .rpc_call_prepare = nfs41_call_priv_sync_prepare, 686 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
667 .rpc_call_done = nfs41_call_sync_done, 687 .rpc_call_done = nfs41_call_sync_done,
668}; 688};
@@ -672,7 +692,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
672 struct rpc_message *msg, 692 struct rpc_message *msg,
673 struct nfs4_sequence_args *args, 693 struct nfs4_sequence_args *args,
674 struct nfs4_sequence_res *res, 694 struct nfs4_sequence_res *res,
675 int cache_reply,
676 int privileged) 695 int privileged)
677{ 696{
678 int ret; 697 int ret;
@@ -681,7 +700,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
681 .seq_server = server, 700 .seq_server = server,
682 .seq_args = args, 701 .seq_args = args,
683 .seq_res = res, 702 .seq_res = res,
684 .cache_reply = cache_reply,
685 }; 703 };
686 struct rpc_task_setup task_setup = { 704 struct rpc_task_setup task_setup = {
687 .rpc_client = clnt, 705 .rpc_client = clnt,
@@ -690,7 +708,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
690 .callback_data = &data 708 .callback_data = &data
691 }; 709 };
692 710
693 res->sr_slot = NULL;
694 if (privileged) 711 if (privileged)
695 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 712 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
696 task = rpc_run_task(&task_setup); 713 task = rpc_run_task(&task_setup);
@@ -710,10 +727,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
710 struct nfs4_sequence_res *res, 727 struct nfs4_sequence_res *res,
711 int cache_reply) 728 int cache_reply)
712{ 729{
713 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); 730 nfs41_init_sequence(args, res, cache_reply);
731 return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
714} 732}
715 733
716#else 734#else
735static inline
736void nfs41_init_sequence(struct nfs4_sequence_args *args,
737 struct nfs4_sequence_res *res, int cache_reply)
738{
739}
740
717static int nfs4_sequence_done(struct rpc_task *task, 741static int nfs4_sequence_done(struct rpc_task *task,
718 struct nfs4_sequence_res *res) 742 struct nfs4_sequence_res *res)
719{ 743{
@@ -728,7 +752,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt,
728 struct nfs4_sequence_res *res, 752 struct nfs4_sequence_res *res,
729 int cache_reply) 753 int cache_reply)
730{ 754{
731 args->sa_session = res->sr_session = NULL; 755 nfs41_init_sequence(args, res, cache_reply);
732 return rpc_call_sync(clnt, msg, 0); 756 return rpc_call_sync(clnt, msg, 0);
733} 757}
734 758
@@ -815,20 +839,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
815 p->o_arg.open_flags = flags; 839 p->o_arg.open_flags = flags;
816 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 840 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
817 p->o_arg.clientid = server->nfs_client->cl_clientid; 841 p->o_arg.clientid = server->nfs_client->cl_clientid;
818 p->o_arg.id = sp->so_owner_id.id; 842 p->o_arg.id = sp->so_seqid.owner_id;
819 p->o_arg.name = &dentry->d_name; 843 p->o_arg.name = &dentry->d_name;
820 p->o_arg.server = server; 844 p->o_arg.server = server;
821 p->o_arg.bitmask = server->attr_bitmask; 845 p->o_arg.bitmask = server->attr_bitmask;
822 p->o_arg.dir_bitmask = server->cache_consistency_bitmask; 846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
823 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
824 if (flags & O_CREAT) { 848 if (attrs != NULL && attrs->ia_valid != 0) {
825 u32 *s; 849 __be32 verf[2];
826 850
827 p->o_arg.u.attrs = &p->attrs; 851 p->o_arg.u.attrs = &p->attrs;
828 memcpy(&p->attrs, attrs, sizeof(p->attrs)); 852 memcpy(&p->attrs, attrs, sizeof(p->attrs));
829 s = (u32 *) p->o_arg.u.verifier.data; 853
830 s[0] = jiffies; 854 verf[0] = jiffies;
831 s[1] = current->pid; 855 verf[1] = current->pid;
856 memcpy(p->o_arg.u.verifier.data, verf,
857 sizeof(p->o_arg.u.verifier.data));
832 } 858 }
833 p->c_arg.fh = &p->o_res.fh; 859 p->c_arg.fh = &p->o_res.fh;
834 p->c_arg.stateid = &p->o_res.stateid; 860 p->c_arg.stateid = &p->o_res.stateid;
@@ -878,7 +904,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode
878{ 904{
879 int ret = 0; 905 int ret = 0;
880 906
881 if (open_mode & O_EXCL) 907 if (open_mode & (O_EXCL|O_TRUNC))
882 goto out; 908 goto out;
883 switch (mode & (FMODE_READ|FMODE_WRITE)) { 909 switch (mode & (FMODE_READ|FMODE_WRITE)) {
884 case FMODE_READ: 910 case FMODE_READ:
@@ -927,8 +953,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
927static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) 953static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
928{ 954{
929 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 955 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
930 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); 956 nfs4_stateid_copy(&state->stateid, stateid);
931 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); 957 nfs4_stateid_copy(&state->open_stateid, stateid);
932 switch (fmode) { 958 switch (fmode) {
933 case FMODE_READ: 959 case FMODE_READ:
934 set_bit(NFS_O_RDONLY_STATE, &state->flags); 960 set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -956,7 +982,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
956 */ 982 */
957 write_seqlock(&state->seqlock); 983 write_seqlock(&state->seqlock);
958 if (deleg_stateid != NULL) { 984 if (deleg_stateid != NULL) {
959 memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); 985 nfs4_stateid_copy(&state->stateid, deleg_stateid);
960 set_bit(NFS_DELEGATED_STATE, &state->flags); 986 set_bit(NFS_DELEGATED_STATE, &state->flags);
961 } 987 }
962 if (open_stateid != NULL) 988 if (open_stateid != NULL)
@@ -987,7 +1013,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
987 1013
988 if (delegation == NULL) 1014 if (delegation == NULL)
989 delegation = &deleg_cur->stateid; 1015 delegation = &deleg_cur->stateid;
990 else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) 1016 else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
991 goto no_delegation_unlock; 1017 goto no_delegation_unlock;
992 1018
993 nfs_mark_delegation_referenced(deleg_cur); 1019 nfs_mark_delegation_referenced(deleg_cur);
@@ -1026,7 +1052,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1026 struct nfs4_state *state = opendata->state; 1052 struct nfs4_state *state = opendata->state;
1027 struct nfs_inode *nfsi = NFS_I(state->inode); 1053 struct nfs_inode *nfsi = NFS_I(state->inode);
1028 struct nfs_delegation *delegation; 1054 struct nfs_delegation *delegation;
1029 int open_mode = opendata->o_arg.open_flags & O_EXCL; 1055 int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
1030 fmode_t fmode = opendata->o_arg.fmode; 1056 fmode_t fmode = opendata->o_arg.fmode;
1031 nfs4_stateid stateid; 1057 nfs4_stateid stateid;
1032 int ret = -EAGAIN; 1058 int ret = -EAGAIN;
@@ -1048,7 +1074,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1048 break; 1074 break;
1049 } 1075 }
1050 /* Save the delegation */ 1076 /* Save the delegation */
1051 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); 1077 nfs4_stateid_copy(&stateid, &delegation->stateid);
1052 rcu_read_unlock(); 1078 rcu_read_unlock();
1053 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); 1079 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
1054 if (ret != 0) 1080 if (ret != 0)
@@ -1090,6 +1116,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1090 if (state == NULL) 1116 if (state == NULL)
1091 goto err_put_inode; 1117 goto err_put_inode;
1092 if (data->o_res.delegation_type != 0) { 1118 if (data->o_res.delegation_type != 0) {
1119 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1093 int delegation_flags = 0; 1120 int delegation_flags = 0;
1094 1121
1095 rcu_read_lock(); 1122 rcu_read_lock();
@@ -1101,7 +1128,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1101 pr_err_ratelimited("NFS: Broken NFSv4 server %s is " 1128 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1102 "returning a delegation for " 1129 "returning a delegation for "
1103 "OPEN(CLAIM_DELEGATE_CUR)\n", 1130 "OPEN(CLAIM_DELEGATE_CUR)\n",
1104 NFS_CLIENT(inode)->cl_server); 1131 clp->cl_hostname);
1105 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1132 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1106 nfs_inode_set_delegation(state->inode, 1133 nfs_inode_set_delegation(state->inode,
1107 data->owner->so_cred, 1134 data->owner->so_cred,
@@ -1210,10 +1237,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1210 * Check if we need to update the current stateid. 1237 * Check if we need to update the current stateid.
1211 */ 1238 */
1212 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && 1239 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
1213 memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { 1240 !nfs4_stateid_match(&state->stateid, &state->open_stateid)) {
1214 write_seqlock(&state->seqlock); 1241 write_seqlock(&state->seqlock);
1215 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 1242 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1216 memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); 1243 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1217 write_sequnlock(&state->seqlock); 1244 write_sequnlock(&state->seqlock);
1218 } 1245 }
1219 return 0; 1246 return 0;
@@ -1282,8 +1309,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
1282 if (IS_ERR(opendata)) 1309 if (IS_ERR(opendata))
1283 return PTR_ERR(opendata); 1310 return PTR_ERR(opendata);
1284 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 1311 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
1285 memcpy(opendata->o_arg.u.delegation.data, stateid->data, 1312 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
1286 sizeof(opendata->o_arg.u.delegation.data));
1287 ret = nfs4_open_recover(opendata, state); 1313 ret = nfs4_open_recover(opendata, state);
1288 nfs4_opendata_put(opendata); 1314 nfs4_opendata_put(opendata);
1289 return ret; 1315 return ret;
@@ -1319,8 +1345,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1319 * The show must go on: exit, but mark the 1345 * The show must go on: exit, but mark the
1320 * stateid as needing recovery. 1346 * stateid as needing recovery.
1321 */ 1347 */
1348 case -NFS4ERR_DELEG_REVOKED:
1322 case -NFS4ERR_ADMIN_REVOKED: 1349 case -NFS4ERR_ADMIN_REVOKED:
1323 case -NFS4ERR_BAD_STATEID: 1350 case -NFS4ERR_BAD_STATEID:
1351 nfs_inode_find_state_and_recover(state->inode,
1352 stateid);
1324 nfs4_schedule_stateid_recovery(server, state); 1353 nfs4_schedule_stateid_recovery(server, state);
1325 case -EKEYEXPIRED: 1354 case -EKEYEXPIRED:
1326 /* 1355 /*
@@ -1345,8 +1374,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
1345 1374
1346 data->rpc_status = task->tk_status; 1375 data->rpc_status = task->tk_status;
1347 if (data->rpc_status == 0) { 1376 if (data->rpc_status == 0) {
1348 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 1377 nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
1349 sizeof(data->o_res.stateid.data));
1350 nfs_confirm_seqid(&data->owner->so_seqid, 0); 1378 nfs_confirm_seqid(&data->owner->so_seqid, 0);
1351 renew_lease(data->o_res.server, data->timestamp); 1379 renew_lease(data->o_res.server, data->timestamp);
1352 data->rpc_done = 1; 1380 data->rpc_done = 1;
@@ -1440,7 +1468,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1440 rcu_read_unlock(); 1468 rcu_read_unlock();
1441 } 1469 }
1442 /* Update sequence id. */ 1470 /* Update sequence id. */
1443 data->o_arg.id = sp->so_owner_id.id; 1471 data->o_arg.id = sp->so_seqid.owner_id;
1444 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; 1472 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
1445 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1473 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1446 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1474 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
@@ -1449,7 +1477,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1449 data->timestamp = jiffies; 1477 data->timestamp = jiffies;
1450 if (nfs4_setup_sequence(data->o_arg.server, 1478 if (nfs4_setup_sequence(data->o_arg.server,
1451 &data->o_arg.seq_args, 1479 &data->o_arg.seq_args,
1452 &data->o_res.seq_res, 1, task)) 1480 &data->o_res.seq_res, task))
1453 return; 1481 return;
1454 rpc_call_start(task); 1482 rpc_call_start(task);
1455 return; 1483 return;
@@ -1551,6 +1579,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1551 }; 1579 };
1552 int status; 1580 int status;
1553 1581
1582 nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
1554 kref_get(&data->kref); 1583 kref_get(&data->kref);
1555 data->rpc_done = 0; 1584 data->rpc_done = 0;
1556 data->rpc_status = 0; 1585 data->rpc_status = 0;
@@ -1712,15 +1741,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
1712} 1741}
1713 1742
1714#if defined(CONFIG_NFS_V4_1) 1743#if defined(CONFIG_NFS_V4_1)
1715static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) 1744static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags)
1716{ 1745{
1717 int status; 1746 int status = NFS_OK;
1718 struct nfs_server *server = NFS_SERVER(state->inode); 1747 struct nfs_server *server = NFS_SERVER(state->inode);
1719 1748
1720 status = nfs41_test_stateid(server, state); 1749 if (state->flags & flags) {
1721 if (status == NFS_OK) 1750 status = nfs41_test_stateid(server, stateid);
1722 return 0; 1751 if (status != NFS_OK) {
1723 nfs41_free_stateid(server, state); 1752 nfs41_free_stateid(server, stateid);
1753 state->flags &= ~flags;
1754 }
1755 }
1756 return status;
1757}
1758
1759static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
1760{
1761 int deleg_status, open_status;
1762 int deleg_flags = 1 << NFS_DELEGATED_STATE;
1763 int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE);
1764
1765 deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags);
1766 open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags);
1767
1768 if ((deleg_status == NFS_OK) && (open_status == NFS_OK))
1769 return NFS_OK;
1724 return nfs4_open_expired(sp, state); 1770 return nfs4_open_expired(sp, state);
1725} 1771}
1726#endif 1772#endif
@@ -1754,7 +1800,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1754 1800
1755 /* Protect against reboot recovery conflicts */ 1801 /* Protect against reboot recovery conflicts */
1756 status = -ENOMEM; 1802 status = -ENOMEM;
1757 if (!(sp = nfs4_get_state_owner(server, cred))) { 1803 sp = nfs4_get_state_owner(server, cred, GFP_KERNEL);
1804 if (sp == NULL) {
1758 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); 1805 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
1759 goto out_err; 1806 goto out_err;
1760 } 1807 }
@@ -1829,7 +1876,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1829 * the user though... 1876 * the user though...
1830 */ 1877 */
1831 if (status == -NFS4ERR_BAD_SEQID) { 1878 if (status == -NFS4ERR_BAD_SEQID) {
1832 printk(KERN_WARNING "NFS: v4 server %s " 1879 pr_warn_ratelimited("NFS: v4 server %s "
1833 " returned a bad sequence-id error!\n", 1880 " returned a bad sequence-id error!\n",
1834 NFS_SERVER(dir)->nfs_client->cl_hostname); 1881 NFS_SERVER(dir)->nfs_client->cl_hostname);
1835 exception.retry = 1; 1882 exception.retry = 1;
@@ -1882,12 +1929,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1882 1929
1883 nfs_fattr_init(fattr); 1930 nfs_fattr_init(fattr);
1884 1931
1885 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { 1932 if (state != NULL) {
1933 nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
1934 current->files, current->tgid);
1935 } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
1936 FMODE_WRITE)) {
1886 /* Use that stateid */ 1937 /* Use that stateid */
1887 } else if (state != NULL) {
1888 nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
1889 } else 1938 } else
1890 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1939 nfs4_stateid_copy(&arg.stateid, &zero_stateid);
1891 1940
1892 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 1941 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1893 if (status == 0 && state != NULL) 1942 if (status == 0 && state != NULL)
@@ -1900,7 +1949,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1900 struct nfs4_state *state) 1949 struct nfs4_state *state)
1901{ 1950{
1902 struct nfs_server *server = NFS_SERVER(inode); 1951 struct nfs_server *server = NFS_SERVER(inode);
1903 struct nfs4_exception exception = { }; 1952 struct nfs4_exception exception = {
1953 .state = state,
1954 .inode = inode,
1955 };
1904 int err; 1956 int err;
1905 do { 1957 do {
1906 err = nfs4_handle_exception(server, 1958 err = nfs4_handle_exception(server,
@@ -1954,6 +2006,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1954 struct nfs4_state *state = calldata->state; 2006 struct nfs4_state *state = calldata->state;
1955 struct nfs_server *server = NFS_SERVER(calldata->inode); 2007 struct nfs_server *server = NFS_SERVER(calldata->inode);
1956 2008
2009 dprintk("%s: begin!\n", __func__);
1957 if (!nfs4_sequence_done(task, &calldata->res.seq_res)) 2010 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
1958 return; 2011 return;
1959 /* hmm. we are done with the inode, and in the process of freeing 2012 /* hmm. we are done with the inode, and in the process of freeing
@@ -1981,6 +2034,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1981 } 2034 }
1982 nfs_release_seqid(calldata->arg.seqid); 2035 nfs_release_seqid(calldata->arg.seqid);
1983 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2036 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
2037 dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
1984} 2038}
1985 2039
1986static void nfs4_close_prepare(struct rpc_task *task, void *data) 2040static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -1989,6 +2043,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1989 struct nfs4_state *state = calldata->state; 2043 struct nfs4_state *state = calldata->state;
1990 int call_close = 0; 2044 int call_close = 0;
1991 2045
2046 dprintk("%s: begin!\n", __func__);
1992 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2047 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1993 return; 2048 return;
1994 2049
@@ -2013,7 +2068,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2013 if (!call_close) { 2068 if (!call_close) {
2014 /* Note: exit _without_ calling nfs4_close_done */ 2069 /* Note: exit _without_ calling nfs4_close_done */
2015 task->tk_action = NULL; 2070 task->tk_action = NULL;
2016 return; 2071 goto out;
2017 } 2072 }
2018 2073
2019 if (calldata->arg.fmode == 0) { 2074 if (calldata->arg.fmode == 0) {
@@ -2022,17 +2077,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2022 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { 2077 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
2023 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, 2078 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
2024 task, NULL); 2079 task, NULL);
2025 return; 2080 goto out;
2026 } 2081 }
2027 } 2082 }
2028 2083
2029 nfs_fattr_init(calldata->res.fattr); 2084 nfs_fattr_init(calldata->res.fattr);
2030 calldata->timestamp = jiffies; 2085 calldata->timestamp = jiffies;
2031 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), 2086 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
2032 &calldata->arg.seq_args, &calldata->res.seq_res, 2087 &calldata->arg.seq_args,
2033 1, task)) 2088 &calldata->res.seq_res,
2034 return; 2089 task))
2090 goto out;
2035 rpc_call_start(task); 2091 rpc_call_start(task);
2092out:
2093 dprintk("%s: done!\n", __func__);
2036} 2094}
2037 2095
2038static const struct rpc_call_ops nfs4_close_ops = { 2096static const struct rpc_call_ops nfs4_close_ops = {
@@ -2074,6 +2132,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2074 calldata = kzalloc(sizeof(*calldata), gfp_mask); 2132 calldata = kzalloc(sizeof(*calldata), gfp_mask);
2075 if (calldata == NULL) 2133 if (calldata == NULL)
2076 goto out; 2134 goto out;
2135 nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
2077 calldata->inode = state->inode; 2136 calldata->inode = state->inode;
2078 calldata->state = state; 2137 calldata->state = state;
2079 calldata->arg.fh = NFS_FH(state->inode); 2138 calldata->arg.fh = NFS_FH(state->inode);
@@ -2182,6 +2241,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2182 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 2241 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
2183 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2242 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
2184 server->acl_bitmask = res.acl_bitmask; 2243 server->acl_bitmask = res.acl_bitmask;
2244 server->fh_expire_type = res.fh_expire_type;
2185 } 2245 }
2186 2246
2187 return status; 2247 return status;
@@ -2303,7 +2363,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2303 return nfs4_map_errors(status); 2363 return nfs4_map_errors(status);
2304} 2364}
2305 2365
2306static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
2307/* 2366/*
2308 * Get locations and (maybe) other attributes of a referral. 2367 * Get locations and (maybe) other attributes of a referral.
2309 * Note that we'll actually follow the referral later when 2368 * Note that we'll actually follow the referral later when
@@ -2420,6 +2479,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2420 } 2479 }
2421 } 2480 }
2422 2481
2482 /* Deal with open(O_TRUNC) */
2483 if (sattr->ia_valid & ATTR_OPEN)
2484 sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
2485
2423 status = nfs4_do_setattr(inode, cred, fattr, sattr, state); 2486 status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
2424 if (status == 0) 2487 if (status == 0)
2425 nfs_setattr_update_inode(inode, sattr); 2488 nfs_setattr_update_inode(inode, sattr);
@@ -2494,7 +2557,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2494 struct nfs_server *server = NFS_SERVER(inode); 2557 struct nfs_server *server = NFS_SERVER(inode);
2495 struct nfs4_accessargs args = { 2558 struct nfs4_accessargs args = {
2496 .fh = NFS_FH(inode), 2559 .fh = NFS_FH(inode),
2497 .bitmask = server->attr_bitmask, 2560 .bitmask = server->cache_consistency_bitmask,
2498 }; 2561 };
2499 struct nfs4_accessres res = { 2562 struct nfs4_accessres res = {
2500 .server = server, 2563 .server = server,
@@ -2712,8 +2775,18 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2712 2775
2713 args->bitmask = server->cache_consistency_bitmask; 2776 args->bitmask = server->cache_consistency_bitmask;
2714 res->server = server; 2777 res->server = server;
2715 res->seq_res.sr_slot = NULL;
2716 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2778 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2779 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
2780}
2781
2782static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
2783{
2784 if (nfs4_setup_sequence(NFS_SERVER(data->dir),
2785 &data->args.seq_args,
2786 &data->res.seq_res,
2787 task))
2788 return;
2789 rpc_call_start(task);
2717} 2790}
2718 2791
2719static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) 2792static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
@@ -2738,6 +2811,17 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2738 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2811 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2739 arg->bitmask = server->attr_bitmask; 2812 arg->bitmask = server->attr_bitmask;
2740 res->server = server; 2813 res->server = server;
2814 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2815}
2816
2817static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
2818{
2819 if (nfs4_setup_sequence(NFS_SERVER(data->old_dir),
2820 &data->args.seq_args,
2821 &data->res.seq_res,
2822 task))
2823 return;
2824 rpc_call_start(task);
2741} 2825}
2742 2826
2743static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 2827static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -3232,6 +3316,17 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3232 data->timestamp = jiffies; 3316 data->timestamp = jiffies;
3233 data->read_done_cb = nfs4_read_done_cb; 3317 data->read_done_cb = nfs4_read_done_cb;
3234 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3318 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3319 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
3320}
3321
3322static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3323{
3324 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3325 &data->args.seq_args,
3326 &data->res.seq_res,
3327 task))
3328 return;
3329 rpc_call_start(task);
3235} 3330}
3236 3331
3237/* Reset the the nfs_read_data to send the read to the MDS. */ 3332/* Reset the the nfs_read_data to send the read to the MDS. */
@@ -3305,6 +3400,17 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3305 data->timestamp = jiffies; 3400 data->timestamp = jiffies;
3306 3401
3307 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3402 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3403 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3404}
3405
3406static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3407{
3408 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3409 &data->args.seq_args,
3410 &data->res.seq_res,
3411 task))
3412 return;
3413 rpc_call_start(task);
3308} 3414}
3309 3415
3310static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3416static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3339,6 +3445,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
3339 data->write_done_cb = nfs4_commit_done_cb; 3445 data->write_done_cb = nfs4_commit_done_cb;
3340 data->res.server = server; 3446 data->res.server = server;
3341 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3447 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3448 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3342} 3449}
3343 3450
3344struct nfs4_renewdata { 3451struct nfs4_renewdata {
@@ -3714,8 +3821,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3714 if (task->tk_status >= 0) 3821 if (task->tk_status >= 0)
3715 return 0; 3822 return 0;
3716 switch(task->tk_status) { 3823 switch(task->tk_status) {
3824 case -NFS4ERR_DELEG_REVOKED:
3717 case -NFS4ERR_ADMIN_REVOKED: 3825 case -NFS4ERR_ADMIN_REVOKED:
3718 case -NFS4ERR_BAD_STATEID: 3826 case -NFS4ERR_BAD_STATEID:
3827 if (state != NULL)
3828 nfs_remove_bad_delegation(state->inode);
3719 case -NFS4ERR_OPENMODE: 3829 case -NFS4ERR_OPENMODE:
3720 if (state == NULL) 3830 if (state == NULL)
3721 break; 3831 break;
@@ -3764,6 +3874,16 @@ wait_on_recovery:
3764 return -EAGAIN; 3874 return -EAGAIN;
3765} 3875}
3766 3876
3877static void nfs4_construct_boot_verifier(struct nfs_client *clp,
3878 nfs4_verifier *bootverf)
3879{
3880 __be32 verf[2];
3881
3882 verf[0] = htonl((u32)clp->cl_boot_time.tv_sec);
3883 verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec);
3884 memcpy(bootverf->data, verf, sizeof(bootverf->data));
3885}
3886
3767int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, 3887int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3768 unsigned short port, struct rpc_cred *cred, 3888 unsigned short port, struct rpc_cred *cred,
3769 struct nfs4_setclientid_res *res) 3889 struct nfs4_setclientid_res *res)
@@ -3780,15 +3900,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3780 .rpc_resp = res, 3900 .rpc_resp = res,
3781 .rpc_cred = cred, 3901 .rpc_cred = cred,
3782 }; 3902 };
3783 __be32 *p;
3784 int loop = 0; 3903 int loop = 0;
3785 int status; 3904 int status;
3786 3905
3787 p = (__be32*)sc_verifier.data; 3906 nfs4_construct_boot_verifier(clp, &sc_verifier);
3788 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
3789 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
3790 3907
3791 for(;;) { 3908 for(;;) {
3909 rcu_read_lock();
3792 setclientid.sc_name_len = scnprintf(setclientid.sc_name, 3910 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
3793 sizeof(setclientid.sc_name), "%s/%s %s %s %u", 3911 sizeof(setclientid.sc_name), "%s/%s %s %s %u",
3794 clp->cl_ipaddr, 3912 clp->cl_ipaddr,
@@ -3805,6 +3923,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3805 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, 3923 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
3806 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 3924 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
3807 clp->cl_ipaddr, port >> 8, port & 255); 3925 clp->cl_ipaddr, port >> 8, port & 255);
3926 rcu_read_unlock();
3808 3927
3809 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 3928 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3810 if (status != -NFS4ERR_CLID_INUSE) 3929 if (status != -NFS4ERR_CLID_INUSE)
@@ -3891,7 +4010,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
3891 4010
3892 if (nfs4_setup_sequence(d_data->res.server, 4011 if (nfs4_setup_sequence(d_data->res.server,
3893 &d_data->args.seq_args, 4012 &d_data->args.seq_args,
3894 &d_data->res.seq_res, 1, task)) 4013 &d_data->res.seq_res, task))
3895 return; 4014 return;
3896 rpc_call_start(task); 4015 rpc_call_start(task);
3897} 4016}
@@ -3925,11 +4044,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3925 data = kzalloc(sizeof(*data), GFP_NOFS); 4044 data = kzalloc(sizeof(*data), GFP_NOFS);
3926 if (data == NULL) 4045 if (data == NULL)
3927 return -ENOMEM; 4046 return -ENOMEM;
4047 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
3928 data->args.fhandle = &data->fh; 4048 data->args.fhandle = &data->fh;
3929 data->args.stateid = &data->stateid; 4049 data->args.stateid = &data->stateid;
3930 data->args.bitmask = server->attr_bitmask; 4050 data->args.bitmask = server->attr_bitmask;
3931 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4051 nfs_copy_fh(&data->fh, NFS_FH(inode));
3932 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 4052 nfs4_stateid_copy(&data->stateid, stateid);
3933 data->res.fattr = &data->fattr; 4053 data->res.fattr = &data->fattr;
3934 data->res.server = server; 4054 data->res.server = server;
3935 nfs_fattr_init(data->res.fattr); 4055 nfs_fattr_init(data->res.fattr);
@@ -4016,7 +4136,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
4016 if (status != 0) 4136 if (status != 0)
4017 goto out; 4137 goto out;
4018 lsp = request->fl_u.nfs4_fl.owner; 4138 lsp = request->fl_u.nfs4_fl.owner;
4019 arg.lock_owner.id = lsp->ls_id.id; 4139 arg.lock_owner.id = lsp->ls_seqid.owner_id;
4020 arg.lock_owner.s_dev = server->s_dev; 4140 arg.lock_owner.s_dev = server->s_dev;
4021 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 4141 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
4022 switch (status) { 4142 switch (status) {
@@ -4112,9 +4232,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
4112 return; 4232 return;
4113 switch (task->tk_status) { 4233 switch (task->tk_status) {
4114 case 0: 4234 case 0:
4115 memcpy(calldata->lsp->ls_stateid.data, 4235 nfs4_stateid_copy(&calldata->lsp->ls_stateid,
4116 calldata->res.stateid.data, 4236 &calldata->res.stateid);
4117 sizeof(calldata->lsp->ls_stateid.data));
4118 renew_lease(calldata->server, calldata->timestamp); 4237 renew_lease(calldata->server, calldata->timestamp);
4119 break; 4238 break;
4120 case -NFS4ERR_BAD_STATEID: 4239 case -NFS4ERR_BAD_STATEID:
@@ -4142,7 +4261,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4142 calldata->timestamp = jiffies; 4261 calldata->timestamp = jiffies;
4143 if (nfs4_setup_sequence(calldata->server, 4262 if (nfs4_setup_sequence(calldata->server,
4144 &calldata->arg.seq_args, 4263 &calldata->arg.seq_args,
4145 &calldata->res.seq_res, 1, task)) 4264 &calldata->res.seq_res, task))
4146 return; 4265 return;
4147 rpc_call_start(task); 4266 rpc_call_start(task);
4148} 4267}
@@ -4182,6 +4301,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4182 return ERR_PTR(-ENOMEM); 4301 return ERR_PTR(-ENOMEM);
4183 } 4302 }
4184 4303
4304 nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
4185 msg.rpc_argp = &data->arg; 4305 msg.rpc_argp = &data->arg;
4186 msg.rpc_resp = &data->res; 4306 msg.rpc_resp = &data->res;
4187 task_setup_data.callback_data = data; 4307 task_setup_data.callback_data = data;
@@ -4261,7 +4381,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4261 goto out_free_seqid; 4381 goto out_free_seqid;
4262 p->arg.lock_stateid = &lsp->ls_stateid; 4382 p->arg.lock_stateid = &lsp->ls_stateid;
4263 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 4383 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4264 p->arg.lock_owner.id = lsp->ls_id.id; 4384 p->arg.lock_owner.id = lsp->ls_seqid.owner_id;
4265 p->arg.lock_owner.s_dev = server->s_dev; 4385 p->arg.lock_owner.s_dev = server->s_dev;
4266 p->res.lock_seqid = p->arg.lock_seqid; 4386 p->res.lock_seqid = p->arg.lock_seqid;
4267 p->lsp = lsp; 4387 p->lsp = lsp;
@@ -4297,7 +4417,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4297 data->timestamp = jiffies; 4417 data->timestamp = jiffies;
4298 if (nfs4_setup_sequence(data->server, 4418 if (nfs4_setup_sequence(data->server,
4299 &data->arg.seq_args, 4419 &data->arg.seq_args,
4300 &data->res.seq_res, 1, task)) 4420 &data->res.seq_res, task))
4301 return; 4421 return;
4302 rpc_call_start(task); 4422 rpc_call_start(task);
4303 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4423 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
@@ -4326,8 +4446,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4326 goto out; 4446 goto out;
4327 } 4447 }
4328 if (data->rpc_status == 0) { 4448 if (data->rpc_status == 0) {
4329 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 4449 nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
4330 sizeof(data->lsp->ls_stateid.data));
4331 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 4450 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
4332 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 4451 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
4333 } 4452 }
@@ -4415,6 +4534,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4415 data->arg.reclaim = NFS_LOCK_RECLAIM; 4534 data->arg.reclaim = NFS_LOCK_RECLAIM;
4416 task_setup_data.callback_ops = &nfs4_recover_lock_ops; 4535 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4417 } 4536 }
4537 nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
4418 msg.rpc_argp = &data->arg; 4538 msg.rpc_argp = &data->arg;
4419 msg.rpc_resp = &data->res; 4539 msg.rpc_resp = &data->res;
4420 task_setup_data.callback_data = data; 4540 task_setup_data.callback_data = data;
@@ -4479,15 +4599,34 @@ out:
4479} 4599}
4480 4600
4481#if defined(CONFIG_NFS_V4_1) 4601#if defined(CONFIG_NFS_V4_1)
4482static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) 4602static int nfs41_check_expired_locks(struct nfs4_state *state)
4483{ 4603{
4484 int status; 4604 int status, ret = NFS_OK;
4605 struct nfs4_lock_state *lsp;
4485 struct nfs_server *server = NFS_SERVER(state->inode); 4606 struct nfs_server *server = NFS_SERVER(state->inode);
4486 4607
4487 status = nfs41_test_stateid(server, state); 4608 list_for_each_entry(lsp, &state->lock_states, ls_locks) {
4609 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
4610 status = nfs41_test_stateid(server, &lsp->ls_stateid);
4611 if (status != NFS_OK) {
4612 nfs41_free_stateid(server, &lsp->ls_stateid);
4613 lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
4614 ret = status;
4615 }
4616 }
4617 };
4618
4619 return ret;
4620}
4621
4622static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
4623{
4624 int status = NFS_OK;
4625
4626 if (test_bit(LK_STATE_IN_USE, &state->flags))
4627 status = nfs41_check_expired_locks(state);
4488 if (status == NFS_OK) 4628 if (status == NFS_OK)
4489 return 0; 4629 return status;
4490 nfs41_free_stateid(server, state);
4491 return nfs4_lock_expired(state, request); 4630 return nfs4_lock_expired(state, request);
4492} 4631}
4493#endif 4632#endif
@@ -4523,7 +4662,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4523 /* Note: we always want to sleep here! */ 4662 /* Note: we always want to sleep here! */
4524 request->fl_flags = fl_flags | FL_SLEEP; 4663 request->fl_flags = fl_flags | FL_SLEEP;
4525 if (do_vfs_lock(request->fl_file, request) < 0) 4664 if (do_vfs_lock(request->fl_file, request) < 0)
4526 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); 4665 printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock "
4666 "manager!\n", __func__);
4527out_unlock: 4667out_unlock:
4528 up_read(&nfsi->rwsem); 4668 up_read(&nfsi->rwsem);
4529out: 4669out:
@@ -4533,7 +4673,9 @@ out:
4533 4673
4534static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4674static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4535{ 4675{
4536 struct nfs4_exception exception = { }; 4676 struct nfs4_exception exception = {
4677 .state = state,
4678 };
4537 int err; 4679 int err;
4538 4680
4539 do { 4681 do {
@@ -4603,8 +4745,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4603 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); 4745 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4604 switch (err) { 4746 switch (err) {
4605 default: 4747 default:
4606 printk(KERN_ERR "%s: unhandled error %d.\n", 4748 printk(KERN_ERR "NFS: %s: unhandled error "
4607 __func__, err); 4749 "%d.\n", __func__, err);
4608 case 0: 4750 case 0:
4609 case -ESTALE: 4751 case -ESTALE:
4610 goto out; 4752 goto out;
@@ -4626,6 +4768,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4626 * The show must go on: exit, but mark the 4768 * The show must go on: exit, but mark the
4627 * stateid as needing recovery. 4769 * stateid as needing recovery.
4628 */ 4770 */
4771 case -NFS4ERR_DELEG_REVOKED:
4629 case -NFS4ERR_ADMIN_REVOKED: 4772 case -NFS4ERR_ADMIN_REVOKED:
4630 case -NFS4ERR_BAD_STATEID: 4773 case -NFS4ERR_BAD_STATEID:
4631 case -NFS4ERR_OPENMODE: 4774 case -NFS4ERR_OPENMODE:
@@ -4655,33 +4798,44 @@ out:
4655 return err; 4798 return err;
4656} 4799}
4657 4800
4801struct nfs_release_lockowner_data {
4802 struct nfs4_lock_state *lsp;
4803 struct nfs_server *server;
4804 struct nfs_release_lockowner_args args;
4805};
4806
4658static void nfs4_release_lockowner_release(void *calldata) 4807static void nfs4_release_lockowner_release(void *calldata)
4659{ 4808{
4809 struct nfs_release_lockowner_data *data = calldata;
4810 nfs4_free_lock_state(data->server, data->lsp);
4660 kfree(calldata); 4811 kfree(calldata);
4661} 4812}
4662 4813
4663const struct rpc_call_ops nfs4_release_lockowner_ops = { 4814static const struct rpc_call_ops nfs4_release_lockowner_ops = {
4664 .rpc_release = nfs4_release_lockowner_release, 4815 .rpc_release = nfs4_release_lockowner_release,
4665}; 4816};
4666 4817
4667void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) 4818int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
4668{ 4819{
4669 struct nfs_server *server = lsp->ls_state->owner->so_server; 4820 struct nfs_server *server = lsp->ls_state->owner->so_server;
4670 struct nfs_release_lockowner_args *args; 4821 struct nfs_release_lockowner_data *data;
4671 struct rpc_message msg = { 4822 struct rpc_message msg = {
4672 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], 4823 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
4673 }; 4824 };
4674 4825
4675 if (server->nfs_client->cl_mvops->minor_version != 0) 4826 if (server->nfs_client->cl_mvops->minor_version != 0)
4676 return; 4827 return -EINVAL;
4677 args = kmalloc(sizeof(*args), GFP_NOFS); 4828 data = kmalloc(sizeof(*data), GFP_NOFS);
4678 if (!args) 4829 if (!data)
4679 return; 4830 return -ENOMEM;
4680 args->lock_owner.clientid = server->nfs_client->cl_clientid; 4831 data->lsp = lsp;
4681 args->lock_owner.id = lsp->ls_id.id; 4832 data->server = server;
4682 args->lock_owner.s_dev = server->s_dev; 4833 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
4683 msg.rpc_argp = args; 4834 data->args.lock_owner.id = lsp->ls_seqid.owner_id;
4684 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); 4835 data->args.lock_owner.s_dev = server->s_dev;
4836 msg.rpc_argp = &data->args;
4837 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
4838 return 0;
4685} 4839}
4686 4840
4687#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4841#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -4727,11 +4881,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
4727 if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || 4881 if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
4728 (fattr->valid & NFS_ATTR_FATTR_FILEID)) && 4882 (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
4729 (fattr->valid & NFS_ATTR_FATTR_FSID) && 4883 (fattr->valid & NFS_ATTR_FATTR_FSID) &&
4730 (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) 4884 (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)))
4731 return; 4885 return;
4732 4886
4733 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | 4887 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
4734 NFS_ATTR_FATTR_NLINK; 4888 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL;
4735 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; 4889 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
4736 fattr->nlink = 2; 4890 fattr->nlink = 2;
4737} 4891}
@@ -4798,7 +4952,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
4798 return status; 4952 return status;
4799} 4953}
4800 4954
4801int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) 4955static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
4956 struct nfs4_secinfo_flavors *flavors)
4802{ 4957{
4803 struct nfs4_exception exception = { }; 4958 struct nfs4_exception exception = { };
4804 int err; 4959 int err;
@@ -4852,6 +5007,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4852{ 5007{
4853 nfs4_verifier verifier; 5008 nfs4_verifier verifier;
4854 struct nfs41_exchange_id_args args = { 5009 struct nfs41_exchange_id_args args = {
5010 .verifier = &verifier,
4855 .client = clp, 5011 .client = clp,
4856 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, 5012 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
4857 }; 5013 };
@@ -4865,15 +5021,11 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4865 .rpc_resp = &res, 5021 .rpc_resp = &res,
4866 .rpc_cred = cred, 5022 .rpc_cred = cred,
4867 }; 5023 };
4868 __be32 *p;
4869 5024
4870 dprintk("--> %s\n", __func__); 5025 dprintk("--> %s\n", __func__);
4871 BUG_ON(clp == NULL); 5026 BUG_ON(clp == NULL);
4872 5027
4873 p = (u32 *)verifier.data; 5028 nfs4_construct_boot_verifier(clp, &verifier);
4874 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4875 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
4876 args.verifier = &verifier;
4877 5029
4878 args.id_len = scnprintf(args.id, sizeof(args.id), 5030 args.id_len = scnprintf(args.id, sizeof(args.id),
4879 "%s/%s.%s/%u", 5031 "%s/%s.%s/%u",
@@ -4888,11 +5040,24 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4888 goto out; 5040 goto out;
4889 } 5041 }
4890 5042
5043 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
5044 if (unlikely(!res.impl_id)) {
5045 status = -ENOMEM;
5046 goto out_server_scope;
5047 }
5048
4891 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5049 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4892 if (!status) 5050 if (!status)
4893 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 5051 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4894 5052
4895 if (!status) { 5053 if (!status) {
5054 /* use the most recent implementation id */
5055 kfree(clp->impl_id);
5056 clp->impl_id = res.impl_id;
5057 } else
5058 kfree(res.impl_id);
5059
5060 if (!status) {
4896 if (clp->server_scope && 5061 if (clp->server_scope &&
4897 !nfs41_same_server_scope(clp->server_scope, 5062 !nfs41_same_server_scope(clp->server_scope,
4898 res.server_scope)) { 5063 res.server_scope)) {
@@ -4908,8 +5073,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4908 goto out; 5073 goto out;
4909 } 5074 }
4910 } 5075 }
5076
5077out_server_scope:
4911 kfree(res.server_scope); 5078 kfree(res.server_scope);
4912out: 5079out:
5080 if (clp->impl_id)
5081 dprintk("%s: Server Implementation ID: "
5082 "domain: %s, name: %s, date: %llu,%u\n",
5083 __func__, clp->impl_id->domain, clp->impl_id->name,
5084 clp->impl_id->date.seconds,
5085 clp->impl_id->date.nseconds);
4913 dprintk("<-- %s status= %d\n", __func__, status); 5086 dprintk("<-- %s status= %d\n", __func__, status);
4914 return status; 5087 return status;
4915} 5088}
@@ -4933,7 +5106,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4933 since we're invoked within one */ 5106 since we're invoked within one */
4934 ret = nfs41_setup_sequence(data->clp->cl_session, 5107 ret = nfs41_setup_sequence(data->clp->cl_session,
4935 &data->args->la_seq_args, 5108 &data->args->la_seq_args,
4936 &data->res->lr_seq_res, 0, task); 5109 &data->res->lr_seq_res, task);
4937 5110
4938 BUG_ON(ret == -EAGAIN); 5111 BUG_ON(ret == -EAGAIN);
4939 rpc_call_start(task); 5112 rpc_call_start(task);
@@ -4966,7 +5139,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4966 dprintk("<-- %s\n", __func__); 5139 dprintk("<-- %s\n", __func__);
4967} 5140}
4968 5141
4969struct rpc_call_ops nfs4_get_lease_time_ops = { 5142static const struct rpc_call_ops nfs4_get_lease_time_ops = {
4970 .rpc_call_prepare = nfs4_get_lease_time_prepare, 5143 .rpc_call_prepare = nfs4_get_lease_time_prepare,
4971 .rpc_call_done = nfs4_get_lease_time_done, 5144 .rpc_call_done = nfs4_get_lease_time_done,
4972}; 5145};
@@ -4997,6 +5170,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4997 }; 5170 };
4998 int status; 5171 int status;
4999 5172
5173 nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
5000 dprintk("--> %s\n", __func__); 5174 dprintk("--> %s\n", __func__);
5001 task = rpc_run_task(&task_setup); 5175 task = rpc_run_task(&task_setup);
5002 5176
@@ -5113,13 +5287,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5113 return NULL; 5287 return NULL;
5114 5288
5115 tbl = &session->fc_slot_table; 5289 tbl = &session->fc_slot_table;
5116 tbl->highest_used_slotid = -1; 5290 tbl->highest_used_slotid = NFS4_NO_SLOT;
5117 spin_lock_init(&tbl->slot_tbl_lock); 5291 spin_lock_init(&tbl->slot_tbl_lock);
5118 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 5292 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
5119 init_completion(&tbl->complete); 5293 init_completion(&tbl->complete);
5120 5294
5121 tbl = &session->bc_slot_table; 5295 tbl = &session->bc_slot_table;
5122 tbl->highest_used_slotid = -1; 5296 tbl->highest_used_slotid = NFS4_NO_SLOT;
5123 spin_lock_init(&tbl->slot_tbl_lock); 5297 spin_lock_init(&tbl->slot_tbl_lock);
5124 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 5298 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
5125 init_completion(&tbl->complete); 5299 init_completion(&tbl->complete);
@@ -5132,11 +5306,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5132 5306
5133void nfs4_destroy_session(struct nfs4_session *session) 5307void nfs4_destroy_session(struct nfs4_session *session)
5134{ 5308{
5309 struct rpc_xprt *xprt;
5310
5135 nfs4_proc_destroy_session(session); 5311 nfs4_proc_destroy_session(session);
5312
5313 rcu_read_lock();
5314 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
5315 rcu_read_unlock();
5136 dprintk("%s Destroy backchannel for xprt %p\n", 5316 dprintk("%s Destroy backchannel for xprt %p\n",
5137 __func__, session->clp->cl_rpcclient->cl_xprt); 5317 __func__, xprt);
5138 xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, 5318 xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
5139 NFS41_BC_MIN_CALLBACKS);
5140 nfs4_destroy_slot_tables(session); 5319 nfs4_destroy_slot_tables(session);
5141 kfree(session); 5320 kfree(session);
5142} 5321}
@@ -5164,7 +5343,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
5164 args->fc_attrs.max_rqst_sz = mxrqst_sz; 5343 args->fc_attrs.max_rqst_sz = mxrqst_sz;
5165 args->fc_attrs.max_resp_sz = mxresp_sz; 5344 args->fc_attrs.max_resp_sz = mxresp_sz;
5166 args->fc_attrs.max_ops = NFS4_MAX_OPS; 5345 args->fc_attrs.max_ops = NFS4_MAX_OPS;
5167 args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; 5346 args->fc_attrs.max_reqs = max_session_slots;
5168 5347
5169 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " 5348 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
5170 "max_ops=%u max_reqs=%u\n", 5349 "max_ops=%u max_reqs=%u\n",
@@ -5204,6 +5383,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
5204 return -EINVAL; 5383 return -EINVAL;
5205 if (rcvd->max_reqs == 0) 5384 if (rcvd->max_reqs == 0)
5206 return -EINVAL; 5385 return -EINVAL;
5386 if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE)
5387 rcvd->max_reqs = NFS4_MAX_SLOT_TABLE;
5207 return 0; 5388 return 0;
5208} 5389}
5209 5390
@@ -5219,9 +5400,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
5219 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) 5400 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
5220 return -EINVAL; 5401 return -EINVAL;
5221 /* These would render the backchannel useless: */ 5402 /* These would render the backchannel useless: */
5222 if (rcvd->max_ops == 0) 5403 if (rcvd->max_ops != sent->max_ops)
5223 return -EINVAL; 5404 return -EINVAL;
5224 if (rcvd->max_reqs == 0) 5405 if (rcvd->max_reqs != sent->max_reqs)
5225 return -EINVAL; 5406 return -EINVAL;
5226 return 0; 5407 return 0;
5227} 5408}
@@ -5324,7 +5505,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5324 5505
5325 if (status) 5506 if (status)
5326 printk(KERN_WARNING 5507 printk(KERN_WARNING
5327 "Got error %d from the server on DESTROY_SESSION. " 5508 "NFS: Got error %d from the server on DESTROY_SESSION. "
5328 "Session has been destroyed regardless...\n", status); 5509 "Session has been destroyed regardless...\n", status);
5329 5510
5330 dprintk("<-- nfs4_proc_destroy_session\n"); 5511 dprintk("<-- nfs4_proc_destroy_session\n");
@@ -5447,7 +5628,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
5447 args = task->tk_msg.rpc_argp; 5628 args = task->tk_msg.rpc_argp;
5448 res = task->tk_msg.rpc_resp; 5629 res = task->tk_msg.rpc_resp;
5449 5630
5450 if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) 5631 if (nfs41_setup_sequence(clp->cl_session, args, res, task))
5451 return; 5632 return;
5452 rpc_call_start(task); 5633 rpc_call_start(task);
5453} 5634}
@@ -5479,6 +5660,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5479 nfs_put_client(clp); 5660 nfs_put_client(clp);
5480 return ERR_PTR(-ENOMEM); 5661 return ERR_PTR(-ENOMEM);
5481 } 5662 }
5663 nfs41_init_sequence(&calldata->args, &calldata->res, 0);
5482 msg.rpc_argp = &calldata->args; 5664 msg.rpc_argp = &calldata->args;
5483 msg.rpc_resp = &calldata->res; 5665 msg.rpc_resp = &calldata->res;
5484 calldata->clp = clp; 5666 calldata->clp = clp;
@@ -5540,7 +5722,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
5540 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 5722 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
5541 if (nfs41_setup_sequence(calldata->clp->cl_session, 5723 if (nfs41_setup_sequence(calldata->clp->cl_session,
5542 &calldata->arg.seq_args, 5724 &calldata->arg.seq_args,
5543 &calldata->res.seq_res, 0, task)) 5725 &calldata->res.seq_res, task))
5544 return; 5726 return;
5545 5727
5546 rpc_call_start(task); 5728 rpc_call_start(task);
@@ -5619,6 +5801,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5619 calldata->clp = clp; 5801 calldata->clp = clp;
5620 calldata->arg.one_fs = 0; 5802 calldata->arg.one_fs = 0;
5621 5803
5804 nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
5622 msg.rpc_argp = &calldata->arg; 5805 msg.rpc_argp = &calldata->arg;
5623 msg.rpc_resp = &calldata->res; 5806 msg.rpc_resp = &calldata->res;
5624 task_setup_data.callback_data = calldata; 5807 task_setup_data.callback_data = calldata;
@@ -5650,7 +5833,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5650 * to be no way to prevent it completely. 5833 * to be no way to prevent it completely.
5651 */ 5834 */
5652 if (nfs4_setup_sequence(server, &lgp->args.seq_args, 5835 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5653 &lgp->res.seq_res, 0, task)) 5836 &lgp->res.seq_res, task))
5654 return; 5837 return;
5655 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 5838 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
5656 NFS_I(lgp->args.inode)->layout, 5839 NFS_I(lgp->args.inode)->layout,
@@ -5725,6 +5908,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5725 5908
5726 lgp->res.layoutp = &lgp->args.layout; 5909 lgp->res.layoutp = &lgp->args.layout;
5727 lgp->res.seq_res.sr_slot = NULL; 5910 lgp->res.seq_res.sr_slot = NULL;
5911 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
5728 task = rpc_run_task(&task_setup_data); 5912 task = rpc_run_task(&task_setup_data);
5729 if (IS_ERR(task)) 5913 if (IS_ERR(task))
5730 return PTR_ERR(task); 5914 return PTR_ERR(task);
@@ -5745,7 +5929,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
5745 5929
5746 dprintk("--> %s\n", __func__); 5930 dprintk("--> %s\n", __func__);
5747 if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, 5931 if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
5748 &lrp->res.seq_res, 0, task)) 5932 &lrp->res.seq_res, task))
5749 return; 5933 return;
5750 rpc_call_start(task); 5934 rpc_call_start(task);
5751} 5935}
@@ -5811,6 +5995,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
5811 int status; 5995 int status;
5812 5996
5813 dprintk("--> %s\n", __func__); 5997 dprintk("--> %s\n", __func__);
5998 nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
5814 task = rpc_run_task(&task_setup_data); 5999 task = rpc_run_task(&task_setup_data);
5815 if (IS_ERR(task)) 6000 if (IS_ERR(task))
5816 return PTR_ERR(task); 6001 return PTR_ERR(task);
@@ -5911,7 +6096,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5911 struct nfs_server *server = NFS_SERVER(data->args.inode); 6096 struct nfs_server *server = NFS_SERVER(data->args.inode);
5912 6097
5913 if (nfs4_setup_sequence(server, &data->args.seq_args, 6098 if (nfs4_setup_sequence(server, &data->args.seq_args,
5914 &data->res.seq_res, 1, task)) 6099 &data->res.seq_res, task))
5915 return; 6100 return;
5916 rpc_call_start(task); 6101 rpc_call_start(task);
5917} 6102}
@@ -5998,6 +6183,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5998 data->args.lastbytewritten, 6183 data->args.lastbytewritten,
5999 data->args.inode->i_ino); 6184 data->args.inode->i_ino);
6000 6185
6186 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
6001 task = rpc_run_task(&task_setup_data); 6187 task = rpc_run_task(&task_setup_data);
6002 if (IS_ERR(task)) 6188 if (IS_ERR(task))
6003 return PTR_ERR(task); 6189 return PTR_ERR(task);
@@ -6091,11 +6277,12 @@ out_freepage:
6091out: 6277out:
6092 return err; 6278 return err;
6093} 6279}
6094static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 6280
6281static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6095{ 6282{
6096 int status; 6283 int status;
6097 struct nfs41_test_stateid_args args = { 6284 struct nfs41_test_stateid_args args = {
6098 .stateid = &state->stateid, 6285 .stateid = stateid,
6099 }; 6286 };
6100 struct nfs41_test_stateid_res res; 6287 struct nfs41_test_stateid_res res;
6101 struct rpc_message msg = { 6288 struct rpc_message msg = {
@@ -6103,28 +6290,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta
6103 .rpc_argp = &args, 6290 .rpc_argp = &args,
6104 .rpc_resp = &res, 6291 .rpc_resp = &res,
6105 }; 6292 };
6106 args.seq_args.sa_session = res.seq_res.sr_session = NULL; 6293
6107 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 6294 nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
6295 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
6296
6297 if (status == NFS_OK)
6298 return res.status;
6108 return status; 6299 return status;
6109} 6300}
6110 6301
6111static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 6302static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6112{ 6303{
6113 struct nfs4_exception exception = { }; 6304 struct nfs4_exception exception = { };
6114 int err; 6305 int err;
6115 do { 6306 do {
6116 err = nfs4_handle_exception(server, 6307 err = nfs4_handle_exception(server,
6117 _nfs41_test_stateid(server, state), 6308 _nfs41_test_stateid(server, stateid),
6118 &exception); 6309 &exception);
6119 } while (exception.retry); 6310 } while (exception.retry);
6120 return err; 6311 return err;
6121} 6312}
6122 6313
6123static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6314static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6124{ 6315{
6125 int status;
6126 struct nfs41_free_stateid_args args = { 6316 struct nfs41_free_stateid_args args = {
6127 .stateid = &state->stateid, 6317 .stateid = stateid,
6128 }; 6318 };
6129 struct nfs41_free_stateid_res res; 6319 struct nfs41_free_stateid_res res;
6130 struct rpc_message msg = { 6320 struct rpc_message msg = {
@@ -6133,25 +6323,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat
6133 .rpc_resp = &res, 6323 .rpc_resp = &res,
6134 }; 6324 };
6135 6325
6136 args.seq_args.sa_session = res.seq_res.sr_session = NULL; 6326 nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
6137 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 6327 return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
6138 return status;
6139} 6328}
6140 6329
6141static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6330static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
6142{ 6331{
6143 struct nfs4_exception exception = { }; 6332 struct nfs4_exception exception = { };
6144 int err; 6333 int err;
6145 do { 6334 do {
6146 err = nfs4_handle_exception(server, 6335 err = nfs4_handle_exception(server,
6147 _nfs4_free_stateid(server, state), 6336 _nfs4_free_stateid(server, stateid),
6148 &exception); 6337 &exception);
6149 } while (exception.retry); 6338 } while (exception.retry);
6150 return err; 6339 return err;
6151} 6340}
6341
6342static bool nfs41_match_stateid(const nfs4_stateid *s1,
6343 const nfs4_stateid *s2)
6344{
6345 if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0)
6346 return false;
6347
6348 if (s1->seqid == s2->seqid)
6349 return true;
6350 if (s1->seqid == 0 || s2->seqid == 0)
6351 return true;
6352
6353 return false;
6354}
6355
6152#endif /* CONFIG_NFS_V4_1 */ 6356#endif /* CONFIG_NFS_V4_1 */
6153 6357
6154struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 6358static bool nfs4_match_stateid(const nfs4_stateid *s1,
6359 const nfs4_stateid *s2)
6360{
6361 return nfs4_stateid_match(s1, s2);
6362}
6363
6364
6365static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
6155 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, 6366 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
6156 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 6367 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
6157 .recover_open = nfs4_open_reclaim, 6368 .recover_open = nfs4_open_reclaim,
@@ -6161,7 +6372,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
6161}; 6372};
6162 6373
6163#if defined(CONFIG_NFS_V4_1) 6374#if defined(CONFIG_NFS_V4_1)
6164struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { 6375static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
6165 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, 6376 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
6166 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 6377 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
6167 .recover_open = nfs4_open_reclaim, 6378 .recover_open = nfs4_open_reclaim,
@@ -6172,7 +6383,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
6172}; 6383};
6173#endif /* CONFIG_NFS_V4_1 */ 6384#endif /* CONFIG_NFS_V4_1 */
6174 6385
6175struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { 6386static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
6176 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6387 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
6177 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6388 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
6178 .recover_open = nfs4_open_expired, 6389 .recover_open = nfs4_open_expired,
@@ -6182,7 +6393,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
6182}; 6393};
6183 6394
6184#if defined(CONFIG_NFS_V4_1) 6395#if defined(CONFIG_NFS_V4_1)
6185struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { 6396static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
6186 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6397 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
6187 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6398 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
6188 .recover_open = nfs41_open_expired, 6399 .recover_open = nfs41_open_expired,
@@ -6192,14 +6403,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
6192}; 6403};
6193#endif /* CONFIG_NFS_V4_1 */ 6404#endif /* CONFIG_NFS_V4_1 */
6194 6405
6195struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { 6406static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
6196 .sched_state_renewal = nfs4_proc_async_renew, 6407 .sched_state_renewal = nfs4_proc_async_renew,
6197 .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, 6408 .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
6198 .renew_lease = nfs4_proc_renew, 6409 .renew_lease = nfs4_proc_renew,
6199}; 6410};
6200 6411
6201#if defined(CONFIG_NFS_V4_1) 6412#if defined(CONFIG_NFS_V4_1)
6202struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { 6413static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
6203 .sched_state_renewal = nfs41_proc_async_sequence, 6414 .sched_state_renewal = nfs41_proc_async_sequence,
6204 .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, 6415 .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
6205 .renew_lease = nfs4_proc_sequence, 6416 .renew_lease = nfs4_proc_sequence,
@@ -6209,7 +6420,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
6209static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { 6420static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6210 .minor_version = 0, 6421 .minor_version = 0,
6211 .call_sync = _nfs4_call_sync, 6422 .call_sync = _nfs4_call_sync,
6212 .validate_stateid = nfs4_validate_delegation_stateid, 6423 .match_stateid = nfs4_match_stateid,
6213 .find_root_sec = nfs4_find_root_sec, 6424 .find_root_sec = nfs4_find_root_sec,
6214 .reboot_recovery_ops = &nfs40_reboot_recovery_ops, 6425 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
6215 .nograce_recovery_ops = &nfs40_nograce_recovery_ops, 6426 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
@@ -6220,7 +6431,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6220static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { 6431static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
6221 .minor_version = 1, 6432 .minor_version = 1,
6222 .call_sync = _nfs4_call_sync_session, 6433 .call_sync = _nfs4_call_sync_session,
6223 .validate_stateid = nfs41_validate_delegation_stateid, 6434 .match_stateid = nfs41_match_stateid,
6224 .find_root_sec = nfs41_find_root_sec, 6435 .find_root_sec = nfs41_find_root_sec,
6225 .reboot_recovery_ops = &nfs41_reboot_recovery_ops, 6436 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
6226 .nograce_recovery_ops = &nfs41_nograce_recovery_ops, 6437 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
@@ -6260,9 +6471,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6260 .create = nfs4_proc_create, 6471 .create = nfs4_proc_create,
6261 .remove = nfs4_proc_remove, 6472 .remove = nfs4_proc_remove,
6262 .unlink_setup = nfs4_proc_unlink_setup, 6473 .unlink_setup = nfs4_proc_unlink_setup,
6474 .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
6263 .unlink_done = nfs4_proc_unlink_done, 6475 .unlink_done = nfs4_proc_unlink_done,
6264 .rename = nfs4_proc_rename, 6476 .rename = nfs4_proc_rename,
6265 .rename_setup = nfs4_proc_rename_setup, 6477 .rename_setup = nfs4_proc_rename_setup,
6478 .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
6266 .rename_done = nfs4_proc_rename_done, 6479 .rename_done = nfs4_proc_rename_done,
6267 .link = nfs4_proc_link, 6480 .link = nfs4_proc_link,
6268 .symlink = nfs4_proc_symlink, 6481 .symlink = nfs4_proc_symlink,
@@ -6276,8 +6489,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6276 .set_capabilities = nfs4_server_capabilities, 6489 .set_capabilities = nfs4_server_capabilities,
6277 .decode_dirent = nfs4_decode_dirent, 6490 .decode_dirent = nfs4_decode_dirent,
6278 .read_setup = nfs4_proc_read_setup, 6491 .read_setup = nfs4_proc_read_setup,
6492 .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
6279 .read_done = nfs4_read_done, 6493 .read_done = nfs4_read_done,
6280 .write_setup = nfs4_proc_write_setup, 6494 .write_setup = nfs4_proc_write_setup,
6495 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6281 .write_done = nfs4_write_done, 6496 .write_done = nfs4_write_done,
6282 .commit_setup = nfs4_proc_commit_setup, 6497 .commit_setup = nfs4_proc_commit_setup,
6283 .commit_done = nfs4_commit_done, 6498 .commit_done = nfs4_commit_done,
@@ -6301,6 +6516,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = {
6301 NULL 6516 NULL
6302}; 6517};
6303 6518
6519module_param(max_session_slots, ushort, 0644);
6520MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
6521 "requests the client will negotiate");
6522
6304/* 6523/*
6305 * Local variables: 6524 * Local variables:
6306 * c-basic-offset: 8 6525 * c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 45392032e7bd..0f43414eb25a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
146 struct rpc_cred *cred = NULL; 146 struct rpc_cred *cred = NULL;
147 struct nfs_server *server; 147 struct nfs_server *server;
148 148
149 /* Use machine credentials if available */
150 cred = nfs4_get_machine_cred_locked(clp);
151 if (cred != NULL)
152 goto out;
153
149 rcu_read_lock(); 154 rcu_read_lock();
150 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 155 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
151 cred = nfs4_get_renew_cred_server_locked(server); 156 cred = nfs4_get_renew_cred_server_locked(server);
@@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
153 break; 158 break;
154 } 159 }
155 rcu_read_unlock(); 160 rcu_read_unlock();
161
162out:
156 return cred; 163 return cred;
157} 164}
158 165
@@ -190,30 +197,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
190static void nfs4_end_drain_session(struct nfs_client *clp) 197static void nfs4_end_drain_session(struct nfs_client *clp)
191{ 198{
192 struct nfs4_session *ses = clp->cl_session; 199 struct nfs4_session *ses = clp->cl_session;
200 struct nfs4_slot_table *tbl;
193 int max_slots; 201 int max_slots;
194 202
195 if (ses == NULL) 203 if (ses == NULL)
196 return; 204 return;
205 tbl = &ses->fc_slot_table;
197 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { 206 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
198 spin_lock(&ses->fc_slot_table.slot_tbl_lock); 207 spin_lock(&tbl->slot_tbl_lock);
199 max_slots = ses->fc_slot_table.max_slots; 208 max_slots = tbl->max_slots;
200 while (max_slots--) { 209 while (max_slots--) {
201 struct rpc_task *task; 210 if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
202 211 nfs4_set_task_privileged,
203 task = rpc_wake_up_next(&ses->fc_slot_table. 212 NULL) == NULL)
204 slot_tbl_waitq);
205 if (!task)
206 break; 213 break;
207 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
208 } 214 }
209 spin_unlock(&ses->fc_slot_table.slot_tbl_lock); 215 spin_unlock(&tbl->slot_tbl_lock);
210 } 216 }
211} 217}
212 218
213static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) 219static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
214{ 220{
215 spin_lock(&tbl->slot_tbl_lock); 221 spin_lock(&tbl->slot_tbl_lock);
216 if (tbl->highest_used_slotid != -1) { 222 if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
217 INIT_COMPLETION(tbl->complete); 223 INIT_COMPLETION(tbl->complete);
218 spin_unlock(&tbl->slot_tbl_lock); 224 spin_unlock(&tbl->slot_tbl_lock);
219 return wait_for_completion_interruptible(&tbl->complete); 225 return wait_for_completion_interruptible(&tbl->complete);
@@ -317,62 +323,6 @@ out:
317 return cred; 323 return cred;
318} 324}
319 325
320static void nfs_alloc_unique_id_locked(struct rb_root *root,
321 struct nfs_unique_id *new,
322 __u64 minval, int maxbits)
323{
324 struct rb_node **p, *parent;
325 struct nfs_unique_id *pos;
326 __u64 mask = ~0ULL;
327
328 if (maxbits < 64)
329 mask = (1ULL << maxbits) - 1ULL;
330
331 /* Ensure distribution is more or less flat */
332 get_random_bytes(&new->id, sizeof(new->id));
333 new->id &= mask;
334 if (new->id < minval)
335 new->id += minval;
336retry:
337 p = &root->rb_node;
338 parent = NULL;
339
340 while (*p != NULL) {
341 parent = *p;
342 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
343
344 if (new->id < pos->id)
345 p = &(*p)->rb_left;
346 else if (new->id > pos->id)
347 p = &(*p)->rb_right;
348 else
349 goto id_exists;
350 }
351 rb_link_node(&new->rb_node, parent, p);
352 rb_insert_color(&new->rb_node, root);
353 return;
354id_exists:
355 for (;;) {
356 new->id++;
357 if (new->id < minval || (new->id & mask) != new->id) {
358 new->id = minval;
359 break;
360 }
361 parent = rb_next(parent);
362 if (parent == NULL)
363 break;
364 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
365 if (new->id < pos->id)
366 break;
367 }
368 goto retry;
369}
370
371static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
372{
373 rb_erase(&id->rb_node, root);
374}
375
376static struct nfs4_state_owner * 326static struct nfs4_state_owner *
377nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) 327nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
378{ 328{
@@ -405,6 +355,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
405 struct rb_node **p = &server->state_owners.rb_node, 355 struct rb_node **p = &server->state_owners.rb_node,
406 *parent = NULL; 356 *parent = NULL;
407 struct nfs4_state_owner *sp; 357 struct nfs4_state_owner *sp;
358 int err;
408 359
409 while (*p != NULL) { 360 while (*p != NULL) {
410 parent = *p; 361 parent = *p;
@@ -421,8 +372,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
421 return sp; 372 return sp;
422 } 373 }
423 } 374 }
424 nfs_alloc_unique_id_locked(&server->openowner_id, 375 err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
425 &new->so_owner_id, 1, 64); 376 if (err)
377 return ERR_PTR(err);
426 rb_link_node(&new->so_server_node, parent, p); 378 rb_link_node(&new->so_server_node, parent, p);
427 rb_insert_color(&new->so_server_node, &server->state_owners); 379 rb_insert_color(&new->so_server_node, &server->state_owners);
428 return new; 380 return new;
@@ -435,7 +387,23 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
435 387
436 if (!RB_EMPTY_NODE(&sp->so_server_node)) 388 if (!RB_EMPTY_NODE(&sp->so_server_node))
437 rb_erase(&sp->so_server_node, &server->state_owners); 389 rb_erase(&sp->so_server_node, &server->state_owners);
438 nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); 390 ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
391}
392
393static void
394nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
395{
396 sc->flags = 0;
397 sc->counter = 0;
398 spin_lock_init(&sc->lock);
399 INIT_LIST_HEAD(&sc->list);
400 rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
401}
402
403static void
404nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
405{
406 rpc_destroy_wait_queue(&sc->wait);
439} 407}
440 408
441/* 409/*
@@ -444,19 +412,20 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
444 * 412 *
445 */ 413 */
446static struct nfs4_state_owner * 414static struct nfs4_state_owner *
447nfs4_alloc_state_owner(void) 415nfs4_alloc_state_owner(struct nfs_server *server,
416 struct rpc_cred *cred,
417 gfp_t gfp_flags)
448{ 418{
449 struct nfs4_state_owner *sp; 419 struct nfs4_state_owner *sp;
450 420
451 sp = kzalloc(sizeof(*sp),GFP_NOFS); 421 sp = kzalloc(sizeof(*sp), gfp_flags);
452 if (!sp) 422 if (!sp)
453 return NULL; 423 return NULL;
424 sp->so_server = server;
425 sp->so_cred = get_rpccred(cred);
454 spin_lock_init(&sp->so_lock); 426 spin_lock_init(&sp->so_lock);
455 INIT_LIST_HEAD(&sp->so_states); 427 INIT_LIST_HEAD(&sp->so_states);
456 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); 428 nfs4_init_seqid_counter(&sp->so_seqid);
457 sp->so_seqid.sequence = &sp->so_sequence;
458 spin_lock_init(&sp->so_sequence.lock);
459 INIT_LIST_HEAD(&sp->so_sequence.list);
460 atomic_set(&sp->so_count, 1); 429 atomic_set(&sp->so_count, 1);
461 INIT_LIST_HEAD(&sp->so_lru); 430 INIT_LIST_HEAD(&sp->so_lru);
462 return sp; 431 return sp;
@@ -478,7 +447,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
478 447
479static void nfs4_free_state_owner(struct nfs4_state_owner *sp) 448static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
480{ 449{
481 rpc_destroy_wait_queue(&sp->so_sequence.wait); 450 nfs4_destroy_seqid_counter(&sp->so_seqid);
482 put_rpccred(sp->so_cred); 451 put_rpccred(sp->so_cred);
483 kfree(sp); 452 kfree(sp);
484} 453}
@@ -516,7 +485,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
516 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. 485 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
517 */ 486 */
518struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, 487struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
519 struct rpc_cred *cred) 488 struct rpc_cred *cred,
489 gfp_t gfp_flags)
520{ 490{
521 struct nfs_client *clp = server->nfs_client; 491 struct nfs_client *clp = server->nfs_client;
522 struct nfs4_state_owner *sp, *new; 492 struct nfs4_state_owner *sp, *new;
@@ -526,20 +496,18 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
526 spin_unlock(&clp->cl_lock); 496 spin_unlock(&clp->cl_lock);
527 if (sp != NULL) 497 if (sp != NULL)
528 goto out; 498 goto out;
529 new = nfs4_alloc_state_owner(); 499 new = nfs4_alloc_state_owner(server, cred, gfp_flags);
530 if (new == NULL) 500 if (new == NULL)
531 goto out; 501 goto out;
532 new->so_server = server; 502 do {
533 new->so_cred = cred; 503 if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
534 spin_lock(&clp->cl_lock); 504 break;
535 sp = nfs4_insert_state_owner_locked(new); 505 spin_lock(&clp->cl_lock);
536 spin_unlock(&clp->cl_lock); 506 sp = nfs4_insert_state_owner_locked(new);
537 if (sp == new) 507 spin_unlock(&clp->cl_lock);
538 get_rpccred(cred); 508 } while (sp == ERR_PTR(-EAGAIN));
539 else { 509 if (sp != new)
540 rpc_destroy_wait_queue(&new->so_sequence.wait); 510 nfs4_free_state_owner(new);
541 kfree(new);
542 }
543out: 511out:
544 nfs4_gc_state_owners(server); 512 nfs4_gc_state_owners(server);
545 return sp; 513 return sp;
@@ -795,15 +763,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
795{ 763{
796 struct nfs4_lock_state *lsp; 764 struct nfs4_lock_state *lsp;
797 struct nfs_server *server = state->owner->so_server; 765 struct nfs_server *server = state->owner->so_server;
798 struct nfs_client *clp = server->nfs_client;
799 766
800 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 767 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
801 if (lsp == NULL) 768 if (lsp == NULL)
802 return NULL; 769 return NULL;
803 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); 770 nfs4_init_seqid_counter(&lsp->ls_seqid);
804 spin_lock_init(&lsp->ls_sequence.lock);
805 INIT_LIST_HEAD(&lsp->ls_sequence.list);
806 lsp->ls_seqid.sequence = &lsp->ls_sequence;
807 atomic_set(&lsp->ls_count, 1); 771 atomic_set(&lsp->ls_count, 1);
808 lsp->ls_state = state; 772 lsp->ls_state = state;
809 lsp->ls_owner.lo_type = type; 773 lsp->ls_owner.lo_type = type;
@@ -815,25 +779,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
815 lsp->ls_owner.lo_u.posix_owner = fl_owner; 779 lsp->ls_owner.lo_u.posix_owner = fl_owner;
816 break; 780 break;
817 default: 781 default:
818 kfree(lsp); 782 goto out_free;
819 return NULL;
820 } 783 }
821 spin_lock(&clp->cl_lock); 784 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
822 nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); 785 if (lsp->ls_seqid.owner_id < 0)
823 spin_unlock(&clp->cl_lock); 786 goto out_free;
824 INIT_LIST_HEAD(&lsp->ls_locks); 787 INIT_LIST_HEAD(&lsp->ls_locks);
825 return lsp; 788 return lsp;
789out_free:
790 kfree(lsp);
791 return NULL;
826} 792}
827 793
828static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 794void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
829{ 795{
830 struct nfs_server *server = lsp->ls_state->owner->so_server; 796 ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
831 struct nfs_client *clp = server->nfs_client; 797 nfs4_destroy_seqid_counter(&lsp->ls_seqid);
832
833 spin_lock(&clp->cl_lock);
834 nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
835 spin_unlock(&clp->cl_lock);
836 rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
837 kfree(lsp); 798 kfree(lsp);
838} 799}
839 800
@@ -865,7 +826,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
865 } 826 }
866 spin_unlock(&state->state_lock); 827 spin_unlock(&state->state_lock);
867 if (new != NULL) 828 if (new != NULL)
868 nfs4_free_lock_state(new); 829 nfs4_free_lock_state(state->owner->so_server, new);
869 return lsp; 830 return lsp;
870} 831}
871 832
@@ -886,9 +847,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
886 if (list_empty(&state->lock_states)) 847 if (list_empty(&state->lock_states))
887 clear_bit(LK_STATE_IN_USE, &state->flags); 848 clear_bit(LK_STATE_IN_USE, &state->flags);
888 spin_unlock(&state->state_lock); 849 spin_unlock(&state->state_lock);
889 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) 850 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
890 nfs4_release_lockowner(lsp); 851 if (nfs4_release_lockowner(lsp) == 0)
891 nfs4_free_lock_state(lsp); 852 return;
853 }
854 nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp);
892} 855}
893 856
894static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 857static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -918,7 +881,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
918 if (fl->fl_flags & FL_POSIX) 881 if (fl->fl_flags & FL_POSIX)
919 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); 882 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
920 else if (fl->fl_flags & FL_FLOCK) 883 else if (fl->fl_flags & FL_FLOCK)
921 lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); 884 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
885 NFS4_FLOCK_LOCK_TYPE);
922 else 886 else
923 return -EINVAL; 887 return -EINVAL;
924 if (lsp == NULL) 888 if (lsp == NULL)
@@ -928,28 +892,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
928 return 0; 892 return 0;
929} 893}
930 894
931/* 895static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
932 * Byte-range lock aware utility to initialize the stateid of read/write 896 fl_owner_t fl_owner, pid_t fl_pid)
933 * requests.
934 */
935void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
936{ 897{
937 struct nfs4_lock_state *lsp; 898 struct nfs4_lock_state *lsp;
938 int seq; 899 bool ret = false;
939 900
940 do {
941 seq = read_seqbegin(&state->seqlock);
942 memcpy(dst, &state->stateid, sizeof(*dst));
943 } while (read_seqretry(&state->seqlock, seq));
944 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 901 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
945 return; 902 goto out;
946 903
947 spin_lock(&state->state_lock); 904 spin_lock(&state->state_lock);
948 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 905 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
949 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 906 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
950 memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); 907 nfs4_stateid_copy(dst, &lsp->ls_stateid);
908 ret = true;
909 }
951 spin_unlock(&state->state_lock); 910 spin_unlock(&state->state_lock);
952 nfs4_put_lock_state(lsp); 911 nfs4_put_lock_state(lsp);
912out:
913 return ret;
914}
915
916static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
917{
918 int seq;
919
920 do {
921 seq = read_seqbegin(&state->seqlock);
922 nfs4_stateid_copy(dst, &state->stateid);
923 } while (read_seqretry(&state->seqlock, seq));
924}
925
926/*
927 * Byte-range lock aware utility to initialize the stateid of read/write
928 * requests.
929 */
930void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
931 fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid)
932{
933 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
934 return;
935 if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid))
936 return;
937 nfs4_copy_open_stateid(dst, state);
953} 938}
954 939
955struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) 940struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -960,20 +945,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m
960 if (new != NULL) { 945 if (new != NULL) {
961 new->sequence = counter; 946 new->sequence = counter;
962 INIT_LIST_HEAD(&new->list); 947 INIT_LIST_HEAD(&new->list);
948 new->task = NULL;
963 } 949 }
964 return new; 950 return new;
965} 951}
966 952
967void nfs_release_seqid(struct nfs_seqid *seqid) 953void nfs_release_seqid(struct nfs_seqid *seqid)
968{ 954{
969 if (!list_empty(&seqid->list)) { 955 struct nfs_seqid_counter *sequence;
970 struct rpc_sequence *sequence = seqid->sequence->sequence;
971 956
972 spin_lock(&sequence->lock); 957 if (list_empty(&seqid->list))
973 list_del_init(&seqid->list); 958 return;
974 spin_unlock(&sequence->lock); 959 sequence = seqid->sequence;
975 rpc_wake_up(&sequence->wait); 960 spin_lock(&sequence->lock);
961 list_del_init(&seqid->list);
962 if (!list_empty(&sequence->list)) {
963 struct nfs_seqid *next;
964
965 next = list_first_entry(&sequence->list,
966 struct nfs_seqid, list);
967 rpc_wake_up_queued_task(&sequence->wait, next->task);
976 } 968 }
969 spin_unlock(&sequence->lock);
977} 970}
978 971
979void nfs_free_seqid(struct nfs_seqid *seqid) 972void nfs_free_seqid(struct nfs_seqid *seqid)
@@ -989,14 +982,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
989 */ 982 */
990static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) 983static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
991{ 984{
992 BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); 985 BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid);
993 switch (status) { 986 switch (status) {
994 case 0: 987 case 0:
995 break; 988 break;
996 case -NFS4ERR_BAD_SEQID: 989 case -NFS4ERR_BAD_SEQID:
997 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) 990 if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
998 return; 991 return;
999 printk(KERN_WARNING "NFS: v4 server returned a bad" 992 pr_warn_ratelimited("NFS: v4 server returned a bad"
1000 " sequence-id error on an" 993 " sequence-id error on an"
1001 " unconfirmed sequence %p!\n", 994 " unconfirmed sequence %p!\n",
1002 seqid->sequence); 995 seqid->sequence);
@@ -1040,10 +1033,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
1040 1033
1041int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) 1034int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
1042{ 1035{
1043 struct rpc_sequence *sequence = seqid->sequence->sequence; 1036 struct nfs_seqid_counter *sequence = seqid->sequence;
1044 int status = 0; 1037 int status = 0;
1045 1038
1046 spin_lock(&sequence->lock); 1039 spin_lock(&sequence->lock);
1040 seqid->task = task;
1047 if (list_empty(&seqid->list)) 1041 if (list_empty(&seqid->list))
1048 list_add_tail(&seqid->list, &sequence->list); 1042 list_add_tail(&seqid->list, &sequence->list);
1049 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) 1043 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
@@ -1072,19 +1066,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1072void nfs4_schedule_state_manager(struct nfs_client *clp) 1066void nfs4_schedule_state_manager(struct nfs_client *clp)
1073{ 1067{
1074 struct task_struct *task; 1068 struct task_struct *task;
1069 char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1075 1070
1076 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1071 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1077 return; 1072 return;
1078 __module_get(THIS_MODULE); 1073 __module_get(THIS_MODULE);
1079 atomic_inc(&clp->cl_count); 1074 atomic_inc(&clp->cl_count);
1080 task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", 1075
1081 rpc_peeraddr2str(clp->cl_rpcclient, 1076 /* The rcu_read_lock() is not strictly necessary, as the state
1082 RPC_DISPLAY_ADDR)); 1077 * manager is the only thread that ever changes the rpc_xprt
1083 if (!IS_ERR(task)) 1078 * after it's initialized. At this point, we're single threaded. */
1084 return; 1079 rcu_read_lock();
1085 nfs4_clear_state_manager_bit(clp); 1080 snprintf(buf, sizeof(buf), "%s-manager",
1086 nfs_put_client(clp); 1081 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1087 module_put(THIS_MODULE); 1082 rcu_read_unlock();
1083 task = kthread_run(nfs4_run_state_manager, clp, buf);
1084 if (IS_ERR(task)) {
1085 printk(KERN_ERR "%s: kthread_run: %ld\n",
1086 __func__, PTR_ERR(task));
1087 nfs4_clear_state_manager_bit(clp);
1088 nfs_put_client(clp);
1089 module_put(THIS_MODULE);
1090 }
1088} 1091}
1089 1092
1090/* 1093/*
@@ -1098,10 +1101,25 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1098 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1101 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1099 nfs4_schedule_state_manager(clp); 1102 nfs4_schedule_state_manager(clp);
1100} 1103}
1104EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
1105
1106/*
1107 * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
1108 * @clp: client to process
1109 *
1110 * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
1111 * resend of the SETCLIENTID and hence re-establish the
1112 * callback channel. Then return all existing delegations.
1113 */
1114static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1115{
1116 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1117 nfs_expire_all_delegations(clp);
1118}
1101 1119
1102void nfs4_schedule_path_down_recovery(struct nfs_client *clp) 1120void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1103{ 1121{
1104 nfs_handle_cb_pathdown(clp); 1122 nfs40_handle_cb_pathdown(clp);
1105 nfs4_schedule_state_manager(clp); 1123 nfs4_schedule_state_manager(clp);
1106} 1124}
1107 1125
@@ -1132,11 +1150,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1132{ 1150{
1133 struct nfs_client *clp = server->nfs_client; 1151 struct nfs_client *clp = server->nfs_client;
1134 1152
1135 if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
1136 nfs_async_inode_return_delegation(state->inode, &state->stateid);
1137 nfs4_state_mark_reclaim_nograce(clp, state); 1153 nfs4_state_mark_reclaim_nograce(clp, state);
1138 nfs4_schedule_state_manager(clp); 1154 nfs4_schedule_state_manager(clp);
1139} 1155}
1156EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1157
1158void nfs_inode_find_state_and_recover(struct inode *inode,
1159 const nfs4_stateid *stateid)
1160{
1161 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1162 struct nfs_inode *nfsi = NFS_I(inode);
1163 struct nfs_open_context *ctx;
1164 struct nfs4_state *state;
1165 bool found = false;
1166
1167 spin_lock(&inode->i_lock);
1168 list_for_each_entry(ctx, &nfsi->open_files, list) {
1169 state = ctx->state;
1170 if (state == NULL)
1171 continue;
1172 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
1173 continue;
1174 if (!nfs4_stateid_match(&state->stateid, stateid))
1175 continue;
1176 nfs4_state_mark_reclaim_nograce(clp, state);
1177 found = true;
1178 }
1179 spin_unlock(&inode->i_lock);
1180 if (found)
1181 nfs4_schedule_state_manager(clp);
1182}
1183
1140 1184
1141static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1185static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1142{ 1186{
@@ -1175,8 +1219,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
1175 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1219 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1176 goto out; 1220 goto out;
1177 default: 1221 default:
1178 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 1222 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1179 __func__, status); 1223 "Zeroing state\n", __func__, status);
1180 case -ENOMEM: 1224 case -ENOMEM:
1181 case -NFS4ERR_DENIED: 1225 case -NFS4ERR_DENIED:
1182 case -NFS4ERR_RECLAIM_BAD: 1226 case -NFS4ERR_RECLAIM_BAD:
@@ -1222,8 +1266,9 @@ restart:
1222 spin_lock(&state->state_lock); 1266 spin_lock(&state->state_lock);
1223 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1267 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1224 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1268 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
1225 printk("%s: Lock reclaim failed!\n", 1269 pr_warn_ratelimited("NFS: "
1226 __func__); 1270 "%s: Lock reclaim "
1271 "failed!\n", __func__);
1227 } 1272 }
1228 spin_unlock(&state->state_lock); 1273 spin_unlock(&state->state_lock);
1229 nfs4_put_open_state(state); 1274 nfs4_put_open_state(state);
@@ -1232,8 +1277,8 @@ restart:
1232 } 1277 }
1233 switch (status) { 1278 switch (status) {
1234 default: 1279 default:
1235 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 1280 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1236 __func__, status); 1281 "Zeroing state\n", __func__, status);
1237 case -ENOENT: 1282 case -ENOENT:
1238 case -ENOMEM: 1283 case -ENOMEM:
1239 case -ESTALE: 1284 case -ESTALE:
@@ -1241,8 +1286,8 @@ restart:
1241 * Open state on this file cannot be recovered 1286 * Open state on this file cannot be recovered
1242 * All we can do is revert to using the zero stateid. 1287 * All we can do is revert to using the zero stateid.
1243 */ 1288 */
1244 memset(state->stateid.data, 0, 1289 memset(&state->stateid, 0,
1245 sizeof(state->stateid.data)); 1290 sizeof(state->stateid));
1246 /* Mark the file as being 'closed' */ 1291 /* Mark the file as being 'closed' */
1247 state->state = 0; 1292 state->state = 0;
1248 break; 1293 break;
@@ -1420,7 +1465,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1420 case 0: 1465 case 0:
1421 break; 1466 break;
1422 case -NFS4ERR_CB_PATH_DOWN: 1467 case -NFS4ERR_CB_PATH_DOWN:
1423 nfs_handle_cb_pathdown(clp); 1468 nfs40_handle_cb_pathdown(clp);
1424 break; 1469 break;
1425 case -NFS4ERR_NO_GRACE: 1470 case -NFS4ERR_NO_GRACE:
1426 nfs4_state_end_reclaim_reboot(clp); 1471 nfs4_state_end_reclaim_reboot(clp);
@@ -1801,7 +1846,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1801 } while (atomic_read(&clp->cl_count) > 1); 1846 } while (atomic_read(&clp->cl_count) > 1);
1802 return; 1847 return;
1803out_error: 1848out_error:
1804 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1849 pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s"
1805 " with error %d\n", clp->cl_hostname, -status); 1850 " with error %d\n", clp->cl_hostname, -status);
1806 nfs4_end_drain_session(clp); 1851 nfs4_end_drain_session(clp);
1807 nfs4_clear_state_manager_bit(clp); 1852 nfs4_clear_state_manager_bit(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 33bd8d0f745d..c74fdb114b48 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -44,6 +44,8 @@
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/module.h>
48#include <linux/utsname.h>
47#include <linux/sunrpc/clnt.h> 49#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 50#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h> 51#include <linux/sunrpc/gss_api.h>
@@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int);
271 1 /* flags */ + \ 273 1 /* flags */ + \
272 1 /* spa_how */ + \ 274 1 /* spa_how */ + \
273 0 /* SP4_NONE (for now) */ + \ 275 0 /* SP4_NONE (for now) */ + \
274 1 /* zero implemetation id array */) 276 1 /* implementation id array of size 1 */ + \
277 1 /* nii_domain */ + \
278 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
279 1 /* nii_name */ + \
280 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
281 3 /* nii_date */)
275#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 282#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
276 2 /* eir_clientid */ + \ 283 2 /* eir_clientid */ + \
277 1 /* eir_sequenceid */ + \ 284 1 /* eir_sequenceid */ + \
@@ -284,7 +291,11 @@ static int nfs4_stat_to_errno(int);
284 /* eir_server_scope<> */ \ 291 /* eir_server_scope<> */ \
285 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ 292 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
286 1 /* eir_server_impl_id array length */ + \ 293 1 /* eir_server_impl_id array length */ + \
287 0 /* ignored eir_server_impl_id contents */) 294 1 /* nii_domain */ + \
295 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
296 1 /* nii_name */ + \
297 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
298 3 /* nii_date */)
288#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) 299#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */)
289#define decode_channel_attrs_maxsz (6 + \ 300#define decode_channel_attrs_maxsz (6 + \
290 1 /* ca_rdma_ird.len */ + \ 301 1 /* ca_rdma_ird.len */ + \
@@ -838,6 +849,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
838 XDR_UNIT); 849 XDR_UNIT);
839#endif /* CONFIG_NFS_V4_1 */ 850#endif /* CONFIG_NFS_V4_1 */
840 851
852static unsigned short send_implementation_id = 1;
853
854module_param(send_implementation_id, ushort, 0644);
855MODULE_PARM_DESC(send_implementation_id,
856 "Send implementation ID with NFSv4.1 exchange_id");
857
841static const umode_t nfs_type2fmt[] = { 858static const umode_t nfs_type2fmt[] = {
842 [NF4BAD] = 0, 859 [NF4BAD] = 0,
843 [NF4REG] = S_IFREG, 860 [NF4REG] = S_IFREG,
@@ -868,15 +885,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
868 return p; 885 return p;
869} 886}
870 887
888static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
889{
890 __be32 *p;
891
892 p = xdr_reserve_space(xdr, len);
893 xdr_encode_opaque_fixed(p, buf, len);
894}
895
871static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 896static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
872{ 897{
873 __be32 *p; 898 __be32 *p;
874 899
875 p = xdr_reserve_space(xdr, 4 + len); 900 p = reserve_space(xdr, 4 + len);
876 BUG_ON(p == NULL);
877 xdr_encode_opaque(p, str, len); 901 xdr_encode_opaque(p, str, len);
878} 902}
879 903
904static void encode_uint32(struct xdr_stream *xdr, u32 n)
905{
906 __be32 *p;
907
908 p = reserve_space(xdr, 4);
909 *p = cpu_to_be32(n);
910}
911
912static void encode_uint64(struct xdr_stream *xdr, u64 n)
913{
914 __be32 *p;
915
916 p = reserve_space(xdr, 8);
917 xdr_encode_hyper(p, n);
918}
919
920static void encode_nfs4_seqid(struct xdr_stream *xdr,
921 const struct nfs_seqid *seqid)
922{
923 encode_uint32(xdr, seqid->sequence->counter);
924}
925
880static void encode_compound_hdr(struct xdr_stream *xdr, 926static void encode_compound_hdr(struct xdr_stream *xdr,
881 struct rpc_rqst *req, 927 struct rpc_rqst *req,
882 struct compound_hdr *hdr) 928 struct compound_hdr *hdr)
@@ -889,28 +935,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
889 * but this is not required as a MUST for the server to do so. */ 935 * but this is not required as a MUST for the server to do so. */
890 hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; 936 hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
891 937
892 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
893 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 938 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
894 p = reserve_space(xdr, 4 + hdr->taglen + 8); 939 encode_string(xdr, hdr->taglen, hdr->tag);
895 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); 940 p = reserve_space(xdr, 8);
896 *p++ = cpu_to_be32(hdr->minorversion); 941 *p++ = cpu_to_be32(hdr->minorversion);
897 hdr->nops_p = p; 942 hdr->nops_p = p;
898 *p = cpu_to_be32(hdr->nops); 943 *p = cpu_to_be32(hdr->nops);
899} 944}
900 945
946static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op,
947 uint32_t replen,
948 struct compound_hdr *hdr)
949{
950 encode_uint32(xdr, op);
951 hdr->nops++;
952 hdr->replen += replen;
953}
954
901static void encode_nops(struct compound_hdr *hdr) 955static void encode_nops(struct compound_hdr *hdr)
902{ 956{
903 BUG_ON(hdr->nops > NFS4_MAX_OPS); 957 BUG_ON(hdr->nops > NFS4_MAX_OPS);
904 *hdr->nops_p = htonl(hdr->nops); 958 *hdr->nops_p = htonl(hdr->nops);
905} 959}
906 960
907static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) 961static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
908{ 962{
909 __be32 *p; 963 encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
964}
910 965
911 p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); 966static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
912 BUG_ON(p == NULL); 967{
913 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); 968 encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
914} 969}
915 970
916static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) 971static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
@@ -1023,7 +1078,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1023 * Now we backfill the bitmap and the attribute buffer length. 1078 * Now we backfill the bitmap and the attribute buffer length.
1024 */ 1079 */
1025 if (len != ((char *)p - (char *)q) + 4) { 1080 if (len != ((char *)p - (char *)q) + 4) {
1026 printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", 1081 printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
1027 len, ((char *)p - (char *)q) + 4); 1082 len, ((char *)p - (char *)q) + 4);
1028 BUG(); 1083 BUG();
1029 } 1084 }
@@ -1037,46 +1092,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1037 1092
1038static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) 1093static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
1039{ 1094{
1040 __be32 *p; 1095 encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr);
1041 1096 encode_uint32(xdr, access);
1042 p = reserve_space(xdr, 8);
1043 *p++ = cpu_to_be32(OP_ACCESS);
1044 *p = cpu_to_be32(access);
1045 hdr->nops++;
1046 hdr->replen += decode_access_maxsz;
1047} 1097}
1048 1098
1049static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 1099static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1050{ 1100{
1051 __be32 *p; 1101 encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
1052 1102 encode_nfs4_seqid(xdr, arg->seqid);
1053 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); 1103 encode_nfs4_stateid(xdr, arg->stateid);
1054 *p++ = cpu_to_be32(OP_CLOSE);
1055 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
1056 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1057 hdr->nops++;
1058 hdr->replen += decode_close_maxsz;
1059} 1104}
1060 1105
1061static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1062{ 1107{
1063 __be32 *p; 1108 __be32 *p;
1064 1109
1065 p = reserve_space(xdr, 16); 1110 encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
1066 *p++ = cpu_to_be32(OP_COMMIT); 1111 p = reserve_space(xdr, 12);
1067 p = xdr_encode_hyper(p, args->offset); 1112 p = xdr_encode_hyper(p, args->offset);
1068 *p = cpu_to_be32(args->count); 1113 *p = cpu_to_be32(args->count);
1069 hdr->nops++;
1070 hdr->replen += decode_commit_maxsz;
1071} 1114}
1072 1115
1073static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) 1116static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
1074{ 1117{
1075 __be32 *p; 1118 __be32 *p;
1076 1119
1077 p = reserve_space(xdr, 8); 1120 encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr);
1078 *p++ = cpu_to_be32(OP_CREATE); 1121 encode_uint32(xdr, create->ftype);
1079 *p = cpu_to_be32(create->ftype);
1080 1122
1081 switch (create->ftype) { 1123 switch (create->ftype) {
1082 case NF4LNK: 1124 case NF4LNK:
@@ -1096,9 +1138,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
1096 } 1138 }
1097 1139
1098 encode_string(xdr, create->name->len, create->name->name); 1140 encode_string(xdr, create->name->len, create->name->name);
1099 hdr->nops++;
1100 hdr->replen += decode_create_maxsz;
1101
1102 encode_attrs(xdr, create->attrs, create->server); 1141 encode_attrs(xdr, create->attrs, create->server);
1103} 1142}
1104 1143
@@ -1106,25 +1145,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
1106{ 1145{
1107 __be32 *p; 1146 __be32 *p;
1108 1147
1109 p = reserve_space(xdr, 12); 1148 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1110 *p++ = cpu_to_be32(OP_GETATTR); 1149 p = reserve_space(xdr, 8);
1111 *p++ = cpu_to_be32(1); 1150 *p++ = cpu_to_be32(1);
1112 *p = cpu_to_be32(bitmap); 1151 *p = cpu_to_be32(bitmap);
1113 hdr->nops++;
1114 hdr->replen += decode_getattr_maxsz;
1115} 1152}
1116 1153
1117static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) 1154static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
1118{ 1155{
1119 __be32 *p; 1156 __be32 *p;
1120 1157
1121 p = reserve_space(xdr, 16); 1158 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1122 *p++ = cpu_to_be32(OP_GETATTR); 1159 p = reserve_space(xdr, 12);
1123 *p++ = cpu_to_be32(2); 1160 *p++ = cpu_to_be32(2);
1124 *p++ = cpu_to_be32(bm0); 1161 *p++ = cpu_to_be32(bm0);
1125 *p = cpu_to_be32(bm1); 1162 *p = cpu_to_be32(bm1);
1126 hdr->nops++;
1127 hdr->replen += decode_getattr_maxsz;
1128} 1163}
1129 1164
1130static void 1165static void
@@ -1134,8 +1169,7 @@ encode_getattr_three(struct xdr_stream *xdr,
1134{ 1169{
1135 __be32 *p; 1170 __be32 *p;
1136 1171
1137 p = reserve_space(xdr, 4); 1172 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
1138 *p = cpu_to_be32(OP_GETATTR);
1139 if (bm2) { 1173 if (bm2) {
1140 p = reserve_space(xdr, 16); 1174 p = reserve_space(xdr, 16);
1141 *p++ = cpu_to_be32(3); 1175 *p++ = cpu_to_be32(3);
@@ -1152,8 +1186,6 @@ encode_getattr_three(struct xdr_stream *xdr,
1152 *p++ = cpu_to_be32(1); 1186 *p++ = cpu_to_be32(1);
1153 *p = cpu_to_be32(bm0); 1187 *p = cpu_to_be32(bm0);
1154 } 1188 }
1155 hdr->nops++;
1156 hdr->replen += decode_getattr_maxsz;
1157} 1189}
1158 1190
1159static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1191static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -1179,23 +1211,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru
1179 1211
1180static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1212static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1181{ 1213{
1182 __be32 *p; 1214 encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr);
1183
1184 p = reserve_space(xdr, 4);
1185 *p = cpu_to_be32(OP_GETFH);
1186 hdr->nops++;
1187 hdr->replen += decode_getfh_maxsz;
1188} 1215}
1189 1216
1190static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1217static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1191{ 1218{
1192 __be32 *p; 1219 encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr);
1193 1220 encode_string(xdr, name->len, name->name);
1194 p = reserve_space(xdr, 8 + name->len);
1195 *p++ = cpu_to_be32(OP_LINK);
1196 xdr_encode_opaque(p, name->name, name->len);
1197 hdr->nops++;
1198 hdr->replen += decode_link_maxsz;
1199} 1221}
1200 1222
1201static inline int nfs4_lock_type(struct file_lock *fl, int block) 1223static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -1232,79 +1254,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1232{ 1254{
1233 __be32 *p; 1255 __be32 *p;
1234 1256
1235 p = reserve_space(xdr, 32); 1257 encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr);
1236 *p++ = cpu_to_be32(OP_LOCK); 1258 p = reserve_space(xdr, 28);
1237 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); 1259 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1238 *p++ = cpu_to_be32(args->reclaim); 1260 *p++ = cpu_to_be32(args->reclaim);
1239 p = xdr_encode_hyper(p, args->fl->fl_start); 1261 p = xdr_encode_hyper(p, args->fl->fl_start);
1240 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1262 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1241 *p = cpu_to_be32(args->new_lock_owner); 1263 *p = cpu_to_be32(args->new_lock_owner);
1242 if (args->new_lock_owner){ 1264 if (args->new_lock_owner){
1243 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1265 encode_nfs4_seqid(xdr, args->open_seqid);
1244 *p++ = cpu_to_be32(args->open_seqid->sequence->counter); 1266 encode_nfs4_stateid(xdr, args->open_stateid);
1245 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); 1267 encode_nfs4_seqid(xdr, args->lock_seqid);
1246 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1247 encode_lockowner(xdr, &args->lock_owner); 1268 encode_lockowner(xdr, &args->lock_owner);
1248 } 1269 }
1249 else { 1270 else {
1250 p = reserve_space(xdr, NFS4_STATEID_SIZE+4); 1271 encode_nfs4_stateid(xdr, args->lock_stateid);
1251 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); 1272 encode_nfs4_seqid(xdr, args->lock_seqid);
1252 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1253 } 1273 }
1254 hdr->nops++;
1255 hdr->replen += decode_lock_maxsz;
1256} 1274}
1257 1275
1258static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) 1276static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
1259{ 1277{
1260 __be32 *p; 1278 __be32 *p;
1261 1279
1262 p = reserve_space(xdr, 24); 1280 encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr);
1263 *p++ = cpu_to_be32(OP_LOCKT); 1281 p = reserve_space(xdr, 20);
1264 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1282 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1265 p = xdr_encode_hyper(p, args->fl->fl_start); 1283 p = xdr_encode_hyper(p, args->fl->fl_start);
1266 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1284 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1267 encode_lockowner(xdr, &args->lock_owner); 1285 encode_lockowner(xdr, &args->lock_owner);
1268 hdr->nops++;
1269 hdr->replen += decode_lockt_maxsz;
1270} 1286}
1271 1287
1272static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) 1288static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
1273{ 1289{
1274 __be32 *p; 1290 __be32 *p;
1275 1291
1276 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); 1292 encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr);
1277 *p++ = cpu_to_be32(OP_LOCKU); 1293 encode_uint32(xdr, nfs4_lock_type(args->fl, 0));
1278 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1294 encode_nfs4_seqid(xdr, args->seqid);
1279 *p++ = cpu_to_be32(args->seqid->sequence->counter); 1295 encode_nfs4_stateid(xdr, args->stateid);
1280 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); 1296 p = reserve_space(xdr, 16);
1281 p = xdr_encode_hyper(p, args->fl->fl_start); 1297 p = xdr_encode_hyper(p, args->fl->fl_start);
1282 xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1298 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1283 hdr->nops++;
1284 hdr->replen += decode_locku_maxsz;
1285} 1299}
1286 1300
1287static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) 1301static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
1288{ 1302{
1289 __be32 *p; 1303 encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr);
1290
1291 p = reserve_space(xdr, 4);
1292 *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
1293 encode_lockowner(xdr, lowner); 1304 encode_lockowner(xdr, lowner);
1294 hdr->nops++;
1295 hdr->replen += decode_release_lockowner_maxsz;
1296} 1305}
1297 1306
1298static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1307static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1299{ 1308{
1300 int len = name->len; 1309 encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr);
1301 __be32 *p; 1310 encode_string(xdr, name->len, name->name);
1302
1303 p = reserve_space(xdr, 8 + len);
1304 *p++ = cpu_to_be32(OP_LOOKUP);
1305 xdr_encode_opaque(p, name->name, len);
1306 hdr->nops++;
1307 hdr->replen += decode_lookup_maxsz;
1308} 1311}
1309 1312
1310static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) 1313static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1335,9 +1338,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1335 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1338 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1336 * owner 4 = 32 1339 * owner 4 = 32
1337 */ 1340 */
1338 p = reserve_space(xdr, 8); 1341 encode_nfs4_seqid(xdr, arg->seqid);
1339 *p++ = cpu_to_be32(OP_OPEN);
1340 *p = cpu_to_be32(arg->seqid->sequence->counter);
1341 encode_share_access(xdr, arg->fmode); 1342 encode_share_access(xdr, arg->fmode);
1342 p = reserve_space(xdr, 32); 1343 p = reserve_space(xdr, 32);
1343 p = xdr_encode_hyper(p, arg->clientid); 1344 p = xdr_encode_hyper(p, arg->clientid);
@@ -1437,14 +1438,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1437{ 1438{
1438 __be32 *p; 1439 __be32 *p;
1439 1440
1440 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); 1441 p = reserve_space(xdr, 4);
1441 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1442 *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1442 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); 1443 encode_nfs4_stateid(xdr, stateid);
1443 encode_string(xdr, name->len, name->name); 1444 encode_string(xdr, name->len, name->name);
1444} 1445}
1445 1446
1446static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) 1447static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
1447{ 1448{
1449 encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
1448 encode_openhdr(xdr, arg); 1450 encode_openhdr(xdr, arg);
1449 encode_opentype(xdr, arg); 1451 encode_opentype(xdr, arg);
1450 switch (arg->claim) { 1452 switch (arg->claim) {
@@ -1460,88 +1462,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
1460 default: 1462 default:
1461 BUG(); 1463 BUG();
1462 } 1464 }
1463 hdr->nops++;
1464 hdr->replen += decode_open_maxsz;
1465} 1465}
1466 1466
1467static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) 1467static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
1468{ 1468{
1469 __be32 *p; 1469 encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr);
1470 1470 encode_nfs4_stateid(xdr, arg->stateid);
1471 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1471 encode_nfs4_seqid(xdr, arg->seqid);
1472 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1473 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1474 *p = cpu_to_be32(arg->seqid->sequence->counter);
1475 hdr->nops++;
1476 hdr->replen += decode_open_confirm_maxsz;
1477} 1472}
1478 1473
1479static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 1474static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1480{ 1475{
1481 __be32 *p; 1476 encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
1482 1477 encode_nfs4_stateid(xdr, arg->stateid);
1483 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); 1478 encode_nfs4_seqid(xdr, arg->seqid);
1484 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1485 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1486 *p = cpu_to_be32(arg->seqid->sequence->counter);
1487 encode_share_access(xdr, arg->fmode); 1479 encode_share_access(xdr, arg->fmode);
1488 hdr->nops++;
1489 hdr->replen += decode_open_downgrade_maxsz;
1490} 1480}
1491 1481
1492static void 1482static void
1493encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) 1483encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
1494{ 1484{
1495 int len = fh->size; 1485 encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr);
1496 __be32 *p; 1486 encode_string(xdr, fh->size, fh->data);
1497
1498 p = reserve_space(xdr, 8 + len);
1499 *p++ = cpu_to_be32(OP_PUTFH);
1500 xdr_encode_opaque(p, fh->data, len);
1501 hdr->nops++;
1502 hdr->replen += decode_putfh_maxsz;
1503} 1487}
1504 1488
1505static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1489static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1506{ 1490{
1507 __be32 *p; 1491 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
1508
1509 p = reserve_space(xdr, 4);
1510 *p = cpu_to_be32(OP_PUTROOTFH);
1511 hdr->nops++;
1512 hdr->replen += decode_putrootfh_maxsz;
1513} 1492}
1514 1493
1515static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) 1494static void encode_open_stateid(struct xdr_stream *xdr,
1495 const struct nfs_open_context *ctx,
1496 const struct nfs_lock_context *l_ctx,
1497 fmode_t fmode,
1498 int zero_seqid)
1516{ 1499{
1517 nfs4_stateid stateid; 1500 nfs4_stateid stateid;
1518 __be32 *p;
1519 1501
1520 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1521 if (ctx->state != NULL) { 1502 if (ctx->state != NULL) {
1522 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1503 nfs4_select_rw_stateid(&stateid, ctx->state,
1504 fmode, l_ctx->lockowner, l_ctx->pid);
1523 if (zero_seqid) 1505 if (zero_seqid)
1524 stateid.stateid.seqid = 0; 1506 stateid.seqid = 0;
1525 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1507 encode_nfs4_stateid(xdr, &stateid);
1526 } else 1508 } else
1527 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1509 encode_nfs4_stateid(xdr, &zero_stateid);
1528} 1510}
1529 1511
1530static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1512static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1531{ 1513{
1532 __be32 *p; 1514 __be32 *p;
1533 1515
1534 p = reserve_space(xdr, 4); 1516 encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
1535 *p = cpu_to_be32(OP_READ); 1517 encode_open_stateid(xdr, args->context, args->lock_context,
1536 1518 FMODE_READ, hdr->minorversion);
1537 encode_stateid(xdr, args->context, args->lock_context,
1538 hdr->minorversion);
1539 1519
1540 p = reserve_space(xdr, 12); 1520 p = reserve_space(xdr, 12);
1541 p = xdr_encode_hyper(p, args->offset); 1521 p = xdr_encode_hyper(p, args->offset);
1542 *p = cpu_to_be32(args->count); 1522 *p = cpu_to_be32(args->count);
1543 hdr->nops++;
1544 hdr->replen += decode_read_maxsz;
1545} 1523}
1546 1524
1547static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1525static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1551,7 +1529,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1551 FATTR4_WORD1_MOUNTED_ON_FILEID, 1529 FATTR4_WORD1_MOUNTED_ON_FILEID,
1552 }; 1530 };
1553 uint32_t dircount = readdir->count >> 1; 1531 uint32_t dircount = readdir->count >> 1;
1554 __be32 *p; 1532 __be32 *p, verf[2];
1555 1533
1556 if (readdir->plus) { 1534 if (readdir->plus) {
1557 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| 1535 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1566,80 +1544,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1566 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) 1544 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
1567 attrs[0] |= FATTR4_WORD0_FILEID; 1545 attrs[0] |= FATTR4_WORD0_FILEID;
1568 1546
1569 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1547 encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
1570 *p++ = cpu_to_be32(OP_READDIR); 1548 encode_uint64(xdr, readdir->cookie);
1571 p = xdr_encode_hyper(p, readdir->cookie); 1549 encode_nfs4_verifier(xdr, &readdir->verifier);
1572 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1550 p = reserve_space(xdr, 20);
1573 *p++ = cpu_to_be32(dircount); 1551 *p++ = cpu_to_be32(dircount);
1574 *p++ = cpu_to_be32(readdir->count); 1552 *p++ = cpu_to_be32(readdir->count);
1575 *p++ = cpu_to_be32(2); 1553 *p++ = cpu_to_be32(2);
1576 1554
1577 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1555 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1578 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1556 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1579 hdr->nops++; 1557 memcpy(verf, readdir->verifier.data, sizeof(verf));
1580 hdr->replen += decode_readdir_maxsz;
1581 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1558 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1582 __func__, 1559 __func__,
1583 (unsigned long long)readdir->cookie, 1560 (unsigned long long)readdir->cookie,
1584 ((u32 *)readdir->verifier.data)[0], 1561 verf[0], verf[1],
1585 ((u32 *)readdir->verifier.data)[1],
1586 attrs[0] & readdir->bitmask[0], 1562 attrs[0] & readdir->bitmask[0],
1587 attrs[1] & readdir->bitmask[1]); 1563 attrs[1] & readdir->bitmask[1]);
1588} 1564}
1589 1565
1590static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) 1566static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
1591{ 1567{
1592 __be32 *p; 1568 encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr);
1593
1594 p = reserve_space(xdr, 4);
1595 *p = cpu_to_be32(OP_READLINK);
1596 hdr->nops++;
1597 hdr->replen += decode_readlink_maxsz;
1598} 1569}
1599 1570
1600static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1571static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1601{ 1572{
1602 __be32 *p; 1573 encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr);
1603 1574 encode_string(xdr, name->len, name->name);
1604 p = reserve_space(xdr, 8 + name->len);
1605 *p++ = cpu_to_be32(OP_REMOVE);
1606 xdr_encode_opaque(p, name->name, name->len);
1607 hdr->nops++;
1608 hdr->replen += decode_remove_maxsz;
1609} 1575}
1610 1576
1611static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) 1577static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
1612{ 1578{
1613 __be32 *p; 1579 encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr);
1614
1615 p = reserve_space(xdr, 4);
1616 *p = cpu_to_be32(OP_RENAME);
1617 encode_string(xdr, oldname->len, oldname->name); 1580 encode_string(xdr, oldname->len, oldname->name);
1618 encode_string(xdr, newname->len, newname->name); 1581 encode_string(xdr, newname->len, newname->name);
1619 hdr->nops++;
1620 hdr->replen += decode_rename_maxsz;
1621} 1582}
1622 1583
1623static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) 1584static void encode_renew(struct xdr_stream *xdr, clientid4 clid,
1585 struct compound_hdr *hdr)
1624{ 1586{
1625 __be32 *p; 1587 encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr);
1626 1588 encode_uint64(xdr, clid);
1627 p = reserve_space(xdr, 12);
1628 *p++ = cpu_to_be32(OP_RENEW);
1629 xdr_encode_hyper(p, client_stateid->cl_clientid);
1630 hdr->nops++;
1631 hdr->replen += decode_renew_maxsz;
1632} 1589}
1633 1590
1634static void 1591static void
1635encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1592encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1636{ 1593{
1637 __be32 *p; 1594 encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr);
1638
1639 p = reserve_space(xdr, 4);
1640 *p = cpu_to_be32(OP_RESTOREFH);
1641 hdr->nops++;
1642 hdr->replen += decode_restorefh_maxsz;
1643} 1595}
1644 1596
1645static void 1597static void
@@ -1647,9 +1599,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1647{ 1599{
1648 __be32 *p; 1600 __be32 *p;
1649 1601
1650 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); 1602 encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
1651 *p++ = cpu_to_be32(OP_SETATTR); 1603 encode_nfs4_stateid(xdr, &zero_stateid);
1652 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1653 p = reserve_space(xdr, 2*4); 1604 p = reserve_space(xdr, 2*4);
1654 *p++ = cpu_to_be32(1); 1605 *p++ = cpu_to_be32(1);
1655 *p = cpu_to_be32(FATTR4_WORD0_ACL); 1606 *p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1657,30 +1608,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1657 p = reserve_space(xdr, 4); 1608 p = reserve_space(xdr, 4);
1658 *p = cpu_to_be32(arg->acl_len); 1609 *p = cpu_to_be32(arg->acl_len);
1659 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1610 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1660 hdr->nops++;
1661 hdr->replen += decode_setacl_maxsz;
1662} 1611}
1663 1612
1664static void 1613static void
1665encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1614encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1666{ 1615{
1667 __be32 *p; 1616 encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr);
1668
1669 p = reserve_space(xdr, 4);
1670 *p = cpu_to_be32(OP_SAVEFH);
1671 hdr->nops++;
1672 hdr->replen += decode_savefh_maxsz;
1673} 1617}
1674 1618
1675static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) 1619static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
1676{ 1620{
1677 __be32 *p; 1621 encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
1678 1622 encode_nfs4_stateid(xdr, &arg->stateid);
1679 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1680 *p++ = cpu_to_be32(OP_SETATTR);
1681 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1682 hdr->nops++;
1683 hdr->replen += decode_setattr_maxsz;
1684 encode_attrs(xdr, arg->iap, server); 1623 encode_attrs(xdr, arg->iap, server);
1685} 1624}
1686 1625
@@ -1688,9 +1627,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1688{ 1627{
1689 __be32 *p; 1628 __be32 *p;
1690 1629
1691 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); 1630 encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
1692 *p++ = cpu_to_be32(OP_SETCLIENTID); 1631 encode_nfs4_verifier(xdr, setclientid->sc_verifier);
1693 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1694 1632
1695 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1633 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1696 p = reserve_space(xdr, 4); 1634 p = reserve_space(xdr, 4);
@@ -1699,31 +1637,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1699 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1637 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1700 p = reserve_space(xdr, 4); 1638 p = reserve_space(xdr, 4);
1701 *p = cpu_to_be32(setclientid->sc_cb_ident); 1639 *p = cpu_to_be32(setclientid->sc_cb_ident);
1702 hdr->nops++;
1703 hdr->replen += decode_setclientid_maxsz;
1704} 1640}
1705 1641
1706static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) 1642static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
1707{ 1643{
1708 __be32 *p; 1644 encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM,
1709 1645 decode_setclientid_confirm_maxsz, hdr);
1710 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); 1646 encode_uint64(xdr, arg->clientid);
1711 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); 1647 encode_nfs4_verifier(xdr, &arg->confirm);
1712 p = xdr_encode_hyper(p, arg->clientid);
1713 xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
1714 hdr->nops++;
1715 hdr->replen += decode_setclientid_confirm_maxsz;
1716} 1648}
1717 1649
1718static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1650static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1719{ 1651{
1720 __be32 *p; 1652 __be32 *p;
1721 1653
1722 p = reserve_space(xdr, 4); 1654 encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
1723 *p = cpu_to_be32(OP_WRITE); 1655 encode_open_stateid(xdr, args->context, args->lock_context,
1724 1656 FMODE_WRITE, hdr->minorversion);
1725 encode_stateid(xdr, args->context, args->lock_context,
1726 hdr->minorversion);
1727 1657
1728 p = reserve_space(xdr, 16); 1658 p = reserve_space(xdr, 16);
1729 p = xdr_encode_hyper(p, args->offset); 1659 p = xdr_encode_hyper(p, args->offset);
@@ -1731,32 +1661,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1731 *p = cpu_to_be32(args->count); 1661 *p = cpu_to_be32(args->count);
1732 1662
1733 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1663 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1734 hdr->nops++;
1735 hdr->replen += decode_write_maxsz;
1736} 1664}
1737 1665
1738static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) 1666static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
1739{ 1667{
1740 __be32 *p; 1668 encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr);
1741 1669 encode_nfs4_stateid(xdr, stateid);
1742 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1743
1744 *p++ = cpu_to_be32(OP_DELEGRETURN);
1745 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1746 hdr->nops++;
1747 hdr->replen += decode_delegreturn_maxsz;
1748} 1670}
1749 1671
1750static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1672static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1751{ 1673{
1752 int len = name->len; 1674 encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr);
1753 __be32 *p; 1675 encode_string(xdr, name->len, name->name);
1754
1755 p = reserve_space(xdr, 8 + len);
1756 *p++ = cpu_to_be32(OP_SECINFO);
1757 xdr_encode_opaque(p, name->name, len);
1758 hdr->nops++;
1759 hdr->replen += decode_secinfo_maxsz;
1760} 1676}
1761 1677
1762#if defined(CONFIG_NFS_V4_1) 1678#if defined(CONFIG_NFS_V4_1)
@@ -1766,19 +1682,39 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1766 struct compound_hdr *hdr) 1682 struct compound_hdr *hdr)
1767{ 1683{
1768 __be32 *p; 1684 __be32 *p;
1685 char impl_name[NFS4_OPAQUE_LIMIT];
1686 int len = 0;
1769 1687
1770 p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); 1688 encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
1771 *p++ = cpu_to_be32(OP_EXCHANGE_ID); 1689 encode_nfs4_verifier(xdr, args->verifier);
1772 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1773 1690
1774 encode_string(xdr, args->id_len, args->id); 1691 encode_string(xdr, args->id_len, args->id);
1775 1692
1776 p = reserve_space(xdr, 12); 1693 p = reserve_space(xdr, 12);
1777 *p++ = cpu_to_be32(args->flags); 1694 *p++ = cpu_to_be32(args->flags);
1778 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ 1695 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1779 *p = cpu_to_be32(0); /* zero length implementation id array */ 1696
1780 hdr->nops++; 1697 if (send_implementation_id &&
1781 hdr->replen += decode_exchange_id_maxsz; 1698 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
1699 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN)
1700 <= NFS4_OPAQUE_LIMIT + 1)
1701 len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s",
1702 utsname()->sysname, utsname()->release,
1703 utsname()->version, utsname()->machine);
1704
1705 if (len > 0) {
1706 *p = cpu_to_be32(1); /* implementation id array length=1 */
1707
1708 encode_string(xdr,
1709 sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
1710 CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN);
1711 encode_string(xdr, len, impl_name);
1712 /* just send zeros for nii_date - the date is in nii_name */
1713 p = reserve_space(xdr, 12);
1714 p = xdr_encode_hyper(p, 0);
1715 *p = cpu_to_be32(0);
1716 } else
1717 *p = cpu_to_be32(0); /* implementation id array length=0 */
1782} 1718}
1783 1719
1784static void encode_create_session(struct xdr_stream *xdr, 1720static void encode_create_session(struct xdr_stream *xdr,
@@ -1801,8 +1737,8 @@ static void encode_create_session(struct xdr_stream *xdr,
1801 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1737 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1802 clp->cl_ipaddr); 1738 clp->cl_ipaddr);
1803 1739
1804 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1740 encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
1805 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1741 p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12);
1806 p = xdr_encode_hyper(p, clp->cl_clientid); 1742 p = xdr_encode_hyper(p, clp->cl_clientid);
1807 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1743 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1808 *p++ = cpu_to_be32(args->flags); /*flags */ 1744 *p++ = cpu_to_be32(args->flags); /*flags */
@@ -1835,33 +1771,22 @@ static void encode_create_session(struct xdr_stream *xdr,
1835 *p++ = cpu_to_be32(0); /* UID */ 1771 *p++ = cpu_to_be32(0); /* UID */
1836 *p++ = cpu_to_be32(0); /* GID */ 1772 *p++ = cpu_to_be32(0); /* GID */
1837 *p = cpu_to_be32(0); /* No more gids */ 1773 *p = cpu_to_be32(0); /* No more gids */
1838 hdr->nops++;
1839 hdr->replen += decode_create_session_maxsz;
1840} 1774}
1841 1775
1842static void encode_destroy_session(struct xdr_stream *xdr, 1776static void encode_destroy_session(struct xdr_stream *xdr,
1843 struct nfs4_session *session, 1777 struct nfs4_session *session,
1844 struct compound_hdr *hdr) 1778 struct compound_hdr *hdr)
1845{ 1779{
1846 __be32 *p; 1780 encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
1847 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); 1781 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1848 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1849 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1850 hdr->nops++;
1851 hdr->replen += decode_destroy_session_maxsz;
1852} 1782}
1853 1783
1854static void encode_reclaim_complete(struct xdr_stream *xdr, 1784static void encode_reclaim_complete(struct xdr_stream *xdr,
1855 struct nfs41_reclaim_complete_args *args, 1785 struct nfs41_reclaim_complete_args *args,
1856 struct compound_hdr *hdr) 1786 struct compound_hdr *hdr)
1857{ 1787{
1858 __be32 *p; 1788 encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
1859 1789 encode_uint32(xdr, args->one_fs);
1860 p = reserve_space(xdr, 8);
1861 *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
1862 *p++ = cpu_to_be32(args->one_fs);
1863 hdr->nops++;
1864 hdr->replen += decode_reclaim_complete_maxsz;
1865} 1790}
1866#endif /* CONFIG_NFS_V4_1 */ 1791#endif /* CONFIG_NFS_V4_1 */
1867 1792
@@ -1883,8 +1808,7 @@ static void encode_sequence(struct xdr_stream *xdr,
1883 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1808 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1884 slot = tp->slots + args->sa_slotid; 1809 slot = tp->slots + args->sa_slotid;
1885 1810
1886 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); 1811 encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
1887 *p++ = cpu_to_be32(OP_SEQUENCE);
1888 1812
1889 /* 1813 /*
1890 * Sessionid + seqid + slotid + max slotid + cache_this 1814 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1898,13 +1822,12 @@ static void encode_sequence(struct xdr_stream *xdr,
1898 ((u32 *)session->sess_id.data)[3], 1822 ((u32 *)session->sess_id.data)[3],
1899 slot->seq_nr, args->sa_slotid, 1823 slot->seq_nr, args->sa_slotid,
1900 tp->highest_used_slotid, args->sa_cache_this); 1824 tp->highest_used_slotid, args->sa_cache_this);
1825 p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
1901 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1826 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1902 *p++ = cpu_to_be32(slot->seq_nr); 1827 *p++ = cpu_to_be32(slot->seq_nr);
1903 *p++ = cpu_to_be32(args->sa_slotid); 1828 *p++ = cpu_to_be32(args->sa_slotid);
1904 *p++ = cpu_to_be32(tp->highest_used_slotid); 1829 *p++ = cpu_to_be32(tp->highest_used_slotid);
1905 *p = cpu_to_be32(args->sa_cache_this); 1830 *p = cpu_to_be32(args->sa_cache_this);
1906 hdr->nops++;
1907 hdr->replen += decode_sequence_maxsz;
1908#endif /* CONFIG_NFS_V4_1 */ 1831#endif /* CONFIG_NFS_V4_1 */
1909} 1832}
1910 1833
@@ -1919,14 +1842,12 @@ encode_getdevicelist(struct xdr_stream *xdr,
1919 .data = "dummmmmy", 1842 .data = "dummmmmy",
1920 }; 1843 };
1921 1844
1922 p = reserve_space(xdr, 20); 1845 encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
1923 *p++ = cpu_to_be32(OP_GETDEVICELIST); 1846 p = reserve_space(xdr, 16);
1924 *p++ = cpu_to_be32(args->layoutclass); 1847 *p++ = cpu_to_be32(args->layoutclass);
1925 *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); 1848 *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
1926 xdr_encode_hyper(p, 0ULL); /* cookie */ 1849 xdr_encode_hyper(p, 0ULL); /* cookie */
1927 encode_nfs4_verifier(xdr, &dummy); 1850 encode_nfs4_verifier(xdr, &dummy);
1928 hdr->nops++;
1929 hdr->replen += decode_getdevicelist_maxsz;
1930} 1851}
1931 1852
1932static void 1853static void
@@ -1936,15 +1857,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
1936{ 1857{
1937 __be32 *p; 1858 __be32 *p;
1938 1859
1939 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); 1860 encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr);
1940 *p++ = cpu_to_be32(OP_GETDEVICEINFO); 1861 p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE);
1941 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, 1862 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1942 NFS4_DEVICEID4_SIZE); 1863 NFS4_DEVICEID4_SIZE);
1943 *p++ = cpu_to_be32(args->pdev->layout_type); 1864 *p++ = cpu_to_be32(args->pdev->layout_type);
1944 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ 1865 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1945 *p++ = cpu_to_be32(0); /* bitmap length 0 */ 1866 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1946 hdr->nops++;
1947 hdr->replen += decode_getdeviceinfo_maxsz;
1948} 1867}
1949 1868
1950static void 1869static void
@@ -1954,16 +1873,16 @@ encode_layoutget(struct xdr_stream *xdr,
1954{ 1873{
1955 __be32 *p; 1874 __be32 *p;
1956 1875
1957 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1876 encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr);
1958 *p++ = cpu_to_be32(OP_LAYOUTGET); 1877 p = reserve_space(xdr, 36);
1959 *p++ = cpu_to_be32(0); /* Signal layout available */ 1878 *p++ = cpu_to_be32(0); /* Signal layout available */
1960 *p++ = cpu_to_be32(args->type); 1879 *p++ = cpu_to_be32(args->type);
1961 *p++ = cpu_to_be32(args->range.iomode); 1880 *p++ = cpu_to_be32(args->range.iomode);
1962 p = xdr_encode_hyper(p, args->range.offset); 1881 p = xdr_encode_hyper(p, args->range.offset);
1963 p = xdr_encode_hyper(p, args->range.length); 1882 p = xdr_encode_hyper(p, args->range.length);
1964 p = xdr_encode_hyper(p, args->minlength); 1883 p = xdr_encode_hyper(p, args->minlength);
1965 p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); 1884 encode_nfs4_stateid(xdr, &args->stateid);
1966 *p = cpu_to_be32(args->maxcount); 1885 encode_uint32(xdr, args->maxcount);
1967 1886
1968 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", 1887 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1969 __func__, 1888 __func__,
@@ -1972,8 +1891,6 @@ encode_layoutget(struct xdr_stream *xdr,
1972 (unsigned long)args->range.offset, 1891 (unsigned long)args->range.offset,
1973 (unsigned long)args->range.length, 1892 (unsigned long)args->range.length,
1974 args->maxcount); 1893 args->maxcount);
1975 hdr->nops++;
1976 hdr->replen += decode_layoutget_maxsz;
1977} 1894}
1978 1895
1979static int 1896static int
@@ -1987,13 +1904,14 @@ encode_layoutcommit(struct xdr_stream *xdr,
1987 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, 1904 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1988 NFS_SERVER(args->inode)->pnfs_curr_ld->id); 1905 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1989 1906
1990 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1907 encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr);
1991 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); 1908 p = reserve_space(xdr, 20);
1992 /* Only whole file layouts */ 1909 /* Only whole file layouts */
1993 p = xdr_encode_hyper(p, 0); /* offset */ 1910 p = xdr_encode_hyper(p, 0); /* offset */
1994 p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ 1911 p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */
1995 *p++ = cpu_to_be32(0); /* reclaim */ 1912 *p = cpu_to_be32(0); /* reclaim */
1996 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); 1913 encode_nfs4_stateid(xdr, &args->stateid);
1914 p = reserve_space(xdr, 20);
1997 *p++ = cpu_to_be32(1); /* newoffset = TRUE */ 1915 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1998 p = xdr_encode_hyper(p, args->lastbytewritten); 1916 p = xdr_encode_hyper(p, args->lastbytewritten);
1999 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ 1917 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
@@ -2002,13 +1920,9 @@ encode_layoutcommit(struct xdr_stream *xdr,
2002 if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) 1920 if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
2003 NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( 1921 NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
2004 NFS_I(inode)->layout, xdr, args); 1922 NFS_I(inode)->layout, xdr, args);
2005 else { 1923 else
2006 p = reserve_space(xdr, 4); 1924 encode_uint32(xdr, 0); /* no layout-type payload */
2007 *p = cpu_to_be32(0); /* no layout-type payload */
2008 }
2009 1925
2010 hdr->nops++;
2011 hdr->replen += decode_layoutcommit_maxsz;
2012 return 0; 1926 return 0;
2013} 1927}
2014 1928
@@ -2019,27 +1933,23 @@ encode_layoutreturn(struct xdr_stream *xdr,
2019{ 1933{
2020 __be32 *p; 1934 __be32 *p;
2021 1935
2022 p = reserve_space(xdr, 20); 1936 encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
2023 *p++ = cpu_to_be32(OP_LAYOUTRETURN); 1937 p = reserve_space(xdr, 16);
2024 *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ 1938 *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */
2025 *p++ = cpu_to_be32(args->layout_type); 1939 *p++ = cpu_to_be32(args->layout_type);
2026 *p++ = cpu_to_be32(IOMODE_ANY); 1940 *p++ = cpu_to_be32(IOMODE_ANY);
2027 *p = cpu_to_be32(RETURN_FILE); 1941 *p = cpu_to_be32(RETURN_FILE);
2028 p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); 1942 p = reserve_space(xdr, 16);
2029 p = xdr_encode_hyper(p, 0); 1943 p = xdr_encode_hyper(p, 0);
2030 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); 1944 p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
2031 spin_lock(&args->inode->i_lock); 1945 spin_lock(&args->inode->i_lock);
2032 xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); 1946 encode_nfs4_stateid(xdr, &args->stateid);
2033 spin_unlock(&args->inode->i_lock); 1947 spin_unlock(&args->inode->i_lock);
2034 if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { 1948 if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
2035 NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( 1949 NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
2036 NFS_I(args->inode)->layout, xdr, args); 1950 NFS_I(args->inode)->layout, xdr, args);
2037 } else { 1951 } else
2038 p = reserve_space(xdr, 4); 1952 encode_uint32(xdr, 0);
2039 *p = cpu_to_be32(0);
2040 }
2041 hdr->nops++;
2042 hdr->replen += decode_layoutreturn_maxsz;
2043} 1953}
2044 1954
2045static int 1955static int
@@ -2047,12 +1957,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
2047 const struct nfs41_secinfo_no_name_args *args, 1957 const struct nfs41_secinfo_no_name_args *args,
2048 struct compound_hdr *hdr) 1958 struct compound_hdr *hdr)
2049{ 1959{
2050 __be32 *p; 1960 encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr);
2051 p = reserve_space(xdr, 8); 1961 encode_uint32(xdr, args->style);
2052 *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
2053 *p++ = cpu_to_be32(args->style);
2054 hdr->nops++;
2055 hdr->replen += decode_secinfo_no_name_maxsz;
2056 return 0; 1962 return 0;
2057} 1963}
2058 1964
@@ -2060,26 +1966,17 @@ static void encode_test_stateid(struct xdr_stream *xdr,
2060 struct nfs41_test_stateid_args *args, 1966 struct nfs41_test_stateid_args *args,
2061 struct compound_hdr *hdr) 1967 struct compound_hdr *hdr)
2062{ 1968{
2063 __be32 *p; 1969 encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
2064 1970 encode_uint32(xdr, 1);
2065 p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); 1971 encode_nfs4_stateid(xdr, args->stateid);
2066 *p++ = cpu_to_be32(OP_TEST_STATEID);
2067 *p++ = cpu_to_be32(1);
2068 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2069 hdr->nops++;
2070 hdr->replen += decode_test_stateid_maxsz;
2071} 1972}
2072 1973
2073static void encode_free_stateid(struct xdr_stream *xdr, 1974static void encode_free_stateid(struct xdr_stream *xdr,
2074 struct nfs41_free_stateid_args *args, 1975 struct nfs41_free_stateid_args *args,
2075 struct compound_hdr *hdr) 1976 struct compound_hdr *hdr)
2076{ 1977{
2077 __be32 *p; 1978 encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
2078 p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); 1979 encode_nfs4_stateid(xdr, args->stateid);
2079 *p++ = cpu_to_be32(OP_FREE_STATEID);
2080 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2081 hdr->nops++;
2082 hdr->replen += decode_free_stateid_maxsz;
2083} 1980}
2084#endif /* CONFIG_NFS_V4_1 */ 1981#endif /* CONFIG_NFS_V4_1 */
2085 1982
@@ -2633,6 +2530,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
2633 encode_sequence(xdr, &args->seq_args, &hdr); 2530 encode_sequence(xdr, &args->seq_args, &hdr);
2634 encode_putfh(xdr, args->fhandle, &hdr); 2531 encode_putfh(xdr, args->fhandle, &hdr);
2635 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| 2532 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2533 FATTR4_WORD0_FH_EXPIRE_TYPE|
2636 FATTR4_WORD0_LINK_SUPPORT| 2534 FATTR4_WORD0_LINK_SUPPORT|
2637 FATTR4_WORD0_SYMLINK_SUPPORT| 2535 FATTR4_WORD0_SYMLINK_SUPPORT|
2638 FATTR4_WORD0_ACLSUPPORT, &hdr); 2536 FATTR4_WORD0_ACLSUPPORT, &hdr);
@@ -2650,7 +2548,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
2650 }; 2548 };
2651 2549
2652 encode_compound_hdr(xdr, req, &hdr); 2550 encode_compound_hdr(xdr, req, &hdr);
2653 encode_renew(xdr, clp, &hdr); 2551 encode_renew(xdr, clp->cl_clientid, &hdr);
2654 encode_nops(&hdr); 2552 encode_nops(&hdr);
2655} 2553}
2656 2554
@@ -3180,6 +3078,28 @@ out_overflow:
3180 return -EIO; 3078 return -EIO;
3181} 3079}
3182 3080
3081static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
3082 uint32_t *bitmap, uint32_t *type)
3083{
3084 __be32 *p;
3085
3086 *type = 0;
3087 if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U)))
3088 return -EIO;
3089 if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
3090 p = xdr_inline_decode(xdr, 4);
3091 if (unlikely(!p))
3092 goto out_overflow;
3093 *type = be32_to_cpup(p);
3094 bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
3095 }
3096 dprintk("%s: expire type=0x%x\n", __func__, *type);
3097 return 0;
3098out_overflow:
3099 print_overflow_msg(__func__, xdr);
3100 return -EIO;
3101}
3102
3183static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 3103static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
3184{ 3104{
3185 __be32 *p; 3105 __be32 *p;
@@ -3513,16 +3433,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3513 n = be32_to_cpup(p); 3433 n = be32_to_cpup(p);
3514 if (n == 0) 3434 if (n == 0)
3515 goto root_path; 3435 goto root_path;
3516 dprintk("path "); 3436 dprintk("pathname4: ");
3517 path->ncomponents = 0; 3437 path->ncomponents = 0;
3518 while (path->ncomponents < n) { 3438 while (path->ncomponents < n) {
3519 struct nfs4_string *component = &path->components[path->ncomponents]; 3439 struct nfs4_string *component = &path->components[path->ncomponents];
3520 status = decode_opaque_inline(xdr, &component->len, &component->data); 3440 status = decode_opaque_inline(xdr, &component->len, &component->data);
3521 if (unlikely(status != 0)) 3441 if (unlikely(status != 0))
3522 goto out_eio; 3442 goto out_eio;
3523 if (path->ncomponents != n) 3443 ifdebug (XDR)
3524 dprintk("/"); 3444 pr_cont("%s%.*s ",
3525 dprintk("%s", component->data); 3445 (path->ncomponents != n ? "/ " : ""),
3446 component->len, component->data);
3526 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) 3447 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
3527 path->ncomponents++; 3448 path->ncomponents++;
3528 else { 3449 else {
@@ -3531,14 +3452,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3531 } 3452 }
3532 } 3453 }
3533out: 3454out:
3534 dprintk("\n");
3535 return status; 3455 return status;
3536root_path: 3456root_path:
3537/* a root pathname is sent as a zero component4 */ 3457/* a root pathname is sent as a zero component4 */
3538 path->ncomponents = 1; 3458 path->ncomponents = 1;
3539 path->components[0].len=0; 3459 path->components[0].len=0;
3540 path->components[0].data=NULL; 3460 path->components[0].data=NULL;
3541 dprintk("path /\n"); 3461 dprintk("pathname4: /\n");
3542 goto out; 3462 goto out;
3543out_eio: 3463out_eio:
3544 dprintk(" status %d", status); 3464 dprintk(" status %d", status);
@@ -3560,7 +3480,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3560 status = 0; 3480 status = 0;
3561 if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) 3481 if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
3562 goto out; 3482 goto out;
3563 dprintk("%s: fsroot ", __func__); 3483 status = -EIO;
3484 /* Ignore borken servers that return unrequested attrs */
3485 if (unlikely(res == NULL))
3486 goto out;
3487 dprintk("%s: fsroot:\n", __func__);
3564 status = decode_pathname(xdr, &res->fs_path); 3488 status = decode_pathname(xdr, &res->fs_path);
3565 if (unlikely(status != 0)) 3489 if (unlikely(status != 0))
3566 goto out; 3490 goto out;
@@ -3581,7 +3505,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3581 m = be32_to_cpup(p); 3505 m = be32_to_cpup(p);
3582 3506
3583 loc->nservers = 0; 3507 loc->nservers = 0;
3584 dprintk("%s: servers ", __func__); 3508 dprintk("%s: servers:\n", __func__);
3585 while (loc->nservers < m) { 3509 while (loc->nservers < m) {
3586 struct nfs4_string *server = &loc->servers[loc->nservers]; 3510 struct nfs4_string *server = &loc->servers[loc->nservers];
3587 status = decode_opaque_inline(xdr, &server->len, &server->data); 3511 status = decode_opaque_inline(xdr, &server->len, &server->data);
@@ -3613,7 +3537,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3613 res->nlocations++; 3537 res->nlocations++;
3614 } 3538 }
3615 if (res->nlocations != 0) 3539 if (res->nlocations != 0)
3616 status = NFS_ATTR_FATTR_V4_REFERRAL; 3540 status = NFS_ATTR_FATTR_V4_LOCATIONS;
3617out: 3541out:
3618 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 3542 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
3619 return status; 3543 return status;
@@ -4157,7 +4081,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
4157 4081
4158static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 4082static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
4159{ 4083{
4160 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); 4084 return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
4161} 4085}
4162 4086
4163static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 4087static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -4174,7 +4098,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
4174 4098
4175static int decode_verifier(struct xdr_stream *xdr, void *verifier) 4099static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4176{ 4100{
4177 return decode_opaque_fixed(xdr, verifier, 8); 4101 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4178} 4102}
4179 4103
4180static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4104static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
@@ -4224,6 +4148,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
4224 goto xdr_error; 4148 goto xdr_error;
4225 if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) 4149 if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
4226 goto xdr_error; 4150 goto xdr_error;
4151 if ((status = decode_attr_fh_expire_type(xdr, bitmap,
4152 &res->fh_expire_type)) != 0)
4153 goto xdr_error;
4227 if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) 4154 if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
4228 goto xdr_error; 4155 goto xdr_error;
4229 if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) 4156 if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
@@ -4294,6 +4221,7 @@ xdr_error:
4294 4221
4295static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4222static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4296 struct nfs_fattr *fattr, struct nfs_fh *fh, 4223 struct nfs_fattr *fattr, struct nfs_fh *fh,
4224 struct nfs4_fs_locations *fs_loc,
4297 const struct nfs_server *server) 4225 const struct nfs_server *server)
4298{ 4226{
4299 int status; 4227 int status;
@@ -4341,9 +4269,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4341 goto xdr_error; 4269 goto xdr_error;
4342 fattr->valid |= status; 4270 fattr->valid |= status;
4343 4271
4344 status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, 4272 status = decode_attr_fs_locations(xdr, bitmap, fs_loc);
4345 struct nfs4_fs_locations,
4346 fattr));
4347 if (status < 0) 4273 if (status < 0)
4348 goto xdr_error; 4274 goto xdr_error;
4349 fattr->valid |= status; 4275 fattr->valid |= status;
@@ -4407,7 +4333,8 @@ xdr_error:
4407} 4333}
4408 4334
4409static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4335static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4410 struct nfs_fh *fh, const struct nfs_server *server) 4336 struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
4337 const struct nfs_server *server)
4411{ 4338{
4412 __be32 *savep; 4339 __be32 *savep;
4413 uint32_t attrlen, 4340 uint32_t attrlen,
@@ -4426,7 +4353,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
4426 if (status < 0) 4353 if (status < 0)
4427 goto xdr_error; 4354 goto xdr_error;
4428 4355
4429 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); 4356 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server);
4430 if (status < 0) 4357 if (status < 0)
4431 goto xdr_error; 4358 goto xdr_error;
4432 4359
@@ -4439,7 +4366,7 @@ xdr_error:
4439static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4366static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4440 const struct nfs_server *server) 4367 const struct nfs_server *server)
4441{ 4368{
4442 return decode_getfattr_generic(xdr, fattr, NULL, server); 4369 return decode_getfattr_generic(xdr, fattr, NULL, NULL, server);
4443} 4370}
4444 4371
4445/* 4372/*
@@ -4463,8 +4390,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4463 return 0; 4390 return 0;
4464 } 4391 }
4465 if (num > 1) 4392 if (num > 1)
4466 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " 4393 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
4467 "per filesystem not supported\n", __func__); 4394 "drivers per filesystem not supported\n", __func__);
4468 4395
4469 /* Decode and set first layout type, move xdr->p past unused types */ 4396 /* Decode and set first layout type, move xdr->p past unused types */
4470 p = xdr_inline_decode(xdr, num * 4); 4397 p = xdr_inline_decode(xdr, num * 4);
@@ -4863,17 +4790,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4863 size_t hdrlen; 4790 size_t hdrlen;
4864 u32 recvd, pglen = rcvbuf->page_len; 4791 u32 recvd, pglen = rcvbuf->page_len;
4865 int status; 4792 int status;
4793 __be32 verf[2];
4866 4794
4867 status = decode_op_hdr(xdr, OP_READDIR); 4795 status = decode_op_hdr(xdr, OP_READDIR);
4868 if (!status) 4796 if (!status)
4869 status = decode_verifier(xdr, readdir->verifier.data); 4797 status = decode_verifier(xdr, readdir->verifier.data);
4870 if (unlikely(status)) 4798 if (unlikely(status))
4871 return status; 4799 return status;
4800 memcpy(verf, readdir->verifier.data, sizeof(verf));
4872 dprintk("%s: verifier = %08x:%08x\n", 4801 dprintk("%s: verifier = %08x:%08x\n",
4873 __func__, 4802 __func__, verf[0], verf[1]);
4874 ((u32 *)readdir->verifier.data)[0],
4875 ((u32 *)readdir->verifier.data)[1]);
4876
4877 4803
4878 hdrlen = (char *) xdr->p - (char *) iov->iov_base; 4804 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
4879 recvd = rcvbuf->len - hdrlen; 4805 recvd = rcvbuf->len - hdrlen;
@@ -5120,7 +5046,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
5120 goto out_overflow; 5046 goto out_overflow;
5121 res->count = be32_to_cpup(p++); 5047 res->count = be32_to_cpup(p++);
5122 res->verf->committed = be32_to_cpup(p++); 5048 res->verf->committed = be32_to_cpup(p++);
5123 memcpy(res->verf->verifier, p, 8); 5049 memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE);
5124 return 0; 5050 return 0;
5125out_overflow: 5051out_overflow:
5126 print_overflow_msg(__func__, xdr); 5052 print_overflow_msg(__func__, xdr);
@@ -5214,6 +5140,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5214 char *dummy_str; 5140 char *dummy_str;
5215 int status; 5141 int status;
5216 struct nfs_client *clp = res->client; 5142 struct nfs_client *clp = res->client;
5143 uint32_t impl_id_count;
5217 5144
5218 status = decode_op_hdr(xdr, OP_EXCHANGE_ID); 5145 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
5219 if (status) 5146 if (status)
@@ -5255,11 +5182,38 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5255 memcpy(res->server_scope->server_scope, dummy_str, dummy); 5182 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5256 res->server_scope->server_scope_sz = dummy; 5183 res->server_scope->server_scope_sz = dummy;
5257 5184
5258 /* Throw away Implementation id array */ 5185 /* Implementation Id */
5259 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5186 p = xdr_inline_decode(xdr, 4);
5260 if (unlikely(status)) 5187 if (unlikely(!p))
5261 return status; 5188 goto out_overflow;
5189 impl_id_count = be32_to_cpup(p++);
5190
5191 if (impl_id_count) {
5192 /* nii_domain */
5193 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5194 if (unlikely(status))
5195 return status;
5196 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5197 return -EIO;
5198 memcpy(res->impl_id->domain, dummy_str, dummy);
5262 5199
5200 /* nii_name */
5201 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5202 if (unlikely(status))
5203 return status;
5204 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5205 return -EIO;
5206 memcpy(res->impl_id->name, dummy_str, dummy);
5207
5208 /* nii_date */
5209 p = xdr_inline_decode(xdr, 12);
5210 if (unlikely(!p))
5211 goto out_overflow;
5212 p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
5213 res->impl_id->date.nseconds = be32_to_cpup(p);
5214
5215 /* if there's more than one entry, ignore the rest */
5216 }
5263 return 0; 5217 return 0;
5264out_overflow: 5218out_overflow:
5265 print_overflow_msg(__func__, xdr); 5219 print_overflow_msg(__func__, xdr);
@@ -5285,8 +5239,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
5285 attrs->max_reqs = be32_to_cpup(p++); 5239 attrs->max_reqs = be32_to_cpup(p++);
5286 nr_attrs = be32_to_cpup(p); 5240 nr_attrs = be32_to_cpup(p);
5287 if (unlikely(nr_attrs > 1)) { 5241 if (unlikely(nr_attrs > 1)) {
5288 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 5242 printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs "
5289 __func__, nr_attrs); 5243 "count %u\n", __func__, nr_attrs);
5290 return -EINVAL; 5244 return -EINVAL;
5291 } 5245 }
5292 if (nr_attrs == 1) { 5246 if (nr_attrs == 1) {
@@ -5436,14 +5390,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
5436 p += 2; 5390 p += 2;
5437 5391
5438 /* Read verifier */ 5392 /* Read verifier */
5439 p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); 5393 p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE);
5440 5394
5441 res->num_devs = be32_to_cpup(p); 5395 res->num_devs = be32_to_cpup(p);
5442 5396
5443 dprintk("%s: num_dev %d\n", __func__, res->num_devs); 5397 dprintk("%s: num_dev %d\n", __func__, res->num_devs);
5444 5398
5445 if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { 5399 if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
5446 printk(KERN_ERR "%s too many result dev_num %u\n", 5400 printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
5447 __func__, res->num_devs); 5401 __func__, res->num_devs);
5448 return -EIO; 5402 return -EIO;
5449 } 5403 }
@@ -5537,11 +5491,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5537 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5491 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5538 if (status) 5492 if (status)
5539 return status; 5493 return status;
5540 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); 5494 p = xdr_inline_decode(xdr, 4);
5495 if (unlikely(!p))
5496 goto out_overflow;
5497 res->return_on_close = be32_to_cpup(p);
5498 decode_stateid(xdr, &res->stateid);
5499 p = xdr_inline_decode(xdr, 4);
5541 if (unlikely(!p)) 5500 if (unlikely(!p))
5542 goto out_overflow; 5501 goto out_overflow;
5543 res->return_on_close = be32_to_cpup(p++);
5544 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5545 layout_count = be32_to_cpup(p); 5502 layout_count = be32_to_cpup(p);
5546 if (!layout_count) { 5503 if (!layout_count) {
5547 dprintk("%s: server responded with empty layout array\n", 5504 dprintk("%s: server responded with empty layout array\n",
@@ -5666,7 +5623,8 @@ static int decode_test_stateid(struct xdr_stream *xdr,
5666 if (unlikely(!p)) 5623 if (unlikely(!p))
5667 goto out_overflow; 5624 goto out_overflow;
5668 res->status = be32_to_cpup(p++); 5625 res->status = be32_to_cpup(p++);
5669 return res->status; 5626
5627 return status;
5670out_overflow: 5628out_overflow:
5671 print_overflow_msg(__func__, xdr); 5629 print_overflow_msg(__func__, xdr);
5672out: 5630out:
@@ -6583,8 +6541,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
6583 if (status) 6541 if (status)
6584 goto out; 6542 goto out;
6585 xdr_enter_page(xdr, PAGE_SIZE); 6543 xdr_enter_page(xdr, PAGE_SIZE);
6586 status = decode_getfattr(xdr, &res->fs_locations->fattr, 6544 status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
6587 res->fs_locations->server); 6545 NULL, res->fs_locations,
6546 res->fs_locations->server);
6588out: 6547out:
6589 return status; 6548 return status;
6590} 6549}
@@ -6964,7 +6923,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6964 goto out_overflow; 6923 goto out_overflow;
6965 6924
6966 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6925 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6967 entry->server) < 0) 6926 NULL, entry->server) < 0)
6968 goto out_overflow; 6927 goto out_overflow;
6969 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) 6928 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6970 entry->ino = entry->fattr->mounted_on_fileid; 6929 entry->ino = entry->fattr->mounted_on_fileid;
@@ -7112,7 +7071,7 @@ struct rpc_procinfo nfs4_procedures[] = {
7112#endif /* CONFIG_NFS_V4_1 */ 7071#endif /* CONFIG_NFS_V4_1 */
7113}; 7072};
7114 7073
7115struct rpc_version nfs_version4 = { 7074const struct rpc_version nfs_version4 = {
7116 .number = 4, 7075 .number = 4,
7117 .nrprocs = ARRAY_SIZE(nfs4_procedures), 7076 .nrprocs = ARRAY_SIZE(nfs4_procedures),
7118 .procs = nfs4_procedures 7077 .procs = nfs4_procedures
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c4744e1d513c..cd3c910d2d12 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
104/* server:export path string passed to super.c */ 104/* server:export path string passed to super.c */
105static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; 105static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
106 106
107#ifdef RPC_DEBUG 107#ifdef NFS_DEBUG
108/* 108/*
109 * When the "nfsrootdebug" kernel command line option is specified, 109 * When the "nfsrootdebug" kernel command line option is specified,
110 * enable debugging messages for NFSROOT. 110 * enable debugging messages for NFSROOT.
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 55d01280a609..4bff4a3dab46 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
137 struct objio_dev_ent *ode; 137 struct objio_dev_ent *ode;
138 struct osd_dev *od; 138 struct osd_dev *od;
139 struct osd_dev_info odi; 139 struct osd_dev_info odi;
140 bool retry_flag = true;
140 int err; 141 int err;
141 142
142 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); 143 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
@@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
171 goto out; 172 goto out;
172 } 173 }
173 174
175retry_lookup:
174 od = osduld_info_lookup(&odi); 176 od = osduld_info_lookup(&odi);
175 if (unlikely(IS_ERR(od))) { 177 if (unlikely(IS_ERR(od))) {
176 err = PTR_ERR(od); 178 err = PTR_ERR(od);
177 dprintk("%s: osduld_info_lookup => %d\n", __func__, err); 179 dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
180 if (err == -ENODEV && retry_flag) {
181 err = objlayout_autologin(deviceaddr);
182 if (likely(!err)) {
183 retry_flag = false;
184 goto retry_lookup;
185 }
186 }
178 goto out; 187 goto out;
179 } 188 }
180 189
@@ -205,25 +214,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
205int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
206 struct objio_segment **pseg) 215 struct objio_segment **pseg)
207{ 216{
208 struct __alloc_objio_segment { 217/* This is the in memory structure of the objio_segment
209 struct objio_segment olseg; 218 *
210 struct ore_dev *ods[numdevs]; 219 * struct __alloc_objio_segment {
211 struct ore_comp comps[numdevs]; 220 * struct objio_segment olseg;
212 } *aolseg; 221 * struct ore_dev *ods[numdevs];
213 222 * struct ore_comp comps[numdevs];
214 aolseg = kzalloc(sizeof(*aolseg), gfp_flags); 223 * } *aolseg;
215 if (unlikely(!aolseg)) { 224 * NOTE: The code as above compiles and runs perfectly. It is elegant,
225 * type safe and compact. At some Past time Linus has decided he does not
226 * like variable length arrays, For the sake of this principal we uglify
227 * the code as below.
228 */
229 struct objio_segment *lseg;
230 size_t lseg_size = sizeof(*lseg) +
231 numdevs * sizeof(lseg->oc.ods[0]) +
232 numdevs * sizeof(*lseg->oc.comps);
233
234 lseg = kzalloc(lseg_size, gfp_flags);
235 if (unlikely(!lseg)) {
216 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, 236 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
217 numdevs, sizeof(*aolseg)); 237 numdevs, lseg_size);
218 return -ENOMEM; 238 return -ENOMEM;
219 } 239 }
220 240
221 aolseg->olseg.oc.numdevs = numdevs; 241 lseg->oc.numdevs = numdevs;
222 aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; 242 lseg->oc.single_comp = EC_MULTPLE_COMPS;
223 aolseg->olseg.oc.comps = aolseg->comps; 243 lseg->oc.ods = (void *)(lseg + 1);
224 aolseg->olseg.oc.ods = aolseg->ods; 244 lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
225 245
226 *pseg = &aolseg->olseg; 246 *pseg = lseg;
227 return 0; 247 return 0;
228} 248}
229 249
@@ -582,10 +602,10 @@ objlayout_init(void)
582 602
583 if (ret) 603 if (ret)
584 printk(KERN_INFO 604 printk(KERN_INFO
585 "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", 605 "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
586 __func__, ret); 606 __func__, ret);
587 else 607 else
588 printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", 608 printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
589 __func__); 609 __func__);
590 return ret; 610 return ret;
591} 611}
@@ -594,7 +614,7 @@ static void __exit
594objlayout_exit(void) 614objlayout_exit(void)
595{ 615{
596 pnfs_unregister_layoutdriver(&objlayout_type); 616 pnfs_unregister_layoutdriver(&objlayout_type);
597 printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", 617 printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
598 __func__); 618 __func__);
599} 619}
600 620
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index b3c29039f5b8..8d45f1c318ce 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -37,6 +37,9 @@
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */ 38 */
39 39
40#include <linux/kmod.h>
41#include <linux/moduleparam.h>
42#include <linux/ratelimit.h>
40#include <scsi/osd_initiator.h> 43#include <scsi/osd_initiator.h>
41#include "objlayout.h" 44#include "objlayout.h"
42 45
@@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len)
156 return end > start ? end - 1 : NFS4_MAX_UINT64; 159 return end > start ? end - 1 : NFS4_MAX_UINT64;
157} 160}
158 161
159void _fix_verify_io_params(struct pnfs_layout_segment *lseg, 162static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
160 struct page ***p_pages, unsigned *p_pgbase, 163 struct page ***p_pages, unsigned *p_pgbase,
161 u64 offset, unsigned long count) 164 u64 offset, unsigned long count)
162{ 165{
@@ -490,9 +493,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
490 if (!ioerr->oer_errno) 493 if (!ioerr->oer_errno)
491 continue; 494 continue;
492 495
493 printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " 496 printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
494 "dev(%llx:%llx) par=0x%llx obj=0x%llx " 497 "is_write=%d dev(%llx:%llx) par=0x%llx "
495 "offset=0x%llx length=0x%llx\n", 498 "obj=0x%llx offset=0x%llx length=0x%llx\n",
496 __func__, i, ioerr->oer_errno, 499 __func__, i, ioerr->oer_errno,
497 ioerr->oer_iswrite, 500 ioerr->oer_iswrite,
498 _DEVID_LO(&ioerr->oer_component.oid_device_id), 501 _DEVID_LO(&ioerr->oer_component.oid_device_id),
@@ -651,3 +654,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
651 __free_page(odi->page); 654 __free_page(odi->page);
652 kfree(odi); 655 kfree(odi);
653} 656}
657
658enum {
659 OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
660 OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
661 OSD_LOGIN_UPCALL_PATHLEN = 256
662};
663
664static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
665
666module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
667 0600);
668MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
669
670struct __auto_login {
671 char uri[OBJLAYOUT_MAX_URI_LEN];
672 char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
673 char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
674};
675
676static int __objlayout_upcall(struct __auto_login *login)
677{
678 static char *envp[] = { "HOME=/",
679 "TERM=linux",
680 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
681 NULL
682 };
683 char *argv[8];
684 int ret;
685
686 if (unlikely(!osd_login_prog[0])) {
687 dprintk("%s: osd_login_prog is disabled\n", __func__);
688 return -EACCES;
689 }
690
691 dprintk("%s uri: %s\n", __func__, login->uri);
692 dprintk("%s osdname %s\n", __func__, login->osdname);
693 dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
694
695 argv[0] = (char *)osd_login_prog;
696 argv[1] = "-u";
697 argv[2] = login->uri;
698 argv[3] = "-o";
699 argv[4] = login->osdname;
700 argv[5] = "-s";
701 argv[6] = login->systemid_hex;
702 argv[7] = NULL;
703
704 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
705 /*
706 * Disable the upcall mechanism if we're getting an ENOENT or
707 * EACCES error. The admin can re-enable it on the fly by using
708 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
709 * the problem has been fixed.
710 */
711 if (ret == -ENOENT || ret == -EACCES) {
712 printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
713 "objlayoutdriver.osd_login_prog kernel parameter!\n",
714 osd_login_prog);
715 osd_login_prog[0] = '\0';
716 }
717 dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
718
719 return ret;
720}
721
722/* Assume dest is all zeros */
723static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
724 char *dest, int max_len,
725 const char *var_name)
726{
727 if (!s.len)
728 return;
729
730 if (s.len >= max_len) {
731 pr_warn_ratelimited(
732 "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
733 var_name, s.len, max_len);
734 s.len = max_len - 1; /* space for null terminator */
735 }
736
737 memcpy(dest, s.data, s.len);
738}
739
740/* Assume sysid is all zeros */
741static void _sysid_2_hex(struct nfs4_string s,
742 char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
743{
744 int i;
745 char *cur;
746
747 if (!s.len)
748 return;
749
750 if (s.len != OSD_SYSTEMID_LEN) {
751 pr_warn_ratelimited(
752 "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
753 s.len);
754 if (s.len > OSD_SYSTEMID_LEN)
755 s.len = OSD_SYSTEMID_LEN;
756 }
757
758 cur = sysid;
759 for (i = 0; i < s.len; i++)
760 cur = hex_byte_pack(cur, s.data[i]);
761}
762
763int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
764{
765 int rc;
766 struct __auto_login login;
767
768 if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
769 return -ENODEV;
770
771 memset(&login, 0, sizeof(login));
772 __copy_nfsS_and_zero_terminate(
773 deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
774 login.uri, sizeof(login.uri), "URI");
775
776 __copy_nfsS_and_zero_terminate(
777 deviceaddr->oda_osdname,
778 login.osdname, sizeof(login.osdname), "OSDNAME");
779
780 _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
781
782 rc = __objlayout_upcall(&login);
783 if (rc > 0) /* script returns positive values */
784 rc = -ENODEV;
785
786 return rc;
787}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 8ec34727ed21..880ba086be94 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn(
184 struct xdr_stream *, 184 struct xdr_stream *,
185 const struct nfs4_layoutreturn_args *); 185 const struct nfs4_layoutreturn_args *);
186 186
187extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
188
187#endif /* _OBJLAYOUT_H */ 189#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5668f7c54c41..d21fceaa9f62 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
16#include <linux/nfs.h>
16#include <linux/nfs3.h> 17#include <linux/nfs3.h>
17#include <linux/nfs4.h> 18#include <linux/nfs4.h>
18#include <linux/nfs_page.h> 19#include <linux/nfs_page.h>
@@ -106,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req)
106 nfs_release_request(req); 107 nfs_release_request(req);
107} 108}
108 109
109/**
110 * nfs_set_page_tag_locked - Tag a request as locked
111 * @req:
112 */
113int nfs_set_page_tag_locked(struct nfs_page *req)
114{
115 if (!nfs_lock_request_dontget(req))
116 return 0;
117 if (test_bit(PG_MAPPED, &req->wb_flags))
118 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
119 return 1;
120}
121
122/**
123 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
124 */
125void nfs_clear_page_tag_locked(struct nfs_page *req)
126{
127 if (test_bit(PG_MAPPED, &req->wb_flags)) {
128 struct inode *inode = req->wb_context->dentry->d_inode;
129 struct nfs_inode *nfsi = NFS_I(inode);
130
131 spin_lock(&inode->i_lock);
132 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
133 nfs_unlock_request(req);
134 spin_unlock(&inode->i_lock);
135 } else
136 nfs_unlock_request(req);
137}
138
139/* 110/*
140 * nfs_clear_request - Free up all resources allocated to the request 111 * nfs_clear_request - Free up all resources allocated to the request
141 * @req: 112 * @req:
@@ -425,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
425 } 396 }
426} 397}
427 398
428#define NFS_SCAN_MAXENTRIES 16
429/**
430 * nfs_scan_list - Scan a list for matching requests
431 * @nfsi: NFS inode
432 * @dst: Destination list
433 * @idx_start: lower bound of page->index to scan
434 * @npages: idx_start + npages sets the upper bound to scan.
435 * @tag: tag to scan for
436 *
437 * Moves elements from one of the inode request lists.
438 * If the number of requests is set to 0, the entire address_space
439 * starting at index idx_start, is scanned.
440 * The requests are *not* checked to ensure that they form a contiguous set.
441 * You must be holding the inode's i_lock when calling this function
442 */
443int nfs_scan_list(struct nfs_inode *nfsi,
444 struct list_head *dst, pgoff_t idx_start,
445 unsigned int npages, int tag)
446{
447 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
448 struct nfs_page *req;
449 pgoff_t idx_end;
450 int found, i;
451 int res;
452 struct list_head *list;
453
454 res = 0;
455 if (npages == 0)
456 idx_end = ~0;
457 else
458 idx_end = idx_start + npages - 1;
459
460 for (;;) {
461 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
462 (void **)&pgvec[0], idx_start,
463 NFS_SCAN_MAXENTRIES, tag);
464 if (found <= 0)
465 break;
466 for (i = 0; i < found; i++) {
467 req = pgvec[i];
468 if (req->wb_index > idx_end)
469 goto out;
470 idx_start = req->wb_index + 1;
471 if (nfs_set_page_tag_locked(req)) {
472 kref_get(&req->wb_kref);
473 radix_tree_tag_clear(&nfsi->nfs_page_tree,
474 req->wb_index, tag);
475 list = pnfs_choose_commit_list(req, dst);
476 nfs_list_add_request(req, list);
477 res++;
478 if (res == INT_MAX)
479 goto out;
480 }
481 }
482 /* for latency reduction */
483 cond_resched_lock(&nfsi->vfs_inode.i_lock);
484 }
485out:
486 return res;
487}
488
489int __init nfs_init_nfspagecache(void) 399int __init nfs_init_nfspagecache(void)
490{ 400{
491 nfs_page_cachep = kmem_cache_create("nfs_page", 401 nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 17149a490065..b5d451586943 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
101 goto out_no_driver; 101 goto out_no_driver;
102 if (!(server->nfs_client->cl_exchange_flags & 102 if (!(server->nfs_client->cl_exchange_flags &
103 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 103 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
104 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, 104 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
105 id, server->nfs_client->cl_exchange_flags); 105 __func__, id, server->nfs_client->cl_exchange_flags);
106 goto out_no_driver; 106 goto out_no_driver;
107 } 107 }
108 ld_type = find_pnfs_driver(id); 108 ld_type = find_pnfs_driver(id);
@@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
122 server->pnfs_curr_ld = ld_type; 122 server->pnfs_curr_ld = ld_type;
123 if (ld_type->set_layoutdriver 123 if (ld_type->set_layoutdriver
124 && ld_type->set_layoutdriver(server, mntfh)) { 124 && ld_type->set_layoutdriver(server, mntfh)) {
125 printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", 125 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
126 __func__, id); 126 "driver %u.\n", __func__, id);
127 module_put(ld_type->owner); 127 module_put(ld_type->owner);
128 goto out_no_driver; 128 goto out_no_driver;
129 } 129 }
@@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
143 struct pnfs_layoutdriver_type *tmp; 143 struct pnfs_layoutdriver_type *tmp;
144 144
145 if (ld_type->id == 0) { 145 if (ld_type->id == 0) {
146 printk(KERN_ERR "%s id 0 is reserved\n", __func__); 146 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
147 return status; 147 return status;
148 } 148 }
149 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 149 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
150 printk(KERN_ERR "%s Layout driver must provide " 150 printk(KERN_ERR "NFS: %s Layout driver must provide "
151 "alloc_lseg and free_lseg.\n", __func__); 151 "alloc_lseg and free_lseg.\n", __func__);
152 return status; 152 return status;
153 } 153 }
@@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
160 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 160 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
161 ld_type->name); 161 ld_type->name);
162 } else { 162 } else {
163 printk(KERN_ERR "%s Module with id %d already loaded!\n", 163 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
164 __func__, ld_type->id); 164 __func__, ld_type->id);
165 } 165 }
166 spin_unlock(&pnfs_spinlock); 166 spin_unlock(&pnfs_spinlock);
@@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
496{ 496{
497 u32 oldseq, newseq; 497 u32 oldseq, newseq;
498 498
499 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); 499 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
500 newseq = be32_to_cpu(new->stateid.seqid); 500 newseq = be32_to_cpu(new->seqid);
501 if ((int)(newseq - oldseq) > 0) { 501 if ((int)(newseq - oldseq) > 0) {
502 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); 502 nfs4_stateid_copy(&lo->plh_stateid, new);
503 if (update_barrier) { 503 if (update_barrier) {
504 u32 new_barrier = be32_to_cpu(new->stateid.seqid); 504 u32 new_barrier = be32_to_cpu(new->seqid);
505 505
506 if ((int)(new_barrier - lo->plh_barrier)) 506 if ((int)(new_barrier - lo->plh_barrier))
507 lo->plh_barrier = new_barrier; 507 lo->plh_barrier = new_barrier;
@@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
525 int lget) 525 int lget)
526{ 526{
527 if ((stateid) && 527 if ((stateid) &&
528 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 528 (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
529 return true; 529 return true;
530 return lo->plh_block_lgets || 530 return lo->plh_block_lgets ||
531 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || 531 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
@@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
549 549
550 do { 550 do {
551 seq = read_seqbegin(&open_state->seqlock); 551 seq = read_seqbegin(&open_state->seqlock);
552 memcpy(dst->data, open_state->stateid.data, 552 nfs4_stateid_copy(dst, &open_state->stateid);
553 sizeof(open_state->stateid.data));
554 } while (read_seqretry(&open_state->seqlock, seq)); 553 } while (read_seqretry(&open_state->seqlock, seq));
555 } else 554 } else
556 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); 555 nfs4_stateid_copy(dst, &lo->plh_stateid);
557 spin_unlock(&lo->plh_inode->i_lock); 556 spin_unlock(&lo->plh_inode->i_lock);
558 dprintk("<-- %s\n", __func__); 557 dprintk("<-- %s\n", __func__);
559 return status; 558 return status;
@@ -590,7 +589,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
590 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 589 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
591 max_pages = max_resp_sz >> PAGE_SHIFT; 590 max_pages = max_resp_sz >> PAGE_SHIFT;
592 591
593 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 592 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
594 if (!pages) 593 if (!pages)
595 goto out_err_free; 594 goto out_err_free;
596 595
@@ -760,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
760 } 759 }
761 if (!found) { 760 if (!found) {
762 struct pnfs_layout_hdr *lo = nfsi->layout; 761 struct pnfs_layout_hdr *lo = nfsi->layout;
763 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); 762 u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
764 763
765 /* Since close does not return a layout stateid for use as 764 /* Since close does not return a layout stateid for use as
766 * a barrier, we choose the worst-case barrier. 765 * a barrier, we choose the worst-case barrier.
@@ -966,8 +965,7 @@ pnfs_update_layout(struct inode *ino,
966 } 965 }
967 966
968 /* Do we even need to bother with this? */ 967 /* Do we even need to bother with this? */
969 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 968 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
970 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
971 dprintk("%s matches recall, use MDS\n", __func__); 969 dprintk("%s matches recall, use MDS\n", __func__);
972 goto out_unlock; 970 goto out_unlock;
973 } 971 }
@@ -1032,7 +1030,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1032 struct nfs4_layoutget_res *res = &lgp->res; 1030 struct nfs4_layoutget_res *res = &lgp->res;
1033 struct pnfs_layout_segment *lseg; 1031 struct pnfs_layout_segment *lseg;
1034 struct inode *ino = lo->plh_inode; 1032 struct inode *ino = lo->plh_inode;
1035 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
1036 int status = 0; 1033 int status = 0;
1037 1034
1038 /* Inject layout blob into I/O device driver */ 1035 /* Inject layout blob into I/O device driver */
@@ -1048,8 +1045,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1048 } 1045 }
1049 1046
1050 spin_lock(&ino->i_lock); 1047 spin_lock(&ino->i_lock);
1051 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || 1048 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1052 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1053 dprintk("%s forget reply due to recall\n", __func__); 1049 dprintk("%s forget reply due to recall\n", __func__);
1054 goto out_forget_reply; 1050 goto out_forget_reply;
1055 } 1051 }
@@ -1214,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
1214 } 1210 }
1215 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); 1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1216 } 1212 }
1213 put_lseg(data->lseg);
1217 data->mds_ops->rpc_release(data); 1214 data->mds_ops->rpc_release(data);
1218} 1215}
1219EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1227,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1227 nfs_list_add_request(data->req, &desc->pg_list); 1224 nfs_list_add_request(data->req, &desc->pg_list);
1228 nfs_pageio_reset_write_mds(desc); 1225 nfs_pageio_reset_write_mds(desc);
1229 desc->pg_recoalesce = 1; 1226 desc->pg_recoalesce = 1;
1227 put_lseg(data->lseg);
1230 nfs_writedata_release(data); 1228 nfs_writedata_release(data);
1231} 1229}
1232 1230
@@ -1327,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
1327 data->mds_ops->rpc_call_done(&data->task, data); 1325 data->mds_ops->rpc_call_done(&data->task, data);
1328 } else 1326 } else
1329 pnfs_ld_handle_read_error(data); 1327 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg);
1330 data->mds_ops->rpc_release(data); 1329 data->mds_ops->rpc_release(data);
1331} 1330}
1332EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1530,8 +1529,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1530 end_pos = nfsi->layout->plh_lwb; 1529 end_pos = nfsi->layout->plh_lwb;
1531 nfsi->layout->plh_lwb = 0; 1530 nfsi->layout->plh_lwb = 0;
1532 1531
1533 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, 1532 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
1534 sizeof(nfsi->layout->plh_stateid.data));
1535 spin_unlock(&inode->i_lock); 1533 spin_unlock(&inode->i_lock);
1536 1534
1537 data->args.inode = inode; 1535 data->args.inode = inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a0a4f2..442ebf68eeec 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 94 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 95 const struct nfs_pageio_ops *pg_write_ops;
96 96
97 /* Returns true if layoutdriver wants to divert this request to 97 void (*mark_request_commit) (struct nfs_page *req,
98 * driver's commit routine. 98 struct pnfs_layout_segment *lseg);
99 */ 99 void (*clear_request_commit) (struct nfs_page *req);
100 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); 100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
101 struct list_head * (*choose_commit_list) (struct nfs_page *req);
102 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
103 102
104 /* 103 /*
@@ -229,7 +228,6 @@ struct nfs4_deviceid_node {
229 atomic_t ref; 228 atomic_t ref;
230}; 229};
231 230
232void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
233struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 231struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
234void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 232void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
235void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 233void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
@@ -262,20 +260,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
262 return nfss->pnfs_curr_ld != NULL; 260 return nfss->pnfs_curr_ld != NULL;
263} 261}
264 262
265static inline void
266pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
267{
268 if (lseg) {
269 struct pnfs_layoutdriver_type *ld;
270
271 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
272 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
273 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
274 req->wb_commit_lseg = get_lseg(lseg);
275 }
276 }
277}
278
279static inline int 263static inline int
280pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
281{ 265{
@@ -284,27 +268,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
284 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
285} 269}
286 270
287static inline struct list_head * 271static inline bool
288pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
289{ 273{
290 struct list_head *rv; 274 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
291 276
292 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { 277 if (lseg == NULL || ld->mark_request_commit == NULL)
293 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; 278 return false;
279 ld->mark_request_commit(req, lseg);
280 return true;
281}
294 282
295 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 283static inline bool
296 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); 284pnfs_clear_request_commit(struct nfs_page *req)
297 /* matched by ref taken when PG_PNFS_COMMIT is set */ 285{
298 put_lseg(req->wb_commit_lseg); 286 struct inode *inode = req->wb_context->dentry->d_inode;
299 } else 287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
300 rv = mds; 288
301 return rv; 289 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false;
291 ld->clear_request_commit(req);
292 return true;
302} 293}
303 294
304static inline void pnfs_clear_request_commit(struct nfs_page *req) 295static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
305{ 297{
306 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) 298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
307 put_lseg(req->wb_commit_lseg); 299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock);
304 if (ret != 0)
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
306 return ret;
308} 307}
309 308
310/* Should the pNFS client commit and return the layout upon a setattr */ 309/* Should the pNFS client commit and return the layout upon a setattr */
@@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino)
328 return 0; 327 return 0;
329} 328}
330 329
330#ifdef NFS_DEBUG
331void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
332#else
333static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id)
334{
335}
336#endif /* NFS_DEBUG */
331#else /* CONFIG_NFS_V4_1 */ 337#else /* CONFIG_NFS_V4_1 */
332 338
333static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 339static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -400,35 +406,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
400 return false; 406 return false;
401} 407}
402 408
403static inline void
404pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
405{
406}
407
408static inline int 409static inline int
409pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
410{ 411{
411 return PNFS_NOT_ATTEMPTED; 412 return PNFS_NOT_ATTEMPTED;
412} 413}
413 414
414static inline struct list_head * 415static inline bool
415pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
416{ 417{
417 return mds; 418 return false;
418} 419}
419 420
420static inline void pnfs_clear_request_commit(struct nfs_page *req) 421static inline bool
422pnfs_clear_request_commit(struct nfs_page *req)
421{ 423{
424 return false;
422} 425}
423 426
424static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 427static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
425{ 429{
426 return 0; 430 return 0;
427} 431}
428 432
429static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) 433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
430{ 434{
435 return 0;
431} 436}
437
432#endif /* CONFIG_NFS_V4_1 */ 438#endif /* CONFIG_NFS_V4_1 */
433 439
434#endif /* FS_NFS_PNFS_H */ 440#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2a26eb..73f701f1f4d3 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -43,6 +43,7 @@
43static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; 43static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
44static DEFINE_SPINLOCK(nfs4_deviceid_lock); 44static DEFINE_SPINLOCK(nfs4_deviceid_lock);
45 45
46#ifdef NFS_DEBUG
46void 47void
47nfs4_print_deviceid(const struct nfs4_deviceid *id) 48nfs4_print_deviceid(const struct nfs4_deviceid *id)
48{ 49{
@@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id)
52 p[0], p[1], p[2], p[3]); 53 p[0], p[1], p[2], p[3]);
53} 54}
54EXPORT_SYMBOL_GPL(nfs4_print_deviceid); 55EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
56#endif
55 57
56static inline u32 58static inline u32
57nfs4_deviceid_hash(const struct nfs4_deviceid *id) 59nfs4_deviceid_hash(const struct nfs4_deviceid *id)
@@ -92,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
92 * @clp nfs_client associated with deviceid 94 * @clp nfs_client associated with deviceid
93 * @id deviceid to look up 95 * @id deviceid to look up
94 */ 96 */
95struct nfs4_deviceid_node * 97static struct nfs4_deviceid_node *
96_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, 98_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
97 const struct nfs_client *clp, const struct nfs4_deviceid *id, 99 const struct nfs_client *clp, const struct nfs4_deviceid *id,
98 long hash) 100 long hash)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0c672588fe5a..b63b6f4d14fb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
358 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; 358 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
359} 359}
360 360
361static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
362{
363 rpc_call_start(task);
364}
365
361static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) 366static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
362{ 367{
363 if (nfs_async_handle_expired_key(task)) 368 if (nfs_async_handle_expired_key(task))
@@ -372,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
372 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; 377 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
373} 378}
374 379
380static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
381{
382 rpc_call_start(task);
383}
384
375static int 385static int
376nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, 386nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
377 struct inode *new_dir) 387 struct inode *new_dir)
@@ -651,6 +661,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *
651 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 661 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
652} 662}
653 663
664static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
665{
666 rpc_call_start(task);
667}
668
654static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 669static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
655{ 670{
656 if (nfs_async_handle_expired_key(task)) 671 if (nfs_async_handle_expired_key(task))
@@ -668,6 +683,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message
668 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 683 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
669} 684}
670 685
686static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
687{
688 rpc_call_start(task);
689}
690
671static void 691static void
672nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 692nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
673{ 693{
@@ -721,9 +741,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
721 .create = nfs_proc_create, 741 .create = nfs_proc_create,
722 .remove = nfs_proc_remove, 742 .remove = nfs_proc_remove,
723 .unlink_setup = nfs_proc_unlink_setup, 743 .unlink_setup = nfs_proc_unlink_setup,
744 .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
724 .unlink_done = nfs_proc_unlink_done, 745 .unlink_done = nfs_proc_unlink_done,
725 .rename = nfs_proc_rename, 746 .rename = nfs_proc_rename,
726 .rename_setup = nfs_proc_rename_setup, 747 .rename_setup = nfs_proc_rename_setup,
748 .rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
727 .rename_done = nfs_proc_rename_done, 749 .rename_done = nfs_proc_rename_done,
728 .link = nfs_proc_link, 750 .link = nfs_proc_link,
729 .symlink = nfs_proc_symlink, 751 .symlink = nfs_proc_symlink,
@@ -736,8 +758,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
736 .pathconf = nfs_proc_pathconf, 758 .pathconf = nfs_proc_pathconf,
737 .decode_dirent = nfs2_decode_dirent, 759 .decode_dirent = nfs2_decode_dirent,
738 .read_setup = nfs_proc_read_setup, 760 .read_setup = nfs_proc_read_setup,
761 .read_rpc_prepare = nfs_proc_read_rpc_prepare,
739 .read_done = nfs_read_done, 762 .read_done = nfs_read_done,
740 .write_setup = nfs_proc_write_setup, 763 .write_setup = nfs_proc_write_setup,
764 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
741 .write_done = nfs_write_done, 765 .write_done = nfs_write_done,
742 .commit_setup = nfs_proc_commit_setup, 766 .commit_setup = nfs_proc_commit_setup,
743 .lock = nfs_proc_lock, 767 .lock = nfs_proc_lock,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfa175c223dc..cc1f758a7ee1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -66,7 +66,6 @@ void nfs_readdata_free(struct nfs_read_data *p)
66 66
67void nfs_readdata_release(struct nfs_read_data *rdata) 67void nfs_readdata_release(struct nfs_read_data *rdata)
68{ 68{
69 put_lseg(rdata->lseg);
70 put_nfs_open_context(rdata->args.context); 69 put_nfs_open_context(rdata->args.context);
71 nfs_readdata_free(rdata); 70 nfs_readdata_free(rdata);
72} 71}
@@ -465,23 +464,14 @@ static void nfs_readpage_release_partial(void *calldata)
465 nfs_readdata_release(calldata); 464 nfs_readdata_release(calldata);
466} 465}
467 466
468#if defined(CONFIG_NFS_V4_1)
469void nfs_read_prepare(struct rpc_task *task, void *calldata) 467void nfs_read_prepare(struct rpc_task *task, void *calldata)
470{ 468{
471 struct nfs_read_data *data = calldata; 469 struct nfs_read_data *data = calldata;
472 470 NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
473 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
474 &data->args.seq_args, &data->res.seq_res,
475 0, task))
476 return;
477 rpc_call_start(task);
478} 471}
479#endif /* CONFIG_NFS_V4_1 */
480 472
481static const struct rpc_call_ops nfs_read_partial_ops = { 473static const struct rpc_call_ops nfs_read_partial_ops = {
482#if defined(CONFIG_NFS_V4_1)
483 .rpc_call_prepare = nfs_read_prepare, 474 .rpc_call_prepare = nfs_read_prepare,
484#endif /* CONFIG_NFS_V4_1 */
485 .rpc_call_done = nfs_readpage_result_partial, 475 .rpc_call_done = nfs_readpage_result_partial,
486 .rpc_release = nfs_readpage_release_partial, 476 .rpc_release = nfs_readpage_release_partial,
487}; 477};
@@ -545,9 +535,7 @@ static void nfs_readpage_release_full(void *calldata)
545} 535}
546 536
547static const struct rpc_call_ops nfs_read_full_ops = { 537static const struct rpc_call_ops nfs_read_full_ops = {
548#if defined(CONFIG_NFS_V4_1)
549 .rpc_call_prepare = nfs_read_prepare, 538 .rpc_call_prepare = nfs_read_prepare,
550#endif /* CONFIG_NFS_V4_1 */
551 .rpc_call_done = nfs_readpage_result_full, 539 .rpc_call_done = nfs_readpage_result_full,
552 .rpc_release = nfs_readpage_release_full, 540 .rpc_release = nfs_readpage_release_full,
553}; 541};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3dfa4f112c0a..ccc4cdb1efe9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -52,6 +52,8 @@
52#include <linux/nfs_xdr.h> 52#include <linux/nfs_xdr.h>
53#include <linux/magic.h> 53#include <linux/magic.h>
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h>
55 57
56#include <asm/system.h> 58#include <asm/system.h>
57#include <asm/uaccess.h> 59#include <asm/uaccess.h>
@@ -79,7 +81,6 @@ enum {
79 Opt_cto, Opt_nocto, 81 Opt_cto, Opt_nocto,
80 Opt_ac, Opt_noac, 82 Opt_ac, Opt_noac,
81 Opt_lock, Opt_nolock, 83 Opt_lock, Opt_nolock,
82 Opt_v2, Opt_v3, Opt_v4,
83 Opt_udp, Opt_tcp, Opt_rdma, 84 Opt_udp, Opt_tcp, Opt_rdma,
84 Opt_acl, Opt_noacl, 85 Opt_acl, Opt_noacl,
85 Opt_rdirplus, Opt_nordirplus, 86 Opt_rdirplus, Opt_nordirplus,
@@ -97,10 +98,10 @@ enum {
97 Opt_namelen, 98 Opt_namelen,
98 Opt_mountport, 99 Opt_mountport,
99 Opt_mountvers, 100 Opt_mountvers,
100 Opt_nfsvers,
101 Opt_minorversion, 101 Opt_minorversion,
102 102
103 /* Mount options that take string arguments */ 103 /* Mount options that take string arguments */
104 Opt_nfsvers,
104 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 105 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
105 Opt_addr, Opt_mountaddr, Opt_clientaddr, 106 Opt_addr, Opt_mountaddr, Opt_clientaddr,
106 Opt_lookupcache, 107 Opt_lookupcache,
@@ -132,9 +133,6 @@ static const match_table_t nfs_mount_option_tokens = {
132 { Opt_noac, "noac" }, 133 { Opt_noac, "noac" },
133 { Opt_lock, "lock" }, 134 { Opt_lock, "lock" },
134 { Opt_nolock, "nolock" }, 135 { Opt_nolock, "nolock" },
135 { Opt_v2, "v2" },
136 { Opt_v3, "v3" },
137 { Opt_v4, "v4" },
138 { Opt_udp, "udp" }, 136 { Opt_udp, "udp" },
139 { Opt_tcp, "tcp" }, 137 { Opt_tcp, "tcp" },
140 { Opt_rdma, "rdma" }, 138 { Opt_rdma, "rdma" },
@@ -163,9 +161,10 @@ static const match_table_t nfs_mount_option_tokens = {
163 { Opt_namelen, "namlen=%s" }, 161 { Opt_namelen, "namlen=%s" },
164 { Opt_mountport, "mountport=%s" }, 162 { Opt_mountport, "mountport=%s" },
165 { Opt_mountvers, "mountvers=%s" }, 163 { Opt_mountvers, "mountvers=%s" },
164 { Opt_minorversion, "minorversion=%s" },
165
166 { Opt_nfsvers, "nfsvers=%s" }, 166 { Opt_nfsvers, "nfsvers=%s" },
167 { Opt_nfsvers, "vers=%s" }, 167 { Opt_nfsvers, "vers=%s" },
168 { Opt_minorversion, "minorversion=%s" },
169 168
170 { Opt_sec, "sec=%s" }, 169 { Opt_sec, "sec=%s" },
171 { Opt_proto, "proto=%s" }, 170 { Opt_proto, "proto=%s" },
@@ -179,6 +178,9 @@ static const match_table_t nfs_mount_option_tokens = {
179 { Opt_fscache_uniq, "fsc=%s" }, 178 { Opt_fscache_uniq, "fsc=%s" },
180 { Opt_local_lock, "local_lock=%s" }, 179 { Opt_local_lock, "local_lock=%s" },
181 180
181 /* The following needs to be listed after all other options */
182 { Opt_nfsvers, "v%s" },
183
182 { Opt_err, NULL } 184 { Opt_err, NULL }
183}; 185};
184 186
@@ -259,6 +261,22 @@ static match_table_t nfs_local_lock_tokens = {
259 { Opt_local_lock_err, NULL } 261 { Opt_local_lock_err, NULL }
260}; 262};
261 263
264enum {
265 Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
266 Opt_vers_4_1,
267
268 Opt_vers_err
269};
270
271static match_table_t nfs_vers_tokens = {
272 { Opt_vers_2, "2" },
273 { Opt_vers_3, "3" },
274 { Opt_vers_4, "4" },
275 { Opt_vers_4_0, "4.0" },
276 { Opt_vers_4_1, "4.1" },
277
278 { Opt_vers_err, NULL }
279};
262 280
263static void nfs_umount_begin(struct super_block *); 281static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 282static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -620,7 +638,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
620 struct nfs_client *clp = nfss->nfs_client; 638 struct nfs_client *clp = nfss->nfs_client;
621 639
622 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); 640 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
623 seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
624} 641}
625#else 642#else
626static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, 643static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
@@ -629,6 +646,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
629} 646}
630#endif 647#endif
631 648
649static void nfs_show_nfs_version(struct seq_file *m,
650 unsigned int version,
651 unsigned int minorversion)
652{
653 seq_printf(m, ",vers=%u", version);
654 if (version == 4)
655 seq_printf(m, ".%u", minorversion);
656}
657
632/* 658/*
633 * Describe the mount options in force on this server representation 659 * Describe the mount options in force on this server representation
634 */ 660 */
@@ -656,7 +682,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
656 u32 version = clp->rpc_ops->version; 682 u32 version = clp->rpc_ops->version;
657 int local_flock, local_fcntl; 683 int local_flock, local_fcntl;
658 684
659 seq_printf(m, ",vers=%u", version); 685 nfs_show_nfs_version(m, version, clp->cl_minorversion);
660 seq_printf(m, ",rsize=%u", nfss->rsize); 686 seq_printf(m, ",rsize=%u", nfss->rsize);
661 seq_printf(m, ",wsize=%u", nfss->wsize); 687 seq_printf(m, ",wsize=%u", nfss->wsize);
662 if (nfss->bsize != 0) 688 if (nfss->bsize != 0)
@@ -676,8 +702,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
676 else 702 else
677 seq_puts(m, nfs_infop->nostr); 703 seq_puts(m, nfs_infop->nostr);
678 } 704 }
705 rcu_read_lock();
679 seq_printf(m, ",proto=%s", 706 seq_printf(m, ",proto=%s",
680 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); 707 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
708 rcu_read_unlock();
681 if (version == 4) { 709 if (version == 4) {
682 if (nfss->port != NFS_PORT) 710 if (nfss->port != NFS_PORT)
683 seq_printf(m, ",port=%u", nfss->port); 711 seq_printf(m, ",port=%u", nfss->port);
@@ -726,9 +754,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root)
726 754
727 nfs_show_mount_options(m, nfss, 0); 755 nfs_show_mount_options(m, nfss, 0);
728 756
757 rcu_read_lock();
729 seq_printf(m, ",addr=%s", 758 seq_printf(m, ",addr=%s",
730 rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, 759 rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
731 RPC_DISPLAY_ADDR)); 760 RPC_DISPLAY_ADDR));
761 rcu_read_unlock();
732 762
733 return 0; 763 return 0;
734} 764}
@@ -745,7 +775,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {}
745#endif 775#endif
746#endif 776#endif
747 777
748#ifdef CONFIG_NFS_V4
749#ifdef CONFIG_NFS_V4_1 778#ifdef CONFIG_NFS_V4_1
750static void show_pnfs(struct seq_file *m, struct nfs_server *server) 779static void show_pnfs(struct seq_file *m, struct nfs_server *server)
751{ 780{
@@ -755,9 +784,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
755 else 784 else
756 seq_printf(m, "not configured"); 785 seq_printf(m, "not configured");
757} 786}
787
788static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
789{
790 if (nfss->nfs_client && nfss->nfs_client->impl_id) {
791 struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
792 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
793 "date='%llu,%u'",
794 impl_id->name, impl_id->domain,
795 impl_id->date.seconds, impl_id->date.nseconds);
796 }
797}
758#else 798#else
759static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} 799#ifdef CONFIG_NFS_V4
800static void show_pnfs(struct seq_file *m, struct nfs_server *server)
801{
802}
760#endif 803#endif
804static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
805{
806}
761#endif 807#endif
762 808
763static int nfs_show_devname(struct seq_file *m, struct dentry *root) 809static int nfs_show_devname(struct seq_file *m, struct dentry *root)
@@ -806,6 +852,8 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
806 852
807 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); 853 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
808 854
855 show_implementation_id(m, nfss);
856
809 seq_printf(m, "\n\tcaps:\t"); 857 seq_printf(m, "\n\tcaps:\t");
810 seq_printf(m, "caps=0x%x", nfss->caps); 858 seq_printf(m, "caps=0x%x", nfss->caps);
811 seq_printf(m, ",wtmult=%u", nfss->wtmult); 859 seq_printf(m, ",wtmult=%u", nfss->wtmult);
@@ -908,6 +956,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
908 data->auth_flavor_len = 1; 956 data->auth_flavor_len = 1;
909 data->version = version; 957 data->version = version;
910 data->minorversion = 0; 958 data->minorversion = 0;
959 data->net = current->nsproxy->net_ns;
911 security_init_mnt_opts(&data->lsm_opts); 960 security_init_mnt_opts(&data->lsm_opts);
912 } 961 }
913 return data; 962 return data;
@@ -1052,6 +1101,40 @@ static int nfs_parse_security_flavors(char *value,
1052 return 1; 1101 return 1;
1053} 1102}
1054 1103
1104static int nfs_parse_version_string(char *string,
1105 struct nfs_parsed_mount_data *mnt,
1106 substring_t *args)
1107{
1108 mnt->flags &= ~NFS_MOUNT_VER3;
1109 switch (match_token(string, nfs_vers_tokens, args)) {
1110 case Opt_vers_2:
1111 mnt->version = 2;
1112 break;
1113 case Opt_vers_3:
1114 mnt->flags |= NFS_MOUNT_VER3;
1115 mnt->version = 3;
1116 break;
1117 case Opt_vers_4:
1118 /* Backward compatibility option. In future,
1119 * the mount program should always supply
1120 * a NFSv4 minor version number.
1121 */
1122 mnt->version = 4;
1123 break;
1124 case Opt_vers_4_0:
1125 mnt->version = 4;
1126 mnt->minorversion = 0;
1127 break;
1128 case Opt_vers_4_1:
1129 mnt->version = 4;
1130 mnt->minorversion = 1;
1131 break;
1132 default:
1133 return 0;
1134 }
1135 return 1;
1136}
1137
1055static int nfs_get_option_str(substring_t args[], char **option) 1138static int nfs_get_option_str(substring_t args[], char **option)
1056{ 1139{
1057 kfree(*option); 1140 kfree(*option);
@@ -1157,18 +1240,6 @@ static int nfs_parse_mount_options(char *raw,
1157 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | 1240 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1158 NFS_MOUNT_LOCAL_FCNTL); 1241 NFS_MOUNT_LOCAL_FCNTL);
1159 break; 1242 break;
1160 case Opt_v2:
1161 mnt->flags &= ~NFS_MOUNT_VER3;
1162 mnt->version = 2;
1163 break;
1164 case Opt_v3:
1165 mnt->flags |= NFS_MOUNT_VER3;
1166 mnt->version = 3;
1167 break;
1168 case Opt_v4:
1169 mnt->flags &= ~NFS_MOUNT_VER3;
1170 mnt->version = 4;
1171 break;
1172 case Opt_udp: 1243 case Opt_udp:
1173 mnt->flags &= ~NFS_MOUNT_TCP; 1244 mnt->flags &= ~NFS_MOUNT_TCP;
1174 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1245 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1295,26 +1366,6 @@ static int nfs_parse_mount_options(char *raw,
1295 goto out_invalid_value; 1366 goto out_invalid_value;
1296 mnt->mount_server.version = option; 1367 mnt->mount_server.version = option;
1297 break; 1368 break;
1298 case Opt_nfsvers:
1299 if (nfs_get_option_ul(args, &option))
1300 goto out_invalid_value;
1301 switch (option) {
1302 case NFS2_VERSION:
1303 mnt->flags &= ~NFS_MOUNT_VER3;
1304 mnt->version = 2;
1305 break;
1306 case NFS3_VERSION:
1307 mnt->flags |= NFS_MOUNT_VER3;
1308 mnt->version = 3;
1309 break;
1310 case NFS4_VERSION:
1311 mnt->flags &= ~NFS_MOUNT_VER3;
1312 mnt->version = 4;
1313 break;
1314 default:
1315 goto out_invalid_value;
1316 }
1317 break;
1318 case Opt_minorversion: 1369 case Opt_minorversion:
1319 if (nfs_get_option_ul(args, &option)) 1370 if (nfs_get_option_ul(args, &option))
1320 goto out_invalid_value; 1371 goto out_invalid_value;
@@ -1326,6 +1377,15 @@ static int nfs_parse_mount_options(char *raw,
1326 /* 1377 /*
1327 * options that take text values 1378 * options that take text values
1328 */ 1379 */
1380 case Opt_nfsvers:
1381 string = match_strdup(args);
1382 if (string == NULL)
1383 goto out_nomem;
1384 rc = nfs_parse_version_string(string, mnt, args);
1385 kfree(string);
1386 if (!rc)
1387 goto out_invalid_value;
1388 break;
1329 case Opt_sec: 1389 case Opt_sec:
1330 string = match_strdup(args); 1390 string = match_strdup(args);
1331 if (string == NULL) 1391 if (string == NULL)
@@ -1405,7 +1465,7 @@ static int nfs_parse_mount_options(char *raw,
1405 if (string == NULL) 1465 if (string == NULL)
1406 goto out_nomem; 1466 goto out_nomem;
1407 mnt->nfs_server.addrlen = 1467 mnt->nfs_server.addrlen =
1408 rpc_pton(string, strlen(string), 1468 rpc_pton(mnt->net, string, strlen(string),
1409 (struct sockaddr *) 1469 (struct sockaddr *)
1410 &mnt->nfs_server.address, 1470 &mnt->nfs_server.address,
1411 sizeof(mnt->nfs_server.address)); 1471 sizeof(mnt->nfs_server.address));
@@ -1427,7 +1487,7 @@ static int nfs_parse_mount_options(char *raw,
1427 if (string == NULL) 1487 if (string == NULL)
1428 goto out_nomem; 1488 goto out_nomem;
1429 mnt->mount_server.addrlen = 1489 mnt->mount_server.addrlen =
1430 rpc_pton(string, strlen(string), 1490 rpc_pton(mnt->net, string, strlen(string),
1431 (struct sockaddr *) 1491 (struct sockaddr *)
1432 &mnt->mount_server.address, 1492 &mnt->mount_server.address,
1433 sizeof(mnt->mount_server.address)); 1493 sizeof(mnt->mount_server.address));
@@ -1516,6 +1576,9 @@ static int nfs_parse_mount_options(char *raw,
1516 if (!sloppy && invalid_option) 1576 if (!sloppy && invalid_option)
1517 return 0; 1577 return 0;
1518 1578
1579 if (mnt->minorversion && mnt->version != 4)
1580 goto out_minorversion_mismatch;
1581
1519 /* 1582 /*
1520 * verify that any proto=/mountproto= options match the address 1583 * verify that any proto=/mountproto= options match the address
1521 * familiies in the addr=/mountaddr= options. 1584 * familiies in the addr=/mountaddr= options.
@@ -1549,6 +1612,10 @@ out_invalid_address:
1549out_invalid_value: 1612out_invalid_value:
1550 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); 1613 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
1551 return 0; 1614 return 0;
1615out_minorversion_mismatch:
1616 printk(KERN_INFO "NFS: mount option vers=%u does not support "
1617 "minorversion=%u\n", mnt->version, mnt->minorversion);
1618 return 0;
1552out_nomem: 1619out_nomem:
1553 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1620 printk(KERN_INFO "NFS: not enough memory to parse option\n");
1554 return 0; 1621 return 0;
@@ -1622,6 +1689,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1622 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1689 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1623 .auth_flav_len = &server_authlist_len, 1690 .auth_flav_len = &server_authlist_len,
1624 .auth_flavs = server_authlist, 1691 .auth_flavs = server_authlist,
1692 .net = args->net,
1625 }; 1693 };
1626 int status; 1694 int status;
1627 1695
@@ -2047,7 +2115,7 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2047 2115
2048 /* We probably want something more informative here */ 2116 /* We probably want something more informative here */
2049 snprintf(sb->s_id, sizeof(sb->s_id), 2117 snprintf(sb->s_id, sizeof(sb->s_id),
2050 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); 2118 "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
2051 2119
2052 if (sb->s_blocksize == 0) 2120 if (sb->s_blocksize == 0)
2053 sb->s_blocksize = nfs_block_bits(server->wsize, 2121 sb->s_blocksize = nfs_block_bits(server->wsize,
@@ -2499,12 +2567,6 @@ static int nfs4_validate_text_mount_data(void *options,
2499 return -EINVAL; 2567 return -EINVAL;
2500 } 2568 }
2501 2569
2502 if (args->client_address == NULL) {
2503 dfprintk(MOUNT,
2504 "NFS4: mount program didn't pass callback address\n");
2505 return -EINVAL;
2506 }
2507
2508 return nfs_parse_devname(dev_name, 2570 return nfs_parse_devname(dev_name,
2509 &args->nfs_server.hostname, 2571 &args->nfs_server.hostname,
2510 NFS4_MAXNAMLEN, 2572 NFS4_MAXNAMLEN,
@@ -2663,8 +2725,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2663 if (!s->s_root) { 2725 if (!s->s_root) {
2664 /* initial superblock/root creation */ 2726 /* initial superblock/root creation */
2665 nfs4_fill_super(s); 2727 nfs4_fill_super(s);
2666 nfs_fscache_get_super_cookie( 2728 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2667 s, data ? data->fscache_uniq : NULL, NULL);
2668 } 2729 }
2669 2730
2670 mntroot = nfs4_get_root(s, mntfh, dev_name); 2731 mntroot = nfs4_get_root(s, mntfh, dev_name);
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 978aaeb8a093..ad4d2e787b20 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
36 { 35 {
37 .procname = "idmap_cache_timeout", 36 .procname = "idmap_cache_timeout",
38 .data = &nfs_idmap_cache_timeout, 37 .data = &nfs_idmap_cache_timeout,
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
40 .mode = 0644, 39 .mode = 0644,
41 .proc_handler = proc_dointvec_jiffies, 40 .proc_handler = proc_dointvec_jiffies,
42 }, 41 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
44#endif 42#endif
45 { 43 {
46 .procname = "nfs_mountpoint_timeout", 44 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 4f9319a2e567..3210a03342f9 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -20,15 +20,6 @@
20#include "iostat.h" 20#include "iostat.h"
21#include "delegation.h" 21#include "delegation.h"
22 22
23struct nfs_unlinkdata {
24 struct hlist_node list;
25 struct nfs_removeargs args;
26 struct nfs_removeres res;
27 struct inode *dir;
28 struct rpc_cred *cred;
29 struct nfs_fattr dir_attr;
30};
31
32/** 23/**
33 * nfs_free_unlinkdata - release data from a sillydelete operation. 24 * nfs_free_unlinkdata - release data from a sillydelete operation.
34 * @data: pointer to unlink structure. 25 * @data: pointer to unlink structure.
@@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata)
107 nfs_sb_deactive(sb); 98 nfs_sb_deactive(sb);
108} 99}
109 100
110#if defined(CONFIG_NFS_V4_1) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
111void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
112{ 102{
113 struct nfs_unlinkdata *data = calldata; 103 struct nfs_unlinkdata *data = calldata;
114 struct nfs_server *server = NFS_SERVER(data->dir); 104 NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
115
116 if (nfs4_setup_sequence(server, &data->args.seq_args,
117 &data->res.seq_res, 1, task))
118 return;
119 rpc_call_start(task);
120} 105}
121#endif /* CONFIG_NFS_V4_1 */
122 106
123static const struct rpc_call_ops nfs_unlink_ops = { 107static const struct rpc_call_ops nfs_unlink_ops = {
124 .rpc_call_done = nfs_async_unlink_done, 108 .rpc_call_done = nfs_async_unlink_done,
125 .rpc_release = nfs_async_unlink_release, 109 .rpc_release = nfs_async_unlink_release,
126#if defined(CONFIG_NFS_V4_1)
127 .rpc_call_prepare = nfs_unlink_prepare, 110 .rpc_call_prepare = nfs_unlink_prepare,
128#endif /* CONFIG_NFS_V4_1 */
129}; 111};
130 112
131static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) 113static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
@@ -341,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry)
341 spin_unlock(&dentry->d_lock); 323 spin_unlock(&dentry->d_lock);
342} 324}
343 325
344struct nfs_renamedata {
345 struct nfs_renameargs args;
346 struct nfs_renameres res;
347 struct rpc_cred *cred;
348 struct inode *old_dir;
349 struct dentry *old_dentry;
350 struct nfs_fattr old_fattr;
351 struct inode *new_dir;
352 struct dentry *new_dentry;
353 struct nfs_fattr new_fattr;
354};
355
356/** 326/**
357 * nfs_async_rename_done - Sillyrename post-processing 327 * nfs_async_rename_done - Sillyrename post-processing
358 * @task: rpc_task of the sillyrename 328 * @task: rpc_task of the sillyrename
@@ -403,25 +373,16 @@ static void nfs_async_rename_release(void *calldata)
403 kfree(data); 373 kfree(data);
404} 374}
405 375
406#if defined(CONFIG_NFS_V4_1)
407static void nfs_rename_prepare(struct rpc_task *task, void *calldata) 376static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
408{ 377{
409 struct nfs_renamedata *data = calldata; 378 struct nfs_renamedata *data = calldata;
410 struct nfs_server *server = NFS_SERVER(data->old_dir); 379 NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data);
411
412 if (nfs4_setup_sequence(server, &data->args.seq_args,
413 &data->res.seq_res, 1, task))
414 return;
415 rpc_call_start(task);
416} 380}
417#endif /* CONFIG_NFS_V4_1 */
418 381
419static const struct rpc_call_ops nfs_rename_ops = { 382static const struct rpc_call_ops nfs_rename_ops = {
420 .rpc_call_done = nfs_async_rename_done, 383 .rpc_call_done = nfs_async_rename_done,
421 .rpc_release = nfs_async_rename_release, 384 .rpc_release = nfs_async_rename_release,
422#if defined(CONFIG_NFS_V4_1)
423 .rpc_call_prepare = nfs_rename_prepare, 385 .rpc_call_prepare = nfs_rename_prepare,
424#endif /* CONFIG_NFS_V4_1 */
425}; 386};
426 387
427/** 388/**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 834f0fe96f89..2c68818f68ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p)
100 100
101void nfs_writedata_release(struct nfs_write_data *wdata) 101void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 102{
103 put_lseg(wdata->lseg);
104 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
105 nfs_writedata_free(wdata); 104 nfs_writedata_free(wdata);
106} 105}
@@ -236,10 +235,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
236 req = nfs_page_find_request_locked(page); 235 req = nfs_page_find_request_locked(page);
237 if (req == NULL) 236 if (req == NULL)
238 break; 237 break;
239 if (nfs_set_page_tag_locked(req)) 238 if (nfs_lock_request_dontget(req))
240 break; 239 break;
241 /* Note: If we hold the page lock, as is the case in nfs_writepage, 240 /* Note: If we hold the page lock, as is the case in nfs_writepage,
242 * then the call to nfs_set_page_tag_locked() will always 241 * then the call to nfs_lock_request_dontget() will always
243 * succeed provided that someone hasn't already marked the 242 * succeed provided that someone hasn't already marked the
244 * request as dirty (in which case we don't care). 243 * request as dirty (in which case we don't care).
245 */ 244 */
@@ -375,21 +374,14 @@ out_err:
375/* 374/*
376 * Insert a write request into an inode 375 * Insert a write request into an inode
377 */ 376 */
378static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 377static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379{ 378{
380 struct nfs_inode *nfsi = NFS_I(inode); 379 struct nfs_inode *nfsi = NFS_I(inode);
381 int error;
382
383 error = radix_tree_preload(GFP_NOFS);
384 if (error != 0)
385 goto out;
386 380
387 /* Lock the request! */ 381 /* Lock the request! */
388 nfs_lock_request_dontget(req); 382 nfs_lock_request_dontget(req);
389 383
390 spin_lock(&inode->i_lock); 384 spin_lock(&inode->i_lock);
391 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
392 BUG_ON(error);
393 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
394 inode->i_version++; 386 inode->i_version++;
395 set_bit(PG_MAPPED, &req->wb_flags); 387 set_bit(PG_MAPPED, &req->wb_flags);
@@ -397,12 +389,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
397 set_page_private(req->wb_page, (unsigned long)req); 389 set_page_private(req->wb_page, (unsigned long)req);
398 nfsi->npages++; 390 nfsi->npages++;
399 kref_get(&req->wb_kref); 391 kref_get(&req->wb_kref);
400 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
401 NFS_PAGE_TAG_LOCKED);
402 spin_unlock(&inode->i_lock); 392 spin_unlock(&inode->i_lock);
403 radix_tree_preload_end();
404out:
405 return error;
406} 393}
407 394
408/* 395/*
@@ -419,7 +406,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
419 set_page_private(req->wb_page, 0); 406 set_page_private(req->wb_page, 0);
420 ClearPagePrivate(req->wb_page); 407 ClearPagePrivate(req->wb_page);
421 clear_bit(PG_MAPPED, &req->wb_flags); 408 clear_bit(PG_MAPPED, &req->wb_flags);
422 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
423 nfsi->npages--; 409 nfsi->npages--;
424 spin_unlock(&inode->i_lock); 410 spin_unlock(&inode->i_lock);
425 nfs_release_request(req); 411 nfs_release_request(req);
@@ -432,39 +418,90 @@ nfs_mark_request_dirty(struct nfs_page *req)
432} 418}
433 419
434#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 420#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
435/* 421/**
436 * Add a request to the inode's commit list. 422 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page
424 * @head: commit list head
425 *
426 * This sets the PG_CLEAN bit, updates the inode global count of
427 * number of outstanding requests requiring a commit as well as
428 * the MM page stats.
429 *
430 * The caller must _not_ hold the inode->i_lock, but must be
431 * holding the nfs_page lock.
437 */ 432 */
438static void 433void
439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
440{ 435{
441 struct inode *inode = req->wb_context->dentry->d_inode; 436 struct inode *inode = req->wb_context->dentry->d_inode;
442 struct nfs_inode *nfsi = NFS_I(inode);
443 437
444 spin_lock(&inode->i_lock);
445 set_bit(PG_CLEAN, &(req)->wb_flags); 438 set_bit(PG_CLEAN, &(req)->wb_flags);
446 radix_tree_tag_set(&nfsi->nfs_page_tree, 439 spin_lock(&inode->i_lock);
447 req->wb_index, 440 nfs_list_add_request(req, head);
448 NFS_PAGE_TAG_COMMIT); 441 NFS_I(inode)->ncommit++;
449 nfsi->ncommit++;
450 spin_unlock(&inode->i_lock); 442 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
455} 446}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
456 448
457static int 449/**
450 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page
452 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of
454 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats.
456 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock.
458 */
459void
460nfs_request_remove_commit_list(struct nfs_page *req)
461{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return;
466 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--;
468}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470
471
472/*
473 * Add a request to the inode's commit list.
474 */
475static void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
477{
478 struct inode *inode = req->wb_context->dentry->d_inode;
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
483}
484
485static void
486nfs_clear_page_commit(struct page *page)
487{
488 dec_zone_page_state(page, NR_UNSTABLE_NFS);
489 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
490}
491
492static void
458nfs_clear_request_commit(struct nfs_page *req) 493nfs_clear_request_commit(struct nfs_page *req)
459{ 494{
460 struct page *page = req->wb_page; 495 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode;
461 497
462 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 498 if (!pnfs_clear_request_commit(req)) {
463 dec_zone_page_state(page, NR_UNSTABLE_NFS); 499 spin_lock(&inode->i_lock);
464 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 500 nfs_request_remove_commit_list(req);
465 return 1; 501 spin_unlock(&inode->i_lock);
502 }
503 nfs_clear_page_commit(req->wb_page);
466 } 504 }
467 return 0;
468} 505}
469 506
470static inline 507static inline
@@ -491,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
491 return 0; 528 return 0;
492} 529}
493#else 530#else
494static inline void 531static void
495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 533{
497} 534}
498 535
499static inline int 536static void
500nfs_clear_request_commit(struct nfs_page *req) 537nfs_clear_request_commit(struct nfs_page *req)
501{ 538{
502 return 0;
503} 539}
504 540
505static inline 541static inline
@@ -520,46 +556,65 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
520static int 556static int
521nfs_need_commit(struct nfs_inode *nfsi) 557nfs_need_commit(struct nfs_inode *nfsi)
522{ 558{
523 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 559 return nfsi->ncommit > 0;
560}
561
562/* i_lock held by caller */
563static int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
565 spinlock_t *lock)
566{
567 struct nfs_page *req, *tmp;
568 int ret = 0;
569
570 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req))
572 continue;
573 if (cond_resched_lock(lock))
574 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req);
576 nfs_list_add_request(req, dst);
577 ret++;
578 if (ret == max)
579 break;
580 }
581 return ret;
524} 582}
525 583
526/* 584/*
527 * nfs_scan_commit - Scan an inode for commit requests 585 * nfs_scan_commit - Scan an inode for commit requests
528 * @inode: NFS inode to scan 586 * @inode: NFS inode to scan
529 * @dst: destination list 587 * @dst: destination list
530 * @idx_start: lower bound of page->index to scan.
531 * @npages: idx_start + npages sets the upper bound to scan.
532 * 588 *
533 * Moves requests from the inode's 'commit' request list. 589 * Moves requests from the inode's 'commit' request list.
534 * The requests are *not* checked to ensure that they form a contiguous set. 590 * The requests are *not* checked to ensure that they form a contiguous set.
535 */ 591 */
536static int 592static int
537nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 593nfs_scan_commit(struct inode *inode, struct list_head *dst)
538{ 594{
539 struct nfs_inode *nfsi = NFS_I(inode); 595 struct nfs_inode *nfsi = NFS_I(inode);
540 int ret; 596 int ret = 0;
541
542 if (!nfs_need_commit(nfsi))
543 return 0;
544 597
545 spin_lock(&inode->i_lock); 598 spin_lock(&inode->i_lock);
546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 599 if (nfsi->ncommit > 0) {
547 if (ret > 0) 600 const int max = INT_MAX;
548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
551 if (nfs_need_commit(NFS_I(inode)))
552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553 601
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
603 &inode->i_lock);
604 ret += pnfs_scan_commit_lists(inode, max - ret,
605 &inode->i_lock);
606 }
607 spin_unlock(&inode->i_lock);
554 return ret; 608 return ret;
555} 609}
610
556#else 611#else
557static inline int nfs_need_commit(struct nfs_inode *nfsi) 612static inline int nfs_need_commit(struct nfs_inode *nfsi)
558{ 613{
559 return 0; 614 return 0;
560} 615}
561 616
562static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
563{ 618{
564 return 0; 619 return 0;
565} 620}
@@ -604,7 +659,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
604 || end < req->wb_offset) 659 || end < req->wb_offset)
605 goto out_flushme; 660 goto out_flushme;
606 661
607 if (nfs_set_page_tag_locked(req)) 662 if (nfs_lock_request_dontget(req))
608 break; 663 break;
609 664
610 /* The request is locked, so wait and then retry */ 665 /* The request is locked, so wait and then retry */
@@ -616,13 +671,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
616 spin_lock(&inode->i_lock); 671 spin_lock(&inode->i_lock);
617 } 672 }
618 673
619 if (nfs_clear_request_commit(req) &&
620 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
621 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
622 NFS_I(inode)->ncommit--;
623 pnfs_clear_request_commit(req);
624 }
625
626 /* Okay, the request matches. Update the region */ 674 /* Okay, the request matches. Update the region */
627 if (offset < req->wb_offset) { 675 if (offset < req->wb_offset) {
628 req->wb_offset = offset; 676 req->wb_offset = offset;
@@ -634,6 +682,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
634 req->wb_bytes = rqend - req->wb_offset; 682 req->wb_bytes = rqend - req->wb_offset;
635out_unlock: 683out_unlock:
636 spin_unlock(&inode->i_lock); 684 spin_unlock(&inode->i_lock);
685 nfs_clear_request_commit(req);
637 return req; 686 return req;
638out_flushme: 687out_flushme:
639 spin_unlock(&inode->i_lock); 688 spin_unlock(&inode->i_lock);
@@ -655,7 +704,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
655{ 704{
656 struct inode *inode = page->mapping->host; 705 struct inode *inode = page->mapping->host;
657 struct nfs_page *req; 706 struct nfs_page *req;
658 int error;
659 707
660 req = nfs_try_to_update_request(inode, page, offset, bytes); 708 req = nfs_try_to_update_request(inode, page, offset, bytes);
661 if (req != NULL) 709 if (req != NULL)
@@ -663,11 +711,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
663 req = nfs_create_request(ctx, inode, page, offset, bytes); 711 req = nfs_create_request(ctx, inode, page, offset, bytes);
664 if (IS_ERR(req)) 712 if (IS_ERR(req))
665 goto out; 713 goto out;
666 error = nfs_inode_add_request(inode, req); 714 nfs_inode_add_request(inode, req);
667 if (error != 0) {
668 nfs_release_request(req);
669 req = ERR_PTR(error);
670 }
671out: 715out:
672 return req; 716 return req;
673} 717}
@@ -684,7 +728,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
684 nfs_grow_file(page, offset, count); 728 nfs_grow_file(page, offset, count);
685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 729 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
686 nfs_mark_request_dirty(req); 730 nfs_mark_request_dirty(req);
687 nfs_clear_page_tag_locked(req); 731 nfs_unlock_request(req);
688 return 0; 732 return 0;
689} 733}
690 734
@@ -777,7 +821,7 @@ static void nfs_writepage_release(struct nfs_page *req,
777 821
778 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) 822 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
779 nfs_inode_remove_request(req); 823 nfs_inode_remove_request(req);
780 nfs_clear_page_tag_locked(req); 824 nfs_unlock_request(req);
781 nfs_end_page_writeback(page); 825 nfs_end_page_writeback(page);
782} 826}
783 827
@@ -925,7 +969,7 @@ static void nfs_redirty_request(struct nfs_page *req)
925 struct page *page = req->wb_page; 969 struct page *page = req->wb_page;
926 970
927 nfs_mark_request_dirty(req); 971 nfs_mark_request_dirty(req);
928 nfs_clear_page_tag_locked(req); 972 nfs_unlock_request(req);
929 nfs_end_page_writeback(page); 973 nfs_end_page_writeback(page);
930} 974}
931 975
@@ -1128,23 +1172,14 @@ out:
1128 nfs_writedata_release(calldata); 1172 nfs_writedata_release(calldata);
1129} 1173}
1130 1174
1131#if defined(CONFIG_NFS_V4_1)
1132void nfs_write_prepare(struct rpc_task *task, void *calldata) 1175void nfs_write_prepare(struct rpc_task *task, void *calldata)
1133{ 1176{
1134 struct nfs_write_data *data = calldata; 1177 struct nfs_write_data *data = calldata;
1135 1178 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1136 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
1137 &data->args.seq_args,
1138 &data->res.seq_res, 1, task))
1139 return;
1140 rpc_call_start(task);
1141} 1179}
1142#endif /* CONFIG_NFS_V4_1 */
1143 1180
1144static const struct rpc_call_ops nfs_write_partial_ops = { 1181static const struct rpc_call_ops nfs_write_partial_ops = {
1145#if defined(CONFIG_NFS_V4_1)
1146 .rpc_call_prepare = nfs_write_prepare, 1182 .rpc_call_prepare = nfs_write_prepare,
1147#endif /* CONFIG_NFS_V4_1 */
1148 .rpc_call_done = nfs_writeback_done_partial, 1183 .rpc_call_done = nfs_writeback_done_partial,
1149 .rpc_release = nfs_writeback_release_partial, 1184 .rpc_release = nfs_writeback_release_partial,
1150}; 1185};
@@ -1199,16 +1234,14 @@ static void nfs_writeback_release_full(void *calldata)
1199remove_request: 1234remove_request:
1200 nfs_inode_remove_request(req); 1235 nfs_inode_remove_request(req);
1201 next: 1236 next:
1202 nfs_clear_page_tag_locked(req); 1237 nfs_unlock_request(req);
1203 nfs_end_page_writeback(page); 1238 nfs_end_page_writeback(page);
1204 } 1239 }
1205 nfs_writedata_release(calldata); 1240 nfs_writedata_release(calldata);
1206} 1241}
1207 1242
1208static const struct rpc_call_ops nfs_write_full_ops = { 1243static const struct rpc_call_ops nfs_write_full_ops = {
1209#if defined(CONFIG_NFS_V4_1)
1210 .rpc_call_prepare = nfs_write_prepare, 1244 .rpc_call_prepare = nfs_write_prepare,
1211#endif /* CONFIG_NFS_V4_1 */
1212 .rpc_call_done = nfs_writeback_done_full, 1245 .rpc_call_done = nfs_writeback_done_full,
1213 .rpc_release = nfs_writeback_release_full, 1246 .rpc_release = nfs_writeback_release_full,
1214}; 1247};
@@ -1325,7 +1358,6 @@ void nfs_commitdata_release(void *data)
1325{ 1358{
1326 struct nfs_write_data *wdata = data; 1359 struct nfs_write_data *wdata = data;
1327 1360
1328 put_lseg(wdata->lseg);
1329 put_nfs_open_context(wdata->args.context); 1361 put_nfs_open_context(wdata->args.context);
1330 nfs_commit_free(wdata); 1362 nfs_commit_free(wdata);
1331} 1363}
@@ -1411,7 +1443,7 @@ void nfs_retry_commit(struct list_head *page_list,
1411 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1443 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1412 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1444 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1413 BDI_RECLAIMABLE); 1445 BDI_RECLAIMABLE);
1414 nfs_clear_page_tag_locked(req); 1446 nfs_unlock_request(req);
1415 } 1447 }
1416} 1448}
1417EXPORT_SYMBOL_GPL(nfs_retry_commit); 1449EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1460,7 +1492,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1460 while (!list_empty(&data->pages)) { 1492 while (!list_empty(&data->pages)) {
1461 req = nfs_list_entry(data->pages.next); 1493 req = nfs_list_entry(data->pages.next);
1462 nfs_list_remove_request(req); 1494 nfs_list_remove_request(req);
1463 nfs_clear_request_commit(req); 1495 nfs_clear_page_commit(req->wb_page);
1464 1496
1465 dprintk("NFS: commit (%s/%lld %d@%lld)", 1497 dprintk("NFS: commit (%s/%lld %d@%lld)",
1466 req->wb_context->dentry->d_sb->s_id, 1498 req->wb_context->dentry->d_sb->s_id,
@@ -1486,7 +1518,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1486 dprintk(" mismatch\n"); 1518 dprintk(" mismatch\n");
1487 nfs_mark_request_dirty(req); 1519 nfs_mark_request_dirty(req);
1488 next: 1520 next:
1489 nfs_clear_page_tag_locked(req); 1521 nfs_unlock_request(req);
1490 } 1522 }
1491} 1523}
1492EXPORT_SYMBOL_GPL(nfs_commit_release_pages); 1524EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
@@ -1501,9 +1533,7 @@ static void nfs_commit_release(void *calldata)
1501} 1533}
1502 1534
1503static const struct rpc_call_ops nfs_commit_ops = { 1535static const struct rpc_call_ops nfs_commit_ops = {
1504#if defined(CONFIG_NFS_V4_1)
1505 .rpc_call_prepare = nfs_write_prepare, 1536 .rpc_call_prepare = nfs_write_prepare,
1506#endif /* CONFIG_NFS_V4_1 */
1507 .rpc_call_done = nfs_commit_done, 1537 .rpc_call_done = nfs_commit_done,
1508 .rpc_release = nfs_commit_release, 1538 .rpc_release = nfs_commit_release,
1509}; 1539};
@@ -1517,7 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1517 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1547 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1518 if (res <= 0) 1548 if (res <= 0)
1519 goto out_mark_dirty; 1549 goto out_mark_dirty;
1520 res = nfs_scan_commit(inode, &head, 0, 0); 1550 res = nfs_scan_commit(inode, &head);
1521 if (res) { 1551 if (res) {
1522 int error; 1552 int error;
1523 1553
@@ -1635,6 +1665,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1635 if (req == NULL) 1665 if (req == NULL)
1636 break; 1666 break;
1637 if (nfs_lock_request_dontget(req)) { 1667 if (nfs_lock_request_dontget(req)) {
1668 nfs_clear_request_commit(req);
1638 nfs_inode_remove_request(req); 1669 nfs_inode_remove_request(req);
1639 /* 1670 /*
1640 * In case nfs_inode_remove_request has marked the 1671 * In case nfs_inode_remove_request has marked the
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index ce7f0758d84c..9559ce468732 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -72,7 +72,7 @@ int nfsd_fault_inject_init(void)
72{ 72{
73 unsigned int i; 73 unsigned int i;
74 struct nfsd_fault_inject_op *op; 74 struct nfsd_fault_inject_op *op;
75 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 75 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
76 76
77 debug_dir = debugfs_create_dir("nfsd", NULL); 77 debug_dir = debugfs_create_dir("nfsd", NULL);
78 if (!debug_dir) 78 if (!debug_dir)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 6f3ebb48b12f..0e262f32ac41 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = {
605 .procs = nfs4_cb_procedures 605 .procs = nfs4_cb_procedures
606}; 606};
607 607
608static struct rpc_version *nfs_cb_version[] = { 608static const struct rpc_version *nfs_cb_version[] = {
609 &nfs_cb_version4, 609 &nfs_cb_version4,
610}; 610};
611 611
612static struct rpc_program cb_program; 612static const struct rpc_program cb_program;
613 613
614static struct rpc_stat cb_stats = { 614static struct rpc_stat cb_stats = {
615 .program = &cb_program 615 .program = &cb_program
616}; 616};
617 617
618#define NFS4_CALLBACK 0x40000000 618#define NFS4_CALLBACK 0x40000000
619static struct rpc_program cb_program = { 619static const struct rpc_program cb_program = {
620 .name = "nfs4_cb", 620 .name = "nfs4_cb",
621 .number = NFS4_CALLBACK, 621 .number = NFS4_CALLBACK,
622 .nrvers = ARRAY_SIZE(nfs_cb_version), 622 .nrvers = ARRAY_SIZE(nfs_cb_version),
623 .version = nfs_cb_version, 623 .version = nfs_cb_version,
624 .stats = &cb_stats, 624 .stats = &cb_stats,
625 .pipe_dir_name = "/nfsd4_cb", 625 .pipe_dir_name = "nfsd4_cb",
626}; 626};
627 627
628static int max_cb_time(void) 628static int max_cb_time(void)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e8c98f009670..c5cddd659429 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
1308 else 1308 else
1309 goto out_err; 1309 goto out_err;
1310 1310
1311 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1311 conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val,
1312 se->se_callback_addr_len, 1312 se->se_callback_addr_len,
1313 (struct sockaddr *)&conn->cb_addr, 1313 (struct sockaddr *)&conn->cb_addr,
1314 sizeof(conn->cb_addr)); 1314 sizeof(conn->cb_addr));
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 748eda93ce59..64c24af8d7ea 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
223 if (qword_get(&buf, fo_path, size) < 0) 223 if (qword_get(&buf, fo_path, size) < 0)
224 return -EINVAL; 224 return -EINVAL;
225 225
226 if (rpc_pton(fo_path, size, sap, salen) == 0) 226 if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0)
227 return -EINVAL; 227 return -EINVAL;
228 228
229 return nlmsvc_unlock_all_by_ip(sap); 229 return nlmsvc_unlock_all_by_ip(sap);
@@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf)
722 nfsd_serv->sv_nrthreads--; 722 nfsd_serv->sv_nrthreads--;
723 return 0; 723 return 0;
724out_close: 724out_close:
725 xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); 725 xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port);
726 if (xprt != NULL) { 726 if (xprt != NULL) {
727 svc_close_xprt(xprt); 727 svc_close_xprt(xprt);
728 svc_xprt_put(xprt); 728 svc_xprt_put(xprt);
@@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf)
748 if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) 748 if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
749 return -EINVAL; 749 return -EINVAL;
750 750
751 xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); 751 xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port);
752 if (xprt == NULL) 752 if (xprt == NULL)
753 return -ENOTCONN; 753 return -ENOTCONN;
754 754
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index eda7d7e55e05..fce472f5f39e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -251,13 +251,13 @@ static void nfsd_shutdown(void)
251 nfsd_up = false; 251 nfsd_up = false;
252} 252}
253 253
254static void nfsd_last_thread(struct svc_serv *serv) 254static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
255{ 255{
256 /* When last nfsd thread exits we need to do some clean-up */ 256 /* When last nfsd thread exits we need to do some clean-up */
257 nfsd_serv = NULL; 257 nfsd_serv = NULL;
258 nfsd_shutdown(); 258 nfsd_shutdown();
259 259
260 svc_rpcb_cleanup(serv); 260 svc_rpcb_cleanup(serv, net);
261 261
262 printk(KERN_WARNING "nfsd: last server has exited, flushing export " 262 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
263 "cache\n"); 263 "cache\n");
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index a2e2402b2afb..6d4521feb6e3 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
27#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
28#include <net/net_namespace.h>
28 29
29#include "nfsd.h" 30#include "nfsd.h"
30 31
@@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = {
94void 95void
95nfsd_stat_init(void) 96nfsd_stat_init(void)
96{ 97{
97 svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); 98 svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
98} 99}
99 100
100void 101void
101nfsd_stat_shutdown(void) 102nfsd_stat_shutdown(void)
102{ 103{
103 svc_proc_unregister("nfsd"); 104 svc_proc_unregister(&init_net, "nfsd");
104} 105}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index edf6d3ed8777..e59f71d0cf73 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1541,30 +1541,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1541__be32 1541__be32
1542nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1542nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1543{ 1543{
1544 struct dentry *dentry;
1545 struct inode *inode; 1544 struct inode *inode;
1546 mm_segment_t oldfs; 1545 mm_segment_t oldfs;
1547 __be32 err; 1546 __be32 err;
1548 int host_err; 1547 int host_err;
1548 struct path path;
1549 1549
1550 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); 1550 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1551 if (err) 1551 if (err)
1552 goto out; 1552 goto out;
1553 1553
1554 dentry = fhp->fh_dentry; 1554 path.mnt = fhp->fh_export->ex_path.mnt;
1555 inode = dentry->d_inode; 1555 path.dentry = fhp->fh_dentry;
1556 inode = path.dentry->d_inode;
1556 1557
1557 err = nfserr_inval; 1558 err = nfserr_inval;
1558 if (!inode->i_op->readlink) 1559 if (!inode->i_op->readlink)
1559 goto out; 1560 goto out;
1560 1561
1561 touch_atime(fhp->fh_export->ex_path.mnt, dentry); 1562 touch_atime(&path);
1562 /* N.B. Why does this call need a get_fs()?? 1563 /* N.B. Why does this call need a get_fs()??
1563 * Remove the set_fs and watch the fireworks:-) --okir 1564 * Remove the set_fs and watch the fireworks:-) --okir
1564 */ 1565 */
1565 1566
1566 oldfs = get_fs(); set_fs(KERNEL_DS); 1567 oldfs = get_fs(); set_fs(KERNEL_DS);
1567 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1568 host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
1568 set_fs(oldfs); 1569 set_fs(oldfs);
1569 1570
1570 if (host_err < 0) 1571 if (host_err < 0)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index c9b342c8b503..dab5c4c6dfaf 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -218,11 +218,11 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
218 kaddr, 1); 218 kaddr, 1);
219 mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr);
226 mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
@@ -313,7 +313,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
313 continue; 313 continue;
314 } 314 }
315 315
316 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 316 kaddr = kmap_atomic(cp_bh->b_page);
317 cp = nilfs_cpfile_block_get_checkpoint( 317 cp = nilfs_cpfile_block_get_checkpoint(
318 cpfile, cno, cp_bh, kaddr); 318 cpfile, cno, cp_bh, kaddr);
319 nicps = 0; 319 nicps = 0;
@@ -334,7 +334,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
334 cpfile, cp_bh, kaddr, nicps); 334 cpfile, cp_bh, kaddr, nicps);
335 if (count == 0) { 335 if (count == 0) {
336 /* make hole */ 336 /* make hole */
337 kunmap_atomic(kaddr, KM_USER0); 337 kunmap_atomic(kaddr);
338 brelse(cp_bh); 338 brelse(cp_bh);
339 ret = 339 ret =
340 nilfs_cpfile_delete_checkpoint_block( 340 nilfs_cpfile_delete_checkpoint_block(
@@ -349,18 +349,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
349 } 349 }
350 } 350 }
351 351
352 kunmap_atomic(kaddr, KM_USER0); 352 kunmap_atomic(kaddr);
353 brelse(cp_bh); 353 brelse(cp_bh);
354 } 354 }
355 355
356 if (tnicps > 0) { 356 if (tnicps > 0) {
357 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 357 kaddr = kmap_atomic(header_bh->b_page);
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr);
364 } 364 }
365 365
366 brelse(header_bh); 366 brelse(header_bh);
@@ -408,7 +408,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
408 continue; /* skip hole */ 408 continue; /* skip hole */
409 } 409 }
410 410
411 kaddr = kmap_atomic(bh->b_page, KM_USER0); 411 kaddr = kmap_atomic(bh->b_page);
412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { 413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
414 if (!nilfs_checkpoint_invalid(cp)) { 414 if (!nilfs_checkpoint_invalid(cp)) {
@@ -418,7 +418,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
418 n++; 418 n++;
419 } 419 }
420 } 420 }
421 kunmap_atomic(kaddr, KM_USER0); 421 kunmap_atomic(kaddr);
422 brelse(bh); 422 brelse(bh);
423 } 423 }
424 424
@@ -451,10 +451,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
451 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 451 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
452 if (ret < 0) 452 if (ret < 0)
453 goto out; 453 goto out;
454 kaddr = kmap_atomic(bh->b_page, KM_USER0); 454 kaddr = kmap_atomic(bh->b_page);
455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); 456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
457 kunmap_atomic(kaddr, KM_USER0); 457 kunmap_atomic(kaddr);
458 brelse(bh); 458 brelse(bh);
459 if (curr == 0) { 459 if (curr == 0) {
460 ret = 0; 460 ret = 0;
@@ -472,7 +472,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
472 ret = 0; /* No snapshots (started from a hole block) */ 472 ret = 0; /* No snapshots (started from a hole block) */
473 goto out; 473 goto out;
474 } 474 }
475 kaddr = kmap_atomic(bh->b_page, KM_USER0); 475 kaddr = kmap_atomic(bh->b_page);
476 while (n < nci) { 476 while (n < nci) {
477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); 477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
478 curr = ~(__u64)0; /* Terminator */ 478 curr = ~(__u64)0; /* Terminator */
@@ -488,7 +488,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
488 488
489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); 489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
490 if (curr_blkoff != next_blkoff) { 490 if (curr_blkoff != next_blkoff) {
491 kunmap_atomic(kaddr, KM_USER0); 491 kunmap_atomic(kaddr);
492 brelse(bh); 492 brelse(bh);
493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
494 0, &bh); 494 0, &bh);
@@ -496,12 +496,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
496 WARN_ON(ret == -ENOENT); 496 WARN_ON(ret == -ENOENT);
497 goto out; 497 goto out;
498 } 498 }
499 kaddr = kmap_atomic(bh->b_page, KM_USER0); 499 kaddr = kmap_atomic(bh->b_page);
500 } 500 }
501 curr = next; 501 curr = next;
502 curr_blkoff = next_blkoff; 502 curr_blkoff = next_blkoff;
503 } 503 }
504 kunmap_atomic(kaddr, KM_USER0); 504 kunmap_atomic(kaddr);
505 brelse(bh); 505 brelse(bh);
506 *cnop = curr; 506 *cnop = curr;
507 ret = n; 507 ret = n;
@@ -592,24 +592,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
593 if (ret < 0) 593 if (ret < 0)
594 goto out_sem; 594 goto out_sem;
595 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 595 kaddr = kmap_atomic(cp_bh->b_page);
596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
597 if (nilfs_checkpoint_invalid(cp)) { 597 if (nilfs_checkpoint_invalid(cp)) {
598 ret = -ENOENT; 598 ret = -ENOENT;
599 kunmap_atomic(kaddr, KM_USER0); 599 kunmap_atomic(kaddr);
600 goto out_cp; 600 goto out_cp;
601 } 601 }
602 if (nilfs_checkpoint_snapshot(cp)) { 602 if (nilfs_checkpoint_snapshot(cp)) {
603 ret = 0; 603 ret = 0;
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 goto out_cp; 605 goto out_cp;
606 } 606 }
607 kunmap_atomic(kaddr, KM_USER0); 607 kunmap_atomic(kaddr);
608 608
609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
610 if (ret < 0) 610 if (ret < 0)
611 goto out_cp; 611 goto out_cp;
612 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 612 kaddr = kmap_atomic(header_bh->b_page);
613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
614 list = &header->ch_snapshot_list; 614 list = &header->ch_snapshot_list;
615 curr_bh = header_bh; 615 curr_bh = header_bh;
@@ -621,13 +621,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); 621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
622 curr = prev; 622 curr = prev;
623 if (curr_blkoff != prev_blkoff) { 623 if (curr_blkoff != prev_blkoff) {
624 kunmap_atomic(kaddr, KM_USER0); 624 kunmap_atomic(kaddr);
625 brelse(curr_bh); 625 brelse(curr_bh);
626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
627 0, &curr_bh); 627 0, &curr_bh);
628 if (ret < 0) 628 if (ret < 0)
629 goto out_header; 629 goto out_header;
630 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 630 kaddr = kmap_atomic(curr_bh->b_page);
631 } 631 }
632 curr_blkoff = prev_blkoff; 632 curr_blkoff = prev_blkoff;
633 cp = nilfs_cpfile_block_get_checkpoint( 633 cp = nilfs_cpfile_block_get_checkpoint(
@@ -635,7 +635,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
635 list = &cp->cp_snapshot_list; 635 list = &cp->cp_snapshot_list;
636 prev = le64_to_cpu(list->ssl_prev); 636 prev = le64_to_cpu(list->ssl_prev);
637 } 637 }
638 kunmap_atomic(kaddr, KM_USER0); 638 kunmap_atomic(kaddr);
639 639
640 if (prev != 0) { 640 if (prev != 0) {
641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, 641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
@@ -647,29 +647,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
647 get_bh(prev_bh); 647 get_bh(prev_bh);
648 } 648 }
649 649
650 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 650 kaddr = kmap_atomic(curr_bh->b_page);
651 list = nilfs_cpfile_block_get_snapshot_list( 651 list = nilfs_cpfile_block_get_snapshot_list(
652 cpfile, curr, curr_bh, kaddr); 652 cpfile, curr, curr_bh, kaddr);
653 list->ssl_prev = cpu_to_le64(cno); 653 list->ssl_prev = cpu_to_le64(cno);
654 kunmap_atomic(kaddr, KM_USER0); 654 kunmap_atomic(kaddr);
655 655
656 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 656 kaddr = kmap_atomic(cp_bh->b_page);
657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); 658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); 659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
660 nilfs_checkpoint_set_snapshot(cp); 660 nilfs_checkpoint_set_snapshot(cp);
661 kunmap_atomic(kaddr, KM_USER0); 661 kunmap_atomic(kaddr);
662 662
663 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 663 kaddr = kmap_atomic(prev_bh->b_page);
664 list = nilfs_cpfile_block_get_snapshot_list( 664 list = nilfs_cpfile_block_get_snapshot_list(
665 cpfile, prev, prev_bh, kaddr); 665 cpfile, prev, prev_bh, kaddr);
666 list->ssl_next = cpu_to_le64(cno); 666 list->ssl_next = cpu_to_le64(cno);
667 kunmap_atomic(kaddr, KM_USER0); 667 kunmap_atomic(kaddr);
668 668
669 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 669 kaddr = kmap_atomic(header_bh->b_page);
670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr);
673 673
674 mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
@@ -710,23 +710,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
711 if (ret < 0) 711 if (ret < 0)
712 goto out_sem; 712 goto out_sem;
713 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 713 kaddr = kmap_atomic(cp_bh->b_page);
714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
715 if (nilfs_checkpoint_invalid(cp)) { 715 if (nilfs_checkpoint_invalid(cp)) {
716 ret = -ENOENT; 716 ret = -ENOENT;
717 kunmap_atomic(kaddr, KM_USER0); 717 kunmap_atomic(kaddr);
718 goto out_cp; 718 goto out_cp;
719 } 719 }
720 if (!nilfs_checkpoint_snapshot(cp)) { 720 if (!nilfs_checkpoint_snapshot(cp)) {
721 ret = 0; 721 ret = 0;
722 kunmap_atomic(kaddr, KM_USER0); 722 kunmap_atomic(kaddr);
723 goto out_cp; 723 goto out_cp;
724 } 724 }
725 725
726 list = &cp->cp_snapshot_list; 726 list = &cp->cp_snapshot_list;
727 next = le64_to_cpu(list->ssl_next); 727 next = le64_to_cpu(list->ssl_next);
728 prev = le64_to_cpu(list->ssl_prev); 728 prev = le64_to_cpu(list->ssl_prev);
729 kunmap_atomic(kaddr, KM_USER0); 729 kunmap_atomic(kaddr);
730 730
731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
732 if (ret < 0) 732 if (ret < 0)
@@ -750,29 +750,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
750 get_bh(prev_bh); 750 get_bh(prev_bh);
751 } 751 }
752 752
753 kaddr = kmap_atomic(next_bh->b_page, KM_USER0); 753 kaddr = kmap_atomic(next_bh->b_page);
754 list = nilfs_cpfile_block_get_snapshot_list( 754 list = nilfs_cpfile_block_get_snapshot_list(
755 cpfile, next, next_bh, kaddr); 755 cpfile, next, next_bh, kaddr);
756 list->ssl_prev = cpu_to_le64(prev); 756 list->ssl_prev = cpu_to_le64(prev);
757 kunmap_atomic(kaddr, KM_USER0); 757 kunmap_atomic(kaddr);
758 758
759 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 759 kaddr = kmap_atomic(prev_bh->b_page);
760 list = nilfs_cpfile_block_get_snapshot_list( 760 list = nilfs_cpfile_block_get_snapshot_list(
761 cpfile, prev, prev_bh, kaddr); 761 cpfile, prev, prev_bh, kaddr);
762 list->ssl_next = cpu_to_le64(next); 762 list->ssl_next = cpu_to_le64(next);
763 kunmap_atomic(kaddr, KM_USER0); 763 kunmap_atomic(kaddr);
764 764
765 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 765 kaddr = kmap_atomic(cp_bh->b_page);
766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); 767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); 768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
769 nilfs_checkpoint_clear_snapshot(cp); 769 nilfs_checkpoint_clear_snapshot(cp);
770 kunmap_atomic(kaddr, KM_USER0); 770 kunmap_atomic(kaddr);
771 771
772 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 772 kaddr = kmap_atomic(header_bh->b_page);
773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr);
776 776
777 mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
@@ -829,13 +829,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
830 if (ret < 0) 830 if (ret < 0)
831 goto out; 831 goto out;
832 kaddr = kmap_atomic(bh->b_page, KM_USER0); 832 kaddr = kmap_atomic(bh->b_page);
833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
834 if (nilfs_checkpoint_invalid(cp)) 834 if (nilfs_checkpoint_invalid(cp))
835 ret = -ENOENT; 835 ret = -ENOENT;
836 else 836 else
837 ret = nilfs_checkpoint_snapshot(cp); 837 ret = nilfs_checkpoint_snapshot(cp);
838 kunmap_atomic(kaddr, KM_USER0); 838 kunmap_atomic(kaddr);
839 brelse(bh); 839 brelse(bh);
840 840
841 out: 841 out:
@@ -912,12 +912,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
912 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 912 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
913 if (ret < 0) 913 if (ret < 0)
914 goto out_sem; 914 goto out_sem;
915 kaddr = kmap_atomic(bh->b_page, KM_USER0); 915 kaddr = kmap_atomic(bh->b_page);
916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
917 cpstat->cs_cno = nilfs_mdt_cno(cpfile); 917 cpstat->cs_cno = nilfs_mdt_cno(cpfile);
918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); 918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); 919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
920 kunmap_atomic(kaddr, KM_USER0); 920 kunmap_atomic(kaddr);
921 brelse(bh); 921 brelse(bh);
922 922
923 out_sem: 923 out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fcc2f869af16..b5c13f3576b9 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -85,13 +85,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
85 struct nilfs_dat_entry *entry; 85 struct nilfs_dat_entry *entry;
86 void *kaddr; 86 void *kaddr;
87 87
88 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 88 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
90 req->pr_entry_bh, kaddr); 90 req->pr_entry_bh, kaddr);
91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
93 entry->de_blocknr = cpu_to_le64(0); 93 entry->de_blocknr = cpu_to_le64(0);
94 kunmap_atomic(kaddr, KM_USER0); 94 kunmap_atomic(kaddr);
95 95
96 nilfs_palloc_commit_alloc_entry(dat, req); 96 nilfs_palloc_commit_alloc_entry(dat, req);
97 nilfs_dat_commit_entry(dat, req); 97 nilfs_dat_commit_entry(dat, req);
@@ -109,13 +109,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
109 struct nilfs_dat_entry *entry; 109 struct nilfs_dat_entry *entry;
110 void *kaddr; 110 void *kaddr;
111 111
112 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 112 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
114 req->pr_entry_bh, kaddr); 114 req->pr_entry_bh, kaddr);
115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
117 entry->de_blocknr = cpu_to_le64(0); 117 entry->de_blocknr = cpu_to_le64(0);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 nilfs_dat_commit_entry(dat, req); 120 nilfs_dat_commit_entry(dat, req);
121 nilfs_palloc_commit_free_entry(dat, req); 121 nilfs_palloc_commit_free_entry(dat, req);
@@ -136,12 +136,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
136 struct nilfs_dat_entry *entry; 136 struct nilfs_dat_entry *entry;
137 void *kaddr; 137 void *kaddr;
138 138
139 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 139 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
141 req->pr_entry_bh, kaddr); 141 req->pr_entry_bh, kaddr);
142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
143 entry->de_blocknr = cpu_to_le64(blocknr); 143 entry->de_blocknr = cpu_to_le64(blocknr);
144 kunmap_atomic(kaddr, KM_USER0); 144 kunmap_atomic(kaddr);
145 145
146 nilfs_dat_commit_entry(dat, req); 146 nilfs_dat_commit_entry(dat, req);
147} 147}
@@ -160,12 +160,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
160 return ret; 160 return ret;
161 } 161 }
162 162
163 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 163 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
165 req->pr_entry_bh, kaddr); 165 req->pr_entry_bh, kaddr);
166 start = le64_to_cpu(entry->de_start); 166 start = le64_to_cpu(entry->de_start);
167 blocknr = le64_to_cpu(entry->de_blocknr); 167 blocknr = le64_to_cpu(entry->de_blocknr);
168 kunmap_atomic(kaddr, KM_USER0); 168 kunmap_atomic(kaddr);
169 169
170 if (blocknr == 0) { 170 if (blocknr == 0) {
171 ret = nilfs_palloc_prepare_free_entry(dat, req); 171 ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -186,7 +186,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
186 sector_t blocknr; 186 sector_t blocknr;
187 void *kaddr; 187 void *kaddr;
188 188
189 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 189 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
191 req->pr_entry_bh, kaddr); 191 req->pr_entry_bh, kaddr);
192 end = start = le64_to_cpu(entry->de_start); 192 end = start = le64_to_cpu(entry->de_start);
@@ -196,7 +196,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
196 } 196 }
197 entry->de_end = cpu_to_le64(end); 197 entry->de_end = cpu_to_le64(end);
198 blocknr = le64_to_cpu(entry->de_blocknr); 198 blocknr = le64_to_cpu(entry->de_blocknr);
199 kunmap_atomic(kaddr, KM_USER0); 199 kunmap_atomic(kaddr);
200 200
201 if (blocknr == 0) 201 if (blocknr == 0)
202 nilfs_dat_commit_free(dat, req); 202 nilfs_dat_commit_free(dat, req);
@@ -211,12 +211,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
211 sector_t blocknr; 211 sector_t blocknr;
212 void *kaddr; 212 void *kaddr;
213 213
214 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 214 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
216 req->pr_entry_bh, kaddr); 216 req->pr_entry_bh, kaddr);
217 start = le64_to_cpu(entry->de_start); 217 start = le64_to_cpu(entry->de_start);
218 blocknr = le64_to_cpu(entry->de_blocknr); 218 blocknr = le64_to_cpu(entry->de_blocknr);
219 kunmap_atomic(kaddr, KM_USER0); 219 kunmap_atomic(kaddr);
220 220
221 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 221 if (start == nilfs_mdt_cno(dat) && blocknr == 0)
222 nilfs_palloc_abort_free_entry(dat, req); 222 nilfs_palloc_abort_free_entry(dat, req);
@@ -346,20 +346,20 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
346 } 346 }
347 } 347 }
348 348
349 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 349 kaddr = kmap_atomic(entry_bh->b_page);
350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, 352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
353 (unsigned long long)vblocknr, 353 (unsigned long long)vblocknr,
354 (unsigned long long)le64_to_cpu(entry->de_start), 354 (unsigned long long)le64_to_cpu(entry->de_start),
355 (unsigned long long)le64_to_cpu(entry->de_end)); 355 (unsigned long long)le64_to_cpu(entry->de_end));
356 kunmap_atomic(kaddr, KM_USER0); 356 kunmap_atomic(kaddr);
357 brelse(entry_bh); 357 brelse(entry_bh);
358 return -EINVAL; 358 return -EINVAL;
359 } 359 }
360 WARN_ON(blocknr == 0); 360 WARN_ON(blocknr == 0);
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr);
363 363
364 mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
@@ -409,7 +409,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
409 } 409 }
410 } 410 }
411 411
412 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 412 kaddr = kmap_atomic(entry_bh->b_page);
413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
414 blocknr = le64_to_cpu(entry->de_blocknr); 414 blocknr = le64_to_cpu(entry->de_blocknr);
415 if (blocknr == 0) { 415 if (blocknr == 0) {
@@ -419,7 +419,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
419 *blocknrp = blocknr; 419 *blocknrp = blocknr;
420 420
421 out: 421 out:
422 kunmap_atomic(kaddr, KM_USER0); 422 kunmap_atomic(kaddr);
423 brelse(entry_bh); 423 brelse(entry_bh);
424 return ret; 424 return ret;
425} 425}
@@ -440,7 +440,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
440 0, &entry_bh); 440 0, &entry_bh);
441 if (ret < 0) 441 if (ret < 0)
442 return ret; 442 return ret;
443 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 443 kaddr = kmap_atomic(entry_bh->b_page);
444 /* last virtual block number in this block */ 444 /* last virtual block number in this block */
445 first = vinfo->vi_vblocknr; 445 first = vinfo->vi_vblocknr;
446 do_div(first, entries_per_block); 446 do_div(first, entries_per_block);
@@ -456,7 +456,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
456 vinfo->vi_end = le64_to_cpu(entry->de_end); 456 vinfo->vi_end = le64_to_cpu(entry->de_end);
457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
458 } 458 }
459 kunmap_atomic(kaddr, KM_USER0); 459 kunmap_atomic(kaddr);
460 brelse(entry_bh); 460 brelse(entry_bh);
461 } 461 }
462 462
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index ca35b3a46d17..df1a7fb238d1 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -602,7 +602,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
602 unlock_page(page); 602 unlock_page(page);
603 goto fail; 603 goto fail;
604 } 604 }
605 kaddr = kmap_atomic(page, KM_USER0); 605 kaddr = kmap_atomic(page);
606 memset(kaddr, 0, chunk_size); 606 memset(kaddr, 0, chunk_size);
607 de = (struct nilfs_dir_entry *)kaddr; 607 de = (struct nilfs_dir_entry *)kaddr;
608 de->name_len = 1; 608 de->name_len = 1;
@@ -617,7 +617,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
617 de->inode = cpu_to_le64(parent->i_ino); 617 de->inode = cpu_to_le64(parent->i_ino);
618 memcpy(de->name, "..\0", 4); 618 memcpy(de->name, "..\0", 4);
619 nilfs_set_de_type(de, inode); 619 nilfs_set_de_type(de, inode);
620 kunmap_atomic(kaddr, KM_USER0); 620 kunmap_atomic(kaddr);
621 nilfs_commit_chunk(page, mapping, 0, chunk_size); 621 nilfs_commit_chunk(page, mapping, 0, chunk_size);
622fail: 622fail:
623 page_cache_release(page); 623 page_cache_release(page);
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 684d76300a80..5a48df79d674 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -122,11 +122,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
122 return ret; 122 return ret;
123 } 123 }
124 124
125 kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); 125 kaddr = kmap_atomic(req.pr_entry_bh->b_page);
126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, 126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
127 req.pr_entry_bh, kaddr); 127 req.pr_entry_bh, kaddr);
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr);
130 130
131 mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 800e8d78a83b..f9897d09c693 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -58,12 +58,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
58 58
59 set_buffer_mapped(bh); 59 set_buffer_mapped(bh);
60 60
61 kaddr = kmap_atomic(bh->b_page, KM_USER0); 61 kaddr = kmap_atomic(bh->b_page);
62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
63 if (init_block) 63 if (init_block)
64 init_block(inode, bh, kaddr); 64 init_block(inode, bh, kaddr);
65 flush_dcache_page(bh->b_page); 65 flush_dcache_page(bh->b_page);
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1cd3f624dffc..fce2bbee66d4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -193,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
193 struct nilfs_transaction_info ti; 193 struct nilfs_transaction_info ti;
194 int err; 194 int err;
195 195
196 if (inode->i_nlink >= NILFS_LINK_MAX)
197 return -EMLINK;
198
199 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 196 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
200 if (err) 197 if (err)
201 return err; 198 return err;
@@ -219,9 +216,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
219 struct nilfs_transaction_info ti; 216 struct nilfs_transaction_info ti;
220 int err; 217 int err;
221 218
222 if (dir->i_nlink >= NILFS_LINK_MAX)
223 return -EMLINK;
224
225 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 219 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
226 if (err) 220 if (err)
227 return err; 221 return err;
@@ -400,11 +394,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
400 drop_nlink(new_inode); 394 drop_nlink(new_inode);
401 nilfs_mark_inode_dirty(new_inode); 395 nilfs_mark_inode_dirty(new_inode);
402 } else { 396 } else {
403 if (dir_de) {
404 err = -EMLINK;
405 if (new_dir->i_nlink >= NILFS_LINK_MAX)
406 goto out_dir;
407 }
408 err = nilfs_add_link(new_dentry, old_inode); 397 err = nilfs_add_link(new_dentry, old_inode);
409 if (err) 398 if (err)
410 goto out_dir; 399 goto out_dir;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 65221a04c6f0..3e7b2a0dc0c8 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -119,11 +119,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
119 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 119 struct page *spage = sbh->b_page, *dpage = dbh->b_page;
120 struct buffer_head *bh; 120 struct buffer_head *bh;
121 121
122 kaddr0 = kmap_atomic(spage, KM_USER0); 122 kaddr0 = kmap_atomic(spage);
123 kaddr1 = kmap_atomic(dpage, KM_USER1); 123 kaddr1 = kmap_atomic(dpage);
124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
125 kunmap_atomic(kaddr1, KM_USER1); 125 kunmap_atomic(kaddr1);
126 kunmap_atomic(kaddr0, KM_USER0); 126 kunmap_atomic(kaddr0);
127 127
128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
129 dbh->b_blocknr = sbh->b_blocknr; 129 dbh->b_blocknr = sbh->b_blocknr;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index a604ac0331b2..f1626f5011c5 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -493,9 +493,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
493 if (unlikely(!bh_org)) 493 if (unlikely(!bh_org))
494 return -EIO; 494 return -EIO;
495 495
496 kaddr = kmap_atomic(page, KM_USER0); 496 kaddr = kmap_atomic(page);
497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); 497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
498 kunmap_atomic(kaddr, KM_USER0); 498 kunmap_atomic(kaddr);
499 brelse(bh_org); 499 brelse(bh_org);
500 return 0; 500 return 0;
501} 501}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 850a7c0228fb..dc9a913784ab 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
227 crc = crc32_le(crc, bh->b_data, bh->b_size); 227 crc = crc32_le(crc, bh->b_data, bh->b_size);
228 } 228 }
229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
230 kaddr = kmap_atomic(bh->b_page, KM_USER0); 230 kaddr = kmap_atomic(bh->b_page);
231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); 231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
232 kunmap_atomic(kaddr, KM_USER0); 232 kunmap_atomic(kaddr);
233 } 233 }
234 raw_sum->ss_datasum = cpu_to_le32(crc); 234 raw_sum->ss_datasum = cpu_to_le32(crc);
235} 235}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 0a0aba617d8a..c5b7653a4391 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -111,11 +111,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
111 struct nilfs_sufile_header *header; 111 struct nilfs_sufile_header *header;
112 void *kaddr; 112 void *kaddr;
113 113
114 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 114 kaddr = kmap_atomic(header_bh->b_page);
115 header = kaddr + bh_offset(header_bh); 115 header = kaddr + bh_offset(header_bh);
116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd); 116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
121} 121}
@@ -319,11 +319,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 ret = nilfs_sufile_get_header_block(sufile, &header_bh); 319 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
320 if (ret < 0) 320 if (ret < 0)
321 goto out_sem; 321 goto out_sem;
322 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 322 kaddr = kmap_atomic(header_bh->b_page);
323 header = kaddr + bh_offset(header_bh); 323 header = kaddr + bh_offset(header_bh);
324 ncleansegs = le64_to_cpu(header->sh_ncleansegs); 324 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
325 last_alloc = le64_to_cpu(header->sh_last_alloc); 325 last_alloc = le64_to_cpu(header->sh_last_alloc);
326 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr);
327 327
328 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax; 329 maxsegnum = sui->allocmax;
@@ -356,7 +356,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
356 &su_bh); 356 &su_bh);
357 if (ret < 0) 357 if (ret < 0)
358 goto out_header; 358 goto out_header;
359 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 359 kaddr = kmap_atomic(su_bh->b_page);
360 su = nilfs_sufile_block_get_segment_usage( 360 su = nilfs_sufile_block_get_segment_usage(
361 sufile, segnum, su_bh, kaddr); 361 sufile, segnum, su_bh, kaddr);
362 362
@@ -367,14 +367,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
367 continue; 367 continue;
368 /* found a clean segment */ 368 /* found a clean segment */
369 nilfs_segment_usage_set_dirty(su); 369 nilfs_segment_usage_set_dirty(su);
370 kunmap_atomic(kaddr, KM_USER0); 370 kunmap_atomic(kaddr);
371 371
372 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 372 kaddr = kmap_atomic(header_bh->b_page);
373 header = kaddr + bh_offset(header_bh); 373 header = kaddr + bh_offset(header_bh);
374 le64_add_cpu(&header->sh_ncleansegs, -1); 374 le64_add_cpu(&header->sh_ncleansegs, -1);
375 le64_add_cpu(&header->sh_ndirtysegs, 1); 375 le64_add_cpu(&header->sh_ndirtysegs, 1);
376 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
377 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr);
378 378
379 sui->ncleansegs--; 379 sui->ncleansegs--;
380 mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
@@ -385,7 +385,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
385 goto out_header; 385 goto out_header;
386 } 386 }
387 387
388 kunmap_atomic(kaddr, KM_USER0); 388 kunmap_atomic(kaddr);
389 brelse(su_bh); 389 brelse(su_bh);
390 } 390 }
391 391
@@ -407,16 +407,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
407 struct nilfs_segment_usage *su; 407 struct nilfs_segment_usage *su;
408 void *kaddr; 408 void *kaddr;
409 409
410 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 410 kaddr = kmap_atomic(su_bh->b_page);
411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
412 if (unlikely(!nilfs_segment_usage_clean(su))) { 412 if (unlikely(!nilfs_segment_usage_clean(su))) {
413 printk(KERN_WARNING "%s: segment %llu must be clean\n", 413 printk(KERN_WARNING "%s: segment %llu must be clean\n",
414 __func__, (unsigned long long)segnum); 414 __func__, (unsigned long long)segnum);
415 kunmap_atomic(kaddr, KM_USER0); 415 kunmap_atomic(kaddr);
416 return; 416 return;
417 } 417 }
418 nilfs_segment_usage_set_dirty(su); 418 nilfs_segment_usage_set_dirty(su);
419 kunmap_atomic(kaddr, KM_USER0); 419 kunmap_atomic(kaddr);
420 420
421 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
422 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
@@ -433,11 +433,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
433 void *kaddr; 433 void *kaddr;
434 int clean, dirty; 434 int clean, dirty;
435 435
436 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 436 kaddr = kmap_atomic(su_bh->b_page);
437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && 438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
439 su->su_nblocks == cpu_to_le32(0)) { 439 su->su_nblocks == cpu_to_le32(0)) {
440 kunmap_atomic(kaddr, KM_USER0); 440 kunmap_atomic(kaddr);
441 return; 441 return;
442 } 442 }
443 clean = nilfs_segment_usage_clean(su); 443 clean = nilfs_segment_usage_clean(su);
@@ -447,7 +447,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
447 su->su_lastmod = cpu_to_le64(0); 447 su->su_lastmod = cpu_to_le64(0);
448 su->su_nblocks = cpu_to_le32(0); 448 su->su_nblocks = cpu_to_le32(0);
449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); 449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
450 kunmap_atomic(kaddr, KM_USER0); 450 kunmap_atomic(kaddr);
451 451
452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
453 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -464,12 +464,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
464 void *kaddr; 464 void *kaddr;
465 int sudirty; 465 int sudirty;
466 466
467 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 467 kaddr = kmap_atomic(su_bh->b_page);
468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
469 if (nilfs_segment_usage_clean(su)) { 469 if (nilfs_segment_usage_clean(su)) {
470 printk(KERN_WARNING "%s: segment %llu is already clean\n", 470 printk(KERN_WARNING "%s: segment %llu is already clean\n",
471 __func__, (unsigned long long)segnum); 471 __func__, (unsigned long long)segnum);
472 kunmap_atomic(kaddr, KM_USER0); 472 kunmap_atomic(kaddr);
473 return; 473 return;
474 } 474 }
475 WARN_ON(nilfs_segment_usage_error(su)); 475 WARN_ON(nilfs_segment_usage_error(su));
@@ -477,7 +477,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
477 477
478 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
479 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
480 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr);
481 mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
482 482
483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -525,13 +525,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
525 if (ret < 0) 525 if (ret < 0)
526 goto out_sem; 526 goto out_sem;
527 527
528 kaddr = kmap_atomic(bh->b_page, KM_USER0); 528 kaddr = kmap_atomic(bh->b_page);
529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); 529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
530 WARN_ON(nilfs_segment_usage_error(su)); 530 WARN_ON(nilfs_segment_usage_error(su));
531 if (modtime) 531 if (modtime)
532 su->su_lastmod = cpu_to_le64(modtime); 532 su->su_lastmod = cpu_to_le64(modtime);
533 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
534 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr);
535 535
536 mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
537 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
@@ -572,7 +572,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
572 if (ret < 0) 572 if (ret < 0)
573 goto out_sem; 573 goto out_sem;
574 574
575 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 575 kaddr = kmap_atomic(header_bh->b_page);
576 header = kaddr + bh_offset(header_bh); 576 header = kaddr + bh_offset(header_bh);
577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); 577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); 578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
@@ -582,7 +582,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
582 spin_lock(&nilfs->ns_last_segment_lock); 582 spin_lock(&nilfs->ns_last_segment_lock);
583 sustat->ss_prot_seq = nilfs->ns_prot_seq; 583 sustat->ss_prot_seq = nilfs->ns_prot_seq;
584 spin_unlock(&nilfs->ns_last_segment_lock); 584 spin_unlock(&nilfs->ns_last_segment_lock);
585 kunmap_atomic(kaddr, KM_USER0); 585 kunmap_atomic(kaddr);
586 brelse(header_bh); 586 brelse(header_bh);
587 587
588 out_sem: 588 out_sem:
@@ -598,15 +598,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
598 void *kaddr; 598 void *kaddr;
599 int suclean; 599 int suclean;
600 600
601 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 601 kaddr = kmap_atomic(su_bh->b_page);
602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
603 if (nilfs_segment_usage_error(su)) { 603 if (nilfs_segment_usage_error(su)) {
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 return; 605 return;
606 } 606 }
607 suclean = nilfs_segment_usage_clean(su); 607 suclean = nilfs_segment_usage_clean(su);
608 nilfs_segment_usage_set_error(su); 608 nilfs_segment_usage_set_error(su);
609 kunmap_atomic(kaddr, KM_USER0); 609 kunmap_atomic(kaddr);
610 610
611 if (suclean) { 611 if (suclean) {
612 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -675,7 +675,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
675 /* hole */ 675 /* hole */
676 continue; 676 continue;
677 } 677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 678 kaddr = kmap_atomic(su_bh->b_page);
679 su = nilfs_sufile_block_get_segment_usage( 679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr); 680 sufile, segnum, su_bh, kaddr);
681 su2 = su; 681 su2 = su;
@@ -684,7 +684,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || 684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) { 685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY; 686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0); 687 kunmap_atomic(kaddr);
688 brelse(su_bh); 688 brelse(su_bh);
689 goto out_header; 689 goto out_header;
690 } 690 }
@@ -696,7 +696,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
696 nc++; 696 nc++;
697 } 697 }
698 } 698 }
699 kunmap_atomic(kaddr, KM_USER0); 699 kunmap_atomic(kaddr);
700 if (nc > 0) { 700 if (nc > 0) {
701 mark_buffer_dirty(su_bh); 701 mark_buffer_dirty(su_bh);
702 ncleaned += nc; 702 ncleaned += nc;
@@ -772,10 +772,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
772 sui->ncleansegs -= nsegs - newnsegs; 772 sui->ncleansegs -= nsegs - newnsegs;
773 } 773 }
774 774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 775 kaddr = kmap_atomic(header_bh->b_page);
776 header = kaddr + bh_offset(header_bh); 776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); 777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0); 778 kunmap_atomic(kaddr);
779 779
780 mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile); 781 nilfs_mdt_mark_dirty(sufile);
@@ -840,7 +840,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
840 continue; 840 continue;
841 } 841 }
842 842
843 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 843 kaddr = kmap_atomic(su_bh->b_page);
844 su = nilfs_sufile_block_get_segment_usage( 844 su = nilfs_sufile_block_get_segment_usage(
845 sufile, segnum, su_bh, kaddr); 845 sufile, segnum, su_bh, kaddr);
846 for (j = 0; j < n; 846 for (j = 0; j < n;
@@ -853,7 +853,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
853 si->sui_flags |= 853 si->sui_flags |=
854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
855 } 855 }
856 kunmap_atomic(kaddr, KM_USER0); 856 kunmap_atomic(kaddr);
857 brelse(su_bh); 857 brelse(su_bh);
858 } 858 }
859 ret = nsegs; 859 ret = nsegs;
@@ -902,10 +902,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
902 goto failed; 902 goto failed;
903 903
904 sui = NILFS_SUI(sufile); 904 sui = NILFS_SUI(sufile);
905 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 905 kaddr = kmap_atomic(header_bh->b_page);
906 header = kaddr + bh_offset(header_bh); 906 header = kaddr + bh_offset(header_bh);
907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); 907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
908 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr);
909 brelse(header_bh); 909 brelse(header_bh);
910 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; 911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 08e3d4f9df18..1099a76cee59 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -917,9 +917,8 @@ static int nilfs_get_root_dentry(struct super_block *sb,
917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) { 917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
918 dentry = d_find_alias(inode); 918 dentry = d_find_alias(inode);
919 if (!dentry) { 919 if (!dentry) {
920 dentry = d_alloc_root(inode); 920 dentry = d_make_root(inode);
921 if (!dentry) { 921 if (!dentry) {
922 iput(inode);
923 ret = -ENOMEM; 922 ret = -ENOMEM;
924 goto failed_dentry; 923 goto failed_dentry;
925 } 924 }
@@ -1059,6 +1058,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1059 sb->s_export_op = &nilfs_export_ops; 1058 sb->s_export_op = &nilfs_export_ops;
1060 sb->s_root = NULL; 1059 sb->s_root = NULL;
1061 sb->s_time_gran = 1; 1060 sb->s_time_gran = 1;
1061 sb->s_max_links = NILFS_LINK_MAX;
1062 1062
1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
1064 sb->s_bdi = bdi ? : &default_backing_dev_info; 1064 sb->s_bdi = bdi ? : &default_backing_dev_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d32714094375..501b7f8b739f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
410 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
411 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
412 if (nilfs->ns_r_segments_percentage < 1 ||
413 nilfs->ns_r_segments_percentage > 99) {
414 printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
415 return -EINVAL;
416 }
417
412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); 418 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 419 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
414 return 0; 420 return 0;
@@ -515,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
515 brelse(sbh[1]); 521 brelse(sbh[1]);
516 sbh[1] = NULL; 522 sbh[1] = NULL;
517 sbp[1] = NULL; 523 sbp[1] = NULL;
524 valid[1] = 0;
518 swp = 0; 525 swp = 0;
519 } 526 }
520 if (!valid[swp]) { 527 if (!valid[swp]) {
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index ee188158a224..c887b1378f7e 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -447,7 +447,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
447 return event; 447 return event;
448} 448}
449 449
450__init int fsnotify_notification_init(void) 450static __init int fsnotify_notification_init(void)
451{ 451{
452 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); 452 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
453 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); 453 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
@@ -461,4 +461,3 @@ __init int fsnotify_notification_init(void)
461 return 0; 461 return 0;
462} 462}
463subsys_initcall(fsnotify_notification_init); 463subsys_initcall(fsnotify_notification_init);
464
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 0b1e885b8cf8..fa9c05f97af4 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -94,11 +94,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
94 if (file_ofs < init_size) 94 if (file_ofs < init_size)
95 ofs = init_size - file_ofs; 95 ofs = init_size - file_ofs;
96 local_irq_save(flags); 96 local_irq_save(flags);
97 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 97 kaddr = kmap_atomic(page);
98 memset(kaddr + bh_offset(bh) + ofs, 0, 98 memset(kaddr + bh_offset(bh) + ofs, 0,
99 bh->b_size - ofs); 99 bh->b_size - ofs);
100 flush_dcache_page(page); 100 flush_dcache_page(page);
101 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 101 kunmap_atomic(kaddr);
102 local_irq_restore(flags); 102 local_irq_restore(flags);
103 } 103 }
104 } else { 104 } else {
@@ -147,11 +147,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
147 /* Should have been verified before we got here... */ 147 /* Should have been verified before we got here... */
148 BUG_ON(!recs); 148 BUG_ON(!recs);
149 local_irq_save(flags); 149 local_irq_save(flags);
150 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 150 kaddr = kmap_atomic(page);
151 for (i = 0; i < recs; i++) 151 for (i = 0; i < recs; i++)
152 post_read_mst_fixup((NTFS_RECORD*)(kaddr + 152 post_read_mst_fixup((NTFS_RECORD*)(kaddr +
153 i * rec_size), rec_size); 153 i * rec_size), rec_size);
154 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 154 kunmap_atomic(kaddr);
155 local_irq_restore(flags); 155 local_irq_restore(flags);
156 flush_dcache_page(page); 156 flush_dcache_page(page);
157 if (likely(page_uptodate && !PageError(page))) 157 if (likely(page_uptodate && !PageError(page)))
@@ -504,7 +504,7 @@ retry_readpage:
504 /* Race with shrinking truncate. */ 504 /* Race with shrinking truncate. */
505 attr_len = i_size; 505 attr_len = i_size;
506 } 506 }
507 addr = kmap_atomic(page, KM_USER0); 507 addr = kmap_atomic(page);
508 /* Copy the data to the page. */ 508 /* Copy the data to the page. */
509 memcpy(addr, (u8*)ctx->attr + 509 memcpy(addr, (u8*)ctx->attr +
510 le16_to_cpu(ctx->attr->data.resident.value_offset), 510 le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -512,7 +512,7 @@ retry_readpage:
512 /* Zero the remainder of the page. */ 512 /* Zero the remainder of the page. */
513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
514 flush_dcache_page(page); 514 flush_dcache_page(page);
515 kunmap_atomic(addr, KM_USER0); 515 kunmap_atomic(addr);
516put_unm_err_out: 516put_unm_err_out:
517 ntfs_attr_put_search_ctx(ctx); 517 ntfs_attr_put_search_ctx(ctx);
518unm_err_out: 518unm_err_out:
@@ -746,14 +746,14 @@ lock_retry_remap:
746 unsigned long *bpos, *bend; 746 unsigned long *bpos, *bend;
747 747
748 /* Check if the buffer is zero. */ 748 /* Check if the buffer is zero. */
749 kaddr = kmap_atomic(page, KM_USER0); 749 kaddr = kmap_atomic(page);
750 bpos = (unsigned long *)(kaddr + bh_offset(bh)); 750 bpos = (unsigned long *)(kaddr + bh_offset(bh));
751 bend = (unsigned long *)((u8*)bpos + blocksize); 751 bend = (unsigned long *)((u8*)bpos + blocksize);
752 do { 752 do {
753 if (unlikely(*bpos)) 753 if (unlikely(*bpos))
754 break; 754 break;
755 } while (likely(++bpos < bend)); 755 } while (likely(++bpos < bend));
756 kunmap_atomic(kaddr, KM_USER0); 756 kunmap_atomic(kaddr);
757 if (bpos == bend) { 757 if (bpos == bend) {
758 /* 758 /*
759 * Buffer is zero and sparse, no need to write 759 * Buffer is zero and sparse, no need to write
@@ -1495,14 +1495,14 @@ retry_writepage:
1495 /* Shrinking cannot fail. */ 1495 /* Shrinking cannot fail. */
1496 BUG_ON(err); 1496 BUG_ON(err);
1497 } 1497 }
1498 addr = kmap_atomic(page, KM_USER0); 1498 addr = kmap_atomic(page);
1499 /* Copy the data from the page to the mft record. */ 1499 /* Copy the data from the page to the mft record. */
1500 memcpy((u8*)ctx->attr + 1500 memcpy((u8*)ctx->attr +
1501 le16_to_cpu(ctx->attr->data.resident.value_offset), 1501 le16_to_cpu(ctx->attr->data.resident.value_offset),
1502 addr, attr_len); 1502 addr, attr_len);
1503 /* Zero out of bounds area in the page cache page. */ 1503 /* Zero out of bounds area in the page cache page. */
1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1505 kunmap_atomic(addr, KM_USER0); 1505 kunmap_atomic(addr);
1506 flush_dcache_page(page); 1506 flush_dcache_page(page);
1507 flush_dcache_mft_record_page(ctx->ntfs_ino); 1507 flush_dcache_mft_record_page(ctx->ntfs_ino);
1508 /* We are done with the page. */ 1508 /* We are done with the page. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index e0281992ddc3..a27e3fecefaf 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1656,12 +1656,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1656 attr_size = le32_to_cpu(a->data.resident.value_length); 1656 attr_size = le32_to_cpu(a->data.resident.value_length);
1657 BUG_ON(attr_size != data_size); 1657 BUG_ON(attr_size != data_size);
1658 if (page && !PageUptodate(page)) { 1658 if (page && !PageUptodate(page)) {
1659 kaddr = kmap_atomic(page, KM_USER0); 1659 kaddr = kmap_atomic(page);
1660 memcpy(kaddr, (u8*)a + 1660 memcpy(kaddr, (u8*)a +
1661 le16_to_cpu(a->data.resident.value_offset), 1661 le16_to_cpu(a->data.resident.value_offset),
1662 attr_size); 1662 attr_size);
1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); 1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
1664 kunmap_atomic(kaddr, KM_USER0); 1664 kunmap_atomic(kaddr);
1665 flush_dcache_page(page); 1665 flush_dcache_page(page);
1666 SetPageUptodate(page); 1666 SetPageUptodate(page);
1667 } 1667 }
@@ -1806,9 +1806,9 @@ undo_err_out:
1806 sizeof(a->data.resident.reserved)); 1806 sizeof(a->data.resident.reserved));
1807 /* Copy the data from the page back to the attribute value. */ 1807 /* Copy the data from the page back to the attribute value. */
1808 if (page) { 1808 if (page) {
1809 kaddr = kmap_atomic(page, KM_USER0); 1809 kaddr = kmap_atomic(page);
1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size); 1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1811 kunmap_atomic(kaddr, KM_USER0); 1811 kunmap_atomic(kaddr);
1812 } 1812 }
1813 /* Setup the allocated size in the ntfs inode in case it changed. */ 1813 /* Setup the allocated size in the ntfs inode in case it changed. */
1814 write_lock_irqsave(&ni->size_lock, flags); 1814 write_lock_irqsave(&ni->size_lock, flags);
@@ -2540,10 +2540,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2540 size = PAGE_CACHE_SIZE; 2540 size = PAGE_CACHE_SIZE;
2541 if (idx == end) 2541 if (idx == end)
2542 size = end_ofs; 2542 size = end_ofs;
2543 kaddr = kmap_atomic(page, KM_USER0); 2543 kaddr = kmap_atomic(page);
2544 memset(kaddr + start_ofs, val, size - start_ofs); 2544 memset(kaddr + start_ofs, val, size - start_ofs);
2545 flush_dcache_page(page); 2545 flush_dcache_page(page);
2546 kunmap_atomic(kaddr, KM_USER0); 2546 kunmap_atomic(kaddr);
2547 set_page_dirty(page); 2547 set_page_dirty(page);
2548 page_cache_release(page); 2548 page_cache_release(page);
2549 balance_dirty_pages_ratelimited(mapping); 2549 balance_dirty_pages_ratelimited(mapping);
@@ -2561,10 +2561,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2561 "page (index 0x%lx).", idx); 2561 "page (index 0x%lx).", idx);
2562 return -ENOMEM; 2562 return -ENOMEM;
2563 } 2563 }
2564 kaddr = kmap_atomic(page, KM_USER0); 2564 kaddr = kmap_atomic(page);
2565 memset(kaddr, val, PAGE_CACHE_SIZE); 2565 memset(kaddr, val, PAGE_CACHE_SIZE);
2566 flush_dcache_page(page); 2566 flush_dcache_page(page);
2567 kunmap_atomic(kaddr, KM_USER0); 2567 kunmap_atomic(kaddr);
2568 /* 2568 /*
2569 * If the page has buffers, mark them uptodate since buffer 2569 * If the page has buffers, mark them uptodate since buffer
2570 * state and not page state is definitive in 2.6 kernels. 2570 * state and not page state is definitive in 2.6 kernels.
@@ -2598,10 +2598,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2598 "(error, index 0x%lx).", idx); 2598 "(error, index 0x%lx).", idx);
2599 return PTR_ERR(page); 2599 return PTR_ERR(page);
2600 } 2600 }
2601 kaddr = kmap_atomic(page, KM_USER0); 2601 kaddr = kmap_atomic(page);
2602 memset(kaddr, val, end_ofs); 2602 memset(kaddr, val, end_ofs);
2603 flush_dcache_page(page); 2603 flush_dcache_page(page);
2604 kunmap_atomic(kaddr, KM_USER0); 2604 kunmap_atomic(kaddr);
2605 set_page_dirty(page); 2605 set_page_dirty(page);
2606 page_cache_release(page); 2606 page_cache_release(page);
2607 balance_dirty_pages_ratelimited(mapping); 2607 balance_dirty_pages_ratelimited(mapping);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c587e2d27183..8639169221c7 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -704,7 +704,7 @@ map_buffer_cached:
704 u8 *kaddr; 704 u8 *kaddr;
705 unsigned pofs; 705 unsigned pofs;
706 706
707 kaddr = kmap_atomic(page, KM_USER0); 707 kaddr = kmap_atomic(page);
708 if (bh_pos < pos) { 708 if (bh_pos < pos) {
709 pofs = bh_pos & ~PAGE_CACHE_MASK; 709 pofs = bh_pos & ~PAGE_CACHE_MASK;
710 memset(kaddr + pofs, 0, pos - bh_pos); 710 memset(kaddr + pofs, 0, pos - bh_pos);
@@ -713,7 +713,7 @@ map_buffer_cached:
713 pofs = end & ~PAGE_CACHE_MASK; 713 pofs = end & ~PAGE_CACHE_MASK;
714 memset(kaddr + pofs, 0, bh_end - end); 714 memset(kaddr + pofs, 0, bh_end - end);
715 } 715 }
716 kunmap_atomic(kaddr, KM_USER0); 716 kunmap_atomic(kaddr);
717 flush_dcache_page(page); 717 flush_dcache_page(page);
718 } 718 }
719 continue; 719 continue;
@@ -1287,9 +1287,9 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
1287 len = PAGE_CACHE_SIZE - ofs; 1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes) 1288 if (len > bytes)
1289 len = bytes; 1289 len = bytes;
1290 addr = kmap_atomic(*pages, KM_USER0); 1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len); 1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr, KM_USER0); 1292 kunmap_atomic(addr);
1293 if (unlikely(left)) { 1293 if (unlikely(left)) {
1294 /* Do it the slow way. */ 1294 /* Do it the slow way. */
1295 addr = kmap(*pages); 1295 addr = kmap(*pages);
@@ -1401,10 +1401,10 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1401 len = PAGE_CACHE_SIZE - ofs; 1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes) 1402 if (len > bytes)
1403 len = bytes; 1403 len = bytes;
1404 addr = kmap_atomic(*pages, KM_USER0); 1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, 1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len); 1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr, KM_USER0); 1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) { 1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */ 1409 /* Do it the slow way. */
1410 addr = kmap(*pages); 1410 addr = kmap(*pages);
@@ -1691,7 +1691,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1691 BUG_ON(end > le32_to_cpu(a->length) - 1691 BUG_ON(end > le32_to_cpu(a->length) -
1692 le16_to_cpu(a->data.resident.value_offset)); 1692 le16_to_cpu(a->data.resident.value_offset));
1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
1694 kaddr = kmap_atomic(page, KM_USER0); 1694 kaddr = kmap_atomic(page);
1695 /* Copy the received data from the page to the mft record. */ 1695 /* Copy the received data from the page to the mft record. */
1696 memcpy(kattr + pos, kaddr + pos, bytes); 1696 memcpy(kattr + pos, kaddr + pos, bytes);
1697 /* Update the attribute length if necessary. */ 1697 /* Update the attribute length if necessary. */
@@ -1713,7 +1713,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1713 flush_dcache_page(page); 1713 flush_dcache_page(page);
1714 SetPageUptodate(page); 1714 SetPageUptodate(page);
1715 } 1715 }
1716 kunmap_atomic(kaddr, KM_USER0); 1716 kunmap_atomic(kaddr);
1717 /* Update initialized_size/i_size if necessary. */ 1717 /* Update initialized_size/i_size if necessary. */
1718 read_lock_irqsave(&ni->size_lock, flags); 1718 read_lock_irqsave(&ni->size_lock, flags);
1719 initialized_size = ni->initialized_size; 1719 initialized_size = ni->initialized_size;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index faece7190866..809c0e6d8e09 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2008,14 +2008,14 @@ typedef struct {
2008 * 2008 *
2009 * When a directory is small enough to fit inside the index root then this 2009 * When a directory is small enough to fit inside the index root then this
2010 * is the only attribute describing the directory. When the directory is too 2010 * is the only attribute describing the directory. When the directory is too
2011 * large to fit in the index root, on the other hand, two aditional attributes 2011 * large to fit in the index root, on the other hand, two additional attributes
2012 * are present: an index allocation attribute, containing sub-nodes of the B+ 2012 * are present: an index allocation attribute, containing sub-nodes of the B+
2013 * directory tree (see below), and a bitmap attribute, describing which virtual 2013 * directory tree (see below), and a bitmap attribute, describing which virtual
2014 * cluster numbers (vcns) in the index allocation attribute are in use by an 2014 * cluster numbers (vcns) in the index allocation attribute are in use by an
2015 * index block. 2015 * index block.
2016 * 2016 *
2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other 2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other
2018 * dircetories do not contain entries for themselves, though. 2018 * directories do not contain entries for themselves, though.
2019 */ 2019 */
2020typedef struct { 2020typedef struct {
2021 ATTR_TYPE type; /* Type of the indexed attribute. Is 2021 ATTR_TYPE type; /* Type of the indexed attribute. Is
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index f907611cca73..b341492542ca 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2473,7 +2473,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2473 nr_free -= PAGE_CACHE_SIZE * 8; 2473 nr_free -= PAGE_CACHE_SIZE * 8;
2474 continue; 2474 continue;
2475 } 2475 }
2476 kaddr = kmap_atomic(page, KM_USER0); 2476 kaddr = kmap_atomic(page);
2477 /* 2477 /*
2478 * Subtract the number of set bits. If this 2478 * Subtract the number of set bits. If this
2479 * is the last page and it is partial we don't really care as 2479 * is the last page and it is partial we don't really care as
@@ -2483,7 +2483,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2483 */ 2483 */
2484 nr_free -= bitmap_weight(kaddr, 2484 nr_free -= bitmap_weight(kaddr,
2485 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2485 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2486 kunmap_atomic(kaddr, KM_USER0); 2486 kunmap_atomic(kaddr);
2487 page_cache_release(page); 2487 page_cache_release(page);
2488 } 2488 }
2489 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); 2489 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
@@ -2544,7 +2544,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2544 nr_free -= PAGE_CACHE_SIZE * 8; 2544 nr_free -= PAGE_CACHE_SIZE * 8;
2545 continue; 2545 continue;
2546 } 2546 }
2547 kaddr = kmap_atomic(page, KM_USER0); 2547 kaddr = kmap_atomic(page);
2548 /* 2548 /*
2549 * Subtract the number of set bits. If this 2549 * Subtract the number of set bits. If this
2550 * is the last page and it is partial we don't really care as 2550 * is the last page and it is partial we don't really care as
@@ -2554,7 +2554,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2554 */ 2554 */
2555 nr_free -= bitmap_weight(kaddr, 2555 nr_free -= bitmap_weight(kaddr,
2556 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2556 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2557 kunmap_atomic(kaddr, KM_USER0); 2557 kunmap_atomic(kaddr);
2558 page_cache_release(page); 2558 page_cache_release(page);
2559 } 2559 }
2560 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2560 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
@@ -2908,9 +2908,10 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2908 ntfs_error(sb, "Failed to load system files."); 2908 ntfs_error(sb, "Failed to load system files.");
2909 goto unl_upcase_iput_tmp_ino_err_out_now; 2909 goto unl_upcase_iput_tmp_ino_err_out_now;
2910 } 2910 }
2911 if ((sb->s_root = d_alloc_root(vol->root_ino))) { 2911
2912 /* We grab a reference, simulating an ntfs_iget(). */ 2912 /* We grab a reference, simulating an ntfs_iget(). */
2913 ihold(vol->root_ino); 2913 ihold(vol->root_ino);
2914 if ((sb->s_root = d_make_root(vol->root_ino))) {
2914 ntfs_debug("Exiting, status successful."); 2915 ntfs_debug("Exiting, status successful.");
2915 /* Release the default upcase if it has no users. */ 2916 /* Release the default upcase if it has no users. */
2916 mutex_lock(&ntfs_lock); 2917 mutex_lock(&ntfs_lock);
@@ -3158,6 +3159,8 @@ static int __init init_ntfs_fs(void)
3158 } 3159 }
3159 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); 3160 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
3160 3161
3162 /* Unregister the ntfs sysctls. */
3163 ntfs_sysctl(0);
3161sysctl_err_out: 3164sysctl_err_out:
3162 kmem_cache_destroy(ntfs_big_inode_cache); 3165 kmem_cache_destroy(ntfs_big_inode_cache);
3163big_inode_err_out: 3166big_inode_err_out:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 78b68af3b0e3..657743254eb9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -102,7 +102,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
102 * copy, the data is still good. */ 102 * copy, the data is still good. */
103 if (buffer_jbd(buffer_cache_bh) 103 if (buffer_jbd(buffer_cache_bh)
104 && ocfs2_inode_is_new(inode)) { 104 && ocfs2_inode_is_new(inode)) {
105 kaddr = kmap_atomic(bh_result->b_page, KM_USER0); 105 kaddr = kmap_atomic(bh_result->b_page);
106 if (!kaddr) { 106 if (!kaddr) {
107 mlog(ML_ERROR, "couldn't kmap!\n"); 107 mlog(ML_ERROR, "couldn't kmap!\n");
108 goto bail; 108 goto bail;
@@ -110,7 +110,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
110 memcpy(kaddr + (bh_result->b_size * iblock), 110 memcpy(kaddr + (bh_result->b_size * iblock),
111 buffer_cache_bh->b_data, 111 buffer_cache_bh->b_data,
112 bh_result->b_size); 112 bh_result->b_size);
113 kunmap_atomic(kaddr, KM_USER0); 113 kunmap_atomic(kaddr);
114 set_buffer_uptodate(bh_result); 114 set_buffer_uptodate(bh_result);
115 } 115 }
116 brelse(buffer_cache_bh); 116 brelse(buffer_cache_bh);
@@ -236,13 +236,13 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
236 return -EROFS; 236 return -EROFS;
237 } 237 }
238 238
239 kaddr = kmap_atomic(page, KM_USER0); 239 kaddr = kmap_atomic(page);
240 if (size) 240 if (size)
241 memcpy(kaddr, di->id2.i_data.id_data, size); 241 memcpy(kaddr, di->id2.i_data.id_data, size);
242 /* Clear the remaining part of the page */ 242 /* Clear the remaining part of the page */
243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size); 243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
244 flush_dcache_page(page); 244 flush_dcache_page(page);
245 kunmap_atomic(kaddr, KM_USER0); 245 kunmap_atomic(kaddr);
246 246
247 SetPageUptodate(page); 247 SetPageUptodate(page);
248 248
@@ -689,7 +689,7 @@ static void ocfs2_clear_page_regions(struct page *page,
689 689
690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); 690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
691 691
692 kaddr = kmap_atomic(page, KM_USER0); 692 kaddr = kmap_atomic(page);
693 693
694 if (from || to) { 694 if (from || to) {
695 if (from > cluster_start) 695 if (from > cluster_start)
@@ -700,7 +700,7 @@ static void ocfs2_clear_page_regions(struct page *page,
700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start); 700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
701 } 701 }
702 702
703 kunmap_atomic(kaddr, KM_USER0); 703 kunmap_atomic(kaddr);
704} 704}
705 705
706/* 706/*
@@ -1981,9 +1981,9 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1981 } 1981 }
1982 } 1982 }
1983 1983
1984 kaddr = kmap_atomic(wc->w_target_page, KM_USER0); 1984 kaddr = kmap_atomic(wc->w_target_page);
1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1986 kunmap_atomic(kaddr, KM_USER0); 1986 kunmap_atomic(kaddr);
1987 1987
1988 trace_ocfs2_write_end_inline( 1988 trace_ocfs2_write_end_inline(
1989 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1989 (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index abfac0d7ae9c..3b5825ef3193 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -582,24 +582,14 @@ static int dlmfs_fill_super(struct super_block * sb,
582 void * data, 582 void * data,
583 int silent) 583 int silent)
584{ 584{
585 struct inode * inode;
586 struct dentry * root;
587
588 sb->s_maxbytes = MAX_LFS_FILESIZE; 585 sb->s_maxbytes = MAX_LFS_FILESIZE;
589 sb->s_blocksize = PAGE_CACHE_SIZE; 586 sb->s_blocksize = PAGE_CACHE_SIZE;
590 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 587 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
591 sb->s_magic = DLMFS_MAGIC; 588 sb->s_magic = DLMFS_MAGIC;
592 sb->s_op = &dlmfs_ops; 589 sb->s_op = &dlmfs_ops;
593 inode = dlmfs_get_root_inode(sb); 590 sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
594 if (!inode) 591 if (!sb->s_root)
595 return -ENOMEM;
596
597 root = d_alloc_root(inode);
598 if (!root) {
599 iput(inode);
600 return -ENOMEM; 592 return -ENOMEM;
601 }
602 sb->s_root = root;
603 return 0; 593 return 0;
604} 594}
605 595
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 604e12c4e979..68f4541c2db9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1154,19 +1154,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1154 } 1154 }
1155 1155
1156 status = ocfs2_mount_volume(sb); 1156 status = ocfs2_mount_volume(sb);
1157 if (osb->root_inode)
1158 inode = igrab(osb->root_inode);
1159
1160 if (status < 0) 1157 if (status < 0)
1161 goto read_super_error; 1158 goto read_super_error;
1162 1159
1160 if (osb->root_inode)
1161 inode = igrab(osb->root_inode);
1162
1163 if (!inode) { 1163 if (!inode) {
1164 status = -EIO; 1164 status = -EIO;
1165 mlog_errno(status); 1165 mlog_errno(status);
1166 goto read_super_error; 1166 goto read_super_error;
1167 } 1167 }
1168 1168
1169 root = d_alloc_root(inode); 1169 root = d_make_root(inode);
1170 if (!root) { 1170 if (!root) {
1171 status = -ENOMEM; 1171 status = -ENOMEM;
1172 mlog_errno(status); 1172 mlog_errno(status);
@@ -1220,9 +1220,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1220read_super_error: 1220read_super_error:
1221 brelse(bh); 1221 brelse(bh);
1222 1222
1223 if (inode)
1224 iput(inode);
1225
1226 if (osb) { 1223 if (osb) {
1227 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1224 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1228 wake_up(&osb->osb_mount_event); 1225 wake_up(&osb->osb_mount_event);
@@ -1627,21 +1624,17 @@ static int __init ocfs2_init(void)
1627 init_waitqueue_head(&ocfs2__ioend_wq[i]); 1624 init_waitqueue_head(&ocfs2__ioend_wq[i]);
1628 1625
1629 status = init_ocfs2_uptodate_cache(); 1626 status = init_ocfs2_uptodate_cache();
1630 if (status < 0) { 1627 if (status < 0)
1631 mlog_errno(status); 1628 goto out1;
1632 goto leave;
1633 }
1634 1629
1635 status = ocfs2_initialize_mem_caches(); 1630 status = ocfs2_initialize_mem_caches();
1636 if (status < 0) { 1631 if (status < 0)
1637 mlog_errno(status); 1632 goto out2;
1638 goto leave;
1639 }
1640 1633
1641 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); 1634 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
1642 if (!ocfs2_wq) { 1635 if (!ocfs2_wq) {
1643 status = -ENOMEM; 1636 status = -ENOMEM;
1644 goto leave; 1637 goto out3;
1645 } 1638 }
1646 1639
1647 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 1640 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
@@ -1653,17 +1646,23 @@ static int __init ocfs2_init(void)
1653 ocfs2_set_locking_protocol(); 1646 ocfs2_set_locking_protocol();
1654 1647
1655 status = register_quota_format(&ocfs2_quota_format); 1648 status = register_quota_format(&ocfs2_quota_format);
1656leave: 1649 if (status < 0)
1657 if (status < 0) { 1650 goto out4;
1658 ocfs2_free_mem_caches(); 1651 status = register_filesystem(&ocfs2_fs_type);
1659 exit_ocfs2_uptodate_cache(); 1652 if (!status)
1660 mlog_errno(status); 1653 return 0;
1661 }
1662 1654
1663 if (status >= 0) { 1655 unregister_quota_format(&ocfs2_quota_format);
1664 return register_filesystem(&ocfs2_fs_type); 1656out4:
1665 } else 1657 destroy_workqueue(ocfs2_wq);
1666 return -1; 1658 debugfs_remove(ocfs2_debugfs_root);
1659out3:
1660 ocfs2_free_mem_caches();
1661out2:
1662 exit_ocfs2_uptodate_cache();
1663out1:
1664 mlog_errno(status);
1665 return status;
1667} 1666}
1668 1667
1669static void __exit ocfs2_exit(void) 1668static void __exit ocfs2_exit(void)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 6065bb0ba207..dbc842222589 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -539,11 +539,9 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
539 goto out_brelse_bh2; 539 goto out_brelse_bh2;
540 } 540 }
541 541
542 sb->s_root = d_alloc_root(root); 542 sb->s_root = d_make_root(root);
543 if (!sb->s_root) { 543 if (!sb->s_root)
544 iput(root);
545 goto out_brelse_bh2; 544 goto out_brelse_bh2;
546 }
547 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); 545 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
548 546
549 ret = 0; 547 ret = 0;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a88c03bc749d..bc49c975d501 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -408,13 +408,12 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
408 oi->type = op_inode_node; 408 oi->type = op_inode_node;
409 oi->u.node = of_find_node_by_path("/"); 409 oi->u.node = of_find_node_by_path("/");
410 410
411 s->s_root = d_alloc_root(root_inode); 411 s->s_root = d_make_root(root_inode);
412 if (!s->s_root) 412 if (!s->s_root)
413 goto out_no_root_dentry; 413 goto out_no_root_dentry;
414 return 0; 414 return 0;
415 415
416out_no_root_dentry: 416out_no_root_dentry:
417 iput(root_inode);
418 ret = -ENOMEM; 417 ret = -ENOMEM;
419out_no_root: 418out_no_root:
420 printk("openprom_fill_super: get root inode failed\n"); 419 printk("openprom_fill_super: get root inode failed\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index a932ced92a16..25feaa3faac0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -13,6 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/log2.h> 14#include <linux/log2.h>
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/magic.h>
16#include <linux/pipe_fs_i.h> 17#include <linux/pipe_fs_i.h>
17#include <linux/uio.h> 18#include <linux/uio.h>
18#include <linux/highmem.h> 19#include <linux/highmem.h>
@@ -230,7 +231,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
230{ 231{
231 if (atomic) { 232 if (atomic) {
232 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 233 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
233 return kmap_atomic(buf->page, KM_USER0); 234 return kmap_atomic(buf->page);
234 } 235 }
235 236
236 return kmap(buf->page); 237 return kmap(buf->page);
@@ -251,7 +252,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
251{ 252{
252 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 253 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
253 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 254 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
254 kunmap_atomic(map_data, KM_USER0); 255 kunmap_atomic(map_data);
255 } else 256 } else
256 kunmap(buf->page); 257 kunmap(buf->page);
257} 258}
@@ -565,14 +566,14 @@ redo1:
565 iov_fault_in_pages_read(iov, chars); 566 iov_fault_in_pages_read(iov, chars);
566redo2: 567redo2:
567 if (atomic) 568 if (atomic)
568 src = kmap_atomic(page, KM_USER0); 569 src = kmap_atomic(page);
569 else 570 else
570 src = kmap(page); 571 src = kmap(page);
571 572
572 error = pipe_iov_copy_from_user(src, iov, chars, 573 error = pipe_iov_copy_from_user(src, iov, chars,
573 atomic); 574 atomic);
574 if (atomic) 575 if (atomic)
575 kunmap_atomic(src, KM_USER0); 576 kunmap_atomic(src);
576 else 577 else
577 kunmap(page); 578 kunmap(page);
578 579
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index cea4623f1ed6..5e325a42e33d 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -18,7 +18,7 @@
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/posix_acl.h> 20#include <linux/posix_acl.h>
21#include <linux/module.h> 21#include <linux/export.h>
22 22
23#include <linux/errno.h> 23#include <linux/errno.h>
24 24
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c602b8d20f06..fbb53c249086 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,59 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
462 /* convert nsec -> ticks */ 462 /* convert nsec -> ticks */
463 start_time = nsec_to_clock_t(start_time); 463 start_time = nsec_to_clock_t(start_time);
464 464
465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ 465 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 466 seq_put_decimal_ll(m, ' ', ppid);
467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", 467 seq_put_decimal_ll(m, ' ', pgid);
468 pid_nr_ns(pid, ns), 468 seq_put_decimal_ll(m, ' ', sid);
469 tcomm, 469 seq_put_decimal_ll(m, ' ', tty_nr);
470 state, 470 seq_put_decimal_ll(m, ' ', tty_pgrp);
471 ppid, 471 seq_put_decimal_ull(m, ' ', task->flags);
472 pgid, 472 seq_put_decimal_ull(m, ' ', min_flt);
473 sid, 473 seq_put_decimal_ull(m, ' ', cmin_flt);
474 tty_nr, 474 seq_put_decimal_ull(m, ' ', maj_flt);
475 tty_pgrp, 475 seq_put_decimal_ull(m, ' ', cmaj_flt);
476 task->flags, 476 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
477 min_flt, 477 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
478 cmin_flt, 478 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
479 maj_flt, 479 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
480 cmaj_flt, 480 seq_put_decimal_ll(m, ' ', priority);
481 cputime_to_clock_t(utime), 481 seq_put_decimal_ll(m, ' ', nice);
482 cputime_to_clock_t(stime), 482 seq_put_decimal_ll(m, ' ', num_threads);
483 cputime_to_clock_t(cutime), 483 seq_put_decimal_ull(m, ' ', 0);
484 cputime_to_clock_t(cstime), 484 seq_put_decimal_ull(m, ' ', start_time);
485 priority, 485 seq_put_decimal_ull(m, ' ', vsize);
486 nice, 486 seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0);
487 num_threads, 487 seq_put_decimal_ull(m, ' ', rsslim);
488 start_time, 488 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
489 vsize, 489 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
490 mm ? get_mm_rss(mm) : 0, 490 seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
491 rsslim, 491 seq_put_decimal_ull(m, ' ', esp);
492 mm ? (permitted ? mm->start_code : 1) : 0, 492 seq_put_decimal_ull(m, ' ', eip);
493 mm ? (permitted ? mm->end_code : 1) : 0, 493 /* The signal information here is obsolete.
494 (permitted && mm) ? mm->start_stack : 0, 494 * It must be decimal for Linux 2.0 compatibility.
495 esp, 495 * Use /proc/#/status for real-time signals.
496 eip, 496 */
497 /* The signal information here is obsolete. 497 seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
498 * It must be decimal for Linux 2.0 compatibility. 498 seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
499 * Use /proc/#/status for real-time signals. 499 seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
500 */ 500 seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
501 task->pending.signal.sig[0] & 0x7fffffffUL, 501 seq_put_decimal_ull(m, ' ', wchan);
502 task->blocked.sig[0] & 0x7fffffffUL, 502 seq_put_decimal_ull(m, ' ', 0);
503 sigign .sig[0] & 0x7fffffffUL, 503 seq_put_decimal_ull(m, ' ', 0);
504 sigcatch .sig[0] & 0x7fffffffUL, 504 seq_put_decimal_ll(m, ' ', task->exit_signal);
505 wchan, 505 seq_put_decimal_ll(m, ' ', task_cpu(task));
506 0UL, 506 seq_put_decimal_ull(m, ' ', task->rt_priority);
507 0UL, 507 seq_put_decimal_ull(m, ' ', task->policy);
508 task->exit_signal, 508 seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
509 task_cpu(task), 509 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
510 task->rt_priority, 510 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
511 task->policy, 511 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0);
512 (unsigned long long)delayacct_blkio_ticks(task), 512 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0);
513 cputime_to_clock_t(gtime), 513 seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0);
514 cputime_to_clock_t(cgtime), 514 seq_putc(m, '\n');
515 (mm && permitted) ? mm->start_data : 0,
516 (mm && permitted) ? mm->end_data : 0,
517 (mm && permitted) ? mm->start_brk : 0);
518 if (mm) 515 if (mm)
519 mmput(mm); 516 mmput(mm);
520 return 0; 517 return 0;
@@ -542,8 +539,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
542 size = task_statm(mm, &shared, &text, &data, &resident); 539 size = task_statm(mm, &shared, &text, &data, &resident);
543 mmput(mm); 540 mmput(mm);
544 } 541 }
545 seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", 542 /*
546 size, resident, shared, text, data); 543 * For quick read, open code by putting numbers directly
544 * expected format is
545 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
546 * size, resident, shared, text, data);
547 */
548 seq_put_decimal_ull(m, 0, size);
549 seq_put_decimal_ull(m, ' ', resident);
550 seq_put_decimal_ull(m, ' ', shared);
551 seq_put_decimal_ull(m, ' ', text);
552 seq_put_decimal_ull(m, ' ', 0);
553 seq_put_decimal_ull(m, ' ', text);
554 seq_put_decimal_ull(m, ' ', 0);
555 seq_putc(m, '\n');
547 556
548 return 0; 557 return 0;
549} 558}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4548dd49b02..3b42c1418f31 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1310 if (!p) 1310 if (!p)
1311 return -ESRCH; 1311 return -ESRCH;
1312 1312
1313 err = nice; 1313 err = proc_sched_autogroup_set_nice(p, nice);
1314 err = proc_sched_autogroup_set_nice(p, &err);
1315 if (err) 1314 if (err)
1316 count = err; 1315 count = err;
1317 1316
@@ -2990,9 +2989,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2990 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2989 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2991 ONE("stat", S_IRUGO, proc_tgid_stat), 2990 ONE("stat", S_IRUGO, proc_tgid_stat),
2992 ONE("statm", S_IRUGO, proc_pid_statm), 2991 ONE("statm", S_IRUGO, proc_pid_statm),
2993 REG("maps", S_IRUGO, proc_maps_operations), 2992 REG("maps", S_IRUGO, proc_pid_maps_operations),
2994#ifdef CONFIG_NUMA 2993#ifdef CONFIG_NUMA
2995 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2994 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
2996#endif 2995#endif
2997 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2996 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2998 LNK("cwd", proc_cwd_link), 2997 LNK("cwd", proc_cwd_link),
@@ -3003,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = {
3003 REG("mountstats", S_IRUSR, proc_mountstats_operations), 3002 REG("mountstats", S_IRUSR, proc_mountstats_operations),
3004#ifdef CONFIG_PROC_PAGE_MONITOR 3003#ifdef CONFIG_PROC_PAGE_MONITOR
3005 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3004 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3006 REG("smaps", S_IRUGO, proc_smaps_operations), 3005 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3007 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3006 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3008#endif 3007#endif
3009#ifdef CONFIG_SECURITY 3008#ifdef CONFIG_SECURITY
@@ -3349,9 +3348,9 @@ static const struct pid_entry tid_base_stuff[] = {
3349 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3348 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3350 ONE("stat", S_IRUGO, proc_tid_stat), 3349 ONE("stat", S_IRUGO, proc_tid_stat),
3351 ONE("statm", S_IRUGO, proc_pid_statm), 3350 ONE("statm", S_IRUGO, proc_pid_statm),
3352 REG("maps", S_IRUGO, proc_maps_operations), 3351 REG("maps", S_IRUGO, proc_tid_maps_operations),
3353#ifdef CONFIG_NUMA 3352#ifdef CONFIG_NUMA
3354 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 3353 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
3355#endif 3354#endif
3356 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3355 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3357 LNK("cwd", proc_cwd_link), 3356 LNK("cwd", proc_cwd_link),
@@ -3361,7 +3360,7 @@ static const struct pid_entry tid_base_stuff[] = {
3361 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3360 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3362#ifdef CONFIG_PROC_PAGE_MONITOR 3361#ifdef CONFIG_PROC_PAGE_MONITOR
3363 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3362 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3364 REG("smaps", S_IRUGO, proc_smaps_operations), 3363 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
3365 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3364 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3366#endif 3365#endif
3367#ifdef CONFIG_SECURITY 3366#ifdef CONFIG_SECURITY
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 84fd3235a590..8461a7b82fdb 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -486,8 +486,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
486 486
487int proc_fill_super(struct super_block *s) 487int proc_fill_super(struct super_block *s)
488{ 488{
489 struct inode * root_inode;
490
491 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 489 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
492 s->s_blocksize = 1024; 490 s->s_blocksize = 1024;
493 s->s_blocksize_bits = 10; 491 s->s_blocksize_bits = 10;
@@ -496,19 +494,11 @@ int proc_fill_super(struct super_block *s)
496 s->s_time_gran = 1; 494 s->s_time_gran = 1;
497 495
498 pde_get(&proc_root); 496 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, &proc_root); 497 s->s_root = d_make_root(proc_get_inode(s, &proc_root));
500 if (!root_inode) 498 if (s->s_root)
501 goto out_no_root; 499 return 0;
502 root_inode->i_uid = 0;
503 root_inode->i_gid = 0;
504 s->s_root = d_alloc_root(root_inode);
505 if (!s->s_root)
506 goto out_no_root;
507 return 0;
508 500
509out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 501 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode);
512 pde_put(&proc_root); 502 pde_put(&proc_root);
513 return -ENOMEM; 503 return -ENOMEM;
514} 504}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 292577531ad1..5f79bb8b4c60 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -10,12 +10,15 @@
10 */ 10 */
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13struct ctl_table_header;
13 14
14extern struct proc_dir_entry proc_root; 15extern struct proc_dir_entry proc_root;
15#ifdef CONFIG_PROC_SYSCTL 16#ifdef CONFIG_PROC_SYSCTL
16extern int proc_sys_init(void); 17extern int proc_sys_init(void);
18extern void sysctl_head_put(struct ctl_table_header *head);
17#else 19#else
18static inline void proc_sys_init(void) { } 20static inline void proc_sys_init(void) { }
21static inline void sysctl_head_put(struct ctl_table_header *head) { }
19#endif 22#endif
20#ifdef CONFIG_NET 23#ifdef CONFIG_NET
21extern int proc_net_init(void); 24extern int proc_net_init(void);
@@ -53,9 +56,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 56 struct pid *pid, struct task_struct *task);
54extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 57extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
55 58
56extern const struct file_operations proc_maps_operations; 59extern const struct file_operations proc_pid_maps_operations;
57extern const struct file_operations proc_numa_maps_operations; 60extern const struct file_operations proc_tid_maps_operations;
58extern const struct file_operations proc_smaps_operations; 61extern const struct file_operations proc_pid_numa_maps_operations;
62extern const struct file_operations proc_tid_numa_maps_operations;
63extern const struct file_operations proc_pid_smaps_operations;
64extern const struct file_operations proc_tid_smaps_operations;
59extern const struct file_operations proc_clear_refs_operations; 65extern const struct file_operations proc_clear_refs_operations;
60extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 67extern const struct file_operations proc_net_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d245cb23dd72..86c67eee439f 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -157,7 +157,8 @@ static int kcore_update_ram(void)
157 157
158#ifdef CONFIG_SPARSEMEM_VMEMMAP 158#ifdef CONFIG_SPARSEMEM_VMEMMAP
159/* calculate vmemmap's address from given system ram pfn and register it */ 159/* calculate vmemmap's address from given system ram pfn and register it */
160int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 160static int
161get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
161{ 162{
162 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; 163 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
163 unsigned long nr_pages = ent->size >> PAGE_SHIFT; 164 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
@@ -189,7 +190,8 @@ int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
189 190
190} 191}
191#else 192#else
192int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 193static int
194get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
193{ 195{
194 return 1; 196 return 1;
195} 197}
@@ -513,7 +515,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
513 515
514 n = copy_to_user(buffer, (char *)start, tsz); 516 n = copy_to_user(buffer, (char *)start, tsz);
515 /* 517 /*
516 * We cannot distingush between fault on source 518 * We cannot distinguish between fault on source
517 * and fault on destination. When this happens 519 * and fault on destination. When this happens
518 * we clear too and hope it will trigger the 520 * we clear too and hope it will trigger the
519 * EFAULT again. 521 * EFAULT again.
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 27da860115c6..3551f1f839eb 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -53,7 +53,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
53 ei->ns_ops = ns_ops; 53 ei->ns_ops = ns_ops;
54 ei->ns = ns; 54 ei->ns = ns;
55 55
56 dentry->d_op = &pid_dentry_operations; 56 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 57 d_add(dentry, inode);
58 /* Close the race of the process dying before we return the dentry */ 58 /* Close the race of the process dying before we return the dentry */
59 if (pid_revalidate(dentry, NULL)) 59 if (pid_revalidate(dentry, NULL))
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 6d8e6a9e93ab..7fcd0d60a968 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page)
115 u |= 1 << KPF_COMPOUND_TAIL; 115 u |= 1 << KPF_COMPOUND_TAIL;
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 else if (PageTransCompound(page))
119 u |= 1 << KPF_THP;
118 120
119 /* 121 /*
120 * Caveats on high order pages: page->_count will only be set 122 * Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a6b62173d4c3..21d836f40292 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -6,7 +6,10 @@
6#include <linux/poll.h> 6#include <linux/poll.h>
7#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/sched.h>
9#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/mm.h>
12#include <linux/module.h>
10#include "internal.h" 13#include "internal.h"
11 14
12static const struct dentry_operations proc_sys_dentry_operations; 15static const struct dentry_operations proc_sys_dentry_operations;
@@ -24,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll)
24 wake_up_interruptible(&poll->wait); 27 wake_up_interruptible(&poll->wait);
25} 28}
26 29
30static struct ctl_table root_table[] = {
31 {
32 .procname = "",
33 .mode = S_IFDIR|S_IRUGO|S_IXUGO,
34 },
35 { }
36};
37static struct ctl_table_root sysctl_table_root = {
38 .default_set.dir.header = {
39 {{.count = 1,
40 .nreg = 1,
41 .ctl_table = root_table }},
42 .ctl_table_arg = root_table,
43 .root = &sysctl_table_root,
44 .set = &sysctl_table_root.default_set,
45 },
46};
47
48static DEFINE_SPINLOCK(sysctl_lock);
49
50static void drop_sysctl_table(struct ctl_table_header *header);
51static int sysctl_follow_link(struct ctl_table_header **phead,
52 struct ctl_table **pentry, struct nsproxy *namespaces);
53static int insert_links(struct ctl_table_header *head);
54static void put_links(struct ctl_table_header *header);
55
56static void sysctl_print_dir(struct ctl_dir *dir)
57{
58 if (dir->header.parent)
59 sysctl_print_dir(dir->header.parent);
60 printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname);
61}
62
63static int namecmp(const char *name1, int len1, const char *name2, int len2)
64{
65 int minlen;
66 int cmp;
67
68 minlen = len1;
69 if (minlen > len2)
70 minlen = len2;
71
72 cmp = memcmp(name1, name2, minlen);
73 if (cmp == 0)
74 cmp = len1 - len2;
75 return cmp;
76}
77
78/* Called under sysctl_lock */
79static struct ctl_table *find_entry(struct ctl_table_header **phead,
80 struct ctl_dir *dir, const char *name, int namelen)
81{
82 struct ctl_table_header *head;
83 struct ctl_table *entry;
84 struct rb_node *node = dir->root.rb_node;
85
86 while (node)
87 {
88 struct ctl_node *ctl_node;
89 const char *procname;
90 int cmp;
91
92 ctl_node = rb_entry(node, struct ctl_node, node);
93 head = ctl_node->header;
94 entry = &head->ctl_table[ctl_node - head->node];
95 procname = entry->procname;
96
97 cmp = namecmp(name, namelen, procname, strlen(procname));
98 if (cmp < 0)
99 node = node->rb_left;
100 else if (cmp > 0)
101 node = node->rb_right;
102 else {
103 *phead = head;
104 return entry;
105 }
106 }
107 return NULL;
108}
109
110static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
111{
112 struct rb_node *node = &head->node[entry - head->ctl_table].node;
113 struct rb_node **p = &head->parent->root.rb_node;
114 struct rb_node *parent = NULL;
115 const char *name = entry->procname;
116 int namelen = strlen(name);
117
118 while (*p) {
119 struct ctl_table_header *parent_head;
120 struct ctl_table *parent_entry;
121 struct ctl_node *parent_node;
122 const char *parent_name;
123 int cmp;
124
125 parent = *p;
126 parent_node = rb_entry(parent, struct ctl_node, node);
127 parent_head = parent_node->header;
128 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
129 parent_name = parent_entry->procname;
130
131 cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
132 if (cmp < 0)
133 p = &(*p)->rb_left;
134 else if (cmp > 0)
135 p = &(*p)->rb_right;
136 else {
137 printk(KERN_ERR "sysctl duplicate entry: ");
138 sysctl_print_dir(head->parent);
139 printk(KERN_CONT "/%s\n", entry->procname);
140 return -EEXIST;
141 }
142 }
143
144 rb_link_node(node, parent, p);
145 return 0;
146}
147
148static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
149{
150 struct rb_node *node = &head->node[entry - head->ctl_table].node;
151
152 rb_erase(node, &head->parent->root);
153}
154
155static void init_header(struct ctl_table_header *head,
156 struct ctl_table_root *root, struct ctl_table_set *set,
157 struct ctl_node *node, struct ctl_table *table)
158{
159 head->ctl_table = table;
160 head->ctl_table_arg = table;
161 head->used = 0;
162 head->count = 1;
163 head->nreg = 1;
164 head->unregistering = NULL;
165 head->root = root;
166 head->set = set;
167 head->parent = NULL;
168 head->node = node;
169 if (node) {
170 struct ctl_table *entry;
171 for (entry = table; entry->procname; entry++, node++) {
172 rb_init_node(&node->node);
173 node->header = head;
174 }
175 }
176}
177
178static void erase_header(struct ctl_table_header *head)
179{
180 struct ctl_table *entry;
181 for (entry = head->ctl_table; entry->procname; entry++)
182 erase_entry(head, entry);
183}
184
185static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
186{
187 struct ctl_table *entry;
188 int err;
189
190 dir->header.nreg++;
191 header->parent = dir;
192 err = insert_links(header);
193 if (err)
194 goto fail_links;
195 for (entry = header->ctl_table; entry->procname; entry++) {
196 err = insert_entry(header, entry);
197 if (err)
198 goto fail;
199 }
200 return 0;
201fail:
202 erase_header(header);
203 put_links(header);
204fail_links:
205 header->parent = NULL;
206 drop_sysctl_table(&dir->header);
207 return err;
208}
209
210/* called under sysctl_lock */
211static int use_table(struct ctl_table_header *p)
212{
213 if (unlikely(p->unregistering))
214 return 0;
215 p->used++;
216 return 1;
217}
218
219/* called under sysctl_lock */
220static void unuse_table(struct ctl_table_header *p)
221{
222 if (!--p->used)
223 if (unlikely(p->unregistering))
224 complete(p->unregistering);
225}
226
227/* called under sysctl_lock, will reacquire if has to wait */
228static void start_unregistering(struct ctl_table_header *p)
229{
230 /*
231 * if p->used is 0, nobody will ever touch that entry again;
232 * we'll eliminate all paths to it before dropping sysctl_lock
233 */
234 if (unlikely(p->used)) {
235 struct completion wait;
236 init_completion(&wait);
237 p->unregistering = &wait;
238 spin_unlock(&sysctl_lock);
239 wait_for_completion(&wait);
240 spin_lock(&sysctl_lock);
241 } else {
242 /* anything non-NULL; we'll never dereference it */
243 p->unregistering = ERR_PTR(-EINVAL);
244 }
245 /*
246 * do not remove from the list until nobody holds it; walking the
247 * list in do_sysctl() relies on that.
248 */
249 erase_header(p);
250}
251
252static void sysctl_head_get(struct ctl_table_header *head)
253{
254 spin_lock(&sysctl_lock);
255 head->count++;
256 spin_unlock(&sysctl_lock);
257}
258
259void sysctl_head_put(struct ctl_table_header *head)
260{
261 spin_lock(&sysctl_lock);
262 if (!--head->count)
263 kfree_rcu(head, rcu);
264 spin_unlock(&sysctl_lock);
265}
266
267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
268{
269 if (!head)
270 BUG();
271 spin_lock(&sysctl_lock);
272 if (!use_table(head))
273 head = ERR_PTR(-ENOENT);
274 spin_unlock(&sysctl_lock);
275 return head;
276}
277
278static void sysctl_head_finish(struct ctl_table_header *head)
279{
280 if (!head)
281 return;
282 spin_lock(&sysctl_lock);
283 unuse_table(head);
284 spin_unlock(&sysctl_lock);
285}
286
287static struct ctl_table_set *
288lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
289{
290 struct ctl_table_set *set = &root->default_set;
291 if (root->lookup)
292 set = root->lookup(root, namespaces);
293 return set;
294}
295
296static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
297 struct ctl_dir *dir,
298 const char *name, int namelen)
299{
300 struct ctl_table_header *head;
301 struct ctl_table *entry;
302
303 spin_lock(&sysctl_lock);
304 entry = find_entry(&head, dir, name, namelen);
305 if (entry && use_table(head))
306 *phead = head;
307 else
308 entry = NULL;
309 spin_unlock(&sysctl_lock);
310 return entry;
311}
312
313static struct ctl_node *first_usable_entry(struct rb_node *node)
314{
315 struct ctl_node *ctl_node;
316
317 for (;node; node = rb_next(node)) {
318 ctl_node = rb_entry(node, struct ctl_node, node);
319 if (use_table(ctl_node->header))
320 return ctl_node;
321 }
322 return NULL;
323}
324
325static void first_entry(struct ctl_dir *dir,
326 struct ctl_table_header **phead, struct ctl_table **pentry)
327{
328 struct ctl_table_header *head = NULL;
329 struct ctl_table *entry = NULL;
330 struct ctl_node *ctl_node;
331
332 spin_lock(&sysctl_lock);
333 ctl_node = first_usable_entry(rb_first(&dir->root));
334 spin_unlock(&sysctl_lock);
335 if (ctl_node) {
336 head = ctl_node->header;
337 entry = &head->ctl_table[ctl_node - head->node];
338 }
339 *phead = head;
340 *pentry = entry;
341}
342
343static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
344{
345 struct ctl_table_header *head = *phead;
346 struct ctl_table *entry = *pentry;
347 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
348
349 spin_lock(&sysctl_lock);
350 unuse_table(head);
351
352 ctl_node = first_usable_entry(rb_next(&ctl_node->node));
353 spin_unlock(&sysctl_lock);
354 head = NULL;
355 if (ctl_node) {
356 head = ctl_node->header;
357 entry = &head->ctl_table[ctl_node - head->node];
358 }
359 *phead = head;
360 *pentry = entry;
361}
362
363void register_sysctl_root(struct ctl_table_root *root)
364{
365}
366
367/*
368 * sysctl_perm does NOT grant the superuser all rights automatically, because
369 * some sysctl variables are readonly even to root.
370 */
371
372static int test_perm(int mode, int op)
373{
374 if (!current_euid())
375 mode >>= 6;
376 else if (in_egroup_p(0))
377 mode >>= 3;
378 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
379 return 0;
380 return -EACCES;
381}
382
383static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
384{
385 int mode;
386
387 if (root->permissions)
388 mode = root->permissions(root, current->nsproxy, table);
389 else
390 mode = table->mode;
391
392 return test_perm(mode, op);
393}
394
27static struct inode *proc_sys_make_inode(struct super_block *sb, 395static struct inode *proc_sys_make_inode(struct super_block *sb,
28 struct ctl_table_header *head, struct ctl_table *table) 396 struct ctl_table_header *head, struct ctl_table *table)
29{ 397{
@@ -43,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
43 411
44 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 412 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
45 inode->i_mode = table->mode; 413 inode->i_mode = table->mode;
46 if (!table->child) { 414 if (!S_ISDIR(table->mode)) {
47 inode->i_mode |= S_IFREG; 415 inode->i_mode |= S_IFREG;
48 inode->i_op = &proc_sys_inode_operations; 416 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations; 417 inode->i_fop = &proc_sys_file_operations;
50 } else { 418 } else {
51 inode->i_mode |= S_IFDIR; 419 inode->i_mode |= S_IFDIR;
52 clear_nlink(inode);
53 inode->i_op = &proc_sys_dir_operations; 420 inode->i_op = &proc_sys_dir_operations;
54 inode->i_fop = &proc_sys_dir_file_operations; 421 inode->i_fop = &proc_sys_dir_file_operations;
55 } 422 }
@@ -57,70 +424,42 @@ out:
57 return inode; 424 return inode;
58} 425}
59 426
60static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
61{
62 int len;
63 for ( ; p->procname; p++) {
64
65 if (!p->procname)
66 continue;
67
68 len = strlen(p->procname);
69 if (len != name->len)
70 continue;
71
72 if (memcmp(p->procname, name->name, len) != 0)
73 continue;
74
75 /* I have a match */
76 return p;
77 }
78 return NULL;
79}
80
81static struct ctl_table_header *grab_header(struct inode *inode) 427static struct ctl_table_header *grab_header(struct inode *inode)
82{ 428{
83 if (PROC_I(inode)->sysctl) 429 struct ctl_table_header *head = PROC_I(inode)->sysctl;
84 return sysctl_head_grab(PROC_I(inode)->sysctl); 430 if (!head)
85 else 431 head = &sysctl_table_root.default_set.dir.header;
86 return sysctl_head_next(NULL); 432 return sysctl_head_grab(head);
87} 433}
88 434
89static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
90 struct nameidata *nd) 436 struct nameidata *nd)
91{ 437{
92 struct ctl_table_header *head = grab_header(dir); 438 struct ctl_table_header *head = grab_header(dir);
93 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
94 struct ctl_table_header *h = NULL; 439 struct ctl_table_header *h = NULL;
95 struct qstr *name = &dentry->d_name; 440 struct qstr *name = &dentry->d_name;
96 struct ctl_table *p; 441 struct ctl_table *p;
97 struct inode *inode; 442 struct inode *inode;
98 struct dentry *err = ERR_PTR(-ENOENT); 443 struct dentry *err = ERR_PTR(-ENOENT);
444 struct ctl_dir *ctl_dir;
445 int ret;
99 446
100 if (IS_ERR(head)) 447 if (IS_ERR(head))
101 return ERR_CAST(head); 448 return ERR_CAST(head);
102 449
103 if (table && !table->child) { 450 ctl_dir = container_of(head, struct ctl_dir, header);
104 WARN_ON(1);
105 goto out;
106 }
107
108 table = table ? table->child : head->ctl_table;
109
110 p = find_in_table(table, name);
111 if (!p) {
112 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
113 if (h->attached_to != table)
114 continue;
115 p = find_in_table(h->attached_by, name);
116 if (p)
117 break;
118 }
119 }
120 451
452 p = lookup_entry(&h, ctl_dir, name->name, name->len);
121 if (!p) 453 if (!p)
122 goto out; 454 goto out;
123 455
456 if (S_ISLNK(p->mode)) {
457 ret = sysctl_follow_link(&h, &p, current->nsproxy);
458 err = ERR_PTR(ret);
459 if (ret)
460 goto out;
461 }
462
124 err = ERR_PTR(-ENOMEM); 463 err = ERR_PTR(-ENOMEM);
125 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
126 if (h) 465 if (h)
@@ -188,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
188 527
189static int proc_sys_open(struct inode *inode, struct file *filp) 528static int proc_sys_open(struct inode *inode, struct file *filp)
190{ 529{
530 struct ctl_table_header *head = grab_header(inode);
191 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 531 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
192 532
533 /* sysctl was unregistered */
534 if (IS_ERR(head))
535 return PTR_ERR(head);
536
193 if (table->poll) 537 if (table->poll)
194 filp->private_data = proc_sys_poll_event(table->poll); 538 filp->private_data = proc_sys_poll_event(table->poll);
195 539
540 sysctl_head_finish(head);
541
196 return 0; 542 return 0;
197} 543}
198 544
199static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 545static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
200{ 546{
201 struct inode *inode = filp->f_path.dentry->d_inode; 547 struct inode *inode = filp->f_path.dentry->d_inode;
548 struct ctl_table_header *head = grab_header(inode);
202 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 549 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
203 unsigned long event = (unsigned long)filp->private_data;
204 unsigned int ret = DEFAULT_POLLMASK; 550 unsigned int ret = DEFAULT_POLLMASK;
551 unsigned long event;
552
553 /* sysctl was unregistered */
554 if (IS_ERR(head))
555 return POLLERR | POLLHUP;
205 556
206 if (!table->proc_handler) 557 if (!table->proc_handler)
207 goto out; 558 goto out;
@@ -209,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
209 if (!table->poll) 560 if (!table->poll)
210 goto out; 561 goto out;
211 562
563 event = (unsigned long)filp->private_data;
212 poll_wait(filp, &table->poll->wait, wait); 564 poll_wait(filp, &table->poll->wait, wait);
213 565
214 if (event != atomic_read(&table->poll->event)) { 566 if (event != atomic_read(&table->poll->event)) {
@@ -217,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
217 } 569 }
218 570
219out: 571out:
572 sysctl_head_finish(head);
573
220 return ret; 574 return ret;
221} 575}
222 576
@@ -258,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
258 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 612 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
259} 613}
260 614
615static int proc_sys_link_fill_cache(struct file *filp, void *dirent,
616 filldir_t filldir,
617 struct ctl_table_header *head,
618 struct ctl_table *table)
619{
620 int err, ret = 0;
621 head = sysctl_head_grab(head);
622
623 if (S_ISLNK(table->mode)) {
624 /* It is not an error if we can not follow the link ignore it */
625 err = sysctl_follow_link(&head, &table, current->nsproxy);
626 if (err)
627 goto out;
628 }
629
630 ret = proc_sys_fill_cache(filp, dirent, filldir, head, table);
631out:
632 sysctl_head_finish(head);
633 return ret;
634}
635
261static int scan(struct ctl_table_header *head, ctl_table *table, 636static int scan(struct ctl_table_header *head, ctl_table *table,
262 unsigned long *pos, struct file *file, 637 unsigned long *pos, struct file *file,
263 void *dirent, filldir_t filldir) 638 void *dirent, filldir_t filldir)
264{ 639{
640 int res;
265 641
266 for (; table->procname; table++, (*pos)++) { 642 if ((*pos)++ < file->f_pos)
267 int res; 643 return 0;
268
269 /* Can't do anything without a proc name */
270 if (!table->procname)
271 continue;
272
273 if (*pos < file->f_pos)
274 continue;
275 644
645 if (unlikely(S_ISLNK(table->mode)))
646 res = proc_sys_link_fill_cache(file, dirent, filldir, head, table);
647 else
276 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 648 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
277 if (res)
278 return res;
279 649
280 file->f_pos = *pos + 1; 650 if (res == 0)
281 } 651 file->f_pos = *pos;
282 return 0; 652
653 return res;
283} 654}
284 655
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 656static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -287,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
287 struct dentry *dentry = filp->f_path.dentry; 658 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 659 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = grab_header(inode); 660 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
291 struct ctl_table_header *h = NULL; 661 struct ctl_table_header *h = NULL;
662 struct ctl_table *entry;
663 struct ctl_dir *ctl_dir;
292 unsigned long pos; 664 unsigned long pos;
293 int ret = -EINVAL; 665 int ret = -EINVAL;
294 666
295 if (IS_ERR(head)) 667 if (IS_ERR(head))
296 return PTR_ERR(head); 668 return PTR_ERR(head);
297 669
298 if (table && !table->child) { 670 ctl_dir = container_of(head, struct ctl_dir, header);
299 WARN_ON(1);
300 goto out;
301 }
302
303 table = table ? table->child : head->ctl_table;
304 671
305 ret = 0; 672 ret = 0;
306 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 673 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -318,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
318 } 685 }
319 pos = 2; 686 pos = 2;
320 687
321 ret = scan(head, table, &pos, filp, dirent, filldir); 688 for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
322 if (ret) 689 ret = scan(h, entry, &pos, filp, dirent, filldir);
323 goto out;
324
325 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
326 if (h->attached_to != table)
327 continue;
328 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
329 if (ret) { 690 if (ret) {
330 sysctl_head_finish(h); 691 sysctl_head_finish(h);
331 break; 692 break;
@@ -445,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry)
445 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 806 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
446} 807}
447 808
809static int sysctl_is_seen(struct ctl_table_header *p)
810{
811 struct ctl_table_set *set = p->set;
812 int res;
813 spin_lock(&sysctl_lock);
814 if (p->unregistering)
815 res = 0;
816 else if (!set->is_seen)
817 res = 1;
818 else
819 res = set->is_seen(set);
820 spin_unlock(&sysctl_lock);
821 return res;
822}
823
448static int proc_sys_compare(const struct dentry *parent, 824static int proc_sys_compare(const struct dentry *parent,
449 const struct inode *pinode, 825 const struct inode *pinode,
450 const struct dentry *dentry, const struct inode *inode, 826 const struct dentry *dentry, const struct inode *inode,
@@ -470,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = {
470 .d_compare = proc_sys_compare, 846 .d_compare = proc_sys_compare,
471}; 847};
472 848
849static struct ctl_dir *find_subdir(struct ctl_dir *dir,
850 const char *name, int namelen)
851{
852 struct ctl_table_header *head;
853 struct ctl_table *entry;
854
855 entry = find_entry(&head, dir, name, namelen);
856 if (!entry)
857 return ERR_PTR(-ENOENT);
858 if (!S_ISDIR(entry->mode))
859 return ERR_PTR(-ENOTDIR);
860 return container_of(head, struct ctl_dir, header);
861}
862
863static struct ctl_dir *new_dir(struct ctl_table_set *set,
864 const char *name, int namelen)
865{
866 struct ctl_table *table;
867 struct ctl_dir *new;
868 struct ctl_node *node;
869 char *new_name;
870
871 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
872 sizeof(struct ctl_table)*2 + namelen + 1,
873 GFP_KERNEL);
874 if (!new)
875 return NULL;
876
877 node = (struct ctl_node *)(new + 1);
878 table = (struct ctl_table *)(node + 1);
879 new_name = (char *)(table + 2);
880 memcpy(new_name, name, namelen);
881 new_name[namelen] = '\0';
882 table[0].procname = new_name;
883 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
884 init_header(&new->header, set->dir.header.root, set, node, table);
885
886 return new;
887}
888
889/**
890 * get_subdir - find or create a subdir with the specified name.
891 * @dir: Directory to create the subdirectory in
892 * @name: The name of the subdirectory to find or create
893 * @namelen: The length of name
894 *
895 * Takes a directory with an elevated reference count so we know that
896 * if we drop the lock the directory will not go away. Upon success
897 * the reference is moved from @dir to the returned subdirectory.
898 * Upon error an error code is returned and the reference on @dir is
899 * simply dropped.
900 */
901static struct ctl_dir *get_subdir(struct ctl_dir *dir,
902 const char *name, int namelen)
903{
904 struct ctl_table_set *set = dir->header.set;
905 struct ctl_dir *subdir, *new = NULL;
906 int err;
907
908 spin_lock(&sysctl_lock);
909 subdir = find_subdir(dir, name, namelen);
910 if (!IS_ERR(subdir))
911 goto found;
912 if (PTR_ERR(subdir) != -ENOENT)
913 goto failed;
914
915 spin_unlock(&sysctl_lock);
916 new = new_dir(set, name, namelen);
917 spin_lock(&sysctl_lock);
918 subdir = ERR_PTR(-ENOMEM);
919 if (!new)
920 goto failed;
921
922 /* Was the subdir added while we dropped the lock? */
923 subdir = find_subdir(dir, name, namelen);
924 if (!IS_ERR(subdir))
925 goto found;
926 if (PTR_ERR(subdir) != -ENOENT)
927 goto failed;
928
929 /* Nope. Use the our freshly made directory entry. */
930 err = insert_header(dir, &new->header);
931 subdir = ERR_PTR(err);
932 if (err)
933 goto failed;
934 subdir = new;
935found:
936 subdir->header.nreg++;
937failed:
938 if (unlikely(IS_ERR(subdir))) {
939 printk(KERN_ERR "sysctl could not get directory: ");
940 sysctl_print_dir(dir);
941 printk(KERN_CONT "/%*.*s %ld\n",
942 namelen, namelen, name, PTR_ERR(subdir));
943 }
944 drop_sysctl_table(&dir->header);
945 if (new)
946 drop_sysctl_table(&new->header);
947 spin_unlock(&sysctl_lock);
948 return subdir;
949}
950
951static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
952{
953 struct ctl_dir *parent;
954 const char *procname;
955 if (!dir->header.parent)
956 return &set->dir;
957 parent = xlate_dir(set, dir->header.parent);
958 if (IS_ERR(parent))
959 return parent;
960 procname = dir->header.ctl_table[0].procname;
961 return find_subdir(parent, procname, strlen(procname));
962}
963
964static int sysctl_follow_link(struct ctl_table_header **phead,
965 struct ctl_table **pentry, struct nsproxy *namespaces)
966{
967 struct ctl_table_header *head;
968 struct ctl_table_root *root;
969 struct ctl_table_set *set;
970 struct ctl_table *entry;
971 struct ctl_dir *dir;
972 int ret;
973
974 ret = 0;
975 spin_lock(&sysctl_lock);
976 root = (*pentry)->data;
977 set = lookup_header_set(root, namespaces);
978 dir = xlate_dir(set, (*phead)->parent);
979 if (IS_ERR(dir))
980 ret = PTR_ERR(dir);
981 else {
982 const char *procname = (*pentry)->procname;
983 head = NULL;
984 entry = find_entry(&head, dir, procname, strlen(procname));
985 ret = -ENOENT;
986 if (entry && use_table(head)) {
987 unuse_table(*phead);
988 *phead = head;
989 *pentry = entry;
990 ret = 0;
991 }
992 }
993
994 spin_unlock(&sysctl_lock);
995 return ret;
996}
997
998static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
999{
1000 struct va_format vaf;
1001 va_list args;
1002
1003 va_start(args, fmt);
1004 vaf.fmt = fmt;
1005 vaf.va = &args;
1006
1007 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
1008 path, table->procname, &vaf);
1009
1010 va_end(args);
1011 return -EINVAL;
1012}
1013
1014static int sysctl_check_table(const char *path, struct ctl_table *table)
1015{
1016 int err = 0;
1017 for (; table->procname; table++) {
1018 if (table->child)
1019 err = sysctl_err(path, table, "Not a file");
1020
1021 if ((table->proc_handler == proc_dostring) ||
1022 (table->proc_handler == proc_dointvec) ||
1023 (table->proc_handler == proc_dointvec_minmax) ||
1024 (table->proc_handler == proc_dointvec_jiffies) ||
1025 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1026 (table->proc_handler == proc_dointvec_ms_jiffies) ||
1027 (table->proc_handler == proc_doulongvec_minmax) ||
1028 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1029 if (!table->data)
1030 err = sysctl_err(path, table, "No data");
1031 if (!table->maxlen)
1032 err = sysctl_err(path, table, "No maxlen");
1033 }
1034 if (!table->proc_handler)
1035 err = sysctl_err(path, table, "No proc_handler");
1036
1037 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1038 err = sysctl_err(path, table, "bogus .mode 0%o",
1039 table->mode);
1040 }
1041 return err;
1042}
1043
1044static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1045 struct ctl_table_root *link_root)
1046{
1047 struct ctl_table *link_table, *entry, *link;
1048 struct ctl_table_header *links;
1049 struct ctl_node *node;
1050 char *link_name;
1051 int nr_entries, name_bytes;
1052
1053 name_bytes = 0;
1054 nr_entries = 0;
1055 for (entry = table; entry->procname; entry++) {
1056 nr_entries++;
1057 name_bytes += strlen(entry->procname) + 1;
1058 }
1059
1060 links = kzalloc(sizeof(struct ctl_table_header) +
1061 sizeof(struct ctl_node)*nr_entries +
1062 sizeof(struct ctl_table)*(nr_entries + 1) +
1063 name_bytes,
1064 GFP_KERNEL);
1065
1066 if (!links)
1067 return NULL;
1068
1069 node = (struct ctl_node *)(links + 1);
1070 link_table = (struct ctl_table *)(node + nr_entries);
1071 link_name = (char *)&link_table[nr_entries + 1];
1072
1073 for (link = link_table, entry = table; entry->procname; link++, entry++) {
1074 int len = strlen(entry->procname) + 1;
1075 memcpy(link_name, entry->procname, len);
1076 link->procname = link_name;
1077 link->mode = S_IFLNK|S_IRWXUGO;
1078 link->data = link_root;
1079 link_name += len;
1080 }
1081 init_header(links, dir->header.root, dir->header.set, node, link_table);
1082 links->nreg = nr_entries;
1083
1084 return links;
1085}
1086
1087static bool get_links(struct ctl_dir *dir,
1088 struct ctl_table *table, struct ctl_table_root *link_root)
1089{
1090 struct ctl_table_header *head;
1091 struct ctl_table *entry, *link;
1092
1093 /* Are there links available for every entry in table? */
1094 for (entry = table; entry->procname; entry++) {
1095 const char *procname = entry->procname;
1096 link = find_entry(&head, dir, procname, strlen(procname));
1097 if (!link)
1098 return false;
1099 if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1100 continue;
1101 if (S_ISLNK(link->mode) && (link->data == link_root))
1102 continue;
1103 return false;
1104 }
1105
1106 /* The checks passed. Increase the registration count on the links */
1107 for (entry = table; entry->procname; entry++) {
1108 const char *procname = entry->procname;
1109 link = find_entry(&head, dir, procname, strlen(procname));
1110 head->nreg++;
1111 }
1112 return true;
1113}
1114
1115static int insert_links(struct ctl_table_header *head)
1116{
1117 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1118 struct ctl_dir *core_parent = NULL;
1119 struct ctl_table_header *links;
1120 int err;
1121
1122 if (head->set == root_set)
1123 return 0;
1124
1125 core_parent = xlate_dir(root_set, head->parent);
1126 if (IS_ERR(core_parent))
1127 return 0;
1128
1129 if (get_links(core_parent, head->ctl_table, head->root))
1130 return 0;
1131
1132 core_parent->header.nreg++;
1133 spin_unlock(&sysctl_lock);
1134
1135 links = new_links(core_parent, head->ctl_table, head->root);
1136
1137 spin_lock(&sysctl_lock);
1138 err = -ENOMEM;
1139 if (!links)
1140 goto out;
1141
1142 err = 0;
1143 if (get_links(core_parent, head->ctl_table, head->root)) {
1144 kfree(links);
1145 goto out;
1146 }
1147
1148 err = insert_header(core_parent, links);
1149 if (err)
1150 kfree(links);
1151out:
1152 drop_sysctl_table(&core_parent->header);
1153 return err;
1154}
1155
1156/**
1157 * __register_sysctl_table - register a leaf sysctl table
1158 * @set: Sysctl tree to register on
1159 * @path: The path to the directory the sysctl table is in.
1160 * @table: the top-level table structure
1161 *
1162 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1163 * array. A completely 0 filled entry terminates the table.
1164 *
1165 * The members of the &struct ctl_table structure are used as follows:
1166 *
1167 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1168 * enter a sysctl file
1169 *
1170 * data - a pointer to data for use by proc_handler
1171 *
1172 * maxlen - the maximum size in bytes of the data
1173 *
1174 * mode - the file permissions for the /proc/sys file
1175 *
1176 * child - must be %NULL.
1177 *
1178 * proc_handler - the text handler routine (described below)
1179 *
1180 * extra1, extra2 - extra pointers usable by the proc handler routines
1181 *
1182 * Leaf nodes in the sysctl tree will be represented by a single file
1183 * under /proc; non-leaf nodes will be represented by directories.
1184 *
1185 * There must be a proc_handler routine for any terminal nodes.
1186 * Several default handlers are available to cover common cases -
1187 *
1188 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1189 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1190 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1191 *
1192 * It is the handler's job to read the input buffer from user memory
1193 * and process it. The handler should return 0 on success.
1194 *
1195 * This routine returns %NULL on a failure to register, and a pointer
1196 * to the table header on success.
1197 */
1198struct ctl_table_header *__register_sysctl_table(
1199 struct ctl_table_set *set,
1200 const char *path, struct ctl_table *table)
1201{
1202 struct ctl_table_root *root = set->dir.header.root;
1203 struct ctl_table_header *header;
1204 const char *name, *nextname;
1205 struct ctl_dir *dir;
1206 struct ctl_table *entry;
1207 struct ctl_node *node;
1208 int nr_entries = 0;
1209
1210 for (entry = table; entry->procname; entry++)
1211 nr_entries++;
1212
1213 header = kzalloc(sizeof(struct ctl_table_header) +
1214 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1215 if (!header)
1216 return NULL;
1217
1218 node = (struct ctl_node *)(header + 1);
1219 init_header(header, root, set, node, table);
1220 if (sysctl_check_table(path, table))
1221 goto fail;
1222
1223 spin_lock(&sysctl_lock);
1224 dir = &set->dir;
1225 /* Reference moved down the diretory tree get_subdir */
1226 dir->header.nreg++;
1227 spin_unlock(&sysctl_lock);
1228
1229 /* Find the directory for the ctl_table */
1230 for (name = path; name; name = nextname) {
1231 int namelen;
1232 nextname = strchr(name, '/');
1233 if (nextname) {
1234 namelen = nextname - name;
1235 nextname++;
1236 } else {
1237 namelen = strlen(name);
1238 }
1239 if (namelen == 0)
1240 continue;
1241
1242 dir = get_subdir(dir, name, namelen);
1243 if (IS_ERR(dir))
1244 goto fail;
1245 }
1246
1247 spin_lock(&sysctl_lock);
1248 if (insert_header(dir, header))
1249 goto fail_put_dir_locked;
1250
1251 drop_sysctl_table(&dir->header);
1252 spin_unlock(&sysctl_lock);
1253
1254 return header;
1255
1256fail_put_dir_locked:
1257 drop_sysctl_table(&dir->header);
1258 spin_unlock(&sysctl_lock);
1259fail:
1260 kfree(header);
1261 dump_stack();
1262 return NULL;
1263}
1264
1265/**
1266 * register_sysctl - register a sysctl table
1267 * @path: The path to the directory the sysctl table is in.
1268 * @table: the table structure
1269 *
1270 * Register a sysctl table. @table should be a filled in ctl_table
1271 * array. A completely 0 filled entry terminates the table.
1272 *
1273 * See __register_sysctl_table for more details.
1274 */
1275struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1276{
1277 return __register_sysctl_table(&sysctl_table_root.default_set,
1278 path, table);
1279}
1280EXPORT_SYMBOL(register_sysctl);
1281
1282static char *append_path(const char *path, char *pos, const char *name)
1283{
1284 int namelen;
1285 namelen = strlen(name);
1286 if (((pos - path) + namelen + 2) >= PATH_MAX)
1287 return NULL;
1288 memcpy(pos, name, namelen);
1289 pos[namelen] = '/';
1290 pos[namelen + 1] = '\0';
1291 pos += namelen + 1;
1292 return pos;
1293}
1294
1295static int count_subheaders(struct ctl_table *table)
1296{
1297 int has_files = 0;
1298 int nr_subheaders = 0;
1299 struct ctl_table *entry;
1300
1301 /* special case: no directory and empty directory */
1302 if (!table || !table->procname)
1303 return 1;
1304
1305 for (entry = table; entry->procname; entry++) {
1306 if (entry->child)
1307 nr_subheaders += count_subheaders(entry->child);
1308 else
1309 has_files = 1;
1310 }
1311 return nr_subheaders + has_files;
1312}
1313
1314static int register_leaf_sysctl_tables(const char *path, char *pos,
1315 struct ctl_table_header ***subheader, struct ctl_table_set *set,
1316 struct ctl_table *table)
1317{
1318 struct ctl_table *ctl_table_arg = NULL;
1319 struct ctl_table *entry, *files;
1320 int nr_files = 0;
1321 int nr_dirs = 0;
1322 int err = -ENOMEM;
1323
1324 for (entry = table; entry->procname; entry++) {
1325 if (entry->child)
1326 nr_dirs++;
1327 else
1328 nr_files++;
1329 }
1330
1331 files = table;
1332 /* If there are mixed files and directories we need a new table */
1333 if (nr_dirs && nr_files) {
1334 struct ctl_table *new;
1335 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1336 GFP_KERNEL);
1337 if (!files)
1338 goto out;
1339
1340 ctl_table_arg = files;
1341 for (new = files, entry = table; entry->procname; entry++) {
1342 if (entry->child)
1343 continue;
1344 *new = *entry;
1345 new++;
1346 }
1347 }
1348
1349 /* Register everything except a directory full of subdirectories */
1350 if (nr_files || !nr_dirs) {
1351 struct ctl_table_header *header;
1352 header = __register_sysctl_table(set, path, files);
1353 if (!header) {
1354 kfree(ctl_table_arg);
1355 goto out;
1356 }
1357
1358 /* Remember if we need to free the file table */
1359 header->ctl_table_arg = ctl_table_arg;
1360 **subheader = header;
1361 (*subheader)++;
1362 }
1363
1364 /* Recurse into the subdirectories. */
1365 for (entry = table; entry->procname; entry++) {
1366 char *child_pos;
1367
1368 if (!entry->child)
1369 continue;
1370
1371 err = -ENAMETOOLONG;
1372 child_pos = append_path(path, pos, entry->procname);
1373 if (!child_pos)
1374 goto out;
1375
1376 err = register_leaf_sysctl_tables(path, child_pos, subheader,
1377 set, entry->child);
1378 pos[0] = '\0';
1379 if (err)
1380 goto out;
1381 }
1382 err = 0;
1383out:
1384 /* On failure our caller will unregister all registered subheaders */
1385 return err;
1386}
1387
1388/**
1389 * __register_sysctl_paths - register a sysctl table hierarchy
1390 * @set: Sysctl tree to register on
1391 * @path: The path to the directory the sysctl table is in.
1392 * @table: the top-level table structure
1393 *
1394 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1395 * array. A completely 0 filled entry terminates the table.
1396 *
1397 * See __register_sysctl_table for more details.
1398 */
1399struct ctl_table_header *__register_sysctl_paths(
1400 struct ctl_table_set *set,
1401 const struct ctl_path *path, struct ctl_table *table)
1402{
1403 struct ctl_table *ctl_table_arg = table;
1404 int nr_subheaders = count_subheaders(table);
1405 struct ctl_table_header *header = NULL, **subheaders, **subheader;
1406 const struct ctl_path *component;
1407 char *new_path, *pos;
1408
1409 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1410 if (!new_path)
1411 return NULL;
1412
1413 pos[0] = '\0';
1414 for (component = path; component->procname; component++) {
1415 pos = append_path(new_path, pos, component->procname);
1416 if (!pos)
1417 goto out;
1418 }
1419 while (table->procname && table->child && !table[1].procname) {
1420 pos = append_path(new_path, pos, table->procname);
1421 if (!pos)
1422 goto out;
1423 table = table->child;
1424 }
1425 if (nr_subheaders == 1) {
1426 header = __register_sysctl_table(set, new_path, table);
1427 if (header)
1428 header->ctl_table_arg = ctl_table_arg;
1429 } else {
1430 header = kzalloc(sizeof(*header) +
1431 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1432 if (!header)
1433 goto out;
1434
1435 subheaders = (struct ctl_table_header **) (header + 1);
1436 subheader = subheaders;
1437 header->ctl_table_arg = ctl_table_arg;
1438
1439 if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1440 set, table))
1441 goto err_register_leaves;
1442 }
1443
1444out:
1445 kfree(new_path);
1446 return header;
1447
1448err_register_leaves:
1449 while (subheader > subheaders) {
1450 struct ctl_table_header *subh = *(--subheader);
1451 struct ctl_table *table = subh->ctl_table_arg;
1452 unregister_sysctl_table(subh);
1453 kfree(table);
1454 }
1455 kfree(header);
1456 header = NULL;
1457 goto out;
1458}
1459
1460/**
1461 * register_sysctl_table_path - register a sysctl table hierarchy
1462 * @path: The path to the directory the sysctl table is in.
1463 * @table: the top-level table structure
1464 *
1465 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1466 * array. A completely 0 filled entry terminates the table.
1467 *
1468 * See __register_sysctl_paths for more details.
1469 */
1470struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1471 struct ctl_table *table)
1472{
1473 return __register_sysctl_paths(&sysctl_table_root.default_set,
1474 path, table);
1475}
1476EXPORT_SYMBOL(register_sysctl_paths);
1477
1478/**
1479 * register_sysctl_table - register a sysctl table hierarchy
1480 * @table: the top-level table structure
1481 *
1482 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1483 * array. A completely 0 filled entry terminates the table.
1484 *
1485 * See register_sysctl_paths for more details.
1486 */
1487struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1488{
1489 static const struct ctl_path null_path[] = { {} };
1490
1491 return register_sysctl_paths(null_path, table);
1492}
1493EXPORT_SYMBOL(register_sysctl_table);
1494
1495static void put_links(struct ctl_table_header *header)
1496{
1497 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1498 struct ctl_table_root *root = header->root;
1499 struct ctl_dir *parent = header->parent;
1500 struct ctl_dir *core_parent;
1501 struct ctl_table *entry;
1502
1503 if (header->set == root_set)
1504 return;
1505
1506 core_parent = xlate_dir(root_set, parent);
1507 if (IS_ERR(core_parent))
1508 return;
1509
1510 for (entry = header->ctl_table; entry->procname; entry++) {
1511 struct ctl_table_header *link_head;
1512 struct ctl_table *link;
1513 const char *name = entry->procname;
1514
1515 link = find_entry(&link_head, core_parent, name, strlen(name));
1516 if (link &&
1517 ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1518 (S_ISLNK(link->mode) && (link->data == root)))) {
1519 drop_sysctl_table(link_head);
1520 }
1521 else {
1522 printk(KERN_ERR "sysctl link missing during unregister: ");
1523 sysctl_print_dir(parent);
1524 printk(KERN_CONT "/%s\n", name);
1525 }
1526 }
1527}
1528
1529static void drop_sysctl_table(struct ctl_table_header *header)
1530{
1531 struct ctl_dir *parent = header->parent;
1532
1533 if (--header->nreg)
1534 return;
1535
1536 put_links(header);
1537 start_unregistering(header);
1538 if (!--header->count)
1539 kfree_rcu(header, rcu);
1540
1541 if (parent)
1542 drop_sysctl_table(&parent->header);
1543}
1544
1545/**
1546 * unregister_sysctl_table - unregister a sysctl table hierarchy
1547 * @header: the header returned from register_sysctl_table
1548 *
1549 * Unregisters the sysctl table and all children. proc entries may not
1550 * actually be removed until they are no longer used by anyone.
1551 */
1552void unregister_sysctl_table(struct ctl_table_header * header)
1553{
1554 int nr_subheaders;
1555 might_sleep();
1556
1557 if (header == NULL)
1558 return;
1559
1560 nr_subheaders = count_subheaders(header->ctl_table_arg);
1561 if (unlikely(nr_subheaders > 1)) {
1562 struct ctl_table_header **subheaders;
1563 int i;
1564
1565 subheaders = (struct ctl_table_header **)(header + 1);
1566 for (i = nr_subheaders -1; i >= 0; i--) {
1567 struct ctl_table_header *subh = subheaders[i];
1568 struct ctl_table *table = subh->ctl_table_arg;
1569 unregister_sysctl_table(subh);
1570 kfree(table);
1571 }
1572 kfree(header);
1573 return;
1574 }
1575
1576 spin_lock(&sysctl_lock);
1577 drop_sysctl_table(header);
1578 spin_unlock(&sysctl_lock);
1579}
1580EXPORT_SYMBOL(unregister_sysctl_table);
1581
1582void setup_sysctl_set(struct ctl_table_set *set,
1583 struct ctl_table_root *root,
1584 int (*is_seen)(struct ctl_table_set *))
1585{
1586 memset(set, 0, sizeof(*set));
1587 set->is_seen = is_seen;
1588 init_header(&set->dir.header, root, set, NULL, root_table);
1589}
1590
1591void retire_sysctl_set(struct ctl_table_set *set)
1592{
1593 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1594}
1595
473int __init proc_sys_init(void) 1596int __init proc_sys_init(void)
474{ 1597{
475 struct proc_dir_entry *proc_sys_root; 1598 struct proc_dir_entry *proc_sys_root;
@@ -478,5 +1601,6 @@ int __init proc_sys_init(void)
478 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1601 proc_sys_root->proc_iops = &proc_sys_dir_operations;
479 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1602 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
480 proc_sys_root->nlink = 0; 1603 proc_sys_root->nlink = 0;
481 return 0; 1604
1605 return sysctl_init();
482} 1606}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 121f77cfef76..6a0c62d6e442 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -89,18 +89,19 @@ static int show_stat(struct seq_file *p, void *v)
89 } 89 }
90 sum += arch_irq_stat(); 90 sum += arch_irq_stat();
91 91
92 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " 92 seq_puts(p, "cpu ");
93 "%llu\n", 93 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
94 (unsigned long long)cputime64_to_clock_t(user), 94 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
95 (unsigned long long)cputime64_to_clock_t(nice), 95 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
96 (unsigned long long)cputime64_to_clock_t(system), 96 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
97 (unsigned long long)cputime64_to_clock_t(idle), 97 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
98 (unsigned long long)cputime64_to_clock_t(iowait), 98 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
99 (unsigned long long)cputime64_to_clock_t(irq), 99 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
100 (unsigned long long)cputime64_to_clock_t(softirq), 100 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
101 (unsigned long long)cputime64_to_clock_t(steal), 101 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
102 (unsigned long long)cputime64_to_clock_t(guest), 102 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
103 (unsigned long long)cputime64_to_clock_t(guest_nice)); 103 seq_putc(p, '\n');
104
104 for_each_online_cpu(i) { 105 for_each_online_cpu(i) {
105 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 106 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
106 user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; 107 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
@@ -113,26 +114,24 @@ static int show_stat(struct seq_file *p, void *v)
113 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; 114 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
114 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; 115 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
115 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; 116 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
116 seq_printf(p, 117 seq_printf(p, "cpu%d", i);
117 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " 118 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
118 "%llu\n", 119 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
119 i, 120 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
120 (unsigned long long)cputime64_to_clock_t(user), 121 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
121 (unsigned long long)cputime64_to_clock_t(nice), 122 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
122 (unsigned long long)cputime64_to_clock_t(system), 123 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
123 (unsigned long long)cputime64_to_clock_t(idle), 124 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
124 (unsigned long long)cputime64_to_clock_t(iowait), 125 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
125 (unsigned long long)cputime64_to_clock_t(irq), 126 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
126 (unsigned long long)cputime64_to_clock_t(softirq), 127 seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
127 (unsigned long long)cputime64_to_clock_t(steal), 128 seq_putc(p, '\n');
128 (unsigned long long)cputime64_to_clock_t(guest),
129 (unsigned long long)cputime64_to_clock_t(guest_nice));
130 } 129 }
131 seq_printf(p, "intr %llu", (unsigned long long)sum); 130 seq_printf(p, "intr %llu", (unsigned long long)sum);
132 131
133 /* sum again ? it could be updated? */ 132 /* sum again ? it could be updated? */
134 for_each_irq_nr(j) 133 for_each_irq_nr(j)
135 seq_printf(p, " %u", kstat_irqs(j)); 134 seq_put_decimal_ull(p, ' ', kstat_irqs(j));
136 135
137 seq_printf(p, 136 seq_printf(p,
138 "\nctxt %llu\n" 137 "\nctxt %llu\n"
@@ -149,7 +148,7 @@ static int show_stat(struct seq_file *p, void *v)
149 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); 148 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
150 149
151 for (i = 0; i < NR_SOFTIRQS; i++) 150 for (i = 0; i < NR_SOFTIRQS; i++)
152 seq_printf(p, " %u", per_softirq_sums[i]); 151 seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
153 seq_putc(p, '\n'); 152 seq_putc(p, '\n');
154 153
155 return 0; 154 return 0;
@@ -157,11 +156,14 @@ static int show_stat(struct seq_file *p, void *v)
157 156
158static int stat_open(struct inode *inode, struct file *file) 157static int stat_open(struct inode *inode, struct file *file)
159{ 158{
160 unsigned size = 4096 * (1 + num_possible_cpus() / 32); 159 unsigned size = 1024 + 128 * num_possible_cpus();
161 char *buf; 160 char *buf;
162 struct seq_file *m; 161 struct seq_file *m;
163 int res; 162 int res;
164 163
164 /* minimum size to display an interrupt count : 2 bytes */
165 size += 2 * nr_irqs;
166
165 /* don't ask for more than the kmalloc() max size */ 167 /* don't ask for more than the kmalloc() max size */
166 if (size > KMALLOC_MAX_SIZE) 168 if (size > KMALLOC_MAX_SIZE)
167 size = KMALLOC_MAX_SIZE; 169 size = KMALLOC_MAX_SIZE;
@@ -173,7 +175,7 @@ static int stat_open(struct inode *inode, struct file *file)
173 if (!res) { 175 if (!res) {
174 m = file->private_data; 176 m = file->private_data;
175 m->buf = buf; 177 m->buf = buf;
176 m->size = size; 178 m->size = ksize(buf);
177 } else 179 } else
178 kfree(buf); 180 kfree(buf);
179 return res; 181 return res;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7dcd2a250495..9694cc283511 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,
209 return ret; 209 return ret;
210} 210}
211 211
212static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) 212static void
213show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
213{ 214{
214 struct mm_struct *mm = vma->vm_mm; 215 struct mm_struct *mm = vma->vm_mm;
215 struct file *file = vma->vm_file; 216 struct file *file = vma->vm_file;
217 struct proc_maps_private *priv = m->private;
218 struct task_struct *task = priv->task;
216 vm_flags_t flags = vma->vm_flags; 219 vm_flags_t flags = vma->vm_flags;
217 unsigned long ino = 0; 220 unsigned long ino = 0;
218 unsigned long long pgoff = 0; 221 unsigned long long pgoff = 0;
219 unsigned long start, end; 222 unsigned long start, end;
220 dev_t dev = 0; 223 dev_t dev = 0;
221 int len; 224 int len;
225 const char *name = NULL;
222 226
223 if (file) { 227 if (file) {
224 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 228 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
252 if (file) { 256 if (file) {
253 pad_len_spaces(m, len); 257 pad_len_spaces(m, len);
254 seq_path(m, &file->f_path, "\n"); 258 seq_path(m, &file->f_path, "\n");
255 } else { 259 goto done;
256 const char *name = arch_vma_name(vma); 260 }
257 if (!name) { 261
258 if (mm) { 262 name = arch_vma_name(vma);
259 if (vma->vm_start <= mm->brk && 263 if (!name) {
260 vma->vm_end >= mm->start_brk) { 264 pid_t tid;
261 name = "[heap]"; 265
262 } else if (vma->vm_start <= mm->start_stack && 266 if (!mm) {
263 vma->vm_end >= mm->start_stack) { 267 name = "[vdso]";
264 name = "[stack]"; 268 goto done;
265 } 269 }
270
271 if (vma->vm_start <= mm->brk &&
272 vma->vm_end >= mm->start_brk) {
273 name = "[heap]";
274 goto done;
275 }
276
277 tid = vm_is_stack(task, vma, is_pid);
278
279 if (tid != 0) {
280 /*
281 * Thread stack in /proc/PID/task/TID/maps or
282 * the main process stack.
283 */
284 if (!is_pid || (vma->vm_start <= mm->start_stack &&
285 vma->vm_end >= mm->start_stack)) {
286 name = "[stack]";
266 } else { 287 } else {
267 name = "[vdso]"; 288 /* Thread stack in /proc/PID/maps */
289 pad_len_spaces(m, len);
290 seq_printf(m, "[stack:%d]", tid);
268 } 291 }
269 } 292 }
270 if (name) { 293 }
271 pad_len_spaces(m, len); 294
272 seq_puts(m, name); 295done:
273 } 296 if (name) {
297 pad_len_spaces(m, len);
298 seq_puts(m, name);
274 } 299 }
275 seq_putc(m, '\n'); 300 seq_putc(m, '\n');
276} 301}
277 302
278static int show_map(struct seq_file *m, void *v) 303static int show_map(struct seq_file *m, void *v, int is_pid)
279{ 304{
280 struct vm_area_struct *vma = v; 305 struct vm_area_struct *vma = v;
281 struct proc_maps_private *priv = m->private; 306 struct proc_maps_private *priv = m->private;
282 struct task_struct *task = priv->task; 307 struct task_struct *task = priv->task;
283 308
284 show_map_vma(m, vma); 309 show_map_vma(m, vma, is_pid);
285 310
286 if (m->count < m->size) /* vma is copied successfully */ 311 if (m->count < m->size) /* vma is copied successfully */
287 m->version = (vma != get_gate_vma(task->mm)) 312 m->version = (vma != get_gate_vma(task->mm))
@@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)
289 return 0; 314 return 0;
290} 315}
291 316
317static int show_pid_map(struct seq_file *m, void *v)
318{
319 return show_map(m, v, 1);
320}
321
322static int show_tid_map(struct seq_file *m, void *v)
323{
324 return show_map(m, v, 0);
325}
326
292static const struct seq_operations proc_pid_maps_op = { 327static const struct seq_operations proc_pid_maps_op = {
293 .start = m_start, 328 .start = m_start,
294 .next = m_next, 329 .next = m_next,
295 .stop = m_stop, 330 .stop = m_stop,
296 .show = show_map 331 .show = show_pid_map
297}; 332};
298 333
299static int maps_open(struct inode *inode, struct file *file) 334static const struct seq_operations proc_tid_maps_op = {
335 .start = m_start,
336 .next = m_next,
337 .stop = m_stop,
338 .show = show_tid_map
339};
340
341static int pid_maps_open(struct inode *inode, struct file *file)
300{ 342{
301 return do_maps_open(inode, file, &proc_pid_maps_op); 343 return do_maps_open(inode, file, &proc_pid_maps_op);
302} 344}
303 345
304const struct file_operations proc_maps_operations = { 346static int tid_maps_open(struct inode *inode, struct file *file)
305 .open = maps_open, 347{
348 return do_maps_open(inode, file, &proc_tid_maps_op);
349}
350
351const struct file_operations proc_pid_maps_operations = {
352 .open = pid_maps_open,
353 .read = seq_read,
354 .llseek = seq_lseek,
355 .release = seq_release_private,
356};
357
358const struct file_operations proc_tid_maps_operations = {
359 .open = tid_maps_open,
306 .read = seq_read, 360 .read = seq_read,
307 .llseek = seq_lseek, 361 .llseek = seq_lseek,
308 .release = seq_release_private, 362 .release = seq_release_private,
@@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
394 pte_t *pte; 448 pte_t *pte;
395 spinlock_t *ptl; 449 spinlock_t *ptl;
396 450
397 spin_lock(&walk->mm->page_table_lock); 451 if (pmd_trans_huge_lock(pmd, vma) == 1) {
398 if (pmd_trans_huge(*pmd)) { 452 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
399 if (pmd_trans_splitting(*pmd)) {
400 spin_unlock(&walk->mm->page_table_lock);
401 wait_split_huge_page(vma->anon_vma, pmd);
402 } else {
403 smaps_pte_entry(*(pte_t *)pmd, addr,
404 HPAGE_PMD_SIZE, walk);
405 spin_unlock(&walk->mm->page_table_lock);
406 mss->anonymous_thp += HPAGE_PMD_SIZE;
407 return 0;
408 }
409 } else {
410 spin_unlock(&walk->mm->page_table_lock); 453 spin_unlock(&walk->mm->page_table_lock);
454 mss->anonymous_thp += HPAGE_PMD_SIZE;
455 return 0;
411 } 456 }
457
458 if (pmd_trans_unstable(pmd))
459 return 0;
412 /* 460 /*
413 * The mmap_sem held all the way back in m_start() is what 461 * The mmap_sem held all the way back in m_start() is what
414 * keeps khugepaged out of here and from collapsing things 462 * keeps khugepaged out of here and from collapsing things
@@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
422 return 0; 470 return 0;
423} 471}
424 472
425static int show_smap(struct seq_file *m, void *v) 473static int show_smap(struct seq_file *m, void *v, int is_pid)
426{ 474{
427 struct proc_maps_private *priv = m->private; 475 struct proc_maps_private *priv = m->private;
428 struct task_struct *task = priv->task; 476 struct task_struct *task = priv->task;
@@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)
440 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 488 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
441 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 489 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
442 490
443 show_map_vma(m, vma); 491 show_map_vma(m, vma, is_pid);
444 492
445 seq_printf(m, 493 seq_printf(m,
446 "Size: %8lu kB\n" 494 "Size: %8lu kB\n"
@@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)
479 return 0; 527 return 0;
480} 528}
481 529
530static int show_pid_smap(struct seq_file *m, void *v)
531{
532 return show_smap(m, v, 1);
533}
534
535static int show_tid_smap(struct seq_file *m, void *v)
536{
537 return show_smap(m, v, 0);
538}
539
482static const struct seq_operations proc_pid_smaps_op = { 540static const struct seq_operations proc_pid_smaps_op = {
483 .start = m_start, 541 .start = m_start,
484 .next = m_next, 542 .next = m_next,
485 .stop = m_stop, 543 .stop = m_stop,
486 .show = show_smap 544 .show = show_pid_smap
545};
546
547static const struct seq_operations proc_tid_smaps_op = {
548 .start = m_start,
549 .next = m_next,
550 .stop = m_stop,
551 .show = show_tid_smap
487}; 552};
488 553
489static int smaps_open(struct inode *inode, struct file *file) 554static int pid_smaps_open(struct inode *inode, struct file *file)
490{ 555{
491 return do_maps_open(inode, file, &proc_pid_smaps_op); 556 return do_maps_open(inode, file, &proc_pid_smaps_op);
492} 557}
493 558
494const struct file_operations proc_smaps_operations = { 559static int tid_smaps_open(struct inode *inode, struct file *file)
495 .open = smaps_open, 560{
561 return do_maps_open(inode, file, &proc_tid_smaps_op);
562}
563
564const struct file_operations proc_pid_smaps_operations = {
565 .open = pid_smaps_open,
566 .read = seq_read,
567 .llseek = seq_lseek,
568 .release = seq_release_private,
569};
570
571const struct file_operations proc_tid_smaps_operations = {
572 .open = tid_smaps_open,
496 .read = seq_read, 573 .read = seq_read,
497 .llseek = seq_lseek, 574 .llseek = seq_lseek,
498 .release = seq_release_private, 575 .release = seq_release_private,
@@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
507 struct page *page; 584 struct page *page;
508 585
509 split_huge_page_pmd(walk->mm, pmd); 586 split_huge_page_pmd(walk->mm, pmd);
587 if (pmd_trans_unstable(pmd))
588 return 0;
510 589
511 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 590 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
512 for (; addr != end; pte++, addr += PAGE_SIZE) { 591 for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = {
598 .llseek = noop_llseek, 677 .llseek = noop_llseek,
599}; 678};
600 679
680typedef struct {
681 u64 pme;
682} pagemap_entry_t;
683
601struct pagemapread { 684struct pagemapread {
602 int pos, len; 685 int pos, len;
603 u64 *buffer; 686 pagemap_entry_t *buffer;
604}; 687};
605 688
689#define PAGEMAP_WALK_SIZE (PMD_SIZE)
690#define PAGEMAP_WALK_MASK (PMD_MASK)
691
606#define PM_ENTRY_BYTES sizeof(u64) 692#define PM_ENTRY_BYTES sizeof(u64)
607#define PM_STATUS_BITS 3 693#define PM_STATUS_BITS 3
608#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 694#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -620,10 +706,15 @@ struct pagemapread {
620#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 706#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
621#define PM_END_OF_BUFFER 1 707#define PM_END_OF_BUFFER 1
622 708
623static int add_to_pagemap(unsigned long addr, u64 pfn, 709static inline pagemap_entry_t make_pme(u64 val)
710{
711 return (pagemap_entry_t) { .pme = val };
712}
713
714static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
624 struct pagemapread *pm) 715 struct pagemapread *pm)
625{ 716{
626 pm->buffer[pm->pos++] = pfn; 717 pm->buffer[pm->pos++] = *pme;
627 if (pm->pos >= pm->len) 718 if (pm->pos >= pm->len)
628 return PM_END_OF_BUFFER; 719 return PM_END_OF_BUFFER;
629 return 0; 720 return 0;
@@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
635 struct pagemapread *pm = walk->private; 726 struct pagemapread *pm = walk->private;
636 unsigned long addr; 727 unsigned long addr;
637 int err = 0; 728 int err = 0;
729 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
730
638 for (addr = start; addr < end; addr += PAGE_SIZE) { 731 for (addr = start; addr < end; addr += PAGE_SIZE) {
639 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); 732 err = add_to_pagemap(addr, &pme, pm);
640 if (err) 733 if (err)
641 break; 734 break;
642 } 735 }
@@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
649 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 742 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
650} 743}
651 744
652static u64 pte_to_pagemap_entry(pte_t pte) 745static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
653{ 746{
654 u64 pme = 0;
655 if (is_swap_pte(pte)) 747 if (is_swap_pte(pte))
656 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 748 *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
657 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 749 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
658 else if (pte_present(pte)) 750 else if (pte_present(pte))
659 pme = PM_PFRAME(pte_pfn(pte)) 751 *pme = make_pme(PM_PFRAME(pte_pfn(pte))
660 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 752 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
661 return pme; 753}
754
755#ifdef CONFIG_TRANSPARENT_HUGEPAGE
756static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
757 pmd_t pmd, int offset)
758{
759 /*
760 * Currently pmd for thp is always present because thp can not be
761 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
762 * This if-check is just to prepare for future implementation.
763 */
764 if (pmd_present(pmd))
765 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
766 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
662} 767}
768#else
769static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
770 pmd_t pmd, int offset)
771{
772}
773#endif
663 774
664static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 775static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
665 struct mm_walk *walk) 776 struct mm_walk *walk)
@@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
668 struct pagemapread *pm = walk->private; 779 struct pagemapread *pm = walk->private;
669 pte_t *pte; 780 pte_t *pte;
670 int err = 0; 781 int err = 0;
782 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
671 783
672 split_huge_page_pmd(walk->mm, pmd); 784 if (pmd_trans_unstable(pmd))
785 return 0;
673 786
674 /* find the first VMA at or above 'addr' */ 787 /* find the first VMA at or above 'addr' */
675 vma = find_vma(walk->mm, addr); 788 vma = find_vma(walk->mm, addr);
789 spin_lock(&walk->mm->page_table_lock);
790 if (pmd_trans_huge_lock(pmd, vma) == 1) {
791 for (; addr != end; addr += PAGE_SIZE) {
792 unsigned long offset;
793
794 offset = (addr & ~PAGEMAP_WALK_MASK) >>
795 PAGE_SHIFT;
796 thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
797 err = add_to_pagemap(addr, &pme, pm);
798 if (err)
799 break;
800 }
801 spin_unlock(&walk->mm->page_table_lock);
802 return err;
803 }
804
676 for (; addr != end; addr += PAGE_SIZE) { 805 for (; addr != end; addr += PAGE_SIZE) {
677 u64 pfn = PM_NOT_PRESENT;
678 806
679 /* check to see if we've left 'vma' behind 807 /* check to see if we've left 'vma' behind
680 * and need a new, higher one */ 808 * and need a new, higher one */
@@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
686 if (vma && (vma->vm_start <= addr) && 814 if (vma && (vma->vm_start <= addr) &&
687 !is_vm_hugetlb_page(vma)) { 815 !is_vm_hugetlb_page(vma)) {
688 pte = pte_offset_map(pmd, addr); 816 pte = pte_offset_map(pmd, addr);
689 pfn = pte_to_pagemap_entry(*pte); 817 pte_to_pagemap_entry(&pme, *pte);
690 /* unmap before userspace copy */ 818 /* unmap before userspace copy */
691 pte_unmap(pte); 819 pte_unmap(pte);
692 } 820 }
693 err = add_to_pagemap(addr, pfn, pm); 821 err = add_to_pagemap(addr, &pme, pm);
694 if (err) 822 if (err)
695 return err; 823 return err;
696 } 824 }
@@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
701} 829}
702 830
703#ifdef CONFIG_HUGETLB_PAGE 831#ifdef CONFIG_HUGETLB_PAGE
704static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) 832static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
833 pte_t pte, int offset)
705{ 834{
706 u64 pme = 0;
707 if (pte_present(pte)) 835 if (pte_present(pte))
708 pme = PM_PFRAME(pte_pfn(pte) + offset) 836 *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
709 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 837 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
710 return pme;
711} 838}
712 839
713/* This function walks within one hugetlb entry in the single call */ 840/* This function walks within one hugetlb entry in the single call */
@@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
717{ 844{
718 struct pagemapread *pm = walk->private; 845 struct pagemapread *pm = walk->private;
719 int err = 0; 846 int err = 0;
720 u64 pfn; 847 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
721 848
722 for (; addr != end; addr += PAGE_SIZE) { 849 for (; addr != end; addr += PAGE_SIZE) {
723 int offset = (addr & ~hmask) >> PAGE_SHIFT; 850 int offset = (addr & ~hmask) >> PAGE_SHIFT;
724 pfn = huge_pte_to_pagemap_entry(*pte, offset); 851 huge_pte_to_pagemap_entry(&pme, *pte, offset);
725 err = add_to_pagemap(addr, pfn, pm); 852 err = add_to_pagemap(addr, &pme, pm);
726 if (err) 853 if (err)
727 return err; 854 return err;
728 } 855 }
@@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
757 * determine which areas of memory are actually mapped and llseek to 884 * determine which areas of memory are actually mapped and llseek to
758 * skip over unmapped regions. 885 * skip over unmapped regions.
759 */ 886 */
760#define PAGEMAP_WALK_SIZE (PMD_SIZE)
761#define PAGEMAP_WALK_MASK (PMD_MASK)
762static ssize_t pagemap_read(struct file *file, char __user *buf, 887static ssize_t pagemap_read(struct file *file, char __user *buf,
763 size_t count, loff_t *ppos) 888 size_t count, loff_t *ppos)
764{ 889{
@@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
941 pte_t *pte; 1066 pte_t *pte;
942 1067
943 md = walk->private; 1068 md = walk->private;
944 spin_lock(&walk->mm->page_table_lock); 1069
945 if (pmd_trans_huge(*pmd)) { 1070 if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
946 if (pmd_trans_splitting(*pmd)) { 1071 pte_t huge_pte = *(pte_t *)pmd;
947 spin_unlock(&walk->mm->page_table_lock); 1072 struct page *page;
948 wait_split_huge_page(md->vma->anon_vma, pmd); 1073
949 } else { 1074 page = can_gather_numa_stats(huge_pte, md->vma, addr);
950 pte_t huge_pte = *(pte_t *)pmd; 1075 if (page)
951 struct page *page; 1076 gather_stats(page, md, pte_dirty(huge_pte),
952 1077 HPAGE_PMD_SIZE/PAGE_SIZE);
953 page = can_gather_numa_stats(huge_pte, md->vma, addr);
954 if (page)
955 gather_stats(page, md, pte_dirty(huge_pte),
956 HPAGE_PMD_SIZE/PAGE_SIZE);
957 spin_unlock(&walk->mm->page_table_lock);
958 return 0;
959 }
960 } else {
961 spin_unlock(&walk->mm->page_table_lock); 1078 spin_unlock(&walk->mm->page_table_lock);
1079 return 0;
962 } 1080 }
963 1081
1082 if (pmd_trans_unstable(pmd))
1083 return 0;
964 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1084 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
965 do { 1085 do {
966 struct page *page = can_gather_numa_stats(*pte, md->vma, addr); 1086 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
@@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
1002/* 1122/*
1003 * Display pages allocated per node and memory policy via /proc. 1123 * Display pages allocated per node and memory policy via /proc.
1004 */ 1124 */
1005static int show_numa_map(struct seq_file *m, void *v) 1125static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1006{ 1126{
1007 struct numa_maps_private *numa_priv = m->private; 1127 struct numa_maps_private *numa_priv = m->private;
1008 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 1128 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v)
1039 seq_path(m, &file->f_path, "\n\t= "); 1159 seq_path(m, &file->f_path, "\n\t= ");
1040 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1160 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1041 seq_printf(m, " heap"); 1161 seq_printf(m, " heap");
1042 } else if (vma->vm_start <= mm->start_stack && 1162 } else {
1043 vma->vm_end >= mm->start_stack) { 1163 pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
1044 seq_printf(m, " stack"); 1164 if (tid != 0) {
1165 /*
1166 * Thread stack in /proc/PID/task/TID/maps or
1167 * the main process stack.
1168 */
1169 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1170 vma->vm_end >= mm->start_stack))
1171 seq_printf(m, " stack");
1172 else
1173 seq_printf(m, " stack:%d", tid);
1174 }
1045 } 1175 }
1046 1176
1047 if (is_vm_hugetlb_page(vma)) 1177 if (is_vm_hugetlb_page(vma))
@@ -1084,21 +1214,39 @@ out:
1084 return 0; 1214 return 0;
1085} 1215}
1086 1216
1217static int show_pid_numa_map(struct seq_file *m, void *v)
1218{
1219 return show_numa_map(m, v, 1);
1220}
1221
1222static int show_tid_numa_map(struct seq_file *m, void *v)
1223{
1224 return show_numa_map(m, v, 0);
1225}
1226
1087static const struct seq_operations proc_pid_numa_maps_op = { 1227static const struct seq_operations proc_pid_numa_maps_op = {
1088 .start = m_start, 1228 .start = m_start,
1089 .next = m_next, 1229 .next = m_next,
1090 .stop = m_stop, 1230 .stop = m_stop,
1091 .show = show_numa_map, 1231 .show = show_pid_numa_map,
1232};
1233
1234static const struct seq_operations proc_tid_numa_maps_op = {
1235 .start = m_start,
1236 .next = m_next,
1237 .stop = m_stop,
1238 .show = show_tid_numa_map,
1092}; 1239};
1093 1240
1094static int numa_maps_open(struct inode *inode, struct file *file) 1241static int numa_maps_open(struct inode *inode, struct file *file,
1242 const struct seq_operations *ops)
1095{ 1243{
1096 struct numa_maps_private *priv; 1244 struct numa_maps_private *priv;
1097 int ret = -ENOMEM; 1245 int ret = -ENOMEM;
1098 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 1246 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1099 if (priv) { 1247 if (priv) {
1100 priv->proc_maps.pid = proc_pid(inode); 1248 priv->proc_maps.pid = proc_pid(inode);
1101 ret = seq_open(file, &proc_pid_numa_maps_op); 1249 ret = seq_open(file, ops);
1102 if (!ret) { 1250 if (!ret) {
1103 struct seq_file *m = file->private_data; 1251 struct seq_file *m = file->private_data;
1104 m->private = priv; 1252 m->private = priv;
@@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)
1109 return ret; 1257 return ret;
1110} 1258}
1111 1259
1112const struct file_operations proc_numa_maps_operations = { 1260static int pid_numa_maps_open(struct inode *inode, struct file *file)
1113 .open = numa_maps_open, 1261{
1262 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
1263}
1264
1265static int tid_numa_maps_open(struct inode *inode, struct file *file)
1266{
1267 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
1268}
1269
1270const struct file_operations proc_pid_numa_maps_operations = {
1271 .open = pid_numa_maps_open,
1272 .read = seq_read,
1273 .llseek = seq_lseek,
1274 .release = seq_release_private,
1275};
1276
1277const struct file_operations proc_tid_numa_maps_operations = {
1278 .open = tid_numa_maps_open,
1114 .read = seq_read, 1279 .read = seq_read,
1115 .llseek = seq_lseek, 1280 .llseek = seq_lseek,
1116 .release = seq_release_private, 1281 .release = seq_release_private,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 980de547c070..74fe164d1b23 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len)
134/* 134/*
135 * display a single VMA to a sequenced file 135 * display a single VMA to a sequenced file
136 */ 136 */
137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
138 int is_pid)
138{ 139{
139 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
141 struct proc_maps_private *priv = m->private;
140 unsigned long ino = 0; 142 unsigned long ino = 0;
141 struct file *file; 143 struct file *file;
142 dev_t dev = 0; 144 dev_t dev = 0;
@@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
168 pad_len_spaces(m, len); 170 pad_len_spaces(m, len);
169 seq_path(m, &file->f_path, ""); 171 seq_path(m, &file->f_path, "");
170 } else if (mm) { 172 } else if (mm) {
171 if (vma->vm_start <= mm->start_stack && 173 pid_t tid = vm_is_stack(priv->task, vma, is_pid);
172 vma->vm_end >= mm->start_stack) { 174
175 if (tid != 0) {
173 pad_len_spaces(m, len); 176 pad_len_spaces(m, len);
174 seq_puts(m, "[stack]"); 177 /*
178 * Thread stack in /proc/PID/task/TID/maps or
179 * the main process stack.
180 */
181 if (!is_pid || (vma->vm_start <= mm->start_stack &&
182 vma->vm_end >= mm->start_stack))
183 seq_printf(m, "[stack]");
184 else
185 seq_printf(m, "[stack:%d]", tid);
175 } 186 }
176 } 187 }
177 188
@@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
182/* 193/*
183 * display mapping lines for a particular process's /proc/pid/maps 194 * display mapping lines for a particular process's /proc/pid/maps
184 */ 195 */
185static int show_map(struct seq_file *m, void *_p) 196static int show_map(struct seq_file *m, void *_p, int is_pid)
186{ 197{
187 struct rb_node *p = _p; 198 struct rb_node *p = _p;
188 199
189 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); 200 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
201 is_pid);
202}
203
204static int show_pid_map(struct seq_file *m, void *_p)
205{
206 return show_map(m, _p, 1);
207}
208
209static int show_tid_map(struct seq_file *m, void *_p)
210{
211 return show_map(m, _p, 0);
190} 212}
191 213
192static void *m_start(struct seq_file *m, loff_t *pos) 214static void *m_start(struct seq_file *m, loff_t *pos)
@@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = {
240 .start = m_start, 262 .start = m_start,
241 .next = m_next, 263 .next = m_next,
242 .stop = m_stop, 264 .stop = m_stop,
243 .show = show_map 265 .show = show_pid_map
266};
267
268static const struct seq_operations proc_tid_maps_ops = {
269 .start = m_start,
270 .next = m_next,
271 .stop = m_stop,
272 .show = show_tid_map
244}; 273};
245 274
246static int maps_open(struct inode *inode, struct file *file) 275static int maps_open(struct inode *inode, struct file *file,
276 const struct seq_operations *ops)
247{ 277{
248 struct proc_maps_private *priv; 278 struct proc_maps_private *priv;
249 int ret = -ENOMEM; 279 int ret = -ENOMEM;
@@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file)
251 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 281 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
252 if (priv) { 282 if (priv) {
253 priv->pid = proc_pid(inode); 283 priv->pid = proc_pid(inode);
254 ret = seq_open(file, &proc_pid_maps_ops); 284 ret = seq_open(file, ops);
255 if (!ret) { 285 if (!ret) {
256 struct seq_file *m = file->private_data; 286 struct seq_file *m = file->private_data;
257 m->private = priv; 287 m->private = priv;
@@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file)
262 return ret; 292 return ret;
263} 293}
264 294
265const struct file_operations proc_maps_operations = { 295static int pid_maps_open(struct inode *inode, struct file *file)
266 .open = maps_open, 296{
297 return maps_open(inode, file, &proc_pid_maps_ops);
298}
299
300static int tid_maps_open(struct inode *inode, struct file *file)
301{
302 return maps_open(inode, file, &proc_tid_maps_ops);
303}
304
305const struct file_operations proc_pid_maps_operations = {
306 .open = pid_maps_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = seq_release_private,
310};
311
312const struct file_operations proc_tid_maps_operations = {
313 .open = tid_maps_open,
267 .read = seq_read, 314 .read = seq_read,
268 .llseek = seq_lseek, 315 .llseek = seq_lseek,
269 .release = seq_release_private, 316 .release = seq_release_private,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b0f450a2bb7c..0d5071d29985 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -700,3 +700,26 @@ static int __init vmcore_init(void)
700 return 0; 700 return 0;
701} 701}
702module_init(vmcore_init) 702module_init(vmcore_init)
703
704/* Cleanup function for vmcore module. */
705void vmcore_cleanup(void)
706{
707 struct list_head *pos, *next;
708
709 if (proc_vmcore) {
710 remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
711 proc_vmcore = NULL;
712 }
713
714 /* clear the vmcore list. */
715 list_for_each_safe(pos, next, &vmcore_list) {
716 struct vmcore *m;
717
718 m = list_entry(pos, struct vmcore, list);
719 list_del(&m->list);
720 kfree(m);
721 }
722 kfree(elfcorebuf);
723 elfcorebuf = NULL;
724}
725EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b3b426edb2fd..f37c32b94525 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -278,9 +278,7 @@ fail:
278 278
279int pstore_fill_super(struct super_block *sb, void *data, int silent) 279int pstore_fill_super(struct super_block *sb, void *data, int silent)
280{ 280{
281 struct inode *inode = NULL; 281 struct inode *inode;
282 struct dentry *root;
283 int err;
284 282
285 save_mount_options(sb, data); 283 save_mount_options(sb, data);
286 284
@@ -296,26 +294,17 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
296 parse_options(data); 294 parse_options(data);
297 295
298 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); 296 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
299 if (!inode) { 297 if (inode) {
300 err = -ENOMEM; 298 /* override ramfs "dir" options so we catch unlink(2) */
301 goto fail; 299 inode->i_op = &pstore_dir_inode_operations;
302 }
303 /* override ramfs "dir" options so we catch unlink(2) */
304 inode->i_op = &pstore_dir_inode_operations;
305
306 root = d_alloc_root(inode);
307 sb->s_root = root;
308 if (!root) {
309 err = -ENOMEM;
310 goto fail;
311 } 300 }
301 sb->s_root = d_make_root(inode);
302 if (!sb->s_root)
303 return -ENOMEM;
312 304
313 pstore_get_records(0); 305 pstore_get_records(0);
314 306
315 return 0; 307 return 0;
316fail:
317 iput(inode);
318 return err;
319} 308}
320 309
321static struct dentry *pstore_mount(struct file_system_type *fs_type, 310static struct dentry *pstore_mount(struct file_system_type *fs_type,
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 9ec22d3b4293..82c585f715e3 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -68,9 +68,25 @@ void pstore_set_kmsg_bytes(int bytes)
68/* Tag each group of saved records with a sequence number */ 68/* Tag each group of saved records with a sequence number */
69static int oopscount; 69static int oopscount;
70 70
71static char *reason_str[] = { 71static const char *get_reason_str(enum kmsg_dump_reason reason)
72 "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" 72{
73}; 73 switch (reason) {
74 case KMSG_DUMP_PANIC:
75 return "Panic";
76 case KMSG_DUMP_OOPS:
77 return "Oops";
78 case KMSG_DUMP_EMERG:
79 return "Emergency";
80 case KMSG_DUMP_RESTART:
81 return "Restart";
82 case KMSG_DUMP_HALT:
83 return "Halt";
84 case KMSG_DUMP_POWEROFF:
85 return "Poweroff";
86 default:
87 return "Unknown";
88 }
89}
74 90
75/* 91/*
76 * callback from kmsg_dump. (s2,l2) has the most recently 92 * callback from kmsg_dump. (s2,l2) has the most recently
@@ -85,17 +101,15 @@ static void pstore_dump(struct kmsg_dumper *dumper,
85 unsigned long s1_start, s2_start; 101 unsigned long s1_start, s2_start;
86 unsigned long l1_cpy, l2_cpy; 102 unsigned long l1_cpy, l2_cpy;
87 unsigned long size, total = 0; 103 unsigned long size, total = 0;
88 char *dst, *why; 104 char *dst;
105 const char *why;
89 u64 id; 106 u64 id;
90 int hsize, ret; 107 int hsize, ret;
91 unsigned int part = 1; 108 unsigned int part = 1;
92 unsigned long flags = 0; 109 unsigned long flags = 0;
93 int is_locked = 0; 110 int is_locked = 0;
94 111
95 if (reason < ARRAY_SIZE(reason_str)) 112 why = get_reason_str(reason);
96 why = reason_str[reason];
97 else
98 why = "Unknown";
99 113
100 if (in_nmi()) { 114 if (in_nmi()) {
101 is_locked = spin_trylock(&psinfo->buf_lock); 115 is_locked = spin_trylock(&psinfo->buf_lock);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 6b009548d2e0..552e994e3aa1 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -52,38 +52,6 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
52 return 0; 52 return 0;
53} 53}
54 54
55static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
56 int create)
57{
58 struct buffer_head *result = NULL;
59
60 if ( nr >= 0 )
61 nr = qnx4_block_map( inode, nr );
62 if (nr) {
63 result = sb_getblk(inode->i_sb, nr);
64 return result;
65 }
66 return NULL;
67}
68
69struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
70{
71 struct buffer_head *bh;
72
73 bh = qnx4_getblk(inode, block, create);
74 if (!bh || buffer_uptodate(bh)) {
75 return bh;
76 }
77 ll_rw_block(READ, 1, &bh);
78 wait_on_buffer(bh);
79 if (buffer_uptodate(bh)) {
80 return bh;
81 }
82 brelse(bh);
83
84 return NULL;
85}
86
87static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) 55static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create )
88{ 56{
89 unsigned long phys; 57 unsigned long phys;
@@ -98,23 +66,31 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
98 return 0; 66 return 0;
99} 67}
100 68
69static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset)
70{
71 u32 size = le32_to_cpu(extent->xtnt_size);
72 if (*offset < size)
73 return le32_to_cpu(extent->xtnt_blk) + *offset - 1;
74 *offset -= size;
75 return 0;
76}
77
101unsigned long qnx4_block_map( struct inode *inode, long iblock ) 78unsigned long qnx4_block_map( struct inode *inode, long iblock )
102{ 79{
103 int ix; 80 int ix;
104 long offset, i_xblk; 81 long i_xblk;
105 unsigned long block = 0;
106 struct buffer_head *bh = NULL; 82 struct buffer_head *bh = NULL;
107 struct qnx4_xblk *xblk = NULL; 83 struct qnx4_xblk *xblk = NULL;
108 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); 84 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
109 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); 85 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
86 u32 offset = iblock;
87 u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset);
110 88
111 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { 89 if (block) {
112 // iblock is in the first extent. This is easy. 90 // iblock is in the first extent. This is easy.
113 block = le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_blk) + iblock - 1;
114 } else { 91 } else {
115 // iblock is beyond first extent. We have to follow the extent chain. 92 // iblock is beyond first extent. We have to follow the extent chain.
116 i_xblk = le32_to_cpu(qnx4_inode->di_xblk); 93 i_xblk = le32_to_cpu(qnx4_inode->di_xblk);
117 offset = iblock - le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size);
118 ix = 0; 94 ix = 0;
119 while ( --nxtnt > 0 ) { 95 while ( --nxtnt > 0 ) {
120 if ( ix == 0 ) { 96 if ( ix == 0 ) {
@@ -130,12 +106,11 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
130 return -EIO; 106 return -EIO;
131 } 107 }
132 } 108 }
133 if ( offset < le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size) ) { 109 block = try_extent(&xblk->xblk_xtnts[ix], &offset);
110 if (block) {
134 // got it! 111 // got it!
135 block = le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_blk) + offset - 1;
136 break; 112 break;
137 } 113 }
138 offset -= le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size);
139 if ( ++ix >= xblk->xblk_num_xtnts ) { 114 if ( ++ix >= xblk->xblk_num_xtnts ) {
140 i_xblk = le32_to_cpu(xblk->xblk_next_xblk); 115 i_xblk = le32_to_cpu(xblk->xblk_next_xblk);
141 ix = 0; 116 ix = 0;
@@ -260,15 +235,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
260 } 235 }
261 236
262 ret = -ENOMEM; 237 ret = -ENOMEM;
263 s->s_root = d_alloc_root(root); 238 s->s_root = d_make_root(root);
264 if (s->s_root == NULL) 239 if (s->s_root == NULL)
265 goto outi; 240 goto outb;
266 241
267 brelse(bh); 242 brelse(bh);
268 return 0; 243 return 0;
269 244
270 outi:
271 iput(root);
272 outb: 245 outb:
273 kfree(qs->BitMap); 246 kfree(qs->BitMap);
274 out: 247 out:
@@ -288,44 +261,17 @@ static void qnx4_put_super(struct super_block *sb)
288 return; 261 return;
289} 262}
290 263
291static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
292{
293 return block_write_full_page(page,qnx4_get_block, wbc);
294}
295
296static int qnx4_readpage(struct file *file, struct page *page) 264static int qnx4_readpage(struct file *file, struct page *page)
297{ 265{
298 return block_read_full_page(page,qnx4_get_block); 266 return block_read_full_page(page,qnx4_get_block);
299} 267}
300 268
301static int qnx4_write_begin(struct file *file, struct address_space *mapping,
302 loff_t pos, unsigned len, unsigned flags,
303 struct page **pagep, void **fsdata)
304{
305 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
306 int ret;
307
308 *pagep = NULL;
309 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
310 qnx4_get_block,
311 &qnx4_inode->mmu_private);
312 if (unlikely(ret)) {
313 loff_t isize = mapping->host->i_size;
314 if (pos + len > isize)
315 vmtruncate(mapping->host, isize);
316 }
317
318 return ret;
319}
320static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 269static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
321{ 270{
322 return generic_block_bmap(mapping,block,qnx4_get_block); 271 return generic_block_bmap(mapping,block,qnx4_get_block);
323} 272}
324static const struct address_space_operations qnx4_aops = { 273static const struct address_space_operations qnx4_aops = {
325 .readpage = qnx4_readpage, 274 .readpage = qnx4_readpage,
326 .writepage = qnx4_writepage,
327 .write_begin = qnx4_write_begin,
328 .write_end = generic_write_end,
329 .bmap = qnx4_bmap 275 .bmap = qnx4_bmap
330}; 276};
331 277
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 275327b5615e..a512c0b30e8e 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -39,10 +39,6 @@ static int qnx4_match(int len, const char *name,
39 } else { 39 } else {
40 namelen = QNX4_SHORT_NAME_MAX; 40 namelen = QNX4_SHORT_NAME_MAX;
41 } 41 }
42 /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
43 if (!len && (de->di_fname[0] == '.') && (de->di_fname[1] == '\0')) {
44 return 1;
45 }
46 thislen = strlen( de->di_fname ); 42 thislen = strlen( de->di_fname );
47 if ( thislen > namelen ) 43 if ( thislen > namelen )
48 thislen = namelen; 44 thislen = namelen;
@@ -72,7 +68,9 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
72 block = offset = blkofs = 0; 68 block = offset = blkofs = 0;
73 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { 69 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
74 if (!bh) { 70 if (!bh) {
75 bh = qnx4_bread(dir, blkofs, 0); 71 block = qnx4_block_map(dir, blkofs);
72 if (block)
73 bh = sb_bread(dir->i_sb, block);
76 if (!bh) { 74 if (!bh) {
77 blkofs++; 75 blkofs++;
78 continue; 76 continue;
@@ -80,7 +78,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
80 } 78 }
81 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset); 79 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset);
82 if (qnx4_match(len, name, bh, &offset)) { 80 if (qnx4_match(len, name, bh, &offset)) {
83 block = qnx4_block_map( dir, blkofs );
84 *ino = block * QNX4_INODES_PER_BLOCK + 81 *ino = block * QNX4_INODES_PER_BLOCK +
85 (offset / QNX4_DIR_ENTRY_SIZE) - 1; 82 (offset / QNX4_DIR_ENTRY_SIZE) - 1;
86 return bh; 83 return bh;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 33a60858203b..244d4620189b 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -27,8 +27,6 @@ extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, stru
27extern unsigned long qnx4_count_free_blocks(struct super_block *sb); 27extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
28extern unsigned long qnx4_block_map(struct inode *inode, long iblock); 28extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
29 29
30extern struct buffer_head *qnx4_bread(struct inode *, int, int);
31
32extern const struct inode_operations qnx4_dir_inode_operations; 30extern const struct inode_operations qnx4_dir_inode_operations;
33extern const struct file_operations qnx4_dir_operations; 31extern const struct file_operations qnx4_dir_operations;
34extern int qnx4_is_free(struct super_block *sb, long block); 32extern int qnx4_is_free(struct super_block *sb, long block);
diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig
new file mode 100644
index 000000000000..edbba5c17cc8
--- /dev/null
+++ b/fs/qnx6/Kconfig
@@ -0,0 +1,26 @@
1config QNX6FS_FS
2 tristate "QNX6 file system support (read only)"
3 depends on BLOCK && CRC32
4 help
5 This is the file system used by the real-time operating systems
6 QNX 6 (also called QNX RTP).
7 Further information is available at <http://www.qnx.com/>.
8 Say Y if you intend to mount QNX hard disks or floppies formatted
9 with a mkqnx6fs.
10 However, keep in mind that this currently is a readonly driver!
11
12 To compile this file system support as a module, choose M here: the
13 module will be called qnx6.
14
15 If you don't know whether you need it, then you don't need it:
16 answer N.
17
18config QNX6FS_DEBUG
19 bool "QNX6 debugging information"
20 depends on QNX6FS_FS
21 help
22 Turns on extended debugging output.
23
24 If you are not a developer working on the QNX6FS, you probably don't
25 want this:
26 answer N.
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
new file mode 100644
index 000000000000..9dd06199afc9
--- /dev/null
+++ b/fs/qnx6/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux qnx4-filesystem routines.
3#
4
5obj-$(CONFIG_QNX6FS_FS) += qnx6.o
6
7qnx6-objs := inode.o dir.o namei.o super_mmi.o
diff --git a/fs/qnx6/README b/fs/qnx6/README
new file mode 100644
index 000000000000..116d622026cc
--- /dev/null
+++ b/fs/qnx6/README
@@ -0,0 +1,8 @@
1
2 This is a snapshot of the QNX6 filesystem for Linux.
3 Please send diffs and remarks to <chaosman@ontika.net> .
4
5Credits :
6
7Al Viro <viro@ZenIV.linux.org.uk> (endless patience with me & support ;))
8Kai Bankett <chaosman@ontika.net> (Maintainer)
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
new file mode 100644
index 000000000000..dc597353db3b
--- /dev/null
+++ b/fs/qnx6/dir.c
@@ -0,0 +1,291 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15static unsigned qnx6_lfile_checksum(char *name, unsigned size)
16{
17 unsigned crc = 0;
18 char *end = name + size;
19 while (name < end) {
20 crc = ((crc >> 1) + *(name++)) ^
21 ((crc & 0x00000001) ? 0x80000000 : 0);
22 }
23 return crc;
24}
25
26static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
27{
28 struct address_space *mapping = dir->i_mapping;
29 struct page *page = read_mapping_page(mapping, n, NULL);
30 if (!IS_ERR(page))
31 kmap(page);
32 return page;
33}
34
35static inline unsigned long dir_pages(struct inode *inode)
36{
37 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
38}
39
40static unsigned last_entry(struct inode *inode, unsigned long page_nr)
41{
42 unsigned long last_byte = inode->i_size;
43 last_byte -= page_nr << PAGE_CACHE_SHIFT;
44 if (last_byte > PAGE_CACHE_SIZE)
45 last_byte = PAGE_CACHE_SIZE;
46 return last_byte / QNX6_DIR_ENTRY_SIZE;
47}
48
49static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
50 struct qnx6_long_dir_entry *de,
51 struct page **p)
52{
53 struct qnx6_sb_info *sbi = QNX6_SB(sb);
54 u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
55 u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
56 /* within page */
57 u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
58 struct address_space *mapping = sbi->longfile->i_mapping;
59 struct page *page = read_mapping_page(mapping, n, NULL);
60 if (IS_ERR(page))
61 return ERR_CAST(page);
62 kmap(*p = page);
63 return (struct qnx6_long_filename *)(page_address(page) + offs);
64}
65
66static int qnx6_dir_longfilename(struct inode *inode,
67 struct qnx6_long_dir_entry *de,
68 void *dirent, loff_t pos,
69 unsigned de_inode, filldir_t filldir)
70{
71 struct qnx6_long_filename *lf;
72 struct super_block *s = inode->i_sb;
73 struct qnx6_sb_info *sbi = QNX6_SB(s);
74 struct page *page;
75 int lf_size;
76
77 if (de->de_size != 0xff) {
78 /* error - long filename entries always have size 0xff
79 in direntry */
80 printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
81 de->de_size);
82 return 0;
83 }
84 lf = qnx6_longname(s, de, &page);
85 if (IS_ERR(lf)) {
86 printk(KERN_ERR "qnx6:Error reading longname\n");
87 return 0;
88 }
89
90 lf_size = fs16_to_cpu(sbi, lf->lf_size);
91
92 if (lf_size > QNX6_LONG_NAME_MAX) {
93 QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
94 printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
95 qnx6_put_page(page);
96 return 0;
97 }
98
99 /* calc & validate longfilename checksum
100 mmi 3g filesystem does not have that checksum */
101 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
102 qnx6_lfile_checksum(lf->lf_fname, lf_size))
103 printk(KERN_INFO "qnx6: long filename checksum error.\n");
104
105 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
106 lf_size, lf->lf_fname, de_inode));
107 if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode,
108 DT_UNKNOWN) < 0) {
109 qnx6_put_page(page);
110 return 0;
111 }
112
113 qnx6_put_page(page);
114 /* success */
115 return 1;
116}
117
118static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
119{
120 struct inode *inode = filp->f_path.dentry->d_inode;
121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
124 unsigned long npages = dir_pages(inode);
125 unsigned long n = pos >> PAGE_CACHE_SHIFT;
126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
127 bool done = false;
128
129 if (filp->f_pos >= inode->i_size)
130 return 0;
131
132 for ( ; !done && n < npages; n++, start = 0) {
133 struct page *page = qnx6_get_page(inode, n);
134 int limit = last_entry(inode, n);
135 struct qnx6_dir_entry *de;
136 int i = start;
137
138 if (IS_ERR(page)) {
139 printk(KERN_ERR "qnx6_readdir: read failed\n");
140 filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT;
141 return PTR_ERR(page);
142 }
143 de = ((struct qnx6_dir_entry *)page_address(page)) + start;
144 for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) {
145 int size = de->de_size;
146 u32 no_inode = fs32_to_cpu(sbi, de->de_inode);
147
148 if (!no_inode || !size)
149 continue;
150
151 if (size > QNX6_SHORT_NAME_MAX) {
152 /* long filename detected
153 get the filename from long filename
154 structure / block */
155 if (!qnx6_dir_longfilename(inode,
156 (struct qnx6_long_dir_entry *)de,
157 dirent, pos, no_inode,
158 filldir)) {
159 done = true;
160 break;
161 }
162 } else {
163 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
164 " inode:%u\n", size, de->de_fname,
165 no_inode));
166 if (filldir(dirent, de->de_fname, size,
167 pos, no_inode, DT_UNKNOWN)
168 < 0) {
169 done = true;
170 break;
171 }
172 }
173 }
174 qnx6_put_page(page);
175 }
176 filp->f_pos = pos;
177 return 0;
178}
179
180/*
181 * check if the long filename is correct.
182 */
183static unsigned qnx6_long_match(int len, const char *name,
184 struct qnx6_long_dir_entry *de, struct inode *dir)
185{
186 struct super_block *s = dir->i_sb;
187 struct qnx6_sb_info *sbi = QNX6_SB(s);
188 struct page *page;
189 int thislen;
190 struct qnx6_long_filename *lf = qnx6_longname(s, de, &page);
191
192 if (IS_ERR(lf))
193 return 0;
194
195 thislen = fs16_to_cpu(sbi, lf->lf_size);
196 if (len != thislen) {
197 qnx6_put_page(page);
198 return 0;
199 }
200 if (memcmp(name, lf->lf_fname, len) == 0) {
201 qnx6_put_page(page);
202 return fs32_to_cpu(sbi, de->de_inode);
203 }
204 qnx6_put_page(page);
205 return 0;
206}
207
208/*
209 * check if the filename is correct.
210 */
211static unsigned qnx6_match(struct super_block *s, int len, const char *name,
212 struct qnx6_dir_entry *de)
213{
214 struct qnx6_sb_info *sbi = QNX6_SB(s);
215 if (memcmp(name, de->de_fname, len) == 0)
216 return fs32_to_cpu(sbi, de->de_inode);
217 return 0;
218}
219
220
221unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
222 struct page **res_page)
223{
224 struct super_block *s = dir->i_sb;
225 struct qnx6_inode_info *ei = QNX6_I(dir);
226 struct page *page = NULL;
227 unsigned long start, n;
228 unsigned long npages = dir_pages(dir);
229 unsigned ino;
230 struct qnx6_dir_entry *de;
231 struct qnx6_long_dir_entry *lde;
232
233 *res_page = NULL;
234
235 if (npages == 0)
236 return 0;
237 start = ei->i_dir_start_lookup;
238 if (start >= npages)
239 start = 0;
240 n = start;
241
242 do {
243 page = qnx6_get_page(dir, n);
244 if (!IS_ERR(page)) {
245 int limit = last_entry(dir, n);
246 int i;
247
248 de = (struct qnx6_dir_entry *)page_address(page);
249 for (i = 0; i < limit; i++, de++) {
250 if (len <= QNX6_SHORT_NAME_MAX) {
251 /* short filename */
252 if (len != de->de_size)
253 continue;
254 ino = qnx6_match(s, len, name, de);
255 if (ino)
256 goto found;
257 } else if (de->de_size == 0xff) {
258 /* deal with long filename */
259 lde = (struct qnx6_long_dir_entry *)de;
260 ino = qnx6_long_match(len,
261 name, lde, dir);
262 if (ino)
263 goto found;
264 } else
265 printk(KERN_ERR "qnx6: undefined "
266 "filename size in inode.\n");
267 }
268 qnx6_put_page(page);
269 }
270
271 if (++n >= npages)
272 n = 0;
273 } while (n != start);
274 return 0;
275
276found:
277 *res_page = page;
278 ei->i_dir_start_lookup = n;
279 return ino;
280}
281
282const struct file_operations qnx6_dir_operations = {
283 .llseek = generic_file_llseek,
284 .read = generic_read_dir,
285 .readdir = qnx6_readdir,
286 .fsync = generic_file_fsync,
287};
288
289const struct inode_operations qnx6_dir_inode_operations = {
290 .lookup = qnx6_lookup,
291};
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
new file mode 100644
index 000000000000..e44012dc5645
--- /dev/null
+++ b/fs/qnx6/inode.c
@@ -0,0 +1,698 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/highuid.h>
17#include <linux/pagemap.h>
18#include <linux/buffer_head.h>
19#include <linux/writeback.h>
20#include <linux/statfs.h>
21#include <linux/parser.h>
22#include <linux/seq_file.h>
23#include <linux/mount.h>
24#include <linux/crc32.h>
25#include <linux/mpage.h>
26#include "qnx6.h"
27
28static const struct super_operations qnx6_sops;
29
30static void qnx6_put_super(struct super_block *sb);
31static struct inode *qnx6_alloc_inode(struct super_block *sb);
32static void qnx6_destroy_inode(struct inode *inode);
33static int qnx6_remount(struct super_block *sb, int *flags, char *data);
34static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf);
35static int qnx6_show_options(struct seq_file *seq, struct dentry *root);
36
37static const struct super_operations qnx6_sops = {
38 .alloc_inode = qnx6_alloc_inode,
39 .destroy_inode = qnx6_destroy_inode,
40 .put_super = qnx6_put_super,
41 .statfs = qnx6_statfs,
42 .remount_fs = qnx6_remount,
43 .show_options = qnx6_show_options,
44};
45
46static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
47{
48 struct super_block *sb = root->d_sb;
49 struct qnx6_sb_info *sbi = QNX6_SB(sb);
50
51 if (sbi->s_mount_opt & QNX6_MOUNT_MMI_FS)
52 seq_puts(seq, ",mmi_fs");
53 return 0;
54}
55
56static int qnx6_remount(struct super_block *sb, int *flags, char *data)
57{
58 *flags |= MS_RDONLY;
59 return 0;
60}
61
62static unsigned qnx6_get_devblock(struct super_block *sb, __fs32 block)
63{
64 struct qnx6_sb_info *sbi = QNX6_SB(sb);
65 return fs32_to_cpu(sbi, block) + sbi->s_blks_off;
66}
67
68static unsigned qnx6_block_map(struct inode *inode, unsigned iblock);
69
70static int qnx6_get_block(struct inode *inode, sector_t iblock,
71 struct buffer_head *bh, int create)
72{
73 unsigned phys;
74
75 QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
76 inode->i_ino, (unsigned long)iblock));
77
78 phys = qnx6_block_map(inode, iblock);
79 if (phys) {
80 /* logical block is before EOF */
81 map_bh(bh, inode->i_sb, phys);
82 }
83 return 0;
84}
85
86static int qnx6_check_blockptr(__fs32 ptr)
87{
88 if (ptr == ~(__fs32)0) {
89 printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
90 return 0;
91 }
92 return 1;
93}
94
95static int qnx6_readpage(struct file *file, struct page *page)
96{
97 return mpage_readpage(page, qnx6_get_block);
98}
99
100static int qnx6_readpages(struct file *file, struct address_space *mapping,
101 struct list_head *pages, unsigned nr_pages)
102{
103 return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block);
104}
105
106/*
107 * returns the block number for the no-th element in the tree
108 * inodebits requred as there are multiple inodes in one inode block
109 */
110static unsigned qnx6_block_map(struct inode *inode, unsigned no)
111{
112 struct super_block *s = inode->i_sb;
113 struct qnx6_sb_info *sbi = QNX6_SB(s);
114 struct qnx6_inode_info *ei = QNX6_I(inode);
115 unsigned block = 0;
116 struct buffer_head *bh;
117 __fs32 ptr;
118 int levelptr;
119 int ptrbits = sbi->s_ptrbits;
120 int bitdelta;
121 u32 mask = (1 << ptrbits) - 1;
122 int depth = ei->di_filelevels;
123 int i;
124
125 bitdelta = ptrbits * depth;
126 levelptr = no >> bitdelta;
127
128 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
129 printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
130 no);
131 return 0;
132 }
133
134 block = qnx6_get_devblock(s, ei->di_block_ptr[levelptr]);
135
136 for (i = 0; i < depth; i++) {
137 bh = sb_bread(s, block);
138 if (!bh) {
139 printk(KERN_ERR "qnx6:Error reading block (%u)\n",
140 block);
141 return 0;
142 }
143 bitdelta -= ptrbits;
144 levelptr = (no >> bitdelta) & mask;
145 ptr = ((__fs32 *)bh->b_data)[levelptr];
146
147 if (!qnx6_check_blockptr(ptr))
148 return 0;
149
150 block = qnx6_get_devblock(s, ptr);
151 brelse(bh);
152 }
153 return block;
154}
155
156static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
157{
158 struct super_block *sb = dentry->d_sb;
159 struct qnx6_sb_info *sbi = QNX6_SB(sb);
160 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
161
162 buf->f_type = sb->s_magic;
163 buf->f_bsize = sb->s_blocksize;
164 buf->f_blocks = fs32_to_cpu(sbi, sbi->sb->sb_num_blocks);
165 buf->f_bfree = fs32_to_cpu(sbi, sbi->sb->sb_free_blocks);
166 buf->f_files = fs32_to_cpu(sbi, sbi->sb->sb_num_inodes);
167 buf->f_ffree = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes);
168 buf->f_bavail = buf->f_bfree;
169 buf->f_namelen = QNX6_LONG_NAME_MAX;
170 buf->f_fsid.val[0] = (u32)id;
171 buf->f_fsid.val[1] = (u32)(id >> 32);
172
173 return 0;
174}
175
176/*
177 * Check the root directory of the filesystem to make sure
178 * it really _is_ a qnx6 filesystem, and to check the size
179 * of the directory entry.
180 */
181static const char *qnx6_checkroot(struct super_block *s)
182{
183 static char match_root[2][3] = {".\0\0", "..\0"};
184 int i, error = 0;
185 struct qnx6_dir_entry *dir_entry;
186 struct inode *root = s->s_root->d_inode;
187 struct address_space *mapping = root->i_mapping;
188 struct page *page = read_mapping_page(mapping, 0, NULL);
189 if (IS_ERR(page))
190 return "error reading root directory";
191 kmap(page);
192 dir_entry = page_address(page);
193 for (i = 0; i < 2; i++) {
194 /* maximum 3 bytes - due to match_root limitation */
195 if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
196 error = 1;
197 }
198 qnx6_put_page(page);
199 if (error)
200 return "error reading root directory.";
201 return NULL;
202}
203
204#ifdef CONFIG_QNX6FS_DEBUG
205void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
206{
207 struct qnx6_sb_info *sbi = QNX6_SB(s);
208
209 QNX6DEBUG((KERN_INFO "magic: %08x\n",
210 fs32_to_cpu(sbi, sb->sb_magic)));
211 QNX6DEBUG((KERN_INFO "checksum: %08x\n",
212 fs32_to_cpu(sbi, sb->sb_checksum)));
213 QNX6DEBUG((KERN_INFO "serial: %llx\n",
214 fs64_to_cpu(sbi, sb->sb_serial)));
215 QNX6DEBUG((KERN_INFO "flags: %08x\n",
216 fs32_to_cpu(sbi, sb->sb_flags)));
217 QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
218 fs32_to_cpu(sbi, sb->sb_blocksize)));
219 QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
220 fs32_to_cpu(sbi, sb->sb_num_inodes)));
221 QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
222 fs32_to_cpu(sbi, sb->sb_free_inodes)));
223 QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
224 fs32_to_cpu(sbi, sb->sb_num_blocks)));
225 QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
226 fs32_to_cpu(sbi, sb->sb_free_blocks)));
227 QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
228 sb->Inode.levels));
229}
230#endif
231
232enum {
233 Opt_mmifs,
234 Opt_err
235};
236
237static const match_table_t tokens = {
238 {Opt_mmifs, "mmi_fs"},
239 {Opt_err, NULL}
240};
241
242static int qnx6_parse_options(char *options, struct super_block *sb)
243{
244 char *p;
245 struct qnx6_sb_info *sbi = QNX6_SB(sb);
246 substring_t args[MAX_OPT_ARGS];
247
248 if (!options)
249 return 1;
250
251 while ((p = strsep(&options, ",")) != NULL) {
252 int token;
253 if (!*p)
254 continue;
255
256 token = match_token(p, tokens, args);
257 switch (token) {
258 case Opt_mmifs:
259 set_opt(sbi->s_mount_opt, MMI_FS);
260 break;
261 default:
262 return 0;
263 }
264 }
265 return 1;
266}
267
268static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
269 int offset, int silent)
270{
271 struct qnx6_sb_info *sbi = QNX6_SB(s);
272 struct buffer_head *bh;
273 struct qnx6_super_block *sb;
274
275 /* Check the superblock signatures
276 start with the first superblock */
277 bh = sb_bread(s, offset);
278 if (!bh) {
279 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
280 return NULL;
281 }
282 sb = (struct qnx6_super_block *)bh->b_data;
283 if (fs32_to_cpu(sbi, sb->sb_magic) != QNX6_SUPER_MAGIC) {
284 sbi->s_bytesex = BYTESEX_BE;
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n"));
289 return bh;
290 } else
291 sbi->s_bytesex = BYTESEX_LE;
292 if (!silent) {
293 if (offset == 0) {
294 printk(KERN_ERR "qnx6: wrong signature (magic)"
295 " in superblock #1.\n");
296 } else {
297 printk(KERN_INFO "qnx6: wrong signature (magic)"
298 " at position (0x%lx) - will try"
299 " alternative position (0x0000).\n",
300 offset * s->s_blocksize);
301 }
302 }
303 brelse(bh);
304 return NULL;
305 }
306 return bh;
307}
308
309static struct inode *qnx6_private_inode(struct super_block *s,
310 struct qnx6_root_node *p);
311
312static int qnx6_fill_super(struct super_block *s, void *data, int silent)
313{
314 struct buffer_head *bh1 = NULL, *bh2 = NULL;
315 struct qnx6_super_block *sb1 = NULL, *sb2 = NULL;
316 struct qnx6_sb_info *sbi;
317 struct inode *root;
318 const char *errmsg;
319 struct qnx6_sb_info *qs;
320 int ret = -EINVAL;
321 u64 offset;
322 int bootblock_offset = QNX6_BOOTBLOCK_SIZE;
323
324 qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL);
325 if (!qs)
326 return -ENOMEM;
327 s->s_fs_info = qs;
328
329 /* Superblock always is 512 Byte long */
330 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
331 printk(KERN_ERR "qnx6: unable to set blocksize\n");
332 goto outnobh;
333 }
334
335 /* parse the mount-options */
336 if (!qnx6_parse_options((char *) data, s)) {
337 printk(KERN_ERR "qnx6: invalid mount options.\n");
338 goto outnobh;
339 }
340 if (test_opt(s, MMI_FS)) {
341 sb1 = qnx6_mmi_fill_super(s, silent);
342 if (sb1)
343 goto mmi_success;
344 else
345 goto outnobh;
346 }
347 sbi = QNX6_SB(s);
348 sbi->s_bytesex = BYTESEX_LE;
349 /* Check the superblock signatures
350 start with the first superblock */
351 bh1 = qnx6_check_first_superblock(s,
352 bootblock_offset / QNX6_SUPERBLOCK_SIZE, silent);
353 if (!bh1) {
354 /* try again without bootblock offset */
355 bh1 = qnx6_check_first_superblock(s, 0, silent);
356 if (!bh1) {
357 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
358 goto outnobh;
359 }
360 /* seems that no bootblock at partition start */
361 bootblock_offset = 0;
362 }
363 sb1 = (struct qnx6_super_block *)bh1->b_data;
364
365#ifdef CONFIG_QNX6FS_DEBUG
366 qnx6_superblock_debug(sb1, s);
367#endif
368
369 /* checksum check - start at byte 8 and end at byte 512 */
370 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
371 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
372 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
373 goto out;
374 }
375
376 /* set new blocksize */
377 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
378 printk(KERN_ERR "qnx6: unable to set blocksize\n");
379 goto out;
380 }
381 /* blocksize invalidates bh - pull it back in */
382 brelse(bh1);
383 bh1 = sb_bread(s, bootblock_offset >> s->s_blocksize_bits);
384 if (!bh1)
385 goto outnobh;
386 sb1 = (struct qnx6_super_block *)bh1->b_data;
387
388 /* calculate second superblock blocknumber */
389 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) +
390 (bootblock_offset >> s->s_blocksize_bits) +
391 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
392
393 /* set bootblock offset */
394 sbi->s_blks_off = (bootblock_offset >> s->s_blocksize_bits) +
395 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
396
397 /* next the second superblock */
398 bh2 = sb_bread(s, offset);
399 if (!bh2) {
400 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
401 goto out;
402 }
403 sb2 = (struct qnx6_super_block *)bh2->b_data;
404 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
405 if (!silent)
406 printk(KERN_ERR "qnx6: wrong signature (magic)"
407 " in superblock #2.\n");
408 goto out;
409 }
410
411 /* checksum check - start at byte 8 and end at byte 512 */
412 if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
413 crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
414 printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
415 goto out;
416 }
417
418 if (fs64_to_cpu(sbi, sb1->sb_serial) >=
419 fs64_to_cpu(sbi, sb2->sb_serial)) {
420 /* superblock #1 active */
421 sbi->sb_buf = bh1;
422 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
423 brelse(bh2);
424 printk(KERN_INFO "qnx6: superblock #1 active\n");
425 } else {
426 /* superblock #2 active */
427 sbi->sb_buf = bh2;
428 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
429 brelse(bh1);
430 printk(KERN_INFO "qnx6: superblock #2 active\n");
431 }
432mmi_success:
433 /* sanity check - limit maximum indirect pointer levels */
434 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
435 printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
436 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
437 goto out;
438 }
439 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
440 printk(KERN_ERR "qnx6: too many longfilename levels"
441 " (max %i, sb %i)\n",
442 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
443 goto out;
444 }
445 s->s_op = &qnx6_sops;
446 s->s_magic = QNX6_SUPER_MAGIC;
447 s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
448
449 /* ease the later tree level calculations */
450 sbi = QNX6_SB(s);
451 sbi->s_ptrbits = ilog2(s->s_blocksize / 4);
452 sbi->inodes = qnx6_private_inode(s, &sb1->Inode);
453 if (!sbi->inodes)
454 goto out;
455 sbi->longfile = qnx6_private_inode(s, &sb1->Longfile);
456 if (!sbi->longfile)
457 goto out1;
458
459 /* prefetch root inode */
460 root = qnx6_iget(s, QNX6_ROOT_INO);
461 if (IS_ERR(root)) {
462 printk(KERN_ERR "qnx6: get inode failed\n");
463 ret = PTR_ERR(root);
464 goto out2;
465 }
466
467 ret = -ENOMEM;
468 s->s_root = d_make_root(root);
469 if (!s->s_root)
470 goto out2;
471
472 ret = -EINVAL;
473 errmsg = qnx6_checkroot(s);
474 if (errmsg != NULL) {
475 if (!silent)
476 printk(KERN_ERR "qnx6: %s\n", errmsg);
477 goto out3;
478 }
479 return 0;
480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 iput(sbi->longfile);
486out1:
487 iput(sbi->inodes);
488out:
489 if (bh1)
490 brelse(bh1);
491 if (bh2)
492 brelse(bh2);
493outnobh:
494 kfree(qs);
495 s->s_fs_info = NULL;
496 return ret;
497}
498
499static void qnx6_put_super(struct super_block *sb)
500{
501 struct qnx6_sb_info *qs = QNX6_SB(sb);
502 brelse(qs->sb_buf);
503 iput(qs->longfile);
504 iput(qs->inodes);
505 kfree(qs);
506 sb->s_fs_info = NULL;
507 return;
508}
509
510static sector_t qnx6_bmap(struct address_space *mapping, sector_t block)
511{
512 return generic_block_bmap(mapping, block, qnx6_get_block);
513}
514static const struct address_space_operations qnx6_aops = {
515 .readpage = qnx6_readpage,
516 .readpages = qnx6_readpages,
517 .bmap = qnx6_bmap
518};
519
520static struct inode *qnx6_private_inode(struct super_block *s,
521 struct qnx6_root_node *p)
522{
523 struct inode *inode = new_inode(s);
524 if (inode) {
525 struct qnx6_inode_info *ei = QNX6_I(inode);
526 struct qnx6_sb_info *sbi = QNX6_SB(s);
527 inode->i_size = fs64_to_cpu(sbi, p->size);
528 memcpy(ei->di_block_ptr, p->ptr, sizeof(p->ptr));
529 ei->di_filelevels = p->levels;
530 inode->i_mode = S_IFREG | S_IRUSR; /* probably wrong */
531 inode->i_mapping->a_ops = &qnx6_aops;
532 }
533 return inode;
534}
535
536struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
537{
538 struct qnx6_sb_info *sbi = QNX6_SB(sb);
539 struct qnx6_inode_entry *raw_inode;
540 struct inode *inode;
541 struct qnx6_inode_info *ei;
542 struct address_space *mapping;
543 struct page *page;
544 u32 n, offs;
545
546 inode = iget_locked(sb, ino);
547 if (!inode)
548 return ERR_PTR(-ENOMEM);
549 if (!(inode->i_state & I_NEW))
550 return inode;
551
552 ei = QNX6_I(inode);
553
554 inode->i_mode = 0;
555
556 if (ino == 0) {
557 printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
558 "out of range\n",
559 sb->s_id, ino);
560 iget_failed(inode);
561 return ERR_PTR(-EIO);
562 }
563 n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
564 offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
565 mapping = sbi->inodes->i_mapping;
566 page = read_mapping_page(mapping, n, NULL);
567 if (IS_ERR(page)) {
568 printk(KERN_ERR "qnx6: major problem: unable to read inode from "
569 "dev %s\n", sb->s_id);
570 iget_failed(inode);
571 return ERR_CAST(page);
572 }
573 kmap(page);
574 raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
575
576 inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode);
577 inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid);
578 inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid);
579 inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size);
580 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime);
581 inode->i_mtime.tv_nsec = 0;
582 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime);
583 inode->i_atime.tv_nsec = 0;
584 inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime);
585 inode->i_ctime.tv_nsec = 0;
586
587 /* calc blocks based on 512 byte blocksize */
588 inode->i_blocks = (inode->i_size + 511) >> 9;
589
590 memcpy(&ei->di_block_ptr, &raw_inode->di_block_ptr,
591 sizeof(raw_inode->di_block_ptr));
592 ei->di_filelevels = raw_inode->di_filelevels;
593
594 if (S_ISREG(inode->i_mode)) {
595 inode->i_fop = &generic_ro_fops;
596 inode->i_mapping->a_ops = &qnx6_aops;
597 } else if (S_ISDIR(inode->i_mode)) {
598 inode->i_op = &qnx6_dir_inode_operations;
599 inode->i_fop = &qnx6_dir_operations;
600 inode->i_mapping->a_ops = &qnx6_aops;
601 } else if (S_ISLNK(inode->i_mode)) {
602 inode->i_op = &page_symlink_inode_operations;
603 inode->i_mapping->a_ops = &qnx6_aops;
604 } else
605 init_special_inode(inode, inode->i_mode, 0);
606 qnx6_put_page(page);
607 unlock_new_inode(inode);
608 return inode;
609}
610
611static struct kmem_cache *qnx6_inode_cachep;
612
613static struct inode *qnx6_alloc_inode(struct super_block *sb)
614{
615 struct qnx6_inode_info *ei;
616 ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL);
617 if (!ei)
618 return NULL;
619 return &ei->vfs_inode;
620}
621
622static void qnx6_i_callback(struct rcu_head *head)
623{
624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
627}
628
629static void qnx6_destroy_inode(struct inode *inode)
630{
631 call_rcu(&inode->i_rcu, qnx6_i_callback);
632}
633
634static void init_once(void *foo)
635{
636 struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo;
637
638 inode_init_once(&ei->vfs_inode);
639}
640
641static int init_inodecache(void)
642{
643 qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
644 sizeof(struct qnx6_inode_info),
645 0, (SLAB_RECLAIM_ACCOUNT|
646 SLAB_MEM_SPREAD),
647 init_once);
648 if (!qnx6_inode_cachep)
649 return -ENOMEM;
650 return 0;
651}
652
653static void destroy_inodecache(void)
654{
655 kmem_cache_destroy(qnx6_inode_cachep);
656}
657
658static struct dentry *qnx6_mount(struct file_system_type *fs_type,
659 int flags, const char *dev_name, void *data)
660{
661 return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super);
662}
663
664static struct file_system_type qnx6_fs_type = {
665 .owner = THIS_MODULE,
666 .name = "qnx6",
667 .mount = qnx6_mount,
668 .kill_sb = kill_block_super,
669 .fs_flags = FS_REQUIRES_DEV,
670};
671
672static int __init init_qnx6_fs(void)
673{
674 int err;
675
676 err = init_inodecache();
677 if (err)
678 return err;
679
680 err = register_filesystem(&qnx6_fs_type);
681 if (err) {
682 destroy_inodecache();
683 return err;
684 }
685
686 printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
687 return 0;
688}
689
690static void __exit exit_qnx6_fs(void)
691{
692 unregister_filesystem(&qnx6_fs_type);
693 destroy_inodecache();
694}
695
696module_init(init_qnx6_fs)
697module_exit(exit_qnx6_fs)
698MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
new file mode 100644
index 000000000000..8a97289e04ad
--- /dev/null
+++ b/fs/qnx6/namei.c
@@ -0,0 +1,42 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
16 struct nameidata *nd)
17{
18 unsigned ino;
19 struct page *page;
20 struct inode *foundinode = NULL;
21 const char *name = dentry->d_name.name;
22 int len = dentry->d_name.len;
23
24 if (len > QNX6_LONG_NAME_MAX)
25 return ERR_PTR(-ENAMETOOLONG);
26
27 ino = qnx6_find_entry(len, dir, name, &page);
28 if (ino) {
29 foundinode = qnx6_iget(dir->i_sb, ino);
30 qnx6_put_page(page);
31 if (IS_ERR(foundinode)) {
32 QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
33 " error %ld\n", PTR_ERR(foundinode)));
34 return ERR_CAST(foundinode);
35 }
36 } else {
37 QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
38 return NULL;
39 }
40 d_add(dentry, foundinode);
41 return NULL;
42}
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
new file mode 100644
index 000000000000..6c5e02a0b6a8
--- /dev/null
+++ b/fs/qnx6/qnx6.h
@@ -0,0 +1,135 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 page map extension by Al Viro
10 *
11 */
12
13#include <linux/fs.h>
14#include <linux/pagemap.h>
15
16typedef __u16 __bitwise __fs16;
17typedef __u32 __bitwise __fs32;
18typedef __u64 __bitwise __fs64;
19
20#include <linux/qnx6_fs.h>
21
22#ifdef CONFIG_QNX6FS_DEBUG
23#define QNX6DEBUG(X) printk X
24#else
25#define QNX6DEBUG(X) (void) 0
26#endif
27
28struct qnx6_sb_info {
29 struct buffer_head *sb_buf; /* superblock buffer */
30 struct qnx6_super_block *sb; /* our superblock */
31 int s_blks_off; /* blkoffset fs-startpoint */
32 int s_ptrbits; /* indirect pointer bitfield */
33 unsigned long s_mount_opt; /* all mount options */
34 int s_bytesex; /* holds endianess info */
35 struct inode * inodes;
36 struct inode * longfile;
37};
38
39struct qnx6_inode_info {
40 __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS];
41 __u8 di_filelevels;
42 __u32 i_dir_start_lookup;
43 struct inode vfs_inode;
44};
45
46extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
47extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
48 struct nameidata *nd);
49
50#ifdef CONFIG_QNX6FS_DEBUG
51extern void qnx6_superblock_debug(struct qnx6_super_block *,
52 struct super_block *);
53#endif
54
55extern const struct inode_operations qnx6_dir_inode_operations;
56extern const struct file_operations qnx6_dir_operations;
57
58static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb)
59{
60 return sb->s_fs_info;
61}
62
63static inline struct qnx6_inode_info *QNX6_I(struct inode *inode)
64{
65 return container_of(inode, struct qnx6_inode_info, vfs_inode);
66}
67
68#define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt))
69#define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt))
70#define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \
71 QNX6_MOUNT_##opt)
72enum {
73 BYTESEX_LE,
74 BYTESEX_BE,
75};
76
77static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n)
78{
79 if (sbi->s_bytesex == BYTESEX_LE)
80 return le64_to_cpu((__force __le64)n);
81 else
82 return be64_to_cpu((__force __be64)n);
83}
84
85static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n)
86{
87 if (sbi->s_bytesex == BYTESEX_LE)
88 return (__force __fs64)cpu_to_le64(n);
89 else
90 return (__force __fs64)cpu_to_be64(n);
91}
92
93static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n)
94{
95 if (sbi->s_bytesex == BYTESEX_LE)
96 return le32_to_cpu((__force __le32)n);
97 else
98 return be32_to_cpu((__force __be32)n);
99}
100
101static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n)
102{
103 if (sbi->s_bytesex == BYTESEX_LE)
104 return (__force __fs32)cpu_to_le32(n);
105 else
106 return (__force __fs32)cpu_to_be32(n);
107}
108
109static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n)
110{
111 if (sbi->s_bytesex == BYTESEX_LE)
112 return le16_to_cpu((__force __le16)n);
113 else
114 return be16_to_cpu((__force __be16)n);
115}
116
117static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n)
118{
119 if (sbi->s_bytesex == BYTESEX_LE)
120 return (__force __fs16)cpu_to_le16(n);
121 else
122 return (__force __fs16)cpu_to_be16(n);
123}
124
125extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
126 int silent);
127
128static inline void qnx6_put_page(struct page *page)
129{
130 kunmap(page);
131 page_cache_release(page);
132}
133
134extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
135 struct page **res_page);
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
new file mode 100644
index 000000000000..29c32cba62d6
--- /dev/null
+++ b/fs/qnx6/super_mmi.c
@@ -0,0 +1,150 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 *
10 */
11
12#include <linux/buffer_head.h>
13#include <linux/slab.h>
14#include <linux/crc32.h>
15#include "qnx6.h"
16
17static void qnx6_mmi_copy_sb(struct qnx6_super_block *qsb,
18 struct qnx6_mmi_super_block *sb)
19{
20 qsb->sb_magic = sb->sb_magic;
21 qsb->sb_checksum = sb->sb_checksum;
22 qsb->sb_serial = sb->sb_serial;
23 qsb->sb_blocksize = sb->sb_blocksize;
24 qsb->sb_num_inodes = sb->sb_num_inodes;
25 qsb->sb_free_inodes = sb->sb_free_inodes;
26 qsb->sb_num_blocks = sb->sb_num_blocks;
27 qsb->sb_free_blocks = sb->sb_free_blocks;
28
29 /* the rest of the superblock is the same */
30 memcpy(&qsb->Inode, &sb->Inode, sizeof(sb->Inode));
31 memcpy(&qsb->Bitmap, &sb->Bitmap, sizeof(sb->Bitmap));
32 memcpy(&qsb->Longfile, &sb->Longfile, sizeof(sb->Longfile));
33}
34
35struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
36{
37 struct buffer_head *bh1, *bh2 = NULL;
38 struct qnx6_mmi_super_block *sb1, *sb2;
39 struct qnx6_super_block *qsb = NULL;
40 struct qnx6_sb_info *sbi;
41 __u64 offset;
42
43 /* Check the superblock signatures
44 start with the first superblock */
45 bh1 = sb_bread(s, 0);
46 if (!bh1) {
47 printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
48 return NULL;
49 }
50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
51 sbi = QNX6_SB(s);
52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
53 if (!silent) {
54 printk(KERN_ERR "qnx6: wrong signature (magic) in"
55 " superblock #1.\n");
56 goto out;
57 }
58 }
59
60 /* checksum check - start at byte 8 and end at byte 512 */
61 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
62 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
63 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
64 goto out;
65 }
66
67 /* calculate second superblock blocknumber */
68 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + QNX6_SUPERBLOCK_AREA /
69 fs32_to_cpu(sbi, sb1->sb_blocksize);
70
71 /* set new blocksize */
72 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
73 printk(KERN_ERR "qnx6: unable to set blocksize\n");
74 goto out;
75 }
76 /* blocksize invalidates bh - pull it back in */
77 brelse(bh1);
78 bh1 = sb_bread(s, 0);
79 if (!bh1)
80 goto out;
81 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
82
83 /* read second superblock */
84 bh2 = sb_bread(s, offset);
85 if (!bh2) {
86 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
87 goto out;
88 }
89 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
90 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
91 if (!silent)
92 printk(KERN_ERR "qnx6: wrong signature (magic) in"
93 " superblock #2.\n");
94 goto out;
95 }
96
97 /* checksum check - start at byte 8 and end at byte 512 */
98 if (fs32_to_cpu(sbi, sb2->sb_checksum)
99 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
100 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
101 goto out;
102 }
103
104 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
105 if (!qsb) {
106 printk(KERN_ERR "qnx6: unable to allocate memory.\n");
107 goto out;
108 }
109
110 if (fs64_to_cpu(sbi, sb1->sb_serial) >
111 fs64_to_cpu(sbi, sb2->sb_serial)) {
112 /* superblock #1 active */
113 qnx6_mmi_copy_sb(qsb, sb1);
114#ifdef CONFIG_QNX6FS_DEBUG
115 qnx6_superblock_debug(qsb, s);
116#endif
117 memcpy(bh1->b_data, qsb, sizeof(struct qnx6_super_block));
118
119 sbi->sb_buf = bh1;
120 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
121 brelse(bh2);
122 printk(KERN_INFO "qnx6: superblock #1 active\n");
123 } else {
124 /* superblock #2 active */
125 qnx6_mmi_copy_sb(qsb, sb2);
126#ifdef CONFIG_QNX6FS_DEBUG
127 qnx6_superblock_debug(qsb, s);
128#endif
129 memcpy(bh2->b_data, qsb, sizeof(struct qnx6_super_block));
130
131 sbi->sb_buf = bh2;
132 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
133 brelse(bh1);
134 printk(KERN_INFO "qnx6: superblock #2 active\n");
135 }
136 kfree(qsb);
137
138 /* offset for mmi_fs is just SUPERBLOCK_AREA bytes */
139 sbi->s_blks_off = QNX6_SUPERBLOCK_AREA / s->s_blocksize;
140
141 /* success */
142 return sbi->sb;
143
144out:
145 if (bh1 != NULL)
146 brelse(bh1);
147 if (bh2 != NULL)
148 brelse(bh2);
149 return NULL;
150}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 439ab110f4df..d69a1d1d7e15 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -71,6 +71,7 @@
71#include <linux/module.h> 71#include <linux/module.h>
72#include <linux/proc_fs.h> 72#include <linux/proc_fs.h>
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/sched.h>
74#include <linux/kmod.h> 75#include <linux/kmod.h>
75#include <linux/namei.h> 76#include <linux/namei.h>
76#include <linux/capability.h> 77#include <linux/capability.h>
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fc2c4388d126..9a391204ca27 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
282 case Q_XGETQUOTA: 282 case Q_XGETQUOTA:
283 return quota_getxquota(sb, type, id, addr); 283 return quota_getxquota(sb, type, id, addr);
284 case Q_XQUOTASYNC: 284 case Q_XQUOTASYNC:
285 /* caller already holds s_umount */
286 if (sb->s_flags & MS_RDONLY) 285 if (sb->s_flags & MS_RDONLY)
287 return -EROFS; 286 return -EROFS;
288 writeback_inodes_sb(sb, WB_REASON_SYNC); 287 /* XFS quotas are fully coherent now, making this call a noop */
289 return 0; 288 return 0;
290 default: 289 default:
291 return -EINVAL; 290 return -EINVAL;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index aec766abe3af..a1fdabe21dec 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -209,22 +209,19 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
209int ramfs_fill_super(struct super_block *sb, void *data, int silent) 209int ramfs_fill_super(struct super_block *sb, void *data, int silent)
210{ 210{
211 struct ramfs_fs_info *fsi; 211 struct ramfs_fs_info *fsi;
212 struct inode *inode = NULL; 212 struct inode *inode;
213 struct dentry *root;
214 int err; 213 int err;
215 214
216 save_mount_options(sb, data); 215 save_mount_options(sb, data);
217 216
218 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); 217 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
219 sb->s_fs_info = fsi; 218 sb->s_fs_info = fsi;
220 if (!fsi) { 219 if (!fsi)
221 err = -ENOMEM; 220 return -ENOMEM;
222 goto fail;
223 }
224 221
225 err = ramfs_parse_options(data, &fsi->mount_opts); 222 err = ramfs_parse_options(data, &fsi->mount_opts);
226 if (err) 223 if (err)
227 goto fail; 224 return err;
228 225
229 sb->s_maxbytes = MAX_LFS_FILESIZE; 226 sb->s_maxbytes = MAX_LFS_FILESIZE;
230 sb->s_blocksize = PAGE_CACHE_SIZE; 227 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -234,24 +231,11 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
234 sb->s_time_gran = 1; 231 sb->s_time_gran = 1;
235 232
236 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); 233 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
237 if (!inode) { 234 sb->s_root = d_make_root(inode);
238 err = -ENOMEM; 235 if (!sb->s_root)
239 goto fail; 236 return -ENOMEM;
240 }
241
242 root = d_alloc_root(inode);
243 sb->s_root = root;
244 if (!root) {
245 err = -ENOMEM;
246 goto fail;
247 }
248 237
249 return 0; 238 return 0;
250fail:
251 kfree(fsi);
252 sb->s_fs_info = NULL;
253 iput(inode);
254 return err;
255} 239}
256 240
257struct dentry *ramfs_mount(struct file_system_type *fs_type, 241struct dentry *ramfs_mount(struct file_system_type *fs_type,
diff --git a/fs/read_write.c b/fs/read_write.c
index 5ad4248b0cd8..ffc99d22e0a3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -11,7 +11,7 @@
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/module.h> 14#include <linux/export.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/splice.h> 17#include <linux/splice.h>
diff --git a/fs/readdir.c b/fs/readdir.c
index 356f71528ad6..cc0a8227cddf 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -6,7 +6,7 @@
6 6
7#include <linux/stddef.h> 7#include <linux/stddef.h>
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/errno.h> 12#include <linux/errno.h>
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
new file mode 100644
index 000000000000..f096b80e73d8
--- /dev/null
+++ b/fs/reiserfs/acl.h
@@ -0,0 +1,76 @@
1#include <linux/init.h>
2#include <linux/posix_acl.h>
3
4#define REISERFS_ACL_VERSION 0x0001
5
6typedef struct {
7 __le16 e_tag;
8 __le16 e_perm;
9 __le32 e_id;
10} reiserfs_acl_entry;
11
12typedef struct {
13 __le16 e_tag;
14 __le16 e_perm;
15} reiserfs_acl_entry_short;
16
17typedef struct {
18 __le32 a_version;
19} reiserfs_acl_header;
20
21static inline size_t reiserfs_acl_size(int count)
22{
23 if (count <= 4) {
24 return sizeof(reiserfs_acl_header) +
25 count * sizeof(reiserfs_acl_entry_short);
26 } else {
27 return sizeof(reiserfs_acl_header) +
28 4 * sizeof(reiserfs_acl_entry_short) +
29 (count - 4) * sizeof(reiserfs_acl_entry);
30 }
31}
32
33static inline int reiserfs_acl_count(size_t size)
34{
35 ssize_t s;
36 size -= sizeof(reiserfs_acl_header);
37 s = size - 4 * sizeof(reiserfs_acl_entry_short);
38 if (s < 0) {
39 if (size % sizeof(reiserfs_acl_entry_short))
40 return -1;
41 return size / sizeof(reiserfs_acl_entry_short);
42 } else {
43 if (s % sizeof(reiserfs_acl_entry))
44 return -1;
45 return s / sizeof(reiserfs_acl_entry) + 4;
46 }
47}
48
49#ifdef CONFIG_REISERFS_FS_POSIX_ACL
50struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
51int reiserfs_acl_chmod(struct inode *inode);
52int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
53 struct inode *dir, struct dentry *dentry,
54 struct inode *inode);
55int reiserfs_cache_default_acl(struct inode *dir);
56extern const struct xattr_handler reiserfs_posix_acl_default_handler;
57extern const struct xattr_handler reiserfs_posix_acl_access_handler;
58
59#else
60
61#define reiserfs_cache_default_acl(inode) 0
62#define reiserfs_get_acl NULL
63
64static inline int reiserfs_acl_chmod(struct inode *inode)
65{
66 return 0;
67}
68
69static inline int
70reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
71 const struct inode *dir, struct dentry *dentry,
72 struct inode *inode)
73{
74 return 0;
75}
76#endif
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 70de42f09f1d..4c0c7d163d15 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -4,14 +4,12 @@
4/* Reiserfs block (de)allocator, bitmap-based. */ 4/* Reiserfs block (de)allocator, bitmap-based. */
5 5
6#include <linux/time.h> 6#include <linux/time.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
13#include <linux/reiserfs_fs_sb.h>
14#include <linux/reiserfs_fs_i.h>
15#include <linux/quotaops.h> 13#include <linux/quotaops.h>
16#include <linux/seq_file.h> 14#include <linux/seq_file.h>
17 15
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 133e9355dc6f..66c53b642a88 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -5,7 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c080440661..2b7882b508db 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/reiserfs_fs.h> 20#include "reiserfs.h"
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23 23
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ace635053a36..8375c922c0d5 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -3,9 +3,9 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7#include <linux/reiserfs_acl.h> 7#include "acl.h"
8#include <linux/reiserfs_xattr.h> 8#include "xattr.h"
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 1e4250bc3a6f..430e0658704c 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/buffer_head.h> 41#include <linux/buffer_head.h>
42 42
43/* To make any changes in the tree we find a node, that contains item 43/* To make any changes in the tree we find a node, that contains item
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 6471c670743e..91b0cc1242a2 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -19,7 +19,7 @@
19// 19//
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/reiserfs_fs.h> 22#include "reiserfs.h"
23#include <asm/types.h> 23#include <asm/types.h>
24 24
25#define DELTA 0x9E3779B9 25#define DELTA 0x9E3779B9
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 2074fd95046b..e1978fd895f5 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* this is one and only function that is used outside (do_balance.c) */ 11/* this is one and only function that is used outside (do_balance.c) */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9e8cd5acd79c..494c315c7417 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -4,9 +4,9 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/reiserfs_acl.h> 8#include "acl.h"
9#include <linux/reiserfs_xattr.h> 9#include "xattr.h"
10#include <linux/exportfs.h> 10#include <linux/exportfs.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/highmem.h> 12#include <linux/highmem.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 950e3d1b5c9e..0c2185042d5f 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -5,7 +5,7 @@
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 72cb1cc51b87..ee382ef3d300 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -3,7 +3,7 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7 7
8// this contains item handlers for old item types: sd, direct, 8// this contains item handlers for old item types: sd, direct,
9// indirect, directory 9// indirect, directory
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c3cf54fd4de3..cf9f4de00a95 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/semaphore.h> 38#include <linux/semaphore.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/fcntl.h> 43#include <linux/fcntl.h>
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 03d85cbf90bf..79e5a8b4c226 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* these are used in do_balance.c */ 11/* these are used in do_balance.c */
@@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
975 remove */ 975 remove */
976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); 976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count, 977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
978 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", 978 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
979 I_ENTRY_COUNT(ih), from, del_count); 979 I_ENTRY_COUNT(ih), from, del_count);
980 980
981 if (del_count == 0) 981 if (del_count == 0)
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index 7df1ce48203a..d735bc8470e3 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -1,4 +1,4 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/mutex.h> 2#include <linux/mutex.h>
3 3
4/* 4/*
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 146378865239..84e8a69cee9d 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -14,9 +14,9 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/reiserfs_fs.h> 17#include "reiserfs.h"
18#include <linux/reiserfs_acl.h> 18#include "acl.h"
19#include <linux/reiserfs_xattr.h> 19#include "xattr.h"
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 3a6de810bd61..f732d6a5251d 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -5,8 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/random.h> 6#include <linux/random.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/reiserfs_fs_sb.h>
10 9
11// find where objectid map starts 10// find where objectid map starts
12#define objectid_map(s,rs) (old_format_only (s) ? \ 11#define objectid_map(s,rs) (old_format_only (s) ? \
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 45de98b59466..c0b1112ab7e3 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -4,7 +4,7 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
@@ -329,7 +329,7 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
330 pointless complexity): 330 pointless complexity):
331 331
332 panics in reiserfs_fs.h have numbers from 1000 to 1999 332 panics in reiserfs.h have numbers from 1000 to 1999
333 super.c 2000 to 2999 333 super.c 2000 to 2999
334 preserve.c (unused) 3000 to 3999 334 preserve.c (unused) 3000 to 3999
335 bitmap.c 4000 to 4999 335 bitmap.c 4000 to 4999
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 7a9981196c1c..2c1ade692cc8 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -12,8 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <linux/reiserfs_fs.h> 15#include "reiserfs.h"
16#include <linux/reiserfs_fs_sb.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
19 18
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
new file mode 100644
index 000000000000..a59d27126338
--- /dev/null
+++ b/fs/reiserfs/reiserfs.h
@@ -0,0 +1,2923 @@
1/*
2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
3 */
4
5#include <linux/reiserfs_fs.h>
6
7#include <linux/slab.h>
8#include <linux/interrupt.h>
9#include <linux/sched.h>
10#include <linux/bug.h>
11#include <linux/workqueue.h>
12#include <asm/unaligned.h>
13#include <linux/bitops.h>
14#include <linux/proc_fs.h>
15#include <linux/buffer_head.h>
16
17/* the 32 bit compat definitions with int argument */
18#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
19#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
20#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
21#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
22#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
23
24struct reiserfs_journal_list;
25
26/** bitmasks for i_flags field in reiserfs-specific part of inode */
27typedef enum {
28 /** this says what format of key do all items (but stat data) of
29 an object have. If this is set, that format is 3.6 otherwise
30 - 3.5 */
31 i_item_key_version_mask = 0x0001,
32 /** If this is unset, object has 3.5 stat data, otherwise, it has
33 3.6 stat data with 64bit size, 32bit nlink etc. */
34 i_stat_data_version_mask = 0x0002,
35 /** file might need tail packing on close */
36 i_pack_on_close_mask = 0x0004,
37 /** don't pack tail of file */
38 i_nopack_mask = 0x0008,
39 /** If those is set, "safe link" was created for this file during
40 truncate or unlink. Safe link is used to avoid leakage of disk
41 space on crash with some files open, but unlinked. */
42 i_link_saved_unlink_mask = 0x0010,
43 i_link_saved_truncate_mask = 0x0020,
44 i_has_xattr_dir = 0x0040,
45 i_data_log = 0x0080,
46} reiserfs_inode_flags;
47
48struct reiserfs_inode_info {
49 __u32 i_key[4]; /* key is still 4 32 bit integers */
50 /** transient inode flags that are never stored on disk. Bitmasks
51 for this field are defined above. */
52 __u32 i_flags;
53
54 __u32 i_first_direct_byte; // offset of first byte stored in direct item.
55
56 /* copy of persistent inode flags read from sd_attrs. */
57 __u32 i_attrs;
58
59 int i_prealloc_block; /* first unused block of a sequence of unused blocks */
60 int i_prealloc_count; /* length of that sequence */
61 struct list_head i_prealloc_list; /* per-transaction list of inodes which
62 * have preallocated blocks */
63
64 unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks
65 * for the contents of this directory should be
66 * displaced */
67
68 /* we use these for fsync or O_SYNC to decide which transaction
69 ** needs to be committed in order for this inode to be properly
70 ** flushed */
71 unsigned int i_trans_id;
72 struct reiserfs_journal_list *i_jl;
73 atomic_t openers;
74 struct mutex tailpack;
75#ifdef CONFIG_REISERFS_FS_XATTR
76 struct rw_semaphore i_xattr_sem;
77#endif
78 struct inode vfs_inode;
79};
80
81typedef enum {
82 reiserfs_attrs_cleared = 0x00000001,
83} reiserfs_super_block_flags;
84
85/* struct reiserfs_super_block accessors/mutators
86 * since this is a disk structure, it will always be in
87 * little endian format. */
88#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
89#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
90#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
91#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
92#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block))
93#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
94
95#define sb_jp_journal_1st_block(sbp) \
96 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
97#define set_sb_jp_journal_1st_block(sbp,v) \
98 ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
99#define sb_jp_journal_dev(sbp) \
100 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
101#define set_sb_jp_journal_dev(sbp,v) \
102 ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
103#define sb_jp_journal_size(sbp) \
104 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
105#define set_sb_jp_journal_size(sbp,v) \
106 ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
107#define sb_jp_journal_trans_max(sbp) \
108 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
109#define set_sb_jp_journal_trans_max(sbp,v) \
110 ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
111#define sb_jp_journal_magic(sbp) \
112 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
113#define set_sb_jp_journal_magic(sbp,v) \
114 ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
115#define sb_jp_journal_max_batch(sbp) \
116 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
117#define set_sb_jp_journal_max_batch(sbp,v) \
118 ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
119#define sb_jp_jourmal_max_commit_age(sbp) \
120 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
121#define set_sb_jp_journal_max_commit_age(sbp,v) \
122 ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
123
124#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize))
125#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
126#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
127#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
128#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
129#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
130#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state))
131#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
132#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state))
133#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
134#define sb_hash_function_code(sbp) \
135 (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
136#define set_sb_hash_function_code(sbp,v) \
137 ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
138#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height))
139#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
140#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
141#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
142#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version))
143#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v))
144
145#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count))
146#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v))
147
148#define sb_reserved_for_journal(sbp) \
149 (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
150#define set_sb_reserved_for_journal(sbp,v) \
151 ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
152
153/* LOGGING -- */
154
155/* These all interelate for performance.
156**
157** If the journal block count is smaller than n transactions, you lose speed.
158** I don't know what n is yet, I'm guessing 8-16.
159**
160** typical transaction size depends on the application, how often fsync is
161** called, and how many metadata blocks you dirty in a 30 second period.
162** The more small files (<16k) you use, the larger your transactions will
163** be.
164**
165** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
166** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough
167** to prevent wrapping before dirty meta blocks get to disk.
168**
169** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
170** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
171**
172** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
173**
174*/
175
176/* don't mess with these for a while */
177 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */
178#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
179#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
180#define JOURNAL_HASH_SIZE 8192
181#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
182
183/* One of these for every block in every transaction
184** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a
185** hash of all the in memory transactions.
186** next and prev are used by the current transaction (journal_hash).
187** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash
188** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging
189** to a given transaction.
190*/
191struct reiserfs_journal_cnode {
192 struct buffer_head *bh; /* real buffer head */
193 struct super_block *sb; /* dev of real buffer head */
194 __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */
195 unsigned long state;
196 struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */
197 struct reiserfs_journal_cnode *next; /* next in transaction list */
198 struct reiserfs_journal_cnode *prev; /* prev in transaction list */
199 struct reiserfs_journal_cnode *hprev; /* prev in hash list */
200 struct reiserfs_journal_cnode *hnext; /* next in hash list */
201};
202
203struct reiserfs_bitmap_node {
204 int id;
205 char *data;
206 struct list_head list;
207};
208
209struct reiserfs_list_bitmap {
210 struct reiserfs_journal_list *journal_list;
211 struct reiserfs_bitmap_node **bitmaps;
212};
213
214/*
215** one of these for each transaction. The most important part here is the j_realblock.
216** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
217** real buffer heads dirty once all the commits hit the disk,
218** and to make sure every real block in a transaction is on disk before allowing the log area
219** to be overwritten */
220struct reiserfs_journal_list {
221 unsigned long j_start;
222 unsigned long j_state;
223 unsigned long j_len;
224 atomic_t j_nonzerolen;
225 atomic_t j_commit_left;
226 atomic_t j_older_commits_done; /* all commits older than this on disk */
227 struct mutex j_commit_mutex;
228 unsigned int j_trans_id;
229 time_t j_timestamp;
230 struct reiserfs_list_bitmap *j_list_bitmap;
231 struct buffer_head *j_commit_bh; /* commit buffer head */
232 struct reiserfs_journal_cnode *j_realblock;
233 struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */
234 /* time ordered list of all active transactions */
235 struct list_head j_list;
236
237 /* time ordered list of all transactions we haven't tried to flush yet */
238 struct list_head j_working_list;
239
240 /* list of tail conversion targets in need of flush before commit */
241 struct list_head j_tail_bh_list;
242 /* list of data=ordered buffers in need of flush before commit */
243 struct list_head j_bh_list;
244 int j_refcount;
245};
246
247struct reiserfs_journal {
248 struct buffer_head **j_ap_blocks; /* journal blocks on disk */
249 struct reiserfs_journal_cnode *j_last; /* newest journal block */
250 struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */
251
252 struct block_device *j_dev_bd;
253 fmode_t j_dev_mode;
254 int j_1st_reserved_block; /* first block on s_dev of reserved area journal */
255
256 unsigned long j_state;
257 unsigned int j_trans_id;
258 unsigned long j_mount_id;
259 unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */
260 unsigned long j_len; /* length of current waiting commit */
261 unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */
262 atomic_t j_wcount; /* count of writers for current commit */
263 unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */
264 unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */
265 unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */
266 struct buffer_head *j_header_bh;
267
268 time_t j_trans_start_time; /* time this transaction started */
269 struct mutex j_mutex;
270 struct mutex j_flush_mutex;
271 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
272 atomic_t j_jlock; /* lock for j_join_wait */
273 int j_list_bitmap_index; /* number of next list bitmap to use */
274 int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */
275 int j_next_full_flush; /* next journal_end will flush all journal list */
276 int j_next_async_flush; /* next journal_end will flush all async commits */
277
278 int j_cnode_used; /* number of cnodes on the used list */
279 int j_cnode_free; /* number of cnodes on the free list */
280
281 unsigned int j_trans_max; /* max number of blocks in a transaction. */
282 unsigned int j_max_batch; /* max number of blocks to batch into a trans */
283 unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */
284 unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */
285 unsigned int j_default_max_commit_age; /* the default for the max commit age */
286
287 struct reiserfs_journal_cnode *j_cnode_free_list;
288 struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */
289
290 struct reiserfs_journal_list *j_current_jl;
291 int j_free_bitmap_nodes;
292 int j_used_bitmap_nodes;
293
294 int j_num_lists; /* total number of active transactions */
295 int j_num_work_lists; /* number that need attention from kreiserfsd */
296
297 /* debugging to make sure things are flushed in order */
298 unsigned int j_last_flush_id;
299
300 /* debugging to make sure things are committed in order */
301 unsigned int j_last_commit_id;
302
303 struct list_head j_bitmap_nodes;
304 struct list_head j_dirty_buffers;
305 spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */
306
307 /* list of all active transactions */
308 struct list_head j_journal_list;
309 /* lists that haven't been touched by writeback attempts */
310 struct list_head j_working_list;
311
312 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */
313 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */
314 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all
315 the transactions */
316 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
317 int j_persistent_trans;
318 unsigned long j_max_trans_size;
319 unsigned long j_max_batch_size;
320
321 int j_errno;
322
323 /* when flushing ordered buffers, throttle new ordered writers */
324 struct delayed_work j_work;
325 struct super_block *j_work_sb;
326 atomic_t j_async_throttle;
327};
328
329enum journal_state_bits {
330 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
331 J_WRITERS_QUEUED, /* set when log is full due to too many writers */
332 J_ABORTED, /* set when log is aborted */
333};
334
335#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
336
337typedef __u32(*hashf_t) (const signed char *, int);
338
339struct reiserfs_bitmap_info {
340 __u32 free_count;
341};
342
343struct proc_dir_entry;
344
345#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
346typedef unsigned long int stat_cnt_t;
347typedef struct reiserfs_proc_info_data {
348 spinlock_t lock;
349 int exiting;
350 int max_hash_collisions;
351
352 stat_cnt_t breads;
353 stat_cnt_t bread_miss;
354 stat_cnt_t search_by_key;
355 stat_cnt_t search_by_key_fs_changed;
356 stat_cnt_t search_by_key_restarted;
357
358 stat_cnt_t insert_item_restarted;
359 stat_cnt_t paste_into_item_restarted;
360 stat_cnt_t cut_from_item_restarted;
361 stat_cnt_t delete_solid_item_restarted;
362 stat_cnt_t delete_item_restarted;
363
364 stat_cnt_t leaked_oid;
365 stat_cnt_t leaves_removable;
366
367 /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
368 stat_cnt_t balance_at[5]; /* XXX */
369 /* sbk == search_by_key */
370 stat_cnt_t sbk_read_at[5]; /* XXX */
371 stat_cnt_t sbk_fs_changed[5];
372 stat_cnt_t sbk_restarted[5];
373 stat_cnt_t items_at[5]; /* XXX */
374 stat_cnt_t free_at[5]; /* XXX */
375 stat_cnt_t can_node_be_removed[5]; /* XXX */
376 long int lnum[5]; /* XXX */
377 long int rnum[5]; /* XXX */
378 long int lbytes[5]; /* XXX */
379 long int rbytes[5]; /* XXX */
380 stat_cnt_t get_neighbors[5];
381 stat_cnt_t get_neighbors_restart[5];
382 stat_cnt_t need_l_neighbor[5];
383 stat_cnt_t need_r_neighbor[5];
384
385 stat_cnt_t free_block;
386 struct __scan_bitmap_stats {
387 stat_cnt_t call;
388 stat_cnt_t wait;
389 stat_cnt_t bmap;
390 stat_cnt_t retry;
391 stat_cnt_t in_journal_hint;
392 stat_cnt_t in_journal_nohint;
393 stat_cnt_t stolen;
394 } scan_bitmap;
395 struct __journal_stats {
396 stat_cnt_t in_journal;
397 stat_cnt_t in_journal_bitmap;
398 stat_cnt_t in_journal_reusable;
399 stat_cnt_t lock_journal;
400 stat_cnt_t lock_journal_wait;
401 stat_cnt_t journal_being;
402 stat_cnt_t journal_relock_writers;
403 stat_cnt_t journal_relock_wcount;
404 stat_cnt_t mark_dirty;
405 stat_cnt_t mark_dirty_already;
406 stat_cnt_t mark_dirty_notjournal;
407 stat_cnt_t restore_prepared;
408 stat_cnt_t prepare;
409 stat_cnt_t prepare_retry;
410 } journal;
411} reiserfs_proc_info_data_t;
412#else
413typedef struct reiserfs_proc_info_data {
414} reiserfs_proc_info_data_t;
415#endif
416
417/* reiserfs union of in-core super block data */
418struct reiserfs_sb_info {
419 struct buffer_head *s_sbh; /* Buffer containing the super block */
420 /* both the comment and the choice of
421 name are unclear for s_rs -Hans */
422 struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */
423 struct reiserfs_bitmap_info *s_ap_bitmap;
424 struct reiserfs_journal *s_journal; /* pointer to journal information */
425 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
426
427 /* Serialize writers access, replace the old bkl */
428 struct mutex lock;
429 /* Owner of the lock (can be recursive) */
430 struct task_struct *lock_owner;
431 /* Depth of the lock, start from -1 like the bkl */
432 int lock_depth;
433
434 /* Comment? -Hans */
435 void (*end_io_handler) (struct buffer_head *, int);
436 hashf_t s_hash_function; /* pointer to function which is used
437 to sort names in directory. Set on
438 mount */
439 unsigned long s_mount_opt; /* reiserfs's mount options are set
440 here (currently - NOTAIL, NOLOG,
441 REPLAYONLY) */
442
443 struct { /* This is a structure that describes block allocator options */
444 unsigned long bits; /* Bitfield for enable/disable kind of options */
445 unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */
446 int border; /* percentage of disk, border takes */
447 int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */
448 int preallocsize; /* Number of blocks we try to prealloc when file
449 reaches preallocmin size (in blocks) or
450 prealloc_list is empty. */
451 } s_alloc_options;
452
453 /* Comment? -Hans */
454 wait_queue_head_t s_wait;
455 /* To be obsoleted soon by per buffer seals.. -Hans */
456 atomic_t s_generation_counter; // increased by one every time the
457 // tree gets re-balanced
458 unsigned long s_properties; /* File system properties. Currently holds
459 on-disk FS format */
460
461 /* session statistics */
462 int s_disk_reads;
463 int s_disk_writes;
464 int s_fix_nodes;
465 int s_do_balance;
466 int s_unneeded_left_neighbor;
467 int s_good_search_by_key_reada;
468 int s_bmaps;
469 int s_bmaps_without_search;
470 int s_direct2indirect;
471 int s_indirect2direct;
472 /* set up when it's ok for reiserfs_read_inode2() to read from
473 disk inode with nlink==0. Currently this is only used during
474 finish_unfinished() processing at mount time */
475 int s_is_unlinked_ok;
476 reiserfs_proc_info_data_t s_proc_info_data;
477 struct proc_dir_entry *procdir;
478 int reserved_blocks; /* amount of blocks reserved for further allocations */
479 spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */
480 struct dentry *priv_root; /* root of /.reiserfs_priv */
481 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
482 int j_errno;
483#ifdef CONFIG_QUOTA
484 char *s_qf_names[MAXQUOTAS];
485 int s_jquota_fmt;
486#endif
487 char *s_jdev; /* Stored jdev for mount option showing */
488#ifdef CONFIG_REISERFS_CHECK
489
490 struct tree_balance *cur_tb; /*
491 * Detects whether more than one
492 * copy of tb exists per superblock
493 * as a means of checking whether
494 * do_balance is executing concurrently
495 * against another tree reader/writer
496 * on a same mount point.
497 */
498#endif
499};
500
501/* Definitions of reiserfs on-disk properties: */
502#define REISERFS_3_5 0
503#define REISERFS_3_6 1
504#define REISERFS_OLD_FORMAT 2
505
506enum reiserfs_mount_options {
507/* Mount options */
508 REISERFS_LARGETAIL, /* large tails will be created in a session */
509 REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
510 REPLAYONLY, /* replay journal and return 0. Use by fsck */
511 REISERFS_CONVERT, /* -o conv: causes conversion of old
512 format super block to the new
513 format. If not specified - old
514 partition will be dealt with in a
515 manner of 3.5.x */
516
517/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
518** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option
519** is not required. If the normal autodection code can't determine which
520** hash to use (because both hashes had the same value for a file)
521** use this option to force a specific hash. It won't allow you to override
522** the existing hash on the FS, so if you have a tea hash disk, and mount
523** with -o hash=rupasov, the mount will fail.
524*/
525 FORCE_TEA_HASH, /* try to force tea hash on mount */
526 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
527 FORCE_R5_HASH, /* try to force rupasov hash on mount */
528 FORCE_HASH_DETECT, /* try to detect hash function on mount */
529
530 REISERFS_DATA_LOG,
531 REISERFS_DATA_ORDERED,
532 REISERFS_DATA_WRITEBACK,
533
534/* used for testing experimental features, makes benchmarking new
535 features with and without more convenient, should never be used by
536 users in any code shipped to users (ideally) */
537
538 REISERFS_NO_BORDER,
539 REISERFS_NO_UNHASHED_RELOCATION,
540 REISERFS_HASHED_RELOCATION,
541 REISERFS_ATTRS,
542 REISERFS_XATTRS_USER,
543 REISERFS_POSIXACL,
544 REISERFS_EXPOSE_PRIVROOT,
545 REISERFS_BARRIER_NONE,
546 REISERFS_BARRIER_FLUSH,
547
548 /* Actions on error */
549 REISERFS_ERROR_PANIC,
550 REISERFS_ERROR_RO,
551 REISERFS_ERROR_CONTINUE,
552
553 REISERFS_USRQUOTA, /* User quota option specified */
554 REISERFS_GRPQUOTA, /* Group quota option specified */
555
556 REISERFS_TEST1,
557 REISERFS_TEST2,
558 REISERFS_TEST3,
559 REISERFS_TEST4,
560 REISERFS_UNSUPPORTED_OPT,
561};
562
563#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
564#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
565#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
566#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
567#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
568#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
569#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
570#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
571
572#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
573#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
574#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
575#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
576#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
577#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
578#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
579#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
580#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
581#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
582#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
583#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
584#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
585#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
586#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
587
588#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
589#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
590
591void reiserfs_file_buffer(struct buffer_head *bh, int list);
592extern struct file_system_type reiserfs_fs_type;
593int reiserfs_resize(struct super_block *, unsigned long);
594
595#define CARRY_ON 0
596#define SCHEDULE_OCCURRED 1
597
598#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
599#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
600#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
601#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
602#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
603
604#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
605
606/* A safe version of the "bdevname", which returns the "s_id" field of
607 * a superblock or else "Null superblock" if the super block is NULL.
608 */
609static inline char *reiserfs_bdevname(struct super_block *s)
610{
611 return (s == NULL) ? "Null superblock" : s->s_id;
612}
613
614#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
615static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
616 *journal)
617{
618 return test_bit(J_ABORTED, &journal->j_state);
619}
620
621/*
622 * Locking primitives. The write lock is a per superblock
623 * special mutex that has properties close to the Big Kernel Lock
624 * which was used in the previous locking scheme.
625 */
626void reiserfs_write_lock(struct super_block *s);
627void reiserfs_write_unlock(struct super_block *s);
628int reiserfs_write_lock_once(struct super_block *s);
629void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
630
631#ifdef CONFIG_REISERFS_CHECK
632void reiserfs_lock_check_recursive(struct super_block *s);
633#else
634static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
635#endif
636
637/*
638 * Several mutexes depend on the write lock.
639 * However sometimes we want to relax the write lock while we hold
640 * these mutexes, according to the release/reacquire on schedule()
641 * properties of the Bkl that were used.
642 * Reiserfs performances and locking were based on this scheme.
643 * Now that the write lock is a mutex and not the bkl anymore, doing so
644 * may result in a deadlock:
645 *
646 * A acquire write_lock
647 * A acquire j_commit_mutex
648 * A release write_lock and wait for something
649 * B acquire write_lock
650 * B can't acquire j_commit_mutex and sleep
651 * A can't acquire write lock anymore
652 * deadlock
653 *
654 * What we do here is avoiding such deadlock by playing the same game
655 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
656 * we release the write lock, wait a bit and then retry.
657 *
658 * The mutexes concerned by this hack are:
659 * - The commit mutex of a journal list
660 * - The flush mutex
661 * - The journal lock
662 * - The inode mutex
663 */
664static inline void reiserfs_mutex_lock_safe(struct mutex *m,
665 struct super_block *s)
666{
667 reiserfs_lock_check_recursive(s);
668 reiserfs_write_unlock(s);
669 mutex_lock(m);
670 reiserfs_write_lock(s);
671}
672
673static inline void
674reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
675 struct super_block *s)
676{
677 reiserfs_lock_check_recursive(s);
678 reiserfs_write_unlock(s);
679 mutex_lock_nested(m, subclass);
680 reiserfs_write_lock(s);
681}
682
683static inline void
684reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
685{
686 reiserfs_lock_check_recursive(s);
687 reiserfs_write_unlock(s);
688 down_read(sem);
689 reiserfs_write_lock(s);
690}
691
692/*
693 * When we schedule, we usually want to also release the write lock,
694 * according to the previous bkl based locking scheme of reiserfs.
695 */
696static inline void reiserfs_cond_resched(struct super_block *s)
697{
698 if (need_resched()) {
699 reiserfs_write_unlock(s);
700 schedule();
701 reiserfs_write_lock(s);
702 }
703}
704
705struct fid;
706
707/* in reading the #defines, it may help to understand that they employ
708 the following abbreviations:
709
710 B = Buffer
711 I = Item header
712 H = Height within the tree (should be changed to LEV)
713 N = Number of the item in the node
714 STAT = stat data
715 DEH = Directory Entry Header
716 EC = Entry Count
717 E = Entry number
718 UL = Unsigned Long
719 BLKH = BLocK Header
720 UNFM = UNForMatted node
721 DC = Disk Child
722 P = Path
723
724 These #defines are named by concatenating these abbreviations,
725 where first comes the arguments, and last comes the return value,
726 of the macro.
727
728*/
729
730#define USE_INODE_GENERATION_COUNTER
731
732#define REISERFS_PREALLOCATE
733#define DISPLACE_NEW_PACKING_LOCALITIES
734#define PREALLOCATION_SIZE 9
735
736/* n must be power of 2 */
737#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
738
739// to be ok for alpha and others we have to align structures to 8 byte
740// boundary.
741// FIXME: do not change 4 by anything else: there is code which relies on that
742#define ROUND_UP(x) _ROUND_UP(x,8LL)
743
744/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
745** messages.
746*/
747#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
748
749void __reiserfs_warning(struct super_block *s, const char *id,
750 const char *func, const char *fmt, ...);
751#define reiserfs_warning(s, id, fmt, args...) \
752 __reiserfs_warning(s, id, __func__, fmt, ##args)
753/* assertions handling */
754
755/** always check a condition and panic if it's false. */
756#define __RASSERT(cond, scond, format, args...) \
757do { \
758 if (!(cond)) \
759 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
760 __FILE__ ":%i:%s: " format "\n", \
761 in_interrupt() ? -1 : task_pid_nr(current), \
762 __LINE__, __func__ , ##args); \
763} while (0)
764
765#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
766
767#if defined( CONFIG_REISERFS_CHECK )
768#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
769#else
770#define RFALSE( cond, format, args... ) do {;} while( 0 )
771#endif
772
773#define CONSTF __attribute_const__
774/*
775 * Disk Data Structures
776 */
777
778/***************************************************************************/
779/* SUPER BLOCK */
780/***************************************************************************/
781
782/*
783 * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
784 * the version in RAM is part of a larger structure containing fields never written to disk.
785 */
786#define UNSET_HASH 0 // read_super will guess about, what hash names
787 // in directories were sorted with
788#define TEA_HASH 1
789#define YURA_HASH 2
790#define R5_HASH 3
791#define DEFAULT_HASH R5_HASH
792
793struct journal_params {
794 __le32 jp_journal_1st_block; /* where does journal start from on its
795 * device */
796 __le32 jp_journal_dev; /* journal device st_rdev */
797 __le32 jp_journal_size; /* size of the journal */
798 __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */
799 __le32 jp_journal_magic; /* random value made on fs creation (this
800 * was sb_journal_block_count) */
801 __le32 jp_journal_max_batch; /* max number of blocks to batch into a
802 * trans */
803 __le32 jp_journal_max_commit_age; /* in seconds, how old can an async
804 * commit be */
805 __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction
806 * be */
807};
808
809/* this is the super from 3.5.X, where X >= 10 */
810struct reiserfs_super_block_v1 {
811 __le32 s_block_count; /* blocks count */
812 __le32 s_free_blocks; /* free blocks count */
813 __le32 s_root_block; /* root block number */
814 struct journal_params s_journal;
815 __le16 s_blocksize; /* block size */
816 __le16 s_oid_maxsize; /* max size of object id array, see
817 * get_objectid() commentary */
818 __le16 s_oid_cursize; /* current size of object id array */
819 __le16 s_umount_state; /* this is set to 1 when filesystem was
820 * umounted, to 2 - when not */
821 char s_magic[10]; /* reiserfs magic string indicates that
822 * file system is reiserfs:
823 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
824 __le16 s_fs_state; /* it is set to used by fsck to mark which
825 * phase of rebuilding is done */
826 __le32 s_hash_function_code; /* indicate, what hash function is being use
827 * to sort names in a directory*/
828 __le16 s_tree_height; /* height of disk tree */
829 __le16 s_bmap_nr; /* amount of bitmap blocks needed to address
830 * each block of file system */
831 __le16 s_version; /* this field is only reliable on filesystem
832 * with non-standard journal */
833 __le16 s_reserved_for_journal; /* size in blocks of journal area on main
834 * device, we need to keep after
835 * making fs with non-standard journal */
836} __attribute__ ((__packed__));
837
838#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
839
840/* this is the on disk super block */
841struct reiserfs_super_block {
842 struct reiserfs_super_block_v1 s_v1;
843 __le32 s_inode_generation;
844 __le32 s_flags; /* Right now used only by inode-attributes, if enabled */
845 unsigned char s_uuid[16]; /* filesystem unique identifier */
846 unsigned char s_label[16]; /* filesystem volume label */
847 __le16 s_mnt_count; /* Count of mounts since last fsck */
848 __le16 s_max_mnt_count; /* Maximum mounts before check */
849 __le32 s_lastcheck; /* Timestamp of last fsck */
850 __le32 s_check_interval; /* Interval between checks */
851 char s_unused[76]; /* zero filled by mkreiserfs and
852 * reiserfs_convert_objectid_map_v1()
853 * so any additions must be updated
854 * there as well. */
855} __attribute__ ((__packed__));
856
857#define SB_SIZE (sizeof(struct reiserfs_super_block))
858
859#define REISERFS_VERSION_1 0
860#define REISERFS_VERSION_2 2
861
862// on-disk super block fields converted to cpu form
863#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
864#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
865#define SB_BLOCKSIZE(s) \
866 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
867#define SB_BLOCK_COUNT(s) \
868 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
869#define SB_FREE_BLOCKS(s) \
870 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
871#define SB_REISERFS_MAGIC(s) \
872 (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
873#define SB_ROOT_BLOCK(s) \
874 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
875#define SB_TREE_HEIGHT(s) \
876 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
877#define SB_REISERFS_STATE(s) \
878 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
879#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
880#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
881
882#define PUT_SB_BLOCK_COUNT(s, val) \
883 do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
884#define PUT_SB_FREE_BLOCKS(s, val) \
885 do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
886#define PUT_SB_ROOT_BLOCK(s, val) \
887 do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
888#define PUT_SB_TREE_HEIGHT(s, val) \
889 do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
890#define PUT_SB_REISERFS_STATE(s, val) \
891 do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
892#define PUT_SB_VERSION(s, val) \
893 do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
894#define PUT_SB_BMAP_NR(s, val) \
895 do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
896
897#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
898#define SB_ONDISK_JOURNAL_SIZE(s) \
899 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
900#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
901 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
902#define SB_ONDISK_JOURNAL_DEVICE(s) \
903 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
904#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
905 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
906
907#define is_block_in_log_or_reserved_area(s, block) \
908 block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
909 && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \
910 ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
911 SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
912
913int is_reiserfs_3_5(struct reiserfs_super_block *rs);
914int is_reiserfs_3_6(struct reiserfs_super_block *rs);
915int is_reiserfs_jr(struct reiserfs_super_block *rs);
916
917/* ReiserFS leaves the first 64k unused, so that partition labels have
918 enough space. If someone wants to write a fancy bootloader that
919 needs more than 64k, let us know, and this will be increased in size.
920 This number must be larger than than the largest block size on any
921 platform, or code will break. -Hans */
922#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
923#define REISERFS_FIRST_BLOCK unused_define
924#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
925
926/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
927#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
928
929/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
930#define CARRY_ON 0
931#define REPEAT_SEARCH -1
932#define IO_ERROR -2
933#define NO_DISK_SPACE -3
934#define NO_BALANCING_NEEDED (-4)
935#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
936#define QUOTA_EXCEEDED -6
937
938typedef __u32 b_blocknr_t;
939typedef __le32 unp_t;
940
941struct unfm_nodeinfo {
942 unp_t unfm_nodenum;
943 unsigned short unfm_freespace;
944};
945
946/* there are two formats of keys: 3.5 and 3.6
947 */
948#define KEY_FORMAT_3_5 0
949#define KEY_FORMAT_3_6 1
950
951/* there are two stat datas */
952#define STAT_DATA_V1 0
953#define STAT_DATA_V2 1
954
955static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode)
956{
957 return container_of(inode, struct reiserfs_inode_info, vfs_inode);
958}
959
960static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
961{
962 return sb->s_fs_info;
963}
964
965/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
966 * which overflows on large file systems. */
967static inline __u32 reiserfs_bmap_count(struct super_block *sb)
968{
969 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
970}
971
972static inline int bmap_would_wrap(unsigned bmap_nr)
973{
974 return bmap_nr > ((1LL << 16) - 1);
975}
976
977/** this says about version of key of all items (but stat data) the
978 object consists of */
979#define get_inode_item_key_version( inode ) \
980 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
981
982#define set_inode_item_key_version( inode, version ) \
983 ({ if((version)==KEY_FORMAT_3_6) \
984 REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \
985 else \
986 REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; })
987
988#define get_inode_sd_version(inode) \
989 ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1)
990
991#define set_inode_sd_version(inode, version) \
992 ({ if((version)==STAT_DATA_V2) \
993 REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \
994 else \
995 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
996
997/* This is an aggressive tail suppression policy, I am hoping it
998 improves our benchmarks. The principle behind it is that percentage
999 space saving is what matters, not absolute space saving. This is
1000 non-intuitive, but it helps to understand it if you consider that the
1001 cost to access 4 blocks is not much more than the cost to access 1
1002 block, if you have to do a seek and rotate. A tail risks a
1003 non-linear disk access that is significant as a percentage of total
1004 time cost for a 4 block file and saves an amount of space that is
1005 less significant as a percentage of space, or so goes the hypothesis.
1006 -Hans */
1007#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
1008(\
1009 (!(n_tail_size)) || \
1010 (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
1011 ( (n_file_size) >= (n_block_size) * 4 ) || \
1012 ( ( (n_file_size) >= (n_block_size) * 3 ) && \
1013 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
1014 ( ( (n_file_size) >= (n_block_size) * 2 ) && \
1015 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
1016 ( ( (n_file_size) >= (n_block_size) ) && \
1017 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
1018)
1019
1020/* Another strategy for tails, this one means only create a tail if all the
1021 file would fit into one DIRECT item.
1022 Primary intention for this one is to increase performance by decreasing
1023 seeking.
1024*/
1025#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
1026(\
1027 (!(n_tail_size)) || \
1028 (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
1029)
1030
1031/*
1032 * values for s_umount_state field
1033 */
1034#define REISERFS_VALID_FS 1
1035#define REISERFS_ERROR_FS 2
1036
1037//
1038// there are 5 item types currently
1039//
1040#define TYPE_STAT_DATA 0
1041#define TYPE_INDIRECT 1
1042#define TYPE_DIRECT 2
1043#define TYPE_DIRENTRY 3
1044#define TYPE_MAXTYPE 3
1045#define TYPE_ANY 15 // FIXME: comment is required
1046
1047/***************************************************************************/
1048/* KEY & ITEM HEAD */
1049/***************************************************************************/
1050
1051//
1052// directories use this key as well as old files
1053//
1054struct offset_v1 {
1055 __le32 k_offset;
1056 __le32 k_uniqueness;
1057} __attribute__ ((__packed__));
1058
1059struct offset_v2 {
1060 __le64 v;
1061} __attribute__ ((__packed__));
1062
1063static inline __u16 offset_v2_k_type(const struct offset_v2 *v2)
1064{
1065 __u8 type = le64_to_cpu(v2->v) >> 60;
1066 return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY;
1067}
1068
1069static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type)
1070{
1071 v2->v =
1072 (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60);
1073}
1074
1075static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2)
1076{
1077 return le64_to_cpu(v2->v) & (~0ULL >> 4);
1078}
1079
1080static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
1081{
1082 offset &= (~0ULL >> 4);
1083 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
1084}
1085
1086/* Key of an item determines its location in the S+tree, and
1087 is composed of 4 components */
1088struct reiserfs_key {
1089 __le32 k_dir_id; /* packing locality: by default parent
1090 directory object id */
1091 __le32 k_objectid; /* object identifier */
1092 union {
1093 struct offset_v1 k_offset_v1;
1094 struct offset_v2 k_offset_v2;
1095 } __attribute__ ((__packed__)) u;
1096} __attribute__ ((__packed__));
1097
1098struct in_core_key {
1099 __u32 k_dir_id; /* packing locality: by default parent
1100 directory object id */
1101 __u32 k_objectid; /* object identifier */
1102 __u64 k_offset;
1103 __u8 k_type;
1104};
1105
1106struct cpu_key {
1107 struct in_core_key on_disk_key;
1108 int version;
1109 int key_length; /* 3 in all cases but direct2indirect and
1110 indirect2direct conversion */
1111};
1112
1113/* Our function for comparing keys can compare keys of different
1114 lengths. It takes as a parameter the length of the keys it is to
1115 compare. These defines are used in determining what is to be passed
1116 to it as that parameter. */
1117#define REISERFS_FULL_KEY_LEN 4
1118#define REISERFS_SHORT_KEY_LEN 2
1119
1120/* The result of the key compare */
1121#define FIRST_GREATER 1
1122#define SECOND_GREATER -1
1123#define KEYS_IDENTICAL 0
1124#define KEY_FOUND 1
1125#define KEY_NOT_FOUND 0
1126
1127#define KEY_SIZE (sizeof(struct reiserfs_key))
1128#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
1129
1130/* return values for search_by_key and clones */
1131#define ITEM_FOUND 1
1132#define ITEM_NOT_FOUND 0
1133#define ENTRY_FOUND 1
1134#define ENTRY_NOT_FOUND 0
1135#define DIRECTORY_NOT_FOUND -1
1136#define REGULAR_FILE_FOUND -2
1137#define DIRECTORY_FOUND -3
1138#define BYTE_FOUND 1
1139#define BYTE_NOT_FOUND 0
1140#define FILE_NOT_FOUND -1
1141
1142#define POSITION_FOUND 1
1143#define POSITION_NOT_FOUND 0
1144
1145// return values for reiserfs_find_entry and search_by_entry_key
1146#define NAME_FOUND 1
1147#define NAME_NOT_FOUND 0
1148#define GOTO_PREVIOUS_ITEM 2
1149#define NAME_FOUND_INVISIBLE 3
1150
1151/* Everything in the filesystem is stored as a set of items. The
1152 item head contains the key of the item, its free space (for
1153 indirect items) and specifies the location of the item itself
1154 within the block. */
1155
1156struct item_head {
1157 /* Everything in the tree is found by searching for it based on
1158 * its key.*/
1159 struct reiserfs_key ih_key;
1160 union {
1161 /* The free space in the last unformatted node of an
1162 indirect item if this is an indirect item. This
1163 equals 0xFFFF iff this is a direct item or stat data
1164 item. Note that the key, not this field, is used to
1165 determine the item type, and thus which field this
1166 union contains. */
1167 __le16 ih_free_space_reserved;
1168 /* Iff this is a directory item, this field equals the
1169 number of directory entries in the directory item. */
1170 __le16 ih_entry_count;
1171 } __attribute__ ((__packed__)) u;
1172 __le16 ih_item_len; /* total size of the item body */
1173 __le16 ih_item_location; /* an offset to the item body
1174 * within the block */
1175 __le16 ih_version; /* 0 for all old items, 2 for new
1176 ones. Highest bit is set by fsck
1177 temporary, cleaned after all
1178 done */
1179} __attribute__ ((__packed__));
1180/* size of item header */
1181#define IH_SIZE (sizeof(struct item_head))
1182
1183#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved)
1184#define ih_version(ih) le16_to_cpu((ih)->ih_version)
1185#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count)
1186#define ih_location(ih) le16_to_cpu((ih)->ih_item_location)
1187#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len)
1188
1189#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
1190#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0)
1191#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
1192#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
1193#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
1194
1195#define unreachable_item(ih) (ih_version(ih) & (1 << 15))
1196
1197#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
1198#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
1199
1200/* these operate on indirect items, where you've got an array of ints
1201** at a possibly unaligned location. These are a noop on ia32
1202**
1203** p is the array of __u32, i is the index into the array, v is the value
1204** to store there.
1205*/
1206#define get_block_num(p, i) get_unaligned_le32((p) + (i))
1207#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
1208
1209//
1210// in old version uniqueness field shows key type
1211//
1212#define V1_SD_UNIQUENESS 0
1213#define V1_INDIRECT_UNIQUENESS 0xfffffffe
1214#define V1_DIRECT_UNIQUENESS 0xffffffff
1215#define V1_DIRENTRY_UNIQUENESS 500
1216#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required
1217
1218//
1219// here are conversion routines
1220//
1221static inline int uniqueness2type(__u32 uniqueness) CONSTF;
1222static inline int uniqueness2type(__u32 uniqueness)
1223{
1224 switch ((int)uniqueness) {
1225 case V1_SD_UNIQUENESS:
1226 return TYPE_STAT_DATA;
1227 case V1_INDIRECT_UNIQUENESS:
1228 return TYPE_INDIRECT;
1229 case V1_DIRECT_UNIQUENESS:
1230 return TYPE_DIRECT;
1231 case V1_DIRENTRY_UNIQUENESS:
1232 return TYPE_DIRENTRY;
1233 case V1_ANY_UNIQUENESS:
1234 default:
1235 return TYPE_ANY;
1236 }
1237}
1238
1239static inline __u32 type2uniqueness(int type) CONSTF;
1240static inline __u32 type2uniqueness(int type)
1241{
1242 switch (type) {
1243 case TYPE_STAT_DATA:
1244 return V1_SD_UNIQUENESS;
1245 case TYPE_INDIRECT:
1246 return V1_INDIRECT_UNIQUENESS;
1247 case TYPE_DIRECT:
1248 return V1_DIRECT_UNIQUENESS;
1249 case TYPE_DIRENTRY:
1250 return V1_DIRENTRY_UNIQUENESS;
1251 case TYPE_ANY:
1252 default:
1253 return V1_ANY_UNIQUENESS;
1254 }
1255}
1256
1257//
1258// key is pointer to on disk key which is stored in le, result is cpu,
1259// there is no way to get version of object from key, so, provide
1260// version to these defines
1261//
1262static inline loff_t le_key_k_offset(int version,
1263 const struct reiserfs_key *key)
1264{
1265 return (version == KEY_FORMAT_3_5) ?
1266 le32_to_cpu(key->u.k_offset_v1.k_offset) :
1267 offset_v2_k_offset(&(key->u.k_offset_v2));
1268}
1269
1270static inline loff_t le_ih_k_offset(const struct item_head *ih)
1271{
1272 return le_key_k_offset(ih_version(ih), &(ih->ih_key));
1273}
1274
1275static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
1276{
1277 return (version == KEY_FORMAT_3_5) ?
1278 uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
1279 offset_v2_k_type(&(key->u.k_offset_v2));
1280}
1281
1282static inline loff_t le_ih_k_type(const struct item_head *ih)
1283{
1284 return le_key_k_type(ih_version(ih), &(ih->ih_key));
1285}
1286
1287static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
1288 loff_t offset)
1289{
1290 (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */
1291 (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
1292}
1293
1294static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
1295{
1296 set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
1297}
1298
1299static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
1300 int type)
1301{
1302 (version == KEY_FORMAT_3_5) ?
1303 (void)(key->u.k_offset_v1.k_uniqueness =
1304 cpu_to_le32(type2uniqueness(type)))
1305 : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
1306}
1307
1308static inline void set_le_ih_k_type(struct item_head *ih, int type)
1309{
1310 set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
1311}
1312
1313static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
1314{
1315 return le_key_k_type(version, key) == TYPE_DIRENTRY;
1316}
1317
1318static inline int is_direct_le_key(int version, struct reiserfs_key *key)
1319{
1320 return le_key_k_type(version, key) == TYPE_DIRECT;
1321}
1322
1323static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
1324{
1325 return le_key_k_type(version, key) == TYPE_INDIRECT;
1326}
1327
1328static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
1329{
1330 return le_key_k_type(version, key) == TYPE_STAT_DATA;
1331}
1332
1333//
1334// item header has version.
1335//
1336static inline int is_direntry_le_ih(struct item_head *ih)
1337{
1338 return is_direntry_le_key(ih_version(ih), &ih->ih_key);
1339}
1340
1341static inline int is_direct_le_ih(struct item_head *ih)
1342{
1343 return is_direct_le_key(ih_version(ih), &ih->ih_key);
1344}
1345
1346static inline int is_indirect_le_ih(struct item_head *ih)
1347{
1348 return is_indirect_le_key(ih_version(ih), &ih->ih_key);
1349}
1350
1351static inline int is_statdata_le_ih(struct item_head *ih)
1352{
1353 return is_statdata_le_key(ih_version(ih), &ih->ih_key);
1354}
1355
1356//
1357// key is pointer to cpu key, result is cpu
1358//
1359static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
1360{
1361 return key->on_disk_key.k_offset;
1362}
1363
1364static inline loff_t cpu_key_k_type(const struct cpu_key *key)
1365{
1366 return key->on_disk_key.k_type;
1367}
1368
1369static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset)
1370{
1371 key->on_disk_key.k_offset = offset;
1372}
1373
1374static inline void set_cpu_key_k_type(struct cpu_key *key, int type)
1375{
1376 key->on_disk_key.k_type = type;
1377}
1378
1379static inline void cpu_key_k_offset_dec(struct cpu_key *key)
1380{
1381 key->on_disk_key.k_offset--;
1382}
1383
1384#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
1385#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
1386#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
1387#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
1388
1389/* are these used ? */
1390#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
1391#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
1392#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
1393#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
1394
1395#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \
1396 (!COMP_SHORT_KEYS(ih, key) && \
1397 I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize))
1398
1399/* maximal length of item */
1400#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
1401#define MIN_ITEM_LEN 1
1402
1403/* object identifier for root dir */
1404#define REISERFS_ROOT_OBJECTID 2
1405#define REISERFS_ROOT_PARENT_OBJECTID 1
1406
1407extern struct reiserfs_key root_key;
1408
1409/*
1410 * Picture represents a leaf of the S+tree
1411 * ______________________________________________________
1412 * | | Array of | | |
1413 * |Block | Object-Item | F r e e | Objects- |
1414 * | head | Headers | S p a c e | Items |
1415 * |______|_______________|___________________|___________|
1416 */
1417
1418/* Header of a disk block. More precisely, header of a formatted leaf
1419 or internal node, and not the header of an unformatted node. */
1420struct block_head {
1421 __le16 blk_level; /* Level of a block in the tree. */
1422 __le16 blk_nr_item; /* Number of keys/items in a block. */
1423 __le16 blk_free_space; /* Block free space in bytes. */
1424 __le16 blk_reserved;
1425 /* dump this in v4/planA */
1426 struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */
1427};
1428
1429#define BLKH_SIZE (sizeof(struct block_head))
1430#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level))
1431#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item))
1432#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space))
1433#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved))
1434#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val))
1435#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val))
1436#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val))
1437#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val))
1438#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key)
1439#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val)
1440
1441/*
1442 * values for blk_level field of the struct block_head
1443 */
1444
1445#define FREE_LEVEL 0 /* when node gets removed from the tree its
1446 blk_level is set to FREE_LEVEL. It is then
1447 used to see whether the node is still in the
1448 tree */
1449
1450#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
1451
1452/* Given the buffer head of a formatted node, resolve to the block head of that node. */
1453#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data))
1454/* Number of items that are in buffer. */
1455#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh)))
1456#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh)))
1457#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh)))
1458
1459#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0)
1460#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0)
1461#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0)
1462
1463/* Get right delimiting key. -- little endian */
1464#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh))))
1465
1466/* Does the buffer contain a disk leaf. */
1467#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL)
1468
1469/* Does the buffer contain a disk internal node */
1470#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
1471 && B_LEVEL(bh) <= MAX_HEIGHT)
1472
1473/***************************************************************************/
1474/* STAT DATA */
1475/***************************************************************************/
1476
1477//
1478// old stat data is 32 bytes long. We are going to distinguish new one by
1479// different size
1480//
1481struct stat_data_v1 {
1482 __le16 sd_mode; /* file type, permissions */
1483 __le16 sd_nlink; /* number of hard links */
1484 __le16 sd_uid; /* owner */
1485 __le16 sd_gid; /* group */
1486 __le32 sd_size; /* file size */
1487 __le32 sd_atime; /* time of last access */
1488 __le32 sd_mtime; /* time file was last modified */
1489 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1490 union {
1491 __le32 sd_rdev;
1492 __le32 sd_blocks; /* number of blocks file uses */
1493 } __attribute__ ((__packed__)) u;
1494 __le32 sd_first_direct_byte; /* first byte of file which is stored
1495 in a direct item: except that if it
1496 equals 1 it is a symlink and if it
1497 equals ~(__u32)0 there is no
1498 direct item. The existence of this
1499 field really grates on me. Let's
1500 replace it with a macro based on
1501 sd_size and our tail suppression
1502 policy. Someday. -Hans */
1503} __attribute__ ((__packed__));
1504
1505#define SD_V1_SIZE (sizeof(struct stat_data_v1))
1506#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5)
1507#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1508#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1509#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink))
1510#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v))
1511#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid))
1512#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v))
1513#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid))
1514#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v))
1515#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size))
1516#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v))
1517#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1518#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1519#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1520#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1521#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1522#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1523#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1524#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1525#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks))
1526#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v))
1527#define sd_v1_first_direct_byte(sdp) \
1528 (le32_to_cpu((sdp)->sd_first_direct_byte))
1529#define set_sd_v1_first_direct_byte(sdp,v) \
1530 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
1531
1532/* inode flags stored in sd_attrs (nee sd_reserved) */
1533
1534/* we want common flags to have the same values as in ext2,
1535 so chattr(1) will work without problems */
1536#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
1537#define REISERFS_APPEND_FL FS_APPEND_FL
1538#define REISERFS_SYNC_FL FS_SYNC_FL
1539#define REISERFS_NOATIME_FL FS_NOATIME_FL
1540#define REISERFS_NODUMP_FL FS_NODUMP_FL
1541#define REISERFS_SECRM_FL FS_SECRM_FL
1542#define REISERFS_UNRM_FL FS_UNRM_FL
1543#define REISERFS_COMPR_FL FS_COMPR_FL
1544#define REISERFS_NOTAIL_FL FS_NOTAIL_FL
1545
1546/* persistent flags that file inherits from the parent directory */
1547#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
1548 REISERFS_SYNC_FL | \
1549 REISERFS_NOATIME_FL | \
1550 REISERFS_NODUMP_FL | \
1551 REISERFS_SECRM_FL | \
1552 REISERFS_COMPR_FL | \
1553 REISERFS_NOTAIL_FL )
1554
1555/* Stat Data on disk (reiserfs version of UFS disk inode minus the
1556 address blocks) */
1557struct stat_data {
1558 __le16 sd_mode; /* file type, permissions */
1559 __le16 sd_attrs; /* persistent inode flags */
1560 __le32 sd_nlink; /* number of hard links */
1561 __le64 sd_size; /* file size */
1562 __le32 sd_uid; /* owner */
1563 __le32 sd_gid; /* group */
1564 __le32 sd_atime; /* time of last access */
1565 __le32 sd_mtime; /* time file was last modified */
1566 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1567 __le32 sd_blocks;
1568 union {
1569 __le32 sd_rdev;
1570 __le32 sd_generation;
1571 //__le32 sd_first_direct_byte;
1572 /* first byte of file which is stored in a
1573 direct item: except that if it equals 1
1574 it is a symlink and if it equals
1575 ~(__u32)0 there is no direct item. The
1576 existence of this field really grates
1577 on me. Let's replace it with a macro
1578 based on sd_size and our tail
1579 suppression policy? */
1580 } __attribute__ ((__packed__)) u;
1581} __attribute__ ((__packed__));
1582//
1583// this is 44 bytes long
1584//
1585#define SD_SIZE (sizeof(struct stat_data))
1586#define SD_V2_SIZE SD_SIZE
1587#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6)
1588#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1589#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1590/* sd_reserved */
1591/* set_sd_reserved */
1592#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink))
1593#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v))
1594#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size))
1595#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v))
1596#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid))
1597#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v))
1598#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid))
1599#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v))
1600#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1601#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1602#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1603#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1604#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1605#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1606#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks))
1607#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v))
1608#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1609#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1610#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation))
1611#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v))
1612#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs))
1613#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v))
1614
1615/***************************************************************************/
1616/* DIRECTORY STRUCTURE */
1617/***************************************************************************/
1618/*
1619 Picture represents the structure of directory items
1620 ________________________________________________
1621 | Array of | | | | | |
1622 | directory |N-1| N-2 | .... | 1st |0th|
1623 | entry headers | | | | | |
1624 |_______________|___|_____|________|_______|___|
1625 <---- directory entries ------>
1626
1627 First directory item has k_offset component 1. We store "." and ".."
1628 in one item, always, we never split "." and ".." into differing
1629 items. This makes, among other things, the code for removing
1630 directories simpler. */
1631#define SD_OFFSET 0
1632#define SD_UNIQUENESS 0
1633#define DOT_OFFSET 1
1634#define DOT_DOT_OFFSET 2
1635#define DIRENTRY_UNIQUENESS 500
1636
1637/* */
1638#define FIRST_ITEM_OFFSET 1
1639
1640/*
1641 Q: How to get key of object pointed to by entry from entry?
1642
1643 A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
1644 of object, entry points to */
1645
1646/* NOT IMPLEMENTED:
1647 Directory will someday contain stat data of object */
1648
1649struct reiserfs_de_head {
1650 __le32 deh_offset; /* third component of the directory entry key */
1651 __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced
1652 by directory entry */
1653 __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */
1654 __le16 deh_location; /* offset of name in the whole item */
1655 __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether
1656 entry is hidden (unlinked) */
1657} __attribute__ ((__packed__));
1658#define DEH_SIZE sizeof(struct reiserfs_de_head)
1659#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset))
1660#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id))
1661#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid))
1662#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location))
1663#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state))
1664
1665#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v)))
1666#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v)))
1667#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v)))
1668#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v)))
1669#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v)))
1670
1671/* empty directory contains two entries "." and ".." and their headers */
1672#define EMPTY_DIR_SIZE \
1673(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
1674
1675/* old format directories have this size when empty */
1676#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
1677
1678#define DEH_Statdata 0 /* not used now */
1679#define DEH_Visible 2
1680
1681/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */
1682#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__)
1683# define ADDR_UNALIGNED_BITS (3)
1684#endif
1685
1686/* These are only used to manipulate deh_state.
1687 * Because of this, we'll use the ext2_ bit routines,
1688 * since they are little endian */
1689#ifdef ADDR_UNALIGNED_BITS
1690
1691# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
1692# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
1693
1694# define set_bit_unaligned(nr, addr) \
1695 __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1696# define clear_bit_unaligned(nr, addr) \
1697 __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1698# define test_bit_unaligned(nr, addr) \
1699 test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1700
1701#else
1702
1703# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr)
1704# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr)
1705# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr)
1706
1707#endif
1708
1709#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1710#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1711#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1712#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1713
1714#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1715#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1716#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1717
1718extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
1719 __le32 par_dirid, __le32 par_objid);
1720extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
1721 __le32 par_dirid, __le32 par_objid);
1722
1723/* array of the entry headers */
1724 /* get item body */
1725#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
1726#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
1727
1728/* length of the directory entry in directory item. This define
1729 calculates length of i-th directory entry using directory entry
1730 locations from dir entry head. When it calculates length of 0-th
1731 directory entry, it uses length of whole item in place of entry
1732 location of the non-existent following entry in the calculation.
1733 See picture above.*/
1734/*
1735#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
1736((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
1737*/
1738static inline int entry_length(const struct buffer_head *bh,
1739 const struct item_head *ih, int pos_in_item)
1740{
1741 struct reiserfs_de_head *deh;
1742
1743 deh = B_I_DEH(bh, ih) + pos_in_item;
1744 if (pos_in_item)
1745 return deh_location(deh - 1) - deh_location(deh);
1746
1747 return ih_item_len(ih) - deh_location(deh);
1748}
1749
1750/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
1751#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
1752
1753/* name by bh, ih and entry_num */
1754#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
1755
1756// two entries per block (at least)
1757#define REISERFS_MAX_NAME(block_size) 255
1758
1759/* this structure is used for operations on directory entries. It is
1760 not a disk structure. */
1761/* When reiserfs_find_entry or search_by_entry_key find directory
1762 entry, they return filled reiserfs_dir_entry structure */
1763struct reiserfs_dir_entry {
1764 struct buffer_head *de_bh;
1765 int de_item_num;
1766 struct item_head *de_ih;
1767 int de_entry_num;
1768 struct reiserfs_de_head *de_deh;
1769 int de_entrylen;
1770 int de_namelen;
1771 char *de_name;
1772 unsigned long *de_gen_number_bit_string;
1773
1774 __u32 de_dir_id;
1775 __u32 de_objectid;
1776
1777 struct cpu_key de_entry_key;
1778};
1779
1780/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
1781
1782/* pointer to file name, stored in entry */
1783#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
1784
1785/* length of name */
1786#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
1787(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
1788
1789/* hash value occupies bits from 7 up to 30 */
1790#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
1791/* generation number occupies 7 bits starting from 0 up to 6 */
1792#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
1793#define MAX_GENERATION_NUMBER 127
1794
1795#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
1796
1797/*
1798 * Picture represents an internal node of the reiserfs tree
1799 * ______________________________________________________
1800 * | | Array of | Array of | Free |
1801 * |block | keys | pointers | space |
1802 * | head | N | N+1 | |
1803 * |______|_______________|___________________|___________|
1804 */
1805
1806/***************************************************************************/
1807/* DISK CHILD */
1808/***************************************************************************/
1809/* Disk child pointer: The pointer from an internal node of the tree
1810 to a node that is on disk. */
1811struct disk_child {
1812 __le32 dc_block_number; /* Disk child's block number. */
1813 __le16 dc_size; /* Disk child's used space. */
1814 __le16 dc_reserved;
1815};
1816
1817#define DC_SIZE (sizeof(struct disk_child))
1818#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number))
1819#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size))
1820#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0)
1821#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0)
1822
1823/* Get disk child by buffer header and position in the tree node. */
1824#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\
1825((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos)))
1826
1827/* Get disk child number by buffer header and position in the tree node. */
1828#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos)))
1829#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \
1830 (put_dc_block_number(B_N_CHILD(bh, n_pos), val))
1831
1832 /* maximal value of field child_size in structure disk_child */
1833 /* child size is the combined size of all items and their headers */
1834#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
1835
1836/* amount of used space in buffer (not including block head) */
1837#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
1838
1839/* max and min number of keys in internal node */
1840#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
1841#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2)
1842
1843/***************************************************************************/
1844/* PATH STRUCTURES AND DEFINES */
1845/***************************************************************************/
1846
1847/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
1848 key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it
1849 does not find them in the cache it reads them from disk. For each node search_by_key finds using
1850 reiserfs_bread it then uses bin_search to look through that node. bin_search will find the
1851 position of the block_number of the next node if it is looking through an internal node. If it
1852 is looking through a leaf node bin_search will find the position of the item which has key either
1853 equal to given key, or which is the maximal key less than the given key. */
1854
1855struct path_element {
1856 struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */
1857 int pe_position; /* Position in the tree node which is placed in the */
1858 /* buffer above. */
1859};
1860
1861#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
1862#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
1863#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */
1864
1865#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
1866#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */
1867
1868/* We need to keep track of who the ancestors of nodes are. When we
1869 perform a search we record which nodes were visited while
1870 descending the tree looking for the node we searched for. This list
1871 of nodes is called the path. This information is used while
1872 performing balancing. Note that this path information may become
1873 invalid, and this means we must check it when using it to see if it
1874 is still valid. You'll need to read search_by_key and the comments
1875 in it, especially about decrement_counters_in_path(), to understand
1876 this structure.
1877
1878Paths make the code so much harder to work with and debug.... An
1879enormous number of bugs are due to them, and trying to write or modify
1880code that uses them just makes my head hurt. They are based on an
1881excessive effort to avoid disturbing the precious VFS code.:-( The
1882gods only know how we are going to SMP the code that uses them.
1883znodes are the way! */
1884
1885#define PATH_READA 0x1 /* do read ahead */
1886#define PATH_READA_BACK 0x2 /* read backwards */
1887
1888struct treepath {
1889 int path_length; /* Length of the array above. */
1890 int reada;
1891 struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */
1892 int pos_in_item;
1893};
1894
1895#define pos_in_item(path) ((path)->pos_in_item)
1896
1897#define INITIALIZE_PATH(var) \
1898struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
1899
1900/* Get path element by path and path position. */
1901#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset))
1902
1903/* Get buffer header at the path by path and path position. */
1904#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer)
1905
1906/* Get position in the element at the path by path and path position. */
1907#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
1908
1909#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
1910 /* you know, to the person who didn't
1911 write this the macro name does not
1912 at first suggest what it does.
1913 Maybe POSITION_FROM_PATH_END? Or
1914 maybe we should just focus on
1915 dumping paths... -Hans */
1916#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
1917
1918#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
1919
1920/* in do_balance leaf has h == 0 in contrast with path structure,
1921 where root has level == 0. That is why we need these defines */
1922#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */
1923#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */
1924#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
1925#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */
1926
1927#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
1928
1929#define get_last_bh(path) PATH_PLAST_BUFFER(path)
1930#define get_ih(path) PATH_PITEM_HEAD(path)
1931#define get_item_pos(path) PATH_LAST_POSITION(path)
1932#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
1933#define item_moved(ih,path) comp_items(ih, path)
1934#define path_changed(ih,path) comp_items (ih, path)
1935
1936/***************************************************************************/
1937/* MISC */
1938/***************************************************************************/
1939
1940/* Size of pointer to the unformatted node. */
1941#define UNFM_P_SIZE (sizeof(unp_t))
1942#define UNFM_P_SHIFT 2
1943
1944// in in-core inode key is stored on le form
1945#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
1946
1947#define MAX_UL_INT 0xffffffff
1948#define MAX_INT 0x7ffffff
1949#define MAX_US_INT 0xffff
1950
1951// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
1952#define U32_MAX (~(__u32)0)
1953
1954static inline loff_t max_reiserfs_offset(struct inode *inode)
1955{
1956 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
1957 return (loff_t) U32_MAX;
1958
1959 return (loff_t) ((~(__u64) 0) >> 4);
1960}
1961
1962/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/
1963#define MAX_KEY_OBJECTID MAX_UL_INT
1964
1965#define MAX_B_NUM MAX_UL_INT
1966#define MAX_FC_NUM MAX_US_INT
1967
1968/* the purpose is to detect overflow of an unsigned short */
1969#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
1970
1971/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */
1972#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */
1973#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */
1974
1975#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
1976#define get_generation(s) atomic_read (&fs_generation(s))
1977#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
1978#define __fs_changed(gen,s) (gen != get_generation (s))
1979#define fs_changed(gen,s) \
1980({ \
1981 reiserfs_cond_resched(s); \
1982 __fs_changed(gen, s); \
1983})
1984
1985/***************************************************************************/
1986/* FIXATE NODES */
1987/***************************************************************************/
1988
1989#define VI_TYPE_LEFT_MERGEABLE 1
1990#define VI_TYPE_RIGHT_MERGEABLE 2
1991
1992/* To make any changes in the tree we always first find node, that
1993 contains item to be changed/deleted or place to insert a new
1994 item. We call this node S. To do balancing we need to decide what
1995 we will shift to left/right neighbor, or to a new node, where new
1996 item will be etc. To make this analysis simpler we build virtual
1997 node. Virtual node is an array of items, that will replace items of
1998 node S. (For instance if we are going to delete an item, virtual
1999 node does not contain it). Virtual node keeps information about
2000 item sizes and types, mergeability of first and last items, sizes
2001 of all entries in directory item. We use this array of items when
2002 calculating what we can shift to neighbors and how many nodes we
2003 have to have if we do not any shiftings, if we shift to left/right
2004 neighbor or to both. */
2005struct virtual_item {
2006 int vi_index; // index in the array of item operations
2007 unsigned short vi_type; // left/right mergeability
2008 unsigned short vi_item_len; /* length of item that it will have after balancing */
2009 struct item_head *vi_ih;
2010 const char *vi_item; // body of item (old or new)
2011 const void *vi_new_data; // 0 always but paste mode
2012 void *vi_uarea; // item specific area
2013};
2014
2015struct virtual_node {
2016 char *vn_free_ptr; /* this is a pointer to the free space in the buffer */
2017 unsigned short vn_nr_item; /* number of items in virtual node */
2018 short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */
2019 short vn_mode; /* mode of balancing (paste, insert, delete, cut) */
2020 short vn_affected_item_num;
2021 short vn_pos_in_item;
2022 struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */
2023 const void *vn_data;
2024 struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */
2025};
2026
2027/* used by directory items when creating virtual nodes */
2028struct direntry_uarea {
2029 int flags;
2030 __u16 entry_count;
2031 __u16 entry_sizes[1];
2032} __attribute__ ((__packed__));
2033
2034/***************************************************************************/
2035/* TREE BALANCE */
2036/***************************************************************************/
2037
2038/* This temporary structure is used in tree balance algorithms, and
2039 constructed as we go to the extent that its various parts are
2040 needed. It contains arrays of nodes that can potentially be
2041 involved in the balancing of node S, and parameters that define how
2042 each of the nodes must be balanced. Note that in these algorithms
2043 for balancing the worst case is to need to balance the current node
2044 S and the left and right neighbors and all of their parents plus
2045 create a new node. We implement S1 balancing for the leaf nodes
2046 and S0 balancing for the internal nodes (S1 and S0 are defined in
2047 our papers.)*/
2048
2049#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */
2050
2051/* maximum number of FEB blocknrs on a single level */
2052#define MAX_AMOUNT_NEEDED 2
2053
2054/* someday somebody will prefix every field in this struct with tb_ */
2055struct tree_balance {
2056 int tb_mode;
2057 int need_balance_dirty;
2058 struct super_block *tb_sb;
2059 struct reiserfs_transaction_handle *transaction_handle;
2060 struct treepath *tb_path;
2061 struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */
2062 struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */
2063 struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */
2064 struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */
2065 struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */
2066 struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */
2067
2068 struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals
2069 cur_blknum. */
2070 struct buffer_head *used[MAX_FEB_SIZE];
2071 struct buffer_head *thrown[MAX_FEB_SIZE];
2072 int lnum[MAX_HEIGHT]; /* array of number of items which must be
2073 shifted to the left in order to balance the
2074 current node; for leaves includes item that
2075 will be partially shifted; for internal
2076 nodes, it is the number of child pointers
2077 rather than items. It includes the new item
2078 being created. The code sometimes subtracts
2079 one to get the number of wholly shifted
2080 items for other purposes. */
2081 int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */
2082 int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and
2083 S[h] to its item number within the node CFL[h] */
2084 int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */
2085 int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from
2086 S[h]. A negative value means removing. */
2087 int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after
2088 balancing on the level h of the tree. If 0 then S is
2089 being deleted, if 1 then S is remaining and no new nodes
2090 are being created, if 2 or 3 then 1 or 2 new nodes is
2091 being created */
2092
2093 /* fields that are used only for balancing leaves of the tree */
2094 int cur_blknum; /* number of empty blocks having been already allocated */
2095 int s0num; /* number of items that fall into left most node when S[0] splits */
2096 int s1num; /* number of items that fall into first new node when S[0] splits */
2097 int s2num; /* number of items that fall into second new node when S[0] splits */
2098 int lbytes; /* number of bytes which can flow to the left neighbor from the left */
2099 /* most liquid item that cannot be shifted from S[0] entirely */
2100 /* if -1 then nothing will be partially shifted */
2101 int rbytes; /* number of bytes which will flow to the right neighbor from the right */
2102 /* most liquid item that cannot be shifted from S[0] entirely */
2103 /* if -1 then nothing will be partially shifted */
2104 int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */
2105 /* note: if S[0] splits into 3 nodes, then items do not need to be cut */
2106 int s2bytes;
2107 struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */
2108 char *vn_buf; /* kmalloced memory. Used to create
2109 virtual node and keep map of
2110 dirtied bitmap blocks */
2111 int vn_buf_size; /* size of the vn_buf */
2112 struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */
2113
2114 int fs_gen; /* saved value of `reiserfs_generation' counter
2115 see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
2116#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2117 struct in_core_key key; /* key pointer, to pass to block allocator or
2118 another low-level subsystem */
2119#endif
2120};
2121
2122/* These are modes of balancing */
2123
2124/* When inserting an item. */
2125#define M_INSERT 'i'
2126/* When inserting into (directories only) or appending onto an already
2127 existent item. */
2128#define M_PASTE 'p'
2129/* When deleting an item. */
2130#define M_DELETE 'd'
2131/* When truncating an item or removing an entry from a (directory) item. */
2132#define M_CUT 'c'
2133
2134/* used when balancing on leaf level skipped (in reiserfsck) */
2135#define M_INTERNAL 'n'
2136
2137/* When further balancing is not needed, then do_balance does not need
2138 to be called. */
2139#define M_SKIP_BALANCING 's'
2140#define M_CONVERT 'v'
2141
2142/* modes of leaf_move_items */
2143#define LEAF_FROM_S_TO_L 0
2144#define LEAF_FROM_S_TO_R 1
2145#define LEAF_FROM_R_TO_L 2
2146#define LEAF_FROM_L_TO_R 3
2147#define LEAF_FROM_S_TO_SNEW 4
2148
2149#define FIRST_TO_LAST 0
2150#define LAST_TO_FIRST 1
2151
2152/* used in do_balance for passing parent of node information that has
2153 been gotten from tb struct */
2154struct buffer_info {
2155 struct tree_balance *tb;
2156 struct buffer_head *bi_bh;
2157 struct buffer_head *bi_parent;
2158 int bi_position;
2159};
2160
2161static inline struct super_block *sb_from_tb(struct tree_balance *tb)
2162{
2163 return tb ? tb->tb_sb : NULL;
2164}
2165
2166static inline struct super_block *sb_from_bi(struct buffer_info *bi)
2167{
2168 return bi ? sb_from_tb(bi->tb) : NULL;
2169}
2170
2171/* there are 4 types of items: stat data, directory item, indirect, direct.
2172+-------------------+------------+--------------+------------+
2173| | k_offset | k_uniqueness | mergeable? |
2174+-------------------+------------+--------------+------------+
2175| stat data | 0 | 0 | no |
2176+-------------------+------------+--------------+------------+
2177| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no |
2178| non 1st directory | hash value | | yes |
2179| item | | | |
2180+-------------------+------------+--------------+------------+
2181| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object
2182+-------------------+------------+--------------+------------+
2183| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object
2184+-------------------+------------+--------------+------------+
2185*/
2186
2187struct item_operations {
2188 int (*bytes_number) (struct item_head * ih, int block_size);
2189 void (*decrement_key) (struct cpu_key *);
2190 int (*is_left_mergeable) (struct reiserfs_key * ih,
2191 unsigned long bsize);
2192 void (*print_item) (struct item_head *, char *item);
2193 void (*check_item) (struct item_head *, char *item);
2194
2195 int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi,
2196 int is_affected, int insert_size);
2197 int (*check_left) (struct virtual_item * vi, int free,
2198 int start_skip, int end_skip);
2199 int (*check_right) (struct virtual_item * vi, int free);
2200 int (*part_size) (struct virtual_item * vi, int from, int to);
2201 int (*unit_num) (struct virtual_item * vi);
2202 void (*print_vi) (struct virtual_item * vi);
2203};
2204
2205extern struct item_operations *item_ops[TYPE_ANY + 1];
2206
2207#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
2208#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
2209#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item)
2210#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item)
2211#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
2212#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
2213#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free)
2214#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to)
2215#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi)
2216#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi)
2217
2218#define COMP_SHORT_KEYS comp_short_keys
2219
2220/* number of blocks pointed to by the indirect item */
2221#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE)
2222
2223/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
2224#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
2225
2226/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
2227
2228/* get the item header */
2229#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2230
2231/* get key */
2232#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2233
2234/* get the key */
2235#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
2236
2237/* get item body */
2238#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
2239
2240/* get the stat data by the buffer header and the item order */
2241#define B_N_STAT_DATA(bh,nr) \
2242( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
2243
2244 /* following defines use reiserfs buffer header and item header */
2245
2246/* get stat-data */
2247#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
2248
2249// this is 3976 for size==4096
2250#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
2251
2252/* indirect items consist of entries which contain blocknrs, pos
2253 indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
2254 blocknr contained by the entry pos points to */
2255#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
2256#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
2257
2258struct reiserfs_iget_args {
2259 __u32 objectid;
2260 __u32 dirid;
2261};
2262
2263/***************************************************************************/
2264/* FUNCTION DECLARATIONS */
2265/***************************************************************************/
2266
2267#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
2268
2269#define journal_trans_half(blocksize) \
2270 ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
2271
2272/* journal.c see journal.c for all the comments here */
2273
2274/* first block written in a commit. */
2275struct reiserfs_journal_desc {
2276 __le32 j_trans_id; /* id of commit */
2277 __le32 j_len; /* length of commit. len +1 is the commit block */
2278 __le32 j_mount_id; /* mount id of this trans */
2279 __le32 j_realblock[1]; /* real locations for each block */
2280};
2281
2282#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id)
2283#define get_desc_trans_len(d) le32_to_cpu((d)->j_len)
2284#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id)
2285
2286#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0)
2287#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0)
2288#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0)
2289
2290/* last block written in a commit */
2291struct reiserfs_journal_commit {
2292 __le32 j_trans_id; /* must match j_trans_id from the desc block */
2293 __le32 j_len; /* ditto */
2294 __le32 j_realblock[1]; /* real locations for each block */
2295};
2296
2297#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
2298#define get_commit_trans_len(c) le32_to_cpu((c)->j_len)
2299#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id)
2300
2301#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
2302#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0)
2303
2304/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
2305** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk,
2306** and this transaction does not need to be replayed.
2307*/
2308struct reiserfs_journal_header {
2309 __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */
2310 __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */
2311 __le32 j_mount_id;
2312 /* 12 */ struct journal_params jh_journal;
2313};
2314
2315/* biggest tunable defines are right here */
2316#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
2317#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */
2318#define JOURNAL_TRANS_MIN_DEFAULT 256
2319#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */
2320#define JOURNAL_MIN_RATIO 2
2321#define JOURNAL_MAX_COMMIT_AGE 30
2322#define JOURNAL_MAX_TRANS_AGE 30
2323#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
2324#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \
2325 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
2326 REISERFS_QUOTA_TRANS_BLOCKS(sb)))
2327
2328#ifdef CONFIG_QUOTA
2329#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
2330/* We need to update data and inode (atime) */
2331#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
2332/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
2333#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2334(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
2335/* same as with INIT */
2336#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2337(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
2338#else
2339#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
2340#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
2341#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
2342#endif
2343
2344/* both of these can be as low as 1, or as high as you want. The min is the
2345** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
2346** as needed, and released when transactions are committed. On release, if
2347** the current number of nodes is > max, the node is freed, otherwise,
2348** it is put on a free list for faster use later.
2349*/
2350#define REISERFS_MIN_BITMAP_NODES 10
2351#define REISERFS_MAX_BITMAP_NODES 100
2352
2353#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */
2354#define JBH_HASH_MASK 8191
2355
2356#define _jhashfn(sb,block) \
2357 (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \
2358 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
2359#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
2360
2361// We need these to make journal.c code more readable
2362#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2363#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2364#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2365
2366enum reiserfs_bh_state_bits {
2367 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */
2368 BH_JDirty_wait,
2369 BH_JNew, /* disk block was taken off free list before
2370 * being in a finished transaction, or
2371 * written to disk. Can be reused immed. */
2372 BH_JPrepared,
2373 BH_JRestore_dirty,
2374 BH_JTest, // debugging only will go away
2375};
2376
2377BUFFER_FNS(JDirty, journaled);
2378TAS_BUFFER_FNS(JDirty, journaled);
2379BUFFER_FNS(JDirty_wait, journal_dirty);
2380TAS_BUFFER_FNS(JDirty_wait, journal_dirty);
2381BUFFER_FNS(JNew, journal_new);
2382TAS_BUFFER_FNS(JNew, journal_new);
2383BUFFER_FNS(JPrepared, journal_prepared);
2384TAS_BUFFER_FNS(JPrepared, journal_prepared);
2385BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2386TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2387BUFFER_FNS(JTest, journal_test);
2388TAS_BUFFER_FNS(JTest, journal_test);
2389
2390/*
2391** transaction handle which is passed around for all journal calls
2392*/
2393struct reiserfs_transaction_handle {
2394 struct super_block *t_super; /* super for this FS when journal_begin was
2395 called. saves calls to reiserfs_get_super
2396 also used by nested transactions to make
2397 sure they are nesting on the right FS
2398 _must_ be first in the handle
2399 */
2400 int t_refcount;
2401 int t_blocks_logged; /* number of blocks this writer has logged */
2402 int t_blocks_allocated; /* number of blocks this writer allocated */
2403 unsigned int t_trans_id; /* sanity check, equals the current trans id */
2404 void *t_handle_save; /* save existing current->journal_info */
2405 unsigned displace_new_blocks:1; /* if new block allocation occurres, that block
2406 should be displaced from others */
2407 struct list_head t_list;
2408};
2409
2410/* used to keep track of ordered and tail writes, attached to the buffer
2411 * head through b_journal_head.
2412 */
2413struct reiserfs_jh {
2414 struct reiserfs_journal_list *jl;
2415 struct buffer_head *bh;
2416 struct list_head list;
2417};
2418
2419void reiserfs_free_jh(struct buffer_head *bh);
2420int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
2421int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
2422int journal_mark_dirty(struct reiserfs_transaction_handle *,
2423 struct super_block *, struct buffer_head *bh);
2424
2425static inline int reiserfs_file_data_log(struct inode *inode)
2426{
2427 if (reiserfs_data_log(inode->i_sb) ||
2428 (REISERFS_I(inode)->i_flags & i_data_log))
2429 return 1;
2430 return 0;
2431}
2432
2433static inline int reiserfs_transaction_running(struct super_block *s)
2434{
2435 struct reiserfs_transaction_handle *th = current->journal_info;
2436 if (th && th->t_super == s)
2437 return 1;
2438 if (th && th->t_super == NULL)
2439 BUG();
2440 return 0;
2441}
2442
2443static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
2444{
2445 return th->t_blocks_allocated - th->t_blocks_logged;
2446}
2447
2448struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
2449 super_block
2450 *,
2451 int count);
2452int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
2453int reiserfs_commit_page(struct inode *inode, struct page *page,
2454 unsigned from, unsigned to);
2455int reiserfs_flush_old_commits(struct super_block *);
2456int reiserfs_commit_for_inode(struct inode *);
2457int reiserfs_inode_needs_commit(struct inode *);
2458void reiserfs_update_inode_transaction(struct inode *);
2459void reiserfs_wait_on_write_block(struct super_block *s);
2460void reiserfs_block_writes(struct reiserfs_transaction_handle *th);
2461void reiserfs_allow_writes(struct super_block *s);
2462void reiserfs_check_lock_depth(struct super_block *s, char *caller);
2463int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh,
2464 int wait);
2465void reiserfs_restore_prepared_buffer(struct super_block *,
2466 struct buffer_head *bh);
2467int journal_init(struct super_block *, const char *j_dev_name, int old_format,
2468 unsigned int);
2469int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
2470int journal_release_error(struct reiserfs_transaction_handle *,
2471 struct super_block *);
2472int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
2473 unsigned long);
2474int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
2475 unsigned long);
2476int journal_mark_freed(struct reiserfs_transaction_handle *,
2477 struct super_block *, b_blocknr_t blocknr);
2478int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
2479int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
2480 int bit_nr, int searchall, b_blocknr_t *next);
2481int journal_begin(struct reiserfs_transaction_handle *,
2482 struct super_block *sb, unsigned long);
2483int journal_join_abort(struct reiserfs_transaction_handle *,
2484 struct super_block *sb, unsigned long);
2485void reiserfs_abort_journal(struct super_block *sb, int errno);
2486void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
2487int reiserfs_allocate_list_bitmaps(struct super_block *s,
2488 struct reiserfs_list_bitmap *, unsigned int);
2489
2490void add_save_link(struct reiserfs_transaction_handle *th,
2491 struct inode *inode, int truncate);
2492int remove_save_link(struct inode *inode, int truncate);
2493
2494/* objectid.c */
2495__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th);
2496void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
2497 __u32 objectid_to_release);
2498int reiserfs_convert_objectid_map_v1(struct super_block *);
2499
2500/* stree.c */
2501int B_IS_IN_TREE(const struct buffer_head *);
2502extern void copy_item_head(struct item_head *to,
2503 const struct item_head *from);
2504
2505// first key is in cpu form, second - le
2506extern int comp_short_keys(const struct reiserfs_key *le_key,
2507 const struct cpu_key *cpu_key);
2508extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
2509
2510// both are in le form
2511extern int comp_le_keys(const struct reiserfs_key *,
2512 const struct reiserfs_key *);
2513extern int comp_short_le_keys(const struct reiserfs_key *,
2514 const struct reiserfs_key *);
2515
2516//
2517// get key version from on disk key - kludge
2518//
2519static inline int le_key_version(const struct reiserfs_key *key)
2520{
2521 int type;
2522
2523 type = offset_v2_k_type(&(key->u.k_offset_v2));
2524 if (type != TYPE_DIRECT && type != TYPE_INDIRECT
2525 && type != TYPE_DIRENTRY)
2526 return KEY_FORMAT_3_5;
2527
2528 return KEY_FORMAT_3_6;
2529
2530}
2531
2532static inline void copy_key(struct reiserfs_key *to,
2533 const struct reiserfs_key *from)
2534{
2535 memcpy(to, from, KEY_SIZE);
2536}
2537
2538int comp_items(const struct item_head *stored_ih, const struct treepath *path);
2539const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
2540 const struct super_block *sb);
2541int search_by_key(struct super_block *, const struct cpu_key *,
2542 struct treepath *, int);
2543#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
2544int search_for_position_by_key(struct super_block *sb,
2545 const struct cpu_key *cpu_key,
2546 struct treepath *search_path);
2547extern void decrement_bcount(struct buffer_head *bh);
2548void decrement_counters_in_path(struct treepath *search_path);
2549void pathrelse(struct treepath *search_path);
2550int reiserfs_check_path(struct treepath *p);
2551void pathrelse_and_restore(struct super_block *s, struct treepath *search_path);
2552
2553int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2554 struct treepath *path,
2555 const struct cpu_key *key,
2556 struct item_head *ih,
2557 struct inode *inode, const char *body);
2558
2559int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2560 struct treepath *path,
2561 const struct cpu_key *key,
2562 struct inode *inode,
2563 const char *body, int paste_size);
2564
2565int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
2566 struct treepath *path,
2567 struct cpu_key *key,
2568 struct inode *inode,
2569 struct page *page, loff_t new_file_size);
2570
2571int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
2572 struct treepath *path,
2573 const struct cpu_key *key,
2574 struct inode *inode, struct buffer_head *un_bh);
2575
2576void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
2577 struct inode *inode, struct reiserfs_key *key);
2578int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
2579 struct inode *inode);
2580int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
2581 struct inode *inode, struct page *,
2582 int update_timestamps);
2583
2584#define i_block_size(inode) ((inode)->i_sb->s_blocksize)
2585#define file_size(inode) ((inode)->i_size)
2586#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
2587
2588#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
2589!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
2590
2591void padd_item(char *item, int total_length, int length);
2592
2593/* inode.c */
2594/* args for the create parameter of reiserfs_get_block */
2595#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
2596#define GET_BLOCK_CREATE 1 /* add anything you need to find block */
2597#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
2598#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
2599#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */
2600#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
2601
2602void reiserfs_read_locked_inode(struct inode *inode,
2603 struct reiserfs_iget_args *args);
2604int reiserfs_find_actor(struct inode *inode, void *p);
2605int reiserfs_init_locked_inode(struct inode *inode, void *p);
2606void reiserfs_evict_inode(struct inode *inode);
2607int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2608int reiserfs_get_block(struct inode *inode, sector_t block,
2609 struct buffer_head *bh_result, int create);
2610struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
2611 int fh_len, int fh_type);
2612struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
2613 int fh_len, int fh_type);
2614int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
2615 int connectable);
2616
2617int reiserfs_truncate_file(struct inode *, int update_timestamps);
2618void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
2619 int type, int key_length);
2620void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
2621 int version,
2622 loff_t offset, int type, int length, int entry_count);
2623struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
2624
2625struct reiserfs_security_handle;
2626int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
2627 struct inode *dir, umode_t mode,
2628 const char *symname, loff_t i_size,
2629 struct dentry *dentry, struct inode *inode,
2630 struct reiserfs_security_handle *security);
2631
2632void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
2633 struct inode *inode, loff_t size);
2634
2635static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
2636 struct inode *inode)
2637{
2638 reiserfs_update_sd_size(th, inode, inode->i_size);
2639}
2640
2641void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
2642void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
2643int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
2644
2645int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
2646
2647/* namei.c */
2648void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
2649int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
2650 struct treepath *path, struct reiserfs_dir_entry *de);
2651struct dentry *reiserfs_get_parent(struct dentry *);
2652
2653#ifdef CONFIG_REISERFS_PROC_INFO
2654int reiserfs_proc_info_init(struct super_block *sb);
2655int reiserfs_proc_info_done(struct super_block *sb);
2656int reiserfs_proc_info_global_init(void);
2657int reiserfs_proc_info_global_done(void);
2658
2659#define PROC_EXP( e ) e
2660
2661#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
2662#define PROC_INFO_MAX( sb, field, value ) \
2663 __PINFO( sb ).field = \
2664 max( REISERFS_SB( sb ) -> s_proc_info_data.field, value )
2665#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) )
2666#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) )
2667#define PROC_INFO_BH_STAT( sb, bh, level ) \
2668 PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \
2669 PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \
2670 PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
2671#else
2672static inline int reiserfs_proc_info_init(struct super_block *sb)
2673{
2674 return 0;
2675}
2676
2677static inline int reiserfs_proc_info_done(struct super_block *sb)
2678{
2679 return 0;
2680}
2681
2682static inline int reiserfs_proc_info_global_init(void)
2683{
2684 return 0;
2685}
2686
2687static inline int reiserfs_proc_info_global_done(void)
2688{
2689 return 0;
2690}
2691
2692#define PROC_EXP( e )
2693#define VOID_V ( ( void ) 0 )
2694#define PROC_INFO_MAX( sb, field, value ) VOID_V
2695#define PROC_INFO_INC( sb, field ) VOID_V
2696#define PROC_INFO_ADD( sb, field, val ) VOID_V
2697#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V
2698#endif
2699
2700/* dir.c */
2701extern const struct inode_operations reiserfs_dir_inode_operations;
2702extern const struct inode_operations reiserfs_symlink_inode_operations;
2703extern const struct inode_operations reiserfs_special_inode_operations;
2704extern const struct file_operations reiserfs_dir_operations;
2705int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *);
2706
2707/* tail_conversion.c */
2708int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
2709 struct treepath *, struct buffer_head *, loff_t);
2710int indirect2direct(struct reiserfs_transaction_handle *, struct inode *,
2711 struct page *, struct treepath *, const struct cpu_key *,
2712 loff_t, char *);
2713void reiserfs_unmap_buffer(struct buffer_head *);
2714
2715/* file.c */
2716extern const struct inode_operations reiserfs_file_inode_operations;
2717extern const struct file_operations reiserfs_file_operations;
2718extern const struct address_space_operations reiserfs_address_space_operations;
2719
2720/* fix_nodes.c */
2721
2722int fix_nodes(int n_op_mode, struct tree_balance *tb,
2723 struct item_head *ins_ih, const void *);
2724void unfix_nodes(struct tree_balance *);
2725
2726/* prints.c */
2727void __reiserfs_panic(struct super_block *s, const char *id,
2728 const char *function, const char *fmt, ...)
2729 __attribute__ ((noreturn));
2730#define reiserfs_panic(s, id, fmt, args...) \
2731 __reiserfs_panic(s, id, __func__, fmt, ##args)
2732void __reiserfs_error(struct super_block *s, const char *id,
2733 const char *function, const char *fmt, ...);
2734#define reiserfs_error(s, id, fmt, args...) \
2735 __reiserfs_error(s, id, __func__, fmt, ##args)
2736void reiserfs_info(struct super_block *s, const char *fmt, ...);
2737void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...);
2738void print_indirect_item(struct buffer_head *bh, int item_num);
2739void store_print_tb(struct tree_balance *tb);
2740void print_cur_tb(char *mes);
2741void print_de(struct reiserfs_dir_entry *de);
2742void print_bi(struct buffer_info *bi, char *mes);
2743#define PRINT_LEAF_ITEMS 1 /* print all items */
2744#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */
2745#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */
2746void print_block(struct buffer_head *bh, ...);
2747void print_bmap(struct super_block *s, int silent);
2748void print_bmap_block(int i, char *data, int size, int silent);
2749/*void print_super_block (struct super_block * s, char * mes);*/
2750void print_objectid_map(struct super_block *s);
2751void print_block_head(struct buffer_head *bh, char *mes);
2752void check_leaf(struct buffer_head *bh);
2753void check_internal(struct buffer_head *bh);
2754void print_statistics(struct super_block *s);
2755char *reiserfs_hashname(int code);
2756
2757/* lbalance.c */
2758int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
2759 int mov_bytes, struct buffer_head *Snew);
2760int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes);
2761int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
2762void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
2763 int del_num, int del_bytes);
2764void leaf_insert_into_buf(struct buffer_info *bi, int before,
2765 struct item_head *inserted_item_ih,
2766 const char *inserted_item_body, int zeros_number);
2767void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
2768 int pos_in_item, int paste_size, const char *body,
2769 int zeros_number);
2770void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
2771 int pos_in_item, int cut_size);
2772void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
2773 int new_entry_count, struct reiserfs_de_head *new_dehs,
2774 const char *records, int paste_size);
2775/* ibalance.c */
2776int balance_internal(struct tree_balance *, int, int, struct item_head *,
2777 struct buffer_head **);
2778
2779/* do_balance.c */
2780void do_balance_mark_leaf_dirty(struct tree_balance *tb,
2781 struct buffer_head *bh, int flag);
2782#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
2783#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
2784
2785void do_balance(struct tree_balance *tb, struct item_head *ih,
2786 const char *body, int flag);
2787void reiserfs_invalidate_buffer(struct tree_balance *tb,
2788 struct buffer_head *bh);
2789
2790int get_left_neighbor_position(struct tree_balance *tb, int h);
2791int get_right_neighbor_position(struct tree_balance *tb, int h);
2792void replace_key(struct tree_balance *tb, struct buffer_head *, int,
2793 struct buffer_head *, int);
2794void make_empty_node(struct buffer_info *);
2795struct buffer_head *get_FEB(struct tree_balance *);
2796
2797/* bitmap.c */
2798
2799/* structure contains hints for block allocator, and it is a container for
2800 * arguments, such as node, search path, transaction_handle, etc. */
2801struct __reiserfs_blocknr_hint {
2802 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */
2803 sector_t block; /* file offset, in blocks */
2804 struct in_core_key key;
2805 struct treepath *path; /* search path, used by allocator to deternine search_start by
2806 * various ways */
2807 struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and
2808 * bitmap blocks changes */
2809 b_blocknr_t beg, end;
2810 b_blocknr_t search_start; /* a field used to transfer search start value (block number)
2811 * between different block allocator procedures
2812 * (determine_search_start() and others) */
2813 int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed
2814 * function that do actual allocation */
2815
2816 unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for
2817 * formatted/unformatted blocks with/without preallocation */
2818 unsigned preallocate:1;
2819};
2820
2821typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
2822
2823int reiserfs_parse_alloc_options(struct super_block *, char *);
2824void reiserfs_init_alloc_options(struct super_block *s);
2825
2826/*
2827 * given a directory, this will tell you what packing locality
2828 * to use for a new object underneat it. The locality is returned
2829 * in disk byte order (le).
2830 */
2831__le32 reiserfs_choose_packing(struct inode *dir);
2832
2833int reiserfs_init_bitmap_cache(struct super_block *sb);
2834void reiserfs_free_bitmap_cache(struct super_block *sb);
2835void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info);
2836struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap);
2837int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
2838void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *,
2839 b_blocknr_t, int for_unformatted);
2840int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int,
2841 int);
2842static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
2843 b_blocknr_t * new_blocknrs,
2844 int amount_needed)
2845{
2846 reiserfs_blocknr_hint_t hint = {
2847 .th = tb->transaction_handle,
2848 .path = tb->tb_path,
2849 .inode = NULL,
2850 .key = tb->key,
2851 .block = 0,
2852 .formatted_node = 1
2853 };
2854 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed,
2855 0);
2856}
2857
2858static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
2859 *th, struct inode *inode,
2860 b_blocknr_t * new_blocknrs,
2861 struct treepath *path,
2862 sector_t block)
2863{
2864 reiserfs_blocknr_hint_t hint = {
2865 .th = th,
2866 .path = path,
2867 .inode = inode,
2868 .block = block,
2869 .formatted_node = 0,
2870 .preallocate = 0
2871 };
2872 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2873}
2874
2875#ifdef REISERFS_PREALLOCATE
2876static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
2877 *th, struct inode *inode,
2878 b_blocknr_t * new_blocknrs,
2879 struct treepath *path,
2880 sector_t block)
2881{
2882 reiserfs_blocknr_hint_t hint = {
2883 .th = th,
2884 .path = path,
2885 .inode = inode,
2886 .block = block,
2887 .formatted_node = 0,
2888 .preallocate = 1
2889 };
2890 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2891}
2892
2893void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
2894 struct inode *inode);
2895void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th);
2896#endif
2897
2898/* hashes.c */
2899__u32 keyed_hash(const signed char *msg, int len);
2900__u32 yura_hash(const signed char *msg, int len);
2901__u32 r5_hash(const signed char *msg, int len);
2902
2903#define reiserfs_set_le_bit __set_bit_le
2904#define reiserfs_test_and_set_le_bit __test_and_set_bit_le
2905#define reiserfs_clear_le_bit __clear_bit_le
2906#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le
2907#define reiserfs_test_le_bit test_bit_le
2908#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le
2909
2910/* sometimes reiserfs_truncate may require to allocate few new blocks
2911 to perform indirect2direct conversion. People probably used to
2912 think, that truncate should work without problems on a filesystem
2913 without free disk space. They may complain that they can not
2914 truncate due to lack of free disk space. This spare space allows us
2915 to not worry about it. 500 is probably too much, but it should be
2916 absolutely safe */
2917#define SPARE_SPACE 500
2918
2919/* prototypes from ioctl.c */
2920long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
2921long reiserfs_compat_ioctl(struct file *filp,
2922 unsigned int cmd, unsigned long arg);
2923int reiserfs_unpack(struct inode *inode, struct file *filp);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 7483279b482d..9a17f63c3fd7 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -13,8 +13,7 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/reiserfs_fs.h> 16#include "reiserfs.h"
17#include <linux/reiserfs_fs_sb.h>
18#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
19 18
20int reiserfs_resize(struct super_block *s, unsigned long block_count_new) 19int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 313d39d639eb..f8afa4b162b8 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -51,7 +51,7 @@
51#include <linux/time.h> 51#include <linux/time.h>
52#include <linux/string.h> 52#include <linux/string.h>
53#include <linux/pagemap.h> 53#include <linux/pagemap.h>
54#include <linux/reiserfs_fs.h> 54#include "reiserfs.h"
55#include <linux/buffer_head.h> 55#include <linux/buffer_head.h>
56#include <linux/quotaops.h> 56#include <linux/quotaops.h>
57 57
@@ -1284,12 +1284,12 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1284 ** -clm 1284 ** -clm
1285 */ 1285 */
1286 1286
1287 data = kmap_atomic(un_bh->b_page, KM_USER0); 1287 data = kmap_atomic(un_bh->b_page);
1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); 1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1289 memcpy(data + off, 1289 memcpy(data + off,
1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), 1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1291 ret_value); 1291 ret_value);
1292 kunmap_atomic(data, KM_USER0); 1292 kunmap_atomic(data);
1293 } 1293 }
1294 /* Perform balancing after all resources have been collected at once. */ 1294 /* Perform balancing after all resources have been collected at once. */
1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e12d8b97cd4d..8b7616ef06d8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -16,9 +16,9 @@
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/time.h> 17#include <linux/time.h>
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/reiserfs_fs.h> 19#include "reiserfs.h"
20#include <linux/reiserfs_acl.h> 20#include "acl.h"
21#include <linux/reiserfs_xattr.h> 21#include "xattr.h"
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
@@ -1874,11 +1874,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1874 unlock_new_inode(root_inode); 1874 unlock_new_inode(root_inode);
1875 } 1875 }
1876 1876
1877 s->s_root = d_alloc_root(root_inode); 1877 s->s_root = d_make_root(root_inode);
1878 if (!s->s_root) { 1878 if (!s->s_root)
1879 iput(root_inode);
1880 goto error; 1879 goto error;
1881 }
1882 // define and initialize hash function 1880 // define and initialize hash function
1883 sbi->s_hash_function = hash_function(s); 1881 sbi->s_hash_function = hash_function(s);
1884 if (sbi->s_hash_function == NULL) { 1882 if (sbi->s_hash_function == NULL) {
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index d7f6e51bef2a..5e2624d12f70 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -5,7 +5,7 @@
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9 9
10/* access to tail : when one is going to read tail it must make sure, that is not running. 10/* access to tail : when one is going to read tail it must make sure, that is not running.
11 direct2indirect and indirect2direct can not run concurrently */ 11 direct2indirect and indirect2direct can not run concurrently */
@@ -128,9 +128,9 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
128 if (up_to_date_bh) { 128 if (up_to_date_bh) {
129 unsigned pgoff = 129 unsigned pgoff =
130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
131 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); 131 char *kaddr = kmap_atomic(up_to_date_bh->b_page);
132 memset(kaddr + pgoff, 0, blk_size - total_tail); 132 memset(kaddr + pgoff, 0, blk_size - total_tail);
133 kunmap_atomic(kaddr, KM_USER0); 133 kunmap_atomic(kaddr);
134 } 134 }
135 135
136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c24deda8a8bc..46fc1c20a6b1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -33,7 +33,7 @@
33 * The xattrs themselves are protected by the xattr_sem. 33 * The xattrs themselves are protected by the xattr_sem.
34 */ 34 */
35 35
36#include <linux/reiserfs_fs.h> 36#include "reiserfs.h"
37#include <linux/capability.h> 37#include <linux/capability.h>
38#include <linux/dcache.h> 38#include <linux/dcache.h>
39#include <linux/namei.h> 39#include <linux/namei.h>
@@ -43,8 +43,8 @@
43#include <linux/file.h> 43#include <linux/file.h>
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/xattr.h> 45#include <linux/xattr.h>
46#include <linux/reiserfs_xattr.h> 46#include "xattr.h"
47#include <linux/reiserfs_acl.h> 47#include "acl.h"
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <net/checksum.h> 49#include <net/checksum.h>
50#include <linux/stat.h> 50#include <linux/stat.h>
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
new file mode 100644
index 000000000000..f59626c5d33b
--- /dev/null
+++ b/fs/reiserfs/xattr.h
@@ -0,0 +1,122 @@
1#include <linux/reiserfs_xattr.h>
2#include <linux/init.h>
3#include <linux/list.h>
4#include <linux/rwsem.h>
5
6struct inode;
7struct dentry;
8struct iattr;
9struct super_block;
10struct nameidata;
11
12int reiserfs_xattr_register_handlers(void) __init;
13void reiserfs_xattr_unregister_handlers(void);
14int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
15int reiserfs_lookup_privroot(struct super_block *sb);
16int reiserfs_delete_xattrs(struct inode *inode);
17int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
18int reiserfs_permission(struct inode *inode, int mask);
19
20#ifdef CONFIG_REISERFS_FS_XATTR
21#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
22ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
23 void *buffer, size_t size);
24int reiserfs_setxattr(struct dentry *dentry, const char *name,
25 const void *value, size_t size, int flags);
26ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
27int reiserfs_removexattr(struct dentry *dentry, const char *name);
28
29int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
30int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
31int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,
32 struct inode *, const char *, const void *,
33 size_t, int);
34
35extern const struct xattr_handler reiserfs_xattr_user_handler;
36extern const struct xattr_handler reiserfs_xattr_trusted_handler;
37extern const struct xattr_handler reiserfs_xattr_security_handler;
38#ifdef CONFIG_REISERFS_FS_SECURITY
39int reiserfs_security_init(struct inode *dir, struct inode *inode,
40 const struct qstr *qstr,
41 struct reiserfs_security_handle *sec);
42int reiserfs_security_write(struct reiserfs_transaction_handle *th,
43 struct inode *inode,
44 struct reiserfs_security_handle *sec);
45void reiserfs_security_free(struct reiserfs_security_handle *sec);
46#endif
47
48static inline int reiserfs_xattrs_initialized(struct super_block *sb)
49{
50 return REISERFS_SB(sb)->priv_root != NULL;
51}
52
53#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
54static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
55{
56 loff_t ret = 0;
57 if (reiserfs_file_data_log(inode)) {
58 ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize);
59 ret >>= inode->i_sb->s_blocksize_bits;
60 }
61 return ret;
62}
63
64/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
65 * Let's try to be smart about it.
66 * xattr root: We cache it. If it's not cached, we may need to create it.
67 * xattr dir: If anything has been loaded for this inode, we can set a flag
68 * saying so.
69 * xattr file: Since we don't cache xattrs, we can't tell. We always include
70 * blocks for it.
71 *
72 * However, since root and dir can be created between calls - YOU MUST SAVE
73 * THIS VALUE.
74 */
75static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
76{
77 size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
78
79 if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
80 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
81 if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
82 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
83 }
84
85 return nblocks;
86}
87
88static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
89{
90 init_rwsem(&REISERFS_I(inode)->i_xattr_sem);
91}
92
93#else
94
95#define reiserfs_getxattr NULL
96#define reiserfs_setxattr NULL
97#define reiserfs_listxattr NULL
98#define reiserfs_removexattr NULL
99
100static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
101{
102}
103#endif /* CONFIG_REISERFS_FS_XATTR */
104
105#ifndef CONFIG_REISERFS_FS_SECURITY
106static inline int reiserfs_security_init(struct inode *dir,
107 struct inode *inode,
108 const struct qstr *qstr,
109 struct reiserfs_security_handle *sec)
110{
111 return 0;
112}
113static inline int
114reiserfs_security_write(struct reiserfs_transaction_handle *th,
115 struct inode *inode,
116 struct reiserfs_security_handle *sec)
117{
118 return 0;
119}
120static inline void reiserfs_security_free(struct reiserfs_security_handle *sec)
121{}
122#endif
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6da0396e5052..44474f9b990d 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,14 +1,14 @@
1#include <linux/capability.h> 1#include <linux/capability.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/posix_acl.h> 3#include <linux/posix_acl.h>
4#include <linux/reiserfs_fs.h> 4#include "reiserfs.h"
5#include <linux/errno.h> 5#include <linux/errno.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/xattr.h> 7#include <linux/xattr.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/posix_acl_xattr.h> 9#include <linux/posix_acl_xattr.h>
10#include <linux/reiserfs_xattr.h> 10#include "xattr.h"
11#include <linux/reiserfs_acl.h> 11#include "acl.h"
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13 13
14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, 14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 534668fa41be..800a3cef6f62 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <linux/security.h> 8#include <linux/security.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 9883736ce3ec..a0035719f66b 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/capability.h> 2#include <linux/capability.h>
3#include <linux/errno.h> 3#include <linux/errno.h>
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 45ae1a00013a..8667491ae7c3 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -1,9 +1,9 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/reiserfs_xattr.h> 6#include "xattr.h"
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index bb36ab74eb45..e64f6b5f7ae5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -538,14 +538,12 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
538 if (IS_ERR(root)) 538 if (IS_ERR(root))
539 goto error; 539 goto error;
540 540
541 sb->s_root = d_alloc_root(root); 541 sb->s_root = d_make_root(root);
542 if (!sb->s_root) 542 if (!sb->s_root)
543 goto error_i; 543 goto error;
544 544
545 return 0; 545 return 0;
546 546
547error_i:
548 iput(root);
549error: 547error:
550 return -EINVAL; 548 return -EINVAL;
551error_rsb_inval: 549error_rsb_inval:
diff --git a/fs/select.c b/fs/select.c
index e782258d0de3..6fb8943d580b 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -17,7 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/syscalls.h> 19#include <linux/syscalls.h>
20#include <linux/module.h> 20#include <linux/export.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/poll.h> 22#include <linux/poll.h>
23#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 23#include <linux/personality.h> /* for STICKY_TIMEOUTS */
@@ -223,7 +223,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
223 get_file(filp); 223 get_file(filp);
224 entry->filp = filp; 224 entry->filp = filp;
225 entry->wait_address = wait_address; 225 entry->wait_address = wait_address;
226 entry->key = p->key; 226 entry->key = p->_key;
227 init_waitqueue_func_entry(&entry->wait, pollwake); 227 init_waitqueue_func_entry(&entry->wait, pollwake);
228 entry->wait.private = pwq; 228 entry->wait.private = pwq;
229 add_wait_queue(wait_address, &entry->wait); 229 add_wait_queue(wait_address, &entry->wait);
@@ -386,13 +386,11 @@ get_max:
386static inline void wait_key_set(poll_table *wait, unsigned long in, 386static inline void wait_key_set(poll_table *wait, unsigned long in,
387 unsigned long out, unsigned long bit) 387 unsigned long out, unsigned long bit)
388{ 388{
389 if (wait) { 389 wait->_key = POLLEX_SET;
390 wait->key = POLLEX_SET; 390 if (in & bit)
391 if (in & bit) 391 wait->_key |= POLLIN_SET;
392 wait->key |= POLLIN_SET; 392 if (out & bit)
393 if (out & bit) 393 wait->_key |= POLLOUT_SET;
394 wait->key |= POLLOUT_SET;
395 }
396} 394}
397 395
398int do_select(int n, fd_set_bits *fds, struct timespec *end_time) 396int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
@@ -414,7 +412,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
414 poll_initwait(&table); 412 poll_initwait(&table);
415 wait = &table.pt; 413 wait = &table.pt;
416 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 414 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
417 wait = NULL; 415 wait->_qproc = NULL;
418 timed_out = 1; 416 timed_out = 1;
419 } 417 }
420 418
@@ -459,17 +457,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
459 if ((mask & POLLIN_SET) && (in & bit)) { 457 if ((mask & POLLIN_SET) && (in & bit)) {
460 res_in |= bit; 458 res_in |= bit;
461 retval++; 459 retval++;
462 wait = NULL; 460 wait->_qproc = NULL;
463 } 461 }
464 if ((mask & POLLOUT_SET) && (out & bit)) { 462 if ((mask & POLLOUT_SET) && (out & bit)) {
465 res_out |= bit; 463 res_out |= bit;
466 retval++; 464 retval++;
467 wait = NULL; 465 wait->_qproc = NULL;
468 } 466 }
469 if ((mask & POLLEX_SET) && (ex & bit)) { 467 if ((mask & POLLEX_SET) && (ex & bit)) {
470 res_ex |= bit; 468 res_ex |= bit;
471 retval++; 469 retval++;
472 wait = NULL; 470 wait->_qproc = NULL;
473 } 471 }
474 } 472 }
475 } 473 }
@@ -481,7 +479,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
481 *rexp = res_ex; 479 *rexp = res_ex;
482 cond_resched(); 480 cond_resched();
483 } 481 }
484 wait = NULL; 482 wait->_qproc = NULL;
485 if (retval || timed_out || signal_pending(current)) 483 if (retval || timed_out || signal_pending(current))
486 break; 484 break;
487 if (table.error) { 485 if (table.error) {
@@ -720,7 +718,7 @@ struct poll_list {
720 * interested in events matching the pollfd->events mask, and the result 718 * interested in events matching the pollfd->events mask, and the result
721 * matching that mask is both recorded in pollfd->revents and returned. The 719 * matching that mask is both recorded in pollfd->revents and returned. The
722 * pwait poll_table will be used by the fd-provided poll handler for waiting, 720 * pwait poll_table will be used by the fd-provided poll handler for waiting,
723 * if non-NULL. 721 * if pwait->_qproc is non-NULL.
724 */ 722 */
725static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) 723static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
726{ 724{
@@ -738,9 +736,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
738 if (file != NULL) { 736 if (file != NULL) {
739 mask = DEFAULT_POLLMASK; 737 mask = DEFAULT_POLLMASK;
740 if (file->f_op && file->f_op->poll) { 738 if (file->f_op && file->f_op->poll) {
741 if (pwait) 739 pwait->_key = pollfd->events|POLLERR|POLLHUP;
742 pwait->key = pollfd->events |
743 POLLERR | POLLHUP;
744 mask = file->f_op->poll(file, pwait); 740 mask = file->f_op->poll(file, pwait);
745 } 741 }
746 /* Mask out unneeded events. */ 742 /* Mask out unneeded events. */
@@ -763,7 +759,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
763 759
764 /* Optimise the no-wait case */ 760 /* Optimise the no-wait case */
765 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 761 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
766 pt = NULL; 762 pt->_qproc = NULL;
767 timed_out = 1; 763 timed_out = 1;
768 } 764 }
769 765
@@ -781,22 +777,22 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
781 for (; pfd != pfd_end; pfd++) { 777 for (; pfd != pfd_end; pfd++) {
782 /* 778 /*
783 * Fish for events. If we found one, record it 779 * Fish for events. If we found one, record it
784 * and kill the poll_table, so we don't 780 * and kill poll_table->_qproc, so we don't
785 * needlessly register any other waiters after 781 * needlessly register any other waiters after
786 * this. They'll get immediately deregistered 782 * this. They'll get immediately deregistered
787 * when we break out and return. 783 * when we break out and return.
788 */ 784 */
789 if (do_pollfd(pfd, pt)) { 785 if (do_pollfd(pfd, pt)) {
790 count++; 786 count++;
791 pt = NULL; 787 pt->_qproc = NULL;
792 } 788 }
793 } 789 }
794 } 790 }
795 /* 791 /*
796 * All waiters have already been registered, so don't provide 792 * All waiters have already been registered, so don't provide
797 * a poll_table to them on the next loop iteration. 793 * a poll_table->_qproc to them on the next loop iteration.
798 */ 794 */
799 pt = NULL; 795 pt->_qproc = NULL;
800 if (!count) { 796 if (!count) {
801 count = wait->error; 797 count = wait->error;
802 if (signal_pending(current)) 798 if (signal_pending(current))
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4023d6be939b..0cbd0494b79e 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -6,13 +6,29 @@
6 */ 6 */
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12 12
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/page.h> 14#include <asm/page.h>
15 15
16
17/*
18 * seq_files have a buffer which can may overflow. When this happens a larger
19 * buffer is reallocated and all the data will be printed again.
20 * The overflow state is true when m->count == m->size.
21 */
22static bool seq_overflow(struct seq_file *m)
23{
24 return m->count == m->size;
25}
26
27static void seq_set_overflow(struct seq_file *m)
28{
29 m->count = m->size;
30}
31
16/** 32/**
17 * seq_open - initialize sequential file 33 * seq_open - initialize sequential file
18 * @file: file we initialize 34 * @file: file we initialize
@@ -92,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
92 error = 0; 108 error = 0;
93 m->count = 0; 109 m->count = 0;
94 } 110 }
95 if (m->count == m->size) 111 if (seq_overflow(m))
96 goto Eoverflow; 112 goto Eoverflow;
97 if (pos + m->count > offset) { 113 if (pos + m->count > offset) {
98 m->from = offset - pos; 114 m->from = offset - pos;
@@ -140,9 +156,21 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
140 156
141 mutex_lock(&m->lock); 157 mutex_lock(&m->lock);
142 158
159 /*
160 * seq_file->op->..m_start/m_stop/m_next may do special actions
161 * or optimisations based on the file->f_version, so we want to
162 * pass the file->f_version to those methods.
163 *
164 * seq_file->version is just copy of f_version, and seq_file
165 * methods can treat it simply as file version.
166 * It is copied in first and copied out after all operations.
167 * It is convenient to have it as part of structure to avoid the
168 * need of passing another argument to all the seq_file methods.
169 */
170 m->version = file->f_version;
171
143 /* Don't assume *ppos is where we left it */ 172 /* Don't assume *ppos is where we left it */
144 if (unlikely(*ppos != m->read_pos)) { 173 if (unlikely(*ppos != m->read_pos)) {
145 m->read_pos = *ppos;
146 while ((err = traverse(m, *ppos)) == -EAGAIN) 174 while ((err = traverse(m, *ppos)) == -EAGAIN)
147 ; 175 ;
148 if (err) { 176 if (err) {
@@ -152,21 +180,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
152 m->index = 0; 180 m->index = 0;
153 m->count = 0; 181 m->count = 0;
154 goto Done; 182 goto Done;
183 } else {
184 m->read_pos = *ppos;
155 } 185 }
156 } 186 }
157 187
158 /*
159 * seq_file->op->..m_start/m_stop/m_next may do special actions
160 * or optimisations based on the file->f_version, so we want to
161 * pass the file->f_version to those methods.
162 *
163 * seq_file->version is just copy of f_version, and seq_file
164 * methods can treat it simply as file version.
165 * It is copied in first and copied out after all operations.
166 * It is convenient to have it as part of structure to avoid the
167 * need of passing another argument to all the seq_file methods.
168 */
169 m->version = file->f_version;
170 /* grab buffer if we didn't have one */ 188 /* grab buffer if we didn't have one */
171 if (!m->buf) { 189 if (!m->buf) {
172 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 190 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
@@ -232,7 +250,7 @@ Fill:
232 break; 250 break;
233 } 251 }
234 err = m->op->show(m, p); 252 err = m->op->show(m, p);
235 if (m->count == m->size || err) { 253 if (seq_overflow(m) || err) {
236 m->count = offs; 254 m->count = offs;
237 if (likely(err <= 0)) 255 if (likely(err <= 0))
238 break; 256 break;
@@ -359,7 +377,7 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
359 *p++ = '0' + (c & 07); 377 *p++ = '0' + (c & 07);
360 continue; 378 continue;
361 } 379 }
362 m->count = m->size; 380 seq_set_overflow(m);
363 return -1; 381 return -1;
364 } 382 }
365 m->count = p - m->buf; 383 m->count = p - m->buf;
@@ -381,7 +399,7 @@ int seq_printf(struct seq_file *m, const char *f, ...)
381 return 0; 399 return 0;
382 } 400 }
383 } 401 }
384 m->count = m->size; 402 seq_set_overflow(m);
385 return -1; 403 return -1;
386} 404}
387EXPORT_SYMBOL(seq_printf); 405EXPORT_SYMBOL(seq_printf);
@@ -510,7 +528,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
510 return 0; 528 return 0;
511 } 529 }
512 } 530 }
513 m->count = m->size; 531 seq_set_overflow(m);
514 return -1; 532 return -1;
515} 533}
516EXPORT_SYMBOL(seq_bitmap); 534EXPORT_SYMBOL(seq_bitmap);
@@ -526,7 +544,7 @@ int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
526 return 0; 544 return 0;
527 } 545 }
528 } 546 }
529 m->count = m->size; 547 seq_set_overflow(m);
530 return -1; 548 return -1;
531} 549}
532EXPORT_SYMBOL(seq_bitmap_list); 550EXPORT_SYMBOL(seq_bitmap_list);
@@ -637,11 +655,63 @@ int seq_puts(struct seq_file *m, const char *s)
637 m->count += len; 655 m->count += len;
638 return 0; 656 return 0;
639 } 657 }
640 m->count = m->size; 658 seq_set_overflow(m);
641 return -1; 659 return -1;
642} 660}
643EXPORT_SYMBOL(seq_puts); 661EXPORT_SYMBOL(seq_puts);
644 662
663/*
664 * A helper routine for putting decimal numbers without rich format of printf().
665 * only 'unsigned long long' is supported.
666 * This routine will put one byte delimiter + number into seq_file.
667 * This routine is very quick when you show lots of numbers.
668 * In usual cases, it will be better to use seq_printf(). It's easier to read.
669 */
670int seq_put_decimal_ull(struct seq_file *m, char delimiter,
671 unsigned long long num)
672{
673 int len;
674
675 if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
676 goto overflow;
677
678 if (delimiter)
679 m->buf[m->count++] = delimiter;
680
681 if (num < 10) {
682 m->buf[m->count++] = num + '0';
683 return 0;
684 }
685
686 len = num_to_str(m->buf + m->count, m->size - m->count, num);
687 if (!len)
688 goto overflow;
689 m->count += len;
690 return 0;
691overflow:
692 seq_set_overflow(m);
693 return -1;
694}
695EXPORT_SYMBOL(seq_put_decimal_ull);
696
697int seq_put_decimal_ll(struct seq_file *m, char delimiter,
698 long long num)
699{
700 if (num < 0) {
701 if (m->count + 3 >= m->size) {
702 seq_set_overflow(m);
703 return -1;
704 }
705 if (delimiter)
706 m->buf[m->count++] = delimiter;
707 num = -num;
708 delimiter = '-';
709 }
710 return seq_put_decimal_ull(m, delimiter, num);
711
712}
713EXPORT_SYMBOL(seq_put_decimal_ll);
714
645/** 715/**
646 * seq_write - write arbitrary data to buffer 716 * seq_write - write arbitrary data to buffer
647 * @seq: seq_file identifying the buffer to which data should be written 717 * @seq: seq_file identifying the buffer to which data should be written
@@ -657,7 +727,7 @@ int seq_write(struct seq_file *seq, const void *data, size_t len)
657 seq->count += len; 727 seq->count += len;
658 return 0; 728 return 0;
659 } 729 }
660 seq->count = seq->size; 730 seq_set_overflow(seq);
661 return -1; 731 return -1;
662} 732}
663EXPORT_SYMBOL(seq_write); 733EXPORT_SYMBOL(seq_write);
diff --git a/fs/splice.c b/fs/splice.c
index 1ec0493266b3..5f883de7ef3a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,7 +25,7 @@
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/module.h> 28#include <linux/export.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31#include <linux/security.h> 31#include <linux/security.h>
@@ -737,15 +737,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
737 goto out; 737 goto out;
738 738
739 if (buf->page != page) { 739 if (buf->page != page) {
740 /*
741 * Careful, ->map() uses KM_USER0!
742 */
743 char *src = buf->ops->map(pipe, buf, 1); 740 char *src = buf->ops->map(pipe, buf, 1);
744 char *dst = kmap_atomic(page, KM_USER1); 741 char *dst = kmap_atomic(page);
745 742
746 memcpy(dst + offset, src + buf->offset, this_len); 743 memcpy(dst + offset, src + buf->offset, this_len);
747 flush_dcache_page(page); 744 flush_dcache_page(page);
748 kunmap_atomic(dst, KM_USER1); 745 kunmap_atomic(dst);
749 buf->ops->unmap(pipe, buf, src); 746 buf->ops->unmap(pipe, buf, src);
750 } 747 }
751 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 748 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 38bb1c640559..8ca62c28fe12 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -464,10 +464,10 @@ static int squashfs_readpage(struct file *file, struct page *page)
464 if (PageUptodate(push_page)) 464 if (PageUptodate(push_page))
465 goto skip_page; 465 goto skip_page;
466 466
467 pageaddr = kmap_atomic(push_page, KM_USER0); 467 pageaddr = kmap_atomic(push_page);
468 squashfs_copy_data(pageaddr, buffer, offset, avail); 468 squashfs_copy_data(pageaddr, buffer, offset, avail);
469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); 469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
470 kunmap_atomic(pageaddr, KM_USER0); 470 kunmap_atomic(pageaddr);
471 flush_dcache_page(push_page); 471 flush_dcache_page(push_page);
472 SetPageUptodate(push_page); 472 SetPageUptodate(push_page);
473skip_page: 473skip_page:
@@ -484,9 +484,9 @@ skip_page:
484error_out: 484error_out:
485 SetPageError(page); 485 SetPageError(page);
486out: 486out:
487 pageaddr = kmap_atomic(page, KM_USER0); 487 pageaddr = kmap_atomic(page);
488 memset(pageaddr, 0, PAGE_CACHE_SIZE); 488 memset(pageaddr, 0, PAGE_CACHE_SIZE);
489 kunmap_atomic(pageaddr, KM_USER0); 489 kunmap_atomic(pageaddr);
490 flush_dcache_page(page); 490 flush_dcache_page(page);
491 if (!PageError(page)) 491 if (!PageError(page))
492 SetPageUptodate(page); 492 SetPageUptodate(page);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index ecaa2f7bdb8f..970b1167e7cb 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -316,11 +316,10 @@ check_directory_table:
316 } 316 }
317 insert_inode_hash(root); 317 insert_inode_hash(root);
318 318
319 sb->s_root = d_alloc_root(root); 319 sb->s_root = d_make_root(root);
320 if (sb->s_root == NULL) { 320 if (sb->s_root == NULL) {
321 ERROR("Root inode create failed\n"); 321 ERROR("Root inode create failed\n");
322 err = -ENOMEM; 322 err = -ENOMEM;
323 iput(root);
324 goto failed_mount; 323 goto failed_mount;
325 } 324 }
326 325
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 1191817264cc..12806dffb345 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -90,14 +90,14 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
90 goto error_out; 90 goto error_out;
91 } 91 }
92 92
93 pageaddr = kmap_atomic(page, KM_USER0); 93 pageaddr = kmap_atomic(page);
94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset, 94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
95 length - bytes); 95 length - bytes);
96 if (copied == length - bytes) 96 if (copied == length - bytes)
97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length); 97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
98 else 98 else
99 block = entry->next_index; 99 block = entry->next_index;
100 kunmap_atomic(pageaddr, KM_USER0); 100 kunmap_atomic(pageaddr);
101 squashfs_cache_put(entry); 101 squashfs_cache_put(entry);
102 } 102 }
103 103
diff --git a/fs/stack.c b/fs/stack.c
index 9c11519245a6..5b5388250e29 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -1,4 +1,4 @@
1#include <linux/module.h> 1#include <linux/export.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/fs_stack.h> 3#include <linux/fs_stack.h>
4 4
diff --git a/fs/stat.c b/fs/stat.c
index 8806b8997d2e..c733dc5753ae 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -4,7 +4,7 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7#include <linux/module.h> 7#include <linux/export.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/file.h> 10#include <linux/file.h>
@@ -307,7 +307,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
307 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
308 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
309 if (!error) { 309 if (!error) {
310 touch_atime(path.mnt, path.dentry); 310 touch_atime(&path);
311 error = inode->i_op->readlink(path.dentry, 311 error = inode->i_op->readlink(path.dentry,
312 buf, bufsiz); 312 buf, bufsiz);
313 } 313 }
diff --git a/fs/statfs.c b/fs/statfs.c
index 2aa6a22e0be2..43e6b6fe4e85 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -1,5 +1,5 @@
1#include <linux/syscalls.h> 1#include <linux/syscalls.h>
2#include <linux/module.h> 2#include <linux/export.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/mount.h> 5#include <linux/mount.h>
diff --git a/fs/super.c b/fs/super.c
index 6277ec6cb60a..cf001775617f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -20,7 +20,7 @@
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
21 */ 21 */
22 22
23#include <linux/module.h> 23#include <linux/export.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/acct.h> 25#include <linux/acct.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -32,6 +32,7 @@
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h>
35#include "internal.h" 36#include "internal.h"
36 37
37 38
@@ -250,7 +251,7 @@ void deactivate_locked_super(struct super_block *s)
250{ 251{
251 struct file_system_type *fs = s->s_type; 252 struct file_system_type *fs = s->s_type;
252 if (atomic_dec_and_test(&s->s_active)) { 253 if (atomic_dec_and_test(&s->s_active)) {
253 cleancache_flush_fs(s); 254 cleancache_invalidate_fs(s);
254 fs->kill_sb(s); 255 fs->kill_sb(s);
255 256
256 /* caches are now gone, we can safely kill the shrinker now */ 257 /* caches are now gone, we can safely kill the shrinker now */
diff --git a/fs/sync.c b/fs/sync.c
index f3501ef39235..0e8db939d96f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -6,7 +6,7 @@
6#include <linux/file.h> 6#include <linux/file.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/module.h> 9#include <linux/export.h>
10#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/writeback.h> 12#include <linux/writeback.h>
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7fdf6a7b7436..2a7a3f5d1ca6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -22,76 +22,103 @@
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/security.h> 24#include <linux/security.h>
25#include <linux/hash.h>
25#include "sysfs.h" 26#include "sysfs.h"
26 27
27DEFINE_MUTEX(sysfs_mutex); 28DEFINE_MUTEX(sysfs_mutex);
28DEFINE_SPINLOCK(sysfs_assoc_lock); 29DEFINE_SPINLOCK(sysfs_assoc_lock);
29 30
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
32
30static DEFINE_SPINLOCK(sysfs_ino_lock); 33static DEFINE_SPINLOCK(sysfs_ino_lock);
31static DEFINE_IDA(sysfs_ino_ida); 34static DEFINE_IDA(sysfs_ino_ida);
32 35
33/** 36/**
34 * sysfs_link_sibling - link sysfs_dirent into sibling list 37 * sysfs_name_hash
38 * @ns: Namespace tag to hash
39 * @name: Null terminated string to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const void *ns, const char *name)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) );
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const void *ns,
60 const char *name, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
73 right);
74}
75
76/**
77 * sysfs_link_subling - link sysfs_dirent into sibling rbtree
35 * @sd: sysfs_dirent of interest 78 * @sd: sysfs_dirent of interest
36 * 79 *
37 * Link @sd into its sibling list which starts from 80 * Link @sd into its sibling rbtree which starts from
38 * sd->s_parent->s_dir.children. 81 * sd->s_parent->s_dir.children.
39 * 82 *
40 * Locking: 83 * Locking:
41 * mutex_lock(sysfs_mutex) 84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
42 */ 88 */
43static void sysfs_link_sibling(struct sysfs_dirent *sd) 89static int sysfs_link_sibling(struct sysfs_dirent *sd)
44{ 90{
45 struct sysfs_dirent *parent_sd = sd->s_parent; 91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
46 92 struct rb_node *parent = NULL;
47 struct rb_node **p;
48 struct rb_node *parent;
49 93
50 if (sysfs_type(sd) == SYSFS_DIR) 94 if (sysfs_type(sd) == SYSFS_DIR)
51 parent_sd->s_dir.subdirs++; 95 sd->s_parent->s_dir.subdirs++;
52 96
53 p = &parent_sd->s_dir.inode_tree.rb_node; 97 while (*node) {
54 parent = NULL; 98 struct sysfs_dirent *pos;
55 while (*p) { 99 int result;
56 parent = *p; 100
57#define node rb_entry(parent, struct sysfs_dirent, inode_node) 101 pos = to_sysfs_dirent(*node);
58 if (sd->s_ino < node->s_ino) { 102 parent = *node;
59 p = &node->inode_node.rb_left; 103 result = sysfs_sd_compare(sd, pos);
60 } else if (sd->s_ino > node->s_ino) { 104 if (result < 0)
61 p = &node->inode_node.rb_right; 105 node = &pos->s_rb.rb_left;
62 } else { 106 else if (result > 0)
63 printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", 107 node = &pos->s_rb.rb_right;
64 (unsigned long) sd->s_ino); 108 else
65 BUG(); 109 return -EEXIST;
66 }
67#undef node
68 }
69 rb_link_node(&sd->inode_node, parent, p);
70 rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
71
72 p = &parent_sd->s_dir.name_tree.rb_node;
73 parent = NULL;
74 while (*p) {
75 int c;
76 parent = *p;
77#define node rb_entry(parent, struct sysfs_dirent, name_node)
78 c = strcmp(sd->s_name, node->s_name);
79 if (c < 0) {
80 p = &node->name_node.rb_left;
81 } else {
82 p = &node->name_node.rb_right;
83 }
84#undef node
85 } 110 }
86 rb_link_node(&sd->name_node, parent, p); 111 /* add new node and rebalance the tree */
87 rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); 112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
88} 115}
89 116
90/** 117/**
91 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list 118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
92 * @sd: sysfs_dirent of interest 119 * @sd: sysfs_dirent of interest
93 * 120 *
94 * Unlink @sd from its sibling list which starts from 121 * Unlink @sd from its sibling rbtree which starts from
95 * sd->s_parent->s_dir.children. 122 * sd->s_parent->s_dir.children.
96 * 123 *
97 * Locking: 124 * Locking:
@@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
102 if (sysfs_type(sd) == SYSFS_DIR) 129 if (sysfs_type(sd) == SYSFS_DIR)
103 sd->s_parent->s_dir.subdirs--; 130 sd->s_parent->s_dir.subdirs--;
104 131
105 rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); 132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
106 rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
107} 133}
108 134
109/** 135/**
@@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
198 rwsem_release(&sd->dep_map, 1, _RET_IP_); 224 rwsem_release(&sd->dep_map, 1, _RET_IP_);
199} 225}
200 226
201static int sysfs_alloc_ino(ino_t *pino) 227static int sysfs_alloc_ino(unsigned int *pino)
202{ 228{
203 int ino, rc; 229 int ino, rc;
204 230
@@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino)
217 return rc; 243 return rc;
218} 244}
219 245
220static void sysfs_free_ino(ino_t ino) 246static void sysfs_free_ino(unsigned int ino)
221{ 247{
222 spin_lock(&sysfs_ino_lock); 248 spin_lock(&sysfs_ino_lock);
223 ida_remove(&sysfs_ino_ida, ino); 249 ida_remove(&sysfs_ino_ida, ino);
@@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
402int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 428int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
403{ 429{
404 struct sysfs_inode_attrs *ps_iattr; 430 struct sysfs_inode_attrs *ps_iattr;
431 int ret;
405 432
406 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { 433 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
407 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 434 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
410 return -EINVAL; 437 return -EINVAL;
411 } 438 }
412 439
413 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) 440 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
414 return -EEXIST;
415
416 sd->s_parent = sysfs_get(acxt->parent_sd); 441 sd->s_parent = sysfs_get(acxt->parent_sd);
417 442
418 sysfs_link_sibling(sd); 443 ret = sysfs_link_sibling(sd);
444 if (ret)
445 return ret;
419 446
420 /* Update timestamps on the parent */ 447 /* Update timestamps on the parent */
421 ps_iattr = acxt->parent_sd->s_iattr; 448 ps_iattr = acxt->parent_sd->s_iattr;
@@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
565 const void *ns, 592 const void *ns,
566 const unsigned char *name) 593 const unsigned char *name)
567{ 594{
568 struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; 595 struct rb_node *node = parent_sd->s_dir.children.rb_node;
569 struct sysfs_dirent *found = NULL; 596 unsigned int hash;
570 597
571 if (!!sysfs_ns_type(parent_sd) != !!ns) { 598 if (!!sysfs_ns_type(parent_sd) != !!ns) {
572 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 599 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
575 return NULL; 602 return NULL;
576 } 603 }
577 604
578 while (p) { 605 hash = sysfs_name_hash(ns, name);
579 int c; 606 while (node) {
580#define node rb_entry(p, struct sysfs_dirent, name_node) 607 struct sysfs_dirent *sd;
581 c = strcmp(name, node->s_name); 608 int result;
582 if (c < 0) { 609
583 p = node->name_node.rb_left; 610 sd = to_sysfs_dirent(node);
584 } else if (c > 0) { 611 result = sysfs_name_compare(hash, ns, name, sd);
585 p = node->name_node.rb_right; 612 if (result < 0)
586 } else { 613 node = node->rb_left;
587 found = node; 614 else if (result > 0)
588 p = node->name_node.rb_left; 615 node = node->rb_right;
589 } 616 else
590#undef node 617 return sd;
591 }
592
593 if (found) {
594 while (found->s_ns != ns) {
595 p = rb_next(&found->name_node);
596 if (!p)
597 return NULL;
598 found = rb_entry(p, struct sysfs_dirent, name_node);
599 if (strcmp(name, found->s_name))
600 return NULL;
601 }
602 } 618 }
603 619 return NULL;
604 return found;
605} 620}
606 621
607/** 622/**
@@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
804 819
805 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); 820 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
806 sysfs_addrm_start(&acxt, dir_sd); 821 sysfs_addrm_start(&acxt, dir_sd);
807 pos = rb_first(&dir_sd->s_dir.inode_tree); 822 pos = rb_first(&dir_sd->s_dir.children);
808 while (pos) { 823 while (pos) {
809 struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); 824 struct sysfs_dirent *sd = to_sysfs_dirent(pos);
810 pos = rb_next(pos); 825 pos = rb_next(pos);
811 if (sysfs_type(sd) != SYSFS_DIR) 826 if (sysfs_type(sd) != SYSFS_DIR)
812 sysfs_remove_one(&acxt, sd); 827 sysfs_remove_one(&acxt, sd);
@@ -863,6 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
863 878
864 dup_name = sd->s_name; 879 dup_name = sd->s_name;
865 sd->s_name = new_name; 880 sd->s_name = new_name;
881 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
866 } 882 }
867 883
868 /* Move to the appropriate place in the appropriate directories rbtree. */ 884 /* Move to the appropriate place in the appropriate directories rbtree. */
@@ -919,38 +935,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
919} 935}
920 936
921static struct sysfs_dirent *sysfs_dir_pos(const void *ns, 937static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
922 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 938 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
923{ 939{
924 if (pos) { 940 if (pos) {
925 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 941 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
926 pos->s_parent == parent_sd && 942 pos->s_parent == parent_sd &&
927 ino == pos->s_ino; 943 hash == pos->s_hash;
928 sysfs_put(pos); 944 sysfs_put(pos);
929 if (!valid) 945 if (!valid)
930 pos = NULL; 946 pos = NULL;
931 } 947 }
932 if (!pos && (ino > 1) && (ino < INT_MAX)) { 948 if (!pos && (hash > 1) && (hash < INT_MAX)) {
933 struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; 949 struct rb_node *node = parent_sd->s_dir.children.rb_node;
934 while (p) { 950 while (node) {
935#define node rb_entry(p, struct sysfs_dirent, inode_node) 951 pos = to_sysfs_dirent(node);
936 if (ino < node->s_ino) { 952
937 pos = node; 953 if (hash < pos->s_hash)
938 p = node->inode_node.rb_left; 954 node = node->rb_left;
939 } else if (ino > node->s_ino) { 955 else if (hash > pos->s_hash)
940 p = node->inode_node.rb_right; 956 node = node->rb_right;
941 } else { 957 else
942 pos = node;
943 break; 958 break;
944 }
945#undef node
946 } 959 }
947 } 960 }
961 /* Skip over entries in the wrong namespace */
948 while (pos && pos->s_ns != ns) { 962 while (pos && pos->s_ns != ns) {
949 struct rb_node *p = rb_next(&pos->inode_node); 963 struct rb_node *node = rb_next(&pos->s_rb);
950 if (!p) 964 if (!node)
951 pos = NULL; 965 pos = NULL;
952 else 966 else
953 pos = rb_entry(p, struct sysfs_dirent, inode_node); 967 pos = to_sysfs_dirent(node);
954 } 968 }
955 return pos; 969 return pos;
956} 970}
@@ -960,11 +974,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
960{ 974{
961 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 975 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
962 if (pos) do { 976 if (pos) do {
963 struct rb_node *p = rb_next(&pos->inode_node); 977 struct rb_node *node = rb_next(&pos->s_rb);
964 if (!p) 978 if (!node)
965 pos = NULL; 979 pos = NULL;
966 else 980 else
967 pos = rb_entry(p, struct sysfs_dirent, inode_node); 981 pos = to_sysfs_dirent(node);
968 } while (pos && pos->s_ns != ns); 982 } while (pos && pos->s_ns != ns);
969 return pos; 983 return pos;
970} 984}
@@ -1006,7 +1020,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1006 len = strlen(name); 1020 len = strlen(name);
1007 ino = pos->s_ino; 1021 ino = pos->s_ino;
1008 type = dt_type(pos); 1022 type = dt_type(pos);
1009 filp->f_pos = ino; 1023 filp->f_pos = pos->s_hash;
1010 filp->private_data = sysfs_get(pos); 1024 filp->private_data = sysfs_get(pos);
1011 1025
1012 mutex_unlock(&sysfs_mutex); 1026 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 85eb81683a29..feb2d69396cf 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
136 void *old_secdata; 136 void *old_secdata;
137 size_t old_secdata_len; 137 size_t old_secdata_len;
138 138
139 iattrs = sd->s_iattr; 139 if (!sd->s_iattr) {
140 if (!iattrs) 140 sd->s_iattr = sysfs_init_inode_attrs(sd);
141 iattrs = sysfs_init_inode_attrs(sd); 141 if (!sd->s_iattr)
142 if (!iattrs) 142 return -ENOMEM;
143 return -ENOMEM; 143 }
144 144
145 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata; 146 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len; 147 old_secdata_len = iattrs->ia_secdata_len;
147 148
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e34f0d99ea4e..52c3bdb66a84 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = {
36 .s_name = "", 36 .s_name = "",
37 .s_count = ATOMIC_INIT(1), 37 .s_count = ATOMIC_INIT(1),
38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), 38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 39 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
40 .s_ino = 1, 40 .s_ino = 1,
41}; 41};
42 42
@@ -61,10 +61,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 } 61 }
62 62
63 /* instantiate and link root dentry */ 63 /* instantiate and link root dentry */
64 root = d_alloc_root(inode); 64 root = d_make_root(inode);
65 if (!root) { 65 if (!root) {
66 pr_debug("%s: could not get root dentry!\n",__func__); 66 pr_debug("%s: could not get root dentry!\n",__func__);
67 iput(inode);
68 return -ENOMEM; 67 return -ENOMEM;
69 } 68 }
70 root->d_fsdata = &sysfs_root; 69 root->d_fsdata = &sysfs_root;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7484a36ee678..661a9639570b 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -20,9 +20,8 @@ struct sysfs_elem_dir {
20 struct kobject *kobj; 20 struct kobject *kobj;
21 21
22 unsigned long subdirs; 22 unsigned long subdirs;
23 23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root inode_tree; 24 struct rb_root children;
25 struct rb_root name_tree;
26}; 25};
27 26
28struct sysfs_elem_symlink { 27struct sysfs_elem_symlink {
@@ -62,8 +61,7 @@ struct sysfs_dirent {
62 struct sysfs_dirent *s_parent; 61 struct sysfs_dirent *s_parent;
63 const char *s_name; 62 const char *s_name;
64 63
65 struct rb_node inode_node; 64 struct rb_node s_rb;
66 struct rb_node name_node;
67 65
68 union { 66 union {
69 struct completion *completion; 67 struct completion *completion;
@@ -71,6 +69,7 @@ struct sysfs_dirent {
71 } u; 69 } u;
72 70
73 const void *s_ns; /* namespace tag */ 71 const void *s_ns; /* namespace tag */
72 unsigned int s_hash; /* ns + name hash */
74 union { 73 union {
75 struct sysfs_elem_dir s_dir; 74 struct sysfs_elem_dir s_dir;
76 struct sysfs_elem_symlink s_symlink; 75 struct sysfs_elem_symlink s_symlink;
@@ -78,9 +77,9 @@ struct sysfs_dirent {
78 struct sysfs_elem_bin_attr s_bin_attr; 77 struct sysfs_elem_bin_attr s_bin_attr;
79 }; 78 };
80 79
81 unsigned int s_flags; 80 unsigned short s_flags;
82 umode_t s_mode; 81 umode_t s_mode;
83 ino_t s_ino; 82 unsigned int s_ino;
84 struct sysfs_inode_attrs *s_iattr; 83 struct sysfs_inode_attrs *s_iattr;
85}; 84};
86 85
@@ -95,11 +94,11 @@ struct sysfs_dirent {
95#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) 94#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
96 95
97/* identify any namespace tag on sysfs_dirents */ 96/* identify any namespace tag on sysfs_dirents */
98#define SYSFS_NS_TYPE_MASK 0xff00 97#define SYSFS_NS_TYPE_MASK 0xf00
99#define SYSFS_NS_TYPE_SHIFT 8 98#define SYSFS_NS_TYPE_SHIFT 8
100 99
101#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) 100#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
102#define SYSFS_FLAG_REMOVED 0x020000 101#define SYSFS_FLAG_REMOVED 0x02000
103 102
104static inline unsigned int sysfs_type(struct sysfs_dirent *sd) 103static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
105{ 104{
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b217797e621b..d7466e293614 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -121,9 +121,6 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
121{ 121{
122 struct inode *inode = old_dentry->d_inode; 122 struct inode *inode = old_dentry->d_inode;
123 123
124 if (inode->i_nlink >= SYSV_SB(inode->i_sb)->s_link_max)
125 return -EMLINK;
126
127 inode->i_ctime = CURRENT_TIME_SEC; 124 inode->i_ctime = CURRENT_TIME_SEC;
128 inode_inc_link_count(inode); 125 inode_inc_link_count(inode);
129 ihold(inode); 126 ihold(inode);
@@ -134,10 +131,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 131static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
135{ 132{
136 struct inode * inode; 133 struct inode * inode;
137 int err = -EMLINK; 134 int err;
138 135
139 if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max)
140 goto out;
141 inode_inc_link_count(dir); 136 inode_inc_link_count(dir);
142 137
143 inode = sysv_new_inode(dir, S_IFDIR|mode); 138 inode = sysv_new_inode(dir, S_IFDIR|mode);
@@ -251,11 +246,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
251 drop_nlink(new_inode); 246 drop_nlink(new_inode);
252 inode_dec_link_count(new_inode); 247 inode_dec_link_count(new_inode);
253 } else { 248 } else {
254 if (dir_de) {
255 err = -EMLINK;
256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
257 goto out_dir;
258 }
259 err = sysv_add_link(new_dentry, old_inode); 249 err = sysv_add_link(new_dentry, old_inode);
260 if (err) 250 if (err)
261 goto out_dir; 251 goto out_dir;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index f60c196913ea..7491c33b6468 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -44,7 +44,7 @@ enum {
44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60 44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60
45}; 45};
46 46
47static void detected_xenix(struct sysv_sb_info *sbi) 47static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links)
48{ 48{
49 struct buffer_head *bh1 = sbi->s_bh1; 49 struct buffer_head *bh1 = sbi->s_bh1;
50 struct buffer_head *bh2 = sbi->s_bh2; 50 struct buffer_head *bh2 = sbi->s_bh2;
@@ -59,7 +59,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512); 59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512);
60 } 60 }
61 61
62 sbi->s_link_max = XENIX_LINK_MAX; 62 *max_links = XENIX_LINK_MAX;
63 sbi->s_fic_size = XENIX_NICINOD; 63 sbi->s_fic_size = XENIX_NICINOD;
64 sbi->s_flc_size = XENIX_NICFREE; 64 sbi->s_flc_size = XENIX_NICFREE;
65 sbi->s_sbd1 = (char *)sbd1; 65 sbi->s_sbd1 = (char *)sbd1;
@@ -75,7 +75,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize); 75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize);
76} 76}
77 77
78static void detected_sysv4(struct sysv_sb_info *sbi) 78static void detected_sysv4(struct sysv_sb_info *sbi, unsigned *max_links)
79{ 79{
80 struct sysv4_super_block * sbd; 80 struct sysv4_super_block * sbd;
81 struct buffer_head *bh1 = sbi->s_bh1; 81 struct buffer_head *bh1 = sbi->s_bh1;
@@ -86,7 +86,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
86 else 86 else
87 sbd = (struct sysv4_super_block *) bh2->b_data; 87 sbd = (struct sysv4_super_block *) bh2->b_data;
88 88
89 sbi->s_link_max = SYSV_LINK_MAX; 89 *max_links = SYSV_LINK_MAX;
90 sbi->s_fic_size = SYSV_NICINOD; 90 sbi->s_fic_size = SYSV_NICINOD;
91 sbi->s_flc_size = SYSV_NICFREE; 91 sbi->s_flc_size = SYSV_NICFREE;
92 sbi->s_sbd1 = (char *)sbd; 92 sbi->s_sbd1 = (char *)sbd;
@@ -103,7 +103,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
104} 104}
105 105
106static void detected_sysv2(struct sysv_sb_info *sbi) 106static void detected_sysv2(struct sysv_sb_info *sbi, unsigned *max_links)
107{ 107{
108 struct sysv2_super_block *sbd; 108 struct sysv2_super_block *sbd;
109 struct buffer_head *bh1 = sbi->s_bh1; 109 struct buffer_head *bh1 = sbi->s_bh1;
@@ -114,7 +114,7 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
114 else 114 else
115 sbd = (struct sysv2_super_block *) bh2->b_data; 115 sbd = (struct sysv2_super_block *) bh2->b_data;
116 116
117 sbi->s_link_max = SYSV_LINK_MAX; 117 *max_links = SYSV_LINK_MAX;
118 sbi->s_fic_size = SYSV_NICINOD; 118 sbi->s_fic_size = SYSV_NICINOD;
119 sbi->s_flc_size = SYSV_NICFREE; 119 sbi->s_flc_size = SYSV_NICFREE;
120 sbi->s_sbd1 = (char *)sbd; 120 sbi->s_sbd1 = (char *)sbd;
@@ -131,14 +131,14 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
132} 132}
133 133
134static void detected_coherent(struct sysv_sb_info *sbi) 134static void detected_coherent(struct sysv_sb_info *sbi, unsigned *max_links)
135{ 135{
136 struct coh_super_block * sbd; 136 struct coh_super_block * sbd;
137 struct buffer_head *bh1 = sbi->s_bh1; 137 struct buffer_head *bh1 = sbi->s_bh1;
138 138
139 sbd = (struct coh_super_block *) bh1->b_data; 139 sbd = (struct coh_super_block *) bh1->b_data;
140 140
141 sbi->s_link_max = COH_LINK_MAX; 141 *max_links = COH_LINK_MAX;
142 sbi->s_fic_size = COH_NICINOD; 142 sbi->s_fic_size = COH_NICINOD;
143 sbi->s_flc_size = COH_NICFREE; 143 sbi->s_flc_size = COH_NICFREE;
144 sbi->s_sbd1 = (char *)sbd; 144 sbi->s_sbd1 = (char *)sbd;
@@ -154,12 +154,12 @@ static void detected_coherent(struct sysv_sb_info *sbi)
154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
155} 155}
156 156
157static void detected_v7(struct sysv_sb_info *sbi) 157static void detected_v7(struct sysv_sb_info *sbi, unsigned *max_links)
158{ 158{
159 struct buffer_head *bh2 = sbi->s_bh2; 159 struct buffer_head *bh2 = sbi->s_bh2;
160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data; 160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data;
161 161
162 sbi->s_link_max = V7_LINK_MAX; 162 *max_links = V7_LINK_MAX;
163 sbi->s_fic_size = V7_NICINOD; 163 sbi->s_fic_size = V7_NICINOD;
164 sbi->s_flc_size = V7_NICFREE; 164 sbi->s_flc_size = V7_NICFREE;
165 sbi->s_sbd1 = (char *)sbd; 165 sbi->s_sbd1 = (char *)sbd;
@@ -290,7 +290,7 @@ static char *flavour_names[] = {
290 [FSTYPE_AFS] = "AFS", 290 [FSTYPE_AFS] = "AFS",
291}; 291};
292 292
293static void (*flavour_setup[])(struct sysv_sb_info *) = { 293static void (*flavour_setup[])(struct sysv_sb_info *, unsigned *) = {
294 [FSTYPE_XENIX] = detected_xenix, 294 [FSTYPE_XENIX] = detected_xenix,
295 [FSTYPE_SYSV4] = detected_sysv4, 295 [FSTYPE_SYSV4] = detected_sysv4,
296 [FSTYPE_SYSV2] = detected_sysv2, 296 [FSTYPE_SYSV2] = detected_sysv2,
@@ -310,7 +310,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
310 310
311 sbi->s_firstinodezone = 2; 311 sbi->s_firstinodezone = 2;
312 312
313 flavour_setup[sbi->s_type](sbi); 313 flavour_setup[sbi->s_type](sbi, &sb->s_max_links);
314 314
315 sbi->s_truncate = 1; 315 sbi->s_truncate = 1;
316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone; 316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone;
@@ -341,9 +341,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
341 printk("SysV FS: get root inode failed\n"); 341 printk("SysV FS: get root inode failed\n");
342 return 0; 342 return 0;
343 } 343 }
344 sb->s_root = d_alloc_root(root_inode); 344 sb->s_root = d_make_root(root_inode);
345 if (!sb->s_root) { 345 if (!sb->s_root) {
346 iput(root_inode);
347 printk("SysV FS: get root dentry failed\n"); 346 printk("SysV FS: get root dentry failed\n");
348 return 0; 347 return 0;
349 } 348 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 0e4b821c5691..11b07672f6c5 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -24,7 +24,6 @@ struct sysv_sb_info {
24 char s_bytesex; /* bytesex (le/be/pdp) */ 24 char s_bytesex; /* bytesex (le/be/pdp) */
25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */ 25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */
26 /* if 0: they are disallowed (ENAMETOOLONG) */ 26 /* if 0: they are disallowed (ENAMETOOLONG) */
27 nlink_t s_link_max; /* max number of hard links to a file */
28 unsigned int s_inodes_per_block; /* number of inodes per block */ 27 unsigned int s_inodes_per_block; /* number of inodes per block */
29 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */ 28 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */
30 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */ 29 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index f922cbacdb96..1934084e2088 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -36,7 +36,7 @@
36 36
37#ifdef CONFIG_UBIFS_FS_DEBUG 37#ifdef CONFIG_UBIFS_FS_DEBUG
38 38
39DEFINE_SPINLOCK(dbg_lock); 39static DEFINE_SPINLOCK(dbg_lock);
40 40
41static const char *get_key_fmt(int fmt) 41static const char *get_key_fmt(int fmt)
42{ 42{
@@ -221,15 +221,15 @@ const char *dbg_jhead(int jhead)
221 221
222static void dump_ch(const struct ubifs_ch *ch) 222static void dump_ch(const struct ubifs_ch *ch)
223{ 223{
224 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); 224 printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic));
225 printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); 225 printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc));
226 printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, 226 printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type,
227 dbg_ntype(ch->node_type)); 227 dbg_ntype(ch->node_type));
228 printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, 228 printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type,
229 dbg_gtype(ch->group_type)); 229 dbg_gtype(ch->group_type));
230 printk(KERN_DEBUG "\tsqnum %llu\n", 230 printk(KERN_ERR "\tsqnum %llu\n",
231 (unsigned long long)le64_to_cpu(ch->sqnum)); 231 (unsigned long long)le64_to_cpu(ch->sqnum));
232 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); 232 printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len));
233} 233}
234 234
235void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) 235void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
@@ -240,43 +240,43 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
240 struct ubifs_dent_node *dent, *pdent = NULL; 240 struct ubifs_dent_node *dent, *pdent = NULL;
241 int count = 2; 241 int count = 2;
242 242
243 printk(KERN_DEBUG "Dump in-memory inode:"); 243 printk(KERN_ERR "Dump in-memory inode:");
244 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); 244 printk(KERN_ERR "\tinode %lu\n", inode->i_ino);
245 printk(KERN_DEBUG "\tsize %llu\n", 245 printk(KERN_ERR "\tsize %llu\n",
246 (unsigned long long)i_size_read(inode)); 246 (unsigned long long)i_size_read(inode));
247 printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); 247 printk(KERN_ERR "\tnlink %u\n", inode->i_nlink);
248 printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); 248 printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid);
249 printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); 249 printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid);
250 printk(KERN_DEBUG "\tatime %u.%u\n", 250 printk(KERN_ERR "\tatime %u.%u\n",
251 (unsigned int)inode->i_atime.tv_sec, 251 (unsigned int)inode->i_atime.tv_sec,
252 (unsigned int)inode->i_atime.tv_nsec); 252 (unsigned int)inode->i_atime.tv_nsec);
253 printk(KERN_DEBUG "\tmtime %u.%u\n", 253 printk(KERN_ERR "\tmtime %u.%u\n",
254 (unsigned int)inode->i_mtime.tv_sec, 254 (unsigned int)inode->i_mtime.tv_sec,
255 (unsigned int)inode->i_mtime.tv_nsec); 255 (unsigned int)inode->i_mtime.tv_nsec);
256 printk(KERN_DEBUG "\tctime %u.%u\n", 256 printk(KERN_ERR "\tctime %u.%u\n",
257 (unsigned int)inode->i_ctime.tv_sec, 257 (unsigned int)inode->i_ctime.tv_sec,
258 (unsigned int)inode->i_ctime.tv_nsec); 258 (unsigned int)inode->i_ctime.tv_nsec);
259 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); 259 printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum);
260 printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); 260 printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size);
261 printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); 261 printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt);
262 printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); 262 printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names);
263 printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); 263 printk(KERN_ERR "\tdirty %u\n", ui->dirty);
264 printk(KERN_DEBUG "\txattr %u\n", ui->xattr); 264 printk(KERN_ERR "\txattr %u\n", ui->xattr);
265 printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); 265 printk(KERN_ERR "\tbulk_read %u\n", ui->xattr);
266 printk(KERN_DEBUG "\tsynced_i_size %llu\n", 266 printk(KERN_ERR "\tsynced_i_size %llu\n",
267 (unsigned long long)ui->synced_i_size); 267 (unsigned long long)ui->synced_i_size);
268 printk(KERN_DEBUG "\tui_size %llu\n", 268 printk(KERN_ERR "\tui_size %llu\n",
269 (unsigned long long)ui->ui_size); 269 (unsigned long long)ui->ui_size);
270 printk(KERN_DEBUG "\tflags %d\n", ui->flags); 270 printk(KERN_ERR "\tflags %d\n", ui->flags);
271 printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); 271 printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type);
272 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); 272 printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read);
273 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); 273 printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row);
274 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); 274 printk(KERN_ERR "\tdata_len %d\n", ui->data_len);
275 275
276 if (!S_ISDIR(inode->i_mode)) 276 if (!S_ISDIR(inode->i_mode))
277 return; 277 return;
278 278
279 printk(KERN_DEBUG "List of directory entries:\n"); 279 printk(KERN_ERR "List of directory entries:\n");
280 ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); 280 ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
281 281
282 lowest_dent_key(c, &key, inode->i_ino); 282 lowest_dent_key(c, &key, inode->i_ino);
@@ -284,11 +284,11 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
284 dent = ubifs_tnc_next_ent(c, &key, &nm); 284 dent = ubifs_tnc_next_ent(c, &key, &nm);
285 if (IS_ERR(dent)) { 285 if (IS_ERR(dent)) {
286 if (PTR_ERR(dent) != -ENOENT) 286 if (PTR_ERR(dent) != -ENOENT)
287 printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); 287 printk(KERN_ERR "error %ld\n", PTR_ERR(dent));
288 break; 288 break;
289 } 289 }
290 290
291 printk(KERN_DEBUG "\t%d: %s (%s)\n", 291 printk(KERN_ERR "\t%d: %s (%s)\n",
292 count++, dent->name, get_dent_type(dent->type)); 292 count++, dent->name, get_dent_type(dent->type));
293 293
294 nm.name = dent->name; 294 nm.name = dent->name;
@@ -312,8 +312,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
312 312
313 /* If the magic is incorrect, just hexdump the first bytes */ 313 /* If the magic is incorrect, just hexdump the first bytes */
314 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { 314 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
315 printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); 315 printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ);
316 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, 316 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
317 (void *)node, UBIFS_CH_SZ, 1); 317 (void *)node, UBIFS_CH_SZ, 1);
318 return; 318 return;
319 } 319 }
@@ -326,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
326 { 326 {
327 const struct ubifs_pad_node *pad = node; 327 const struct ubifs_pad_node *pad = node;
328 328
329 printk(KERN_DEBUG "\tpad_len %u\n", 329 printk(KERN_ERR "\tpad_len %u\n",
330 le32_to_cpu(pad->pad_len)); 330 le32_to_cpu(pad->pad_len));
331 break; 331 break;
332 } 332 }
@@ -335,50 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
335 const struct ubifs_sb_node *sup = node; 335 const struct ubifs_sb_node *sup = node;
336 unsigned int sup_flags = le32_to_cpu(sup->flags); 336 unsigned int sup_flags = le32_to_cpu(sup->flags);
337 337
338 printk(KERN_DEBUG "\tkey_hash %d (%s)\n", 338 printk(KERN_ERR "\tkey_hash %d (%s)\n",
339 (int)sup->key_hash, get_key_hash(sup->key_hash)); 339 (int)sup->key_hash, get_key_hash(sup->key_hash));
340 printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", 340 printk(KERN_ERR "\tkey_fmt %d (%s)\n",
341 (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); 341 (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
342 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 342 printk(KERN_ERR "\tflags %#x\n", sup_flags);
343 printk(KERN_DEBUG "\t big_lpt %u\n", 343 printk(KERN_ERR "\t big_lpt %u\n",
344 !!(sup_flags & UBIFS_FLG_BIGLPT)); 344 !!(sup_flags & UBIFS_FLG_BIGLPT));
345 printk(KERN_DEBUG "\t space_fixup %u\n", 345 printk(KERN_ERR "\t space_fixup %u\n",
346 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); 346 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
347 printk(KERN_DEBUG "\tmin_io_size %u\n", 347 printk(KERN_ERR "\tmin_io_size %u\n",
348 le32_to_cpu(sup->min_io_size)); 348 le32_to_cpu(sup->min_io_size));
349 printk(KERN_DEBUG "\tleb_size %u\n", 349 printk(KERN_ERR "\tleb_size %u\n",
350 le32_to_cpu(sup->leb_size)); 350 le32_to_cpu(sup->leb_size));
351 printk(KERN_DEBUG "\tleb_cnt %u\n", 351 printk(KERN_ERR "\tleb_cnt %u\n",
352 le32_to_cpu(sup->leb_cnt)); 352 le32_to_cpu(sup->leb_cnt));
353 printk(KERN_DEBUG "\tmax_leb_cnt %u\n", 353 printk(KERN_ERR "\tmax_leb_cnt %u\n",
354 le32_to_cpu(sup->max_leb_cnt)); 354 le32_to_cpu(sup->max_leb_cnt));
355 printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", 355 printk(KERN_ERR "\tmax_bud_bytes %llu\n",
356 (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); 356 (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
357 printk(KERN_DEBUG "\tlog_lebs %u\n", 357 printk(KERN_ERR "\tlog_lebs %u\n",
358 le32_to_cpu(sup->log_lebs)); 358 le32_to_cpu(sup->log_lebs));
359 printk(KERN_DEBUG "\tlpt_lebs %u\n", 359 printk(KERN_ERR "\tlpt_lebs %u\n",
360 le32_to_cpu(sup->lpt_lebs)); 360 le32_to_cpu(sup->lpt_lebs));
361 printk(KERN_DEBUG "\torph_lebs %u\n", 361 printk(KERN_ERR "\torph_lebs %u\n",
362 le32_to_cpu(sup->orph_lebs)); 362 le32_to_cpu(sup->orph_lebs));
363 printk(KERN_DEBUG "\tjhead_cnt %u\n", 363 printk(KERN_ERR "\tjhead_cnt %u\n",
364 le32_to_cpu(sup->jhead_cnt)); 364 le32_to_cpu(sup->jhead_cnt));
365 printk(KERN_DEBUG "\tfanout %u\n", 365 printk(KERN_ERR "\tfanout %u\n",
366 le32_to_cpu(sup->fanout)); 366 le32_to_cpu(sup->fanout));
367 printk(KERN_DEBUG "\tlsave_cnt %u\n", 367 printk(KERN_ERR "\tlsave_cnt %u\n",
368 le32_to_cpu(sup->lsave_cnt)); 368 le32_to_cpu(sup->lsave_cnt));
369 printk(KERN_DEBUG "\tdefault_compr %u\n", 369 printk(KERN_ERR "\tdefault_compr %u\n",
370 (int)le16_to_cpu(sup->default_compr)); 370 (int)le16_to_cpu(sup->default_compr));
371 printk(KERN_DEBUG "\trp_size %llu\n", 371 printk(KERN_ERR "\trp_size %llu\n",
372 (unsigned long long)le64_to_cpu(sup->rp_size)); 372 (unsigned long long)le64_to_cpu(sup->rp_size));
373 printk(KERN_DEBUG "\trp_uid %u\n", 373 printk(KERN_ERR "\trp_uid %u\n",
374 le32_to_cpu(sup->rp_uid)); 374 le32_to_cpu(sup->rp_uid));
375 printk(KERN_DEBUG "\trp_gid %u\n", 375 printk(KERN_ERR "\trp_gid %u\n",
376 le32_to_cpu(sup->rp_gid)); 376 le32_to_cpu(sup->rp_gid));
377 printk(KERN_DEBUG "\tfmt_version %u\n", 377 printk(KERN_ERR "\tfmt_version %u\n",
378 le32_to_cpu(sup->fmt_version)); 378 le32_to_cpu(sup->fmt_version));
379 printk(KERN_DEBUG "\ttime_gran %u\n", 379 printk(KERN_ERR "\ttime_gran %u\n",
380 le32_to_cpu(sup->time_gran)); 380 le32_to_cpu(sup->time_gran));
381 printk(KERN_DEBUG "\tUUID %pUB\n", 381 printk(KERN_ERR "\tUUID %pUB\n",
382 sup->uuid); 382 sup->uuid);
383 break; 383 break;
384 } 384 }
@@ -386,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
386 { 386 {
387 const struct ubifs_mst_node *mst = node; 387 const struct ubifs_mst_node *mst = node;
388 388
389 printk(KERN_DEBUG "\thighest_inum %llu\n", 389 printk(KERN_ERR "\thighest_inum %llu\n",
390 (unsigned long long)le64_to_cpu(mst->highest_inum)); 390 (unsigned long long)le64_to_cpu(mst->highest_inum));
391 printk(KERN_DEBUG "\tcommit number %llu\n", 391 printk(KERN_ERR "\tcommit number %llu\n",
392 (unsigned long long)le64_to_cpu(mst->cmt_no)); 392 (unsigned long long)le64_to_cpu(mst->cmt_no));
393 printk(KERN_DEBUG "\tflags %#x\n", 393 printk(KERN_ERR "\tflags %#x\n",
394 le32_to_cpu(mst->flags)); 394 le32_to_cpu(mst->flags));
395 printk(KERN_DEBUG "\tlog_lnum %u\n", 395 printk(KERN_ERR "\tlog_lnum %u\n",
396 le32_to_cpu(mst->log_lnum)); 396 le32_to_cpu(mst->log_lnum));
397 printk(KERN_DEBUG "\troot_lnum %u\n", 397 printk(KERN_ERR "\troot_lnum %u\n",
398 le32_to_cpu(mst->root_lnum)); 398 le32_to_cpu(mst->root_lnum));
399 printk(KERN_DEBUG "\troot_offs %u\n", 399 printk(KERN_ERR "\troot_offs %u\n",
400 le32_to_cpu(mst->root_offs)); 400 le32_to_cpu(mst->root_offs));
401 printk(KERN_DEBUG "\troot_len %u\n", 401 printk(KERN_ERR "\troot_len %u\n",
402 le32_to_cpu(mst->root_len)); 402 le32_to_cpu(mst->root_len));
403 printk(KERN_DEBUG "\tgc_lnum %u\n", 403 printk(KERN_ERR "\tgc_lnum %u\n",
404 le32_to_cpu(mst->gc_lnum)); 404 le32_to_cpu(mst->gc_lnum));
405 printk(KERN_DEBUG "\tihead_lnum %u\n", 405 printk(KERN_ERR "\tihead_lnum %u\n",
406 le32_to_cpu(mst->ihead_lnum)); 406 le32_to_cpu(mst->ihead_lnum));
407 printk(KERN_DEBUG "\tihead_offs %u\n", 407 printk(KERN_ERR "\tihead_offs %u\n",
408 le32_to_cpu(mst->ihead_offs)); 408 le32_to_cpu(mst->ihead_offs));
409 printk(KERN_DEBUG "\tindex_size %llu\n", 409 printk(KERN_ERR "\tindex_size %llu\n",
410 (unsigned long long)le64_to_cpu(mst->index_size)); 410 (unsigned long long)le64_to_cpu(mst->index_size));
411 printk(KERN_DEBUG "\tlpt_lnum %u\n", 411 printk(KERN_ERR "\tlpt_lnum %u\n",
412 le32_to_cpu(mst->lpt_lnum)); 412 le32_to_cpu(mst->lpt_lnum));
413 printk(KERN_DEBUG "\tlpt_offs %u\n", 413 printk(KERN_ERR "\tlpt_offs %u\n",
414 le32_to_cpu(mst->lpt_offs)); 414 le32_to_cpu(mst->lpt_offs));
415 printk(KERN_DEBUG "\tnhead_lnum %u\n", 415 printk(KERN_ERR "\tnhead_lnum %u\n",
416 le32_to_cpu(mst->nhead_lnum)); 416 le32_to_cpu(mst->nhead_lnum));
417 printk(KERN_DEBUG "\tnhead_offs %u\n", 417 printk(KERN_ERR "\tnhead_offs %u\n",
418 le32_to_cpu(mst->nhead_offs)); 418 le32_to_cpu(mst->nhead_offs));
419 printk(KERN_DEBUG "\tltab_lnum %u\n", 419 printk(KERN_ERR "\tltab_lnum %u\n",
420 le32_to_cpu(mst->ltab_lnum)); 420 le32_to_cpu(mst->ltab_lnum));
421 printk(KERN_DEBUG "\tltab_offs %u\n", 421 printk(KERN_ERR "\tltab_offs %u\n",
422 le32_to_cpu(mst->ltab_offs)); 422 le32_to_cpu(mst->ltab_offs));
423 printk(KERN_DEBUG "\tlsave_lnum %u\n", 423 printk(KERN_ERR "\tlsave_lnum %u\n",
424 le32_to_cpu(mst->lsave_lnum)); 424 le32_to_cpu(mst->lsave_lnum));
425 printk(KERN_DEBUG "\tlsave_offs %u\n", 425 printk(KERN_ERR "\tlsave_offs %u\n",
426 le32_to_cpu(mst->lsave_offs)); 426 le32_to_cpu(mst->lsave_offs));
427 printk(KERN_DEBUG "\tlscan_lnum %u\n", 427 printk(KERN_ERR "\tlscan_lnum %u\n",
428 le32_to_cpu(mst->lscan_lnum)); 428 le32_to_cpu(mst->lscan_lnum));
429 printk(KERN_DEBUG "\tleb_cnt %u\n", 429 printk(KERN_ERR "\tleb_cnt %u\n",
430 le32_to_cpu(mst->leb_cnt)); 430 le32_to_cpu(mst->leb_cnt));
431 printk(KERN_DEBUG "\tempty_lebs %u\n", 431 printk(KERN_ERR "\tempty_lebs %u\n",
432 le32_to_cpu(mst->empty_lebs)); 432 le32_to_cpu(mst->empty_lebs));
433 printk(KERN_DEBUG "\tidx_lebs %u\n", 433 printk(KERN_ERR "\tidx_lebs %u\n",
434 le32_to_cpu(mst->idx_lebs)); 434 le32_to_cpu(mst->idx_lebs));
435 printk(KERN_DEBUG "\ttotal_free %llu\n", 435 printk(KERN_ERR "\ttotal_free %llu\n",
436 (unsigned long long)le64_to_cpu(mst->total_free)); 436 (unsigned long long)le64_to_cpu(mst->total_free));
437 printk(KERN_DEBUG "\ttotal_dirty %llu\n", 437 printk(KERN_ERR "\ttotal_dirty %llu\n",
438 (unsigned long long)le64_to_cpu(mst->total_dirty)); 438 (unsigned long long)le64_to_cpu(mst->total_dirty));
439 printk(KERN_DEBUG "\ttotal_used %llu\n", 439 printk(KERN_ERR "\ttotal_used %llu\n",
440 (unsigned long long)le64_to_cpu(mst->total_used)); 440 (unsigned long long)le64_to_cpu(mst->total_used));
441 printk(KERN_DEBUG "\ttotal_dead %llu\n", 441 printk(KERN_ERR "\ttotal_dead %llu\n",
442 (unsigned long long)le64_to_cpu(mst->total_dead)); 442 (unsigned long long)le64_to_cpu(mst->total_dead));
443 printk(KERN_DEBUG "\ttotal_dark %llu\n", 443 printk(KERN_ERR "\ttotal_dark %llu\n",
444 (unsigned long long)le64_to_cpu(mst->total_dark)); 444 (unsigned long long)le64_to_cpu(mst->total_dark));
445 break; 445 break;
446 } 446 }
@@ -448,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
448 { 448 {
449 const struct ubifs_ref_node *ref = node; 449 const struct ubifs_ref_node *ref = node;
450 450
451 printk(KERN_DEBUG "\tlnum %u\n", 451 printk(KERN_ERR "\tlnum %u\n",
452 le32_to_cpu(ref->lnum)); 452 le32_to_cpu(ref->lnum));
453 printk(KERN_DEBUG "\toffs %u\n", 453 printk(KERN_ERR "\toffs %u\n",
454 le32_to_cpu(ref->offs)); 454 le32_to_cpu(ref->offs));
455 printk(KERN_DEBUG "\tjhead %u\n", 455 printk(KERN_ERR "\tjhead %u\n",
456 le32_to_cpu(ref->jhead)); 456 le32_to_cpu(ref->jhead));
457 break; 457 break;
458 } 458 }
@@ -461,40 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
461 const struct ubifs_ino_node *ino = node; 461 const struct ubifs_ino_node *ino = node;
462 462
463 key_read(c, &ino->key, &key); 463 key_read(c, &ino->key, &key);
464 printk(KERN_DEBUG "\tkey %s\n", 464 printk(KERN_ERR "\tkey %s\n",
465 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 465 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
466 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", 466 printk(KERN_ERR "\tcreat_sqnum %llu\n",
467 (unsigned long long)le64_to_cpu(ino->creat_sqnum)); 467 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
468 printk(KERN_DEBUG "\tsize %llu\n", 468 printk(KERN_ERR "\tsize %llu\n",
469 (unsigned long long)le64_to_cpu(ino->size)); 469 (unsigned long long)le64_to_cpu(ino->size));
470 printk(KERN_DEBUG "\tnlink %u\n", 470 printk(KERN_ERR "\tnlink %u\n",
471 le32_to_cpu(ino->nlink)); 471 le32_to_cpu(ino->nlink));
472 printk(KERN_DEBUG "\tatime %lld.%u\n", 472 printk(KERN_ERR "\tatime %lld.%u\n",
473 (long long)le64_to_cpu(ino->atime_sec), 473 (long long)le64_to_cpu(ino->atime_sec),
474 le32_to_cpu(ino->atime_nsec)); 474 le32_to_cpu(ino->atime_nsec));
475 printk(KERN_DEBUG "\tmtime %lld.%u\n", 475 printk(KERN_ERR "\tmtime %lld.%u\n",
476 (long long)le64_to_cpu(ino->mtime_sec), 476 (long long)le64_to_cpu(ino->mtime_sec),
477 le32_to_cpu(ino->mtime_nsec)); 477 le32_to_cpu(ino->mtime_nsec));
478 printk(KERN_DEBUG "\tctime %lld.%u\n", 478 printk(KERN_ERR "\tctime %lld.%u\n",
479 (long long)le64_to_cpu(ino->ctime_sec), 479 (long long)le64_to_cpu(ino->ctime_sec),
480 le32_to_cpu(ino->ctime_nsec)); 480 le32_to_cpu(ino->ctime_nsec));
481 printk(KERN_DEBUG "\tuid %u\n", 481 printk(KERN_ERR "\tuid %u\n",
482 le32_to_cpu(ino->uid)); 482 le32_to_cpu(ino->uid));
483 printk(KERN_DEBUG "\tgid %u\n", 483 printk(KERN_ERR "\tgid %u\n",
484 le32_to_cpu(ino->gid)); 484 le32_to_cpu(ino->gid));
485 printk(KERN_DEBUG "\tmode %u\n", 485 printk(KERN_ERR "\tmode %u\n",
486 le32_to_cpu(ino->mode)); 486 le32_to_cpu(ino->mode));
487 printk(KERN_DEBUG "\tflags %#x\n", 487 printk(KERN_ERR "\tflags %#x\n",
488 le32_to_cpu(ino->flags)); 488 le32_to_cpu(ino->flags));
489 printk(KERN_DEBUG "\txattr_cnt %u\n", 489 printk(KERN_ERR "\txattr_cnt %u\n",
490 le32_to_cpu(ino->xattr_cnt)); 490 le32_to_cpu(ino->xattr_cnt));
491 printk(KERN_DEBUG "\txattr_size %u\n", 491 printk(KERN_ERR "\txattr_size %u\n",
492 le32_to_cpu(ino->xattr_size)); 492 le32_to_cpu(ino->xattr_size));
493 printk(KERN_DEBUG "\txattr_names %u\n", 493 printk(KERN_ERR "\txattr_names %u\n",
494 le32_to_cpu(ino->xattr_names)); 494 le32_to_cpu(ino->xattr_names));
495 printk(KERN_DEBUG "\tcompr_type %#x\n", 495 printk(KERN_ERR "\tcompr_type %#x\n",
496 (int)le16_to_cpu(ino->compr_type)); 496 (int)le16_to_cpu(ino->compr_type));
497 printk(KERN_DEBUG "\tdata len %u\n", 497 printk(KERN_ERR "\tdata len %u\n",
498 le32_to_cpu(ino->data_len)); 498 le32_to_cpu(ino->data_len));
499 break; 499 break;
500 } 500 }
@@ -505,16 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
505 int nlen = le16_to_cpu(dent->nlen); 505 int nlen = le16_to_cpu(dent->nlen);
506 506
507 key_read(c, &dent->key, &key); 507 key_read(c, &dent->key, &key);
508 printk(KERN_DEBUG "\tkey %s\n", 508 printk(KERN_ERR "\tkey %s\n",
509 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 509 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
510 printk(KERN_DEBUG "\tinum %llu\n", 510 printk(KERN_ERR "\tinum %llu\n",
511 (unsigned long long)le64_to_cpu(dent->inum)); 511 (unsigned long long)le64_to_cpu(dent->inum));
512 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); 512 printk(KERN_ERR "\ttype %d\n", (int)dent->type);
513 printk(KERN_DEBUG "\tnlen %d\n", nlen); 513 printk(KERN_ERR "\tnlen %d\n", nlen);
514 printk(KERN_DEBUG "\tname "); 514 printk(KERN_ERR "\tname ");
515 515
516 if (nlen > UBIFS_MAX_NLEN) 516 if (nlen > UBIFS_MAX_NLEN)
517 printk(KERN_DEBUG "(bad name length, not printing, " 517 printk(KERN_ERR "(bad name length, not printing, "
518 "bad or corrupted node)"); 518 "bad or corrupted node)");
519 else { 519 else {
520 for (i = 0; i < nlen && dent->name[i]; i++) 520 for (i = 0; i < nlen && dent->name[i]; i++)
@@ -530,16 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
530 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; 530 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
531 531
532 key_read(c, &dn->key, &key); 532 key_read(c, &dn->key, &key);
533 printk(KERN_DEBUG "\tkey %s\n", 533 printk(KERN_ERR "\tkey %s\n",
534 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 534 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
535 printk(KERN_DEBUG "\tsize %u\n", 535 printk(KERN_ERR "\tsize %u\n",
536 le32_to_cpu(dn->size)); 536 le32_to_cpu(dn->size));
537 printk(KERN_DEBUG "\tcompr_typ %d\n", 537 printk(KERN_ERR "\tcompr_typ %d\n",
538 (int)le16_to_cpu(dn->compr_type)); 538 (int)le16_to_cpu(dn->compr_type));
539 printk(KERN_DEBUG "\tdata size %d\n", 539 printk(KERN_ERR "\tdata size %d\n",
540 dlen); 540 dlen);
541 printk(KERN_DEBUG "\tdata:\n"); 541 printk(KERN_ERR "\tdata:\n");
542 print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, 542 print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
543 (void *)&dn->data, dlen, 0); 543 (void *)&dn->data, dlen, 0);
544 break; 544 break;
545 } 545 }
@@ -547,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
547 { 547 {
548 const struct ubifs_trun_node *trun = node; 548 const struct ubifs_trun_node *trun = node;
549 549
550 printk(KERN_DEBUG "\tinum %u\n", 550 printk(KERN_ERR "\tinum %u\n",
551 le32_to_cpu(trun->inum)); 551 le32_to_cpu(trun->inum));
552 printk(KERN_DEBUG "\told_size %llu\n", 552 printk(KERN_ERR "\told_size %llu\n",
553 (unsigned long long)le64_to_cpu(trun->old_size)); 553 (unsigned long long)le64_to_cpu(trun->old_size));
554 printk(KERN_DEBUG "\tnew_size %llu\n", 554 printk(KERN_ERR "\tnew_size %llu\n",
555 (unsigned long long)le64_to_cpu(trun->new_size)); 555 (unsigned long long)le64_to_cpu(trun->new_size));
556 break; 556 break;
557 } 557 }
@@ -560,17 +560,17 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
560 const struct ubifs_idx_node *idx = node; 560 const struct ubifs_idx_node *idx = node;
561 561
562 n = le16_to_cpu(idx->child_cnt); 562 n = le16_to_cpu(idx->child_cnt);
563 printk(KERN_DEBUG "\tchild_cnt %d\n", n); 563 printk(KERN_ERR "\tchild_cnt %d\n", n);
564 printk(KERN_DEBUG "\tlevel %d\n", 564 printk(KERN_ERR "\tlevel %d\n",
565 (int)le16_to_cpu(idx->level)); 565 (int)le16_to_cpu(idx->level));
566 printk(KERN_DEBUG "\tBranches:\n"); 566 printk(KERN_ERR "\tBranches:\n");
567 567
568 for (i = 0; i < n && i < c->fanout - 1; i++) { 568 for (i = 0; i < n && i < c->fanout - 1; i++) {
569 const struct ubifs_branch *br; 569 const struct ubifs_branch *br;
570 570
571 br = ubifs_idx_branch(c, idx, i); 571 br = ubifs_idx_branch(c, idx, i);
572 key_read(c, &br->key, &key); 572 key_read(c, &br->key, &key);
573 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", 573 printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n",
574 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), 574 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
575 le32_to_cpu(br->len), 575 le32_to_cpu(br->len),
576 dbg_snprintf_key(c, &key, key_buf, 576 dbg_snprintf_key(c, &key, key_buf,
@@ -584,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
584 { 584 {
585 const struct ubifs_orph_node *orph = node; 585 const struct ubifs_orph_node *orph = node;
586 586
587 printk(KERN_DEBUG "\tcommit number %llu\n", 587 printk(KERN_ERR "\tcommit number %llu\n",
588 (unsigned long long) 588 (unsigned long long)
589 le64_to_cpu(orph->cmt_no) & LLONG_MAX); 589 le64_to_cpu(orph->cmt_no) & LLONG_MAX);
590 printk(KERN_DEBUG "\tlast node flag %llu\n", 590 printk(KERN_ERR "\tlast node flag %llu\n",
591 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); 591 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
592 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; 592 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
593 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); 593 printk(KERN_ERR "\t%d orphan inode numbers:\n", n);
594 for (i = 0; i < n; i++) 594 for (i = 0; i < n; i++)
595 printk(KERN_DEBUG "\t ino %llu\n", 595 printk(KERN_ERR "\t ino %llu\n",
596 (unsigned long long)le64_to_cpu(orph->inos[i])); 596 (unsigned long long)le64_to_cpu(orph->inos[i]));
597 break; 597 break;
598 } 598 }
599 default: 599 default:
600 printk(KERN_DEBUG "node type %d was not recognized\n", 600 printk(KERN_ERR "node type %d was not recognized\n",
601 (int)ch->node_type); 601 (int)ch->node_type);
602 } 602 }
603 spin_unlock(&dbg_lock); 603 spin_unlock(&dbg_lock);
@@ -606,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
606void dbg_dump_budget_req(const struct ubifs_budget_req *req) 606void dbg_dump_budget_req(const struct ubifs_budget_req *req)
607{ 607{
608 spin_lock(&dbg_lock); 608 spin_lock(&dbg_lock);
609 printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", 609 printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n",
610 req->new_ino, req->dirtied_ino); 610 req->new_ino, req->dirtied_ino);
611 printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", 611 printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n",
612 req->new_ino_d, req->dirtied_ino_d); 612 req->new_ino_d, req->dirtied_ino_d);
613 printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", 613 printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n",
614 req->new_page, req->dirtied_page); 614 req->new_page, req->dirtied_page);
615 printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", 615 printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n",
616 req->new_dent, req->mod_dent); 616 req->new_dent, req->mod_dent);
617 printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); 617 printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth);
618 printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", 618 printk(KERN_ERR "\tdata_growth %d dd_growth %d\n",
619 req->data_growth, req->dd_growth); 619 req->data_growth, req->dd_growth);
620 spin_unlock(&dbg_lock); 620 spin_unlock(&dbg_lock);
621} 621}
@@ -623,12 +623,12 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
623void dbg_dump_lstats(const struct ubifs_lp_stats *lst) 623void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
624{ 624{
625 spin_lock(&dbg_lock); 625 spin_lock(&dbg_lock);
626 printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " 626 printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, "
627 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); 627 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
628 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " 628 printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, "
629 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, 629 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
630 lst->total_dirty); 630 lst->total_dirty);
631 printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " 631 printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, "
632 "total_dead %lld\n", lst->total_used, lst->total_dark, 632 "total_dead %lld\n", lst->total_used, lst->total_dark,
633 lst->total_dead); 633 lst->total_dead);
634 spin_unlock(&dbg_lock); 634 spin_unlock(&dbg_lock);
@@ -644,21 +644,21 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
644 644
645 spin_lock(&c->space_lock); 645 spin_lock(&c->space_lock);
646 spin_lock(&dbg_lock); 646 spin_lock(&dbg_lock);
647 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " 647 printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, "
648 "total budget sum %lld\n", current->pid, 648 "total budget sum %lld\n", current->pid,
649 bi->data_growth + bi->dd_growth, 649 bi->data_growth + bi->dd_growth,
650 bi->data_growth + bi->dd_growth + bi->idx_growth); 650 bi->data_growth + bi->dd_growth + bi->idx_growth);
651 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " 651 printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, "
652 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, 652 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
653 bi->idx_growth); 653 bi->idx_growth);
654 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " 654 printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, "
655 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, 655 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
656 bi->uncommitted_idx); 656 bi->uncommitted_idx);
657 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", 657 printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
658 bi->page_budget, bi->inode_budget, bi->dent_budget); 658 bi->page_budget, bi->inode_budget, bi->dent_budget);
659 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", 659 printk(KERN_ERR "\tnospace %u, nospace_rp %u\n",
660 bi->nospace, bi->nospace_rp); 660 bi->nospace, bi->nospace_rp);
661 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", 661 printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
662 c->dark_wm, c->dead_wm, c->max_idx_node_sz); 662 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
663 663
664 if (bi != &c->bi) 664 if (bi != &c->bi)
@@ -669,38 +669,38 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
669 */ 669 */
670 goto out_unlock; 670 goto out_unlock;
671 671
672 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", 672 printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
673 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); 673 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
674 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 674 printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
675 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 675 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
676 atomic_long_read(&c->dirty_zn_cnt), 676 atomic_long_read(&c->dirty_zn_cnt),
677 atomic_long_read(&c->clean_zn_cnt)); 677 atomic_long_read(&c->clean_zn_cnt));
678 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 678 printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n",
679 c->gc_lnum, c->ihead_lnum); 679 c->gc_lnum, c->ihead_lnum);
680 680
681 /* If we are in R/O mode, journal heads do not exist */ 681 /* If we are in R/O mode, journal heads do not exist */
682 if (c->jheads) 682 if (c->jheads)
683 for (i = 0; i < c->jhead_cnt; i++) 683 for (i = 0; i < c->jhead_cnt; i++)
684 printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", 684 printk(KERN_ERR "\tjhead %s\t LEB %d\n",
685 dbg_jhead(c->jheads[i].wbuf.jhead), 685 dbg_jhead(c->jheads[i].wbuf.jhead),
686 c->jheads[i].wbuf.lnum); 686 c->jheads[i].wbuf.lnum);
687 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { 687 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
688 bud = rb_entry(rb, struct ubifs_bud, rb); 688 bud = rb_entry(rb, struct ubifs_bud, rb);
689 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); 689 printk(KERN_ERR "\tbud LEB %d\n", bud->lnum);
690 } 690 }
691 list_for_each_entry(bud, &c->old_buds, list) 691 list_for_each_entry(bud, &c->old_buds, list)
692 printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); 692 printk(KERN_ERR "\told bud LEB %d\n", bud->lnum);
693 list_for_each_entry(idx_gc, &c->idx_gc, list) 693 list_for_each_entry(idx_gc, &c->idx_gc, list)
694 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", 694 printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n",
695 idx_gc->lnum, idx_gc->unmap); 695 idx_gc->lnum, idx_gc->unmap);
696 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 696 printk(KERN_ERR "\tcommit state %d\n", c->cmt_state);
697 697
698 /* Print budgeting predictions */ 698 /* Print budgeting predictions */
699 available = ubifs_calc_available(c, c->bi.min_idx_lebs); 699 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
700 outstanding = c->bi.data_growth + c->bi.dd_growth; 700 outstanding = c->bi.data_growth + c->bi.dd_growth;
701 free = ubifs_get_free_space_nolock(c); 701 free = ubifs_get_free_space_nolock(c);
702 printk(KERN_DEBUG "Budgeting predictions:\n"); 702 printk(KERN_ERR "Budgeting predictions:\n");
703 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 703 printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n",
704 available, outstanding, free); 704 available, outstanding, free);
705out_unlock: 705out_unlock:
706 spin_unlock(&dbg_lock); 706 spin_unlock(&dbg_lock);
@@ -720,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
720 dark = ubifs_calc_dark(c, spc); 720 dark = ubifs_calc_dark(c, spc);
721 721
722 if (lp->flags & LPROPS_INDEX) 722 if (lp->flags & LPROPS_INDEX)
723 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " 723 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
724 "free + dirty %-8d flags %#x (", lp->lnum, lp->free, 724 "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
725 lp->dirty, c->leb_size - spc, spc, lp->flags); 725 lp->dirty, c->leb_size - spc, spc, lp->flags);
726 else 726 else
727 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " 727 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
728 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " 728 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
729 "flags %#-4x (", lp->lnum, lp->free, lp->dirty, 729 "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
730 c->leb_size - spc, spc, dark, dead, 730 c->leb_size - spc, spc, dark, dead,
@@ -807,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
807 struct ubifs_lprops lp; 807 struct ubifs_lprops lp;
808 struct ubifs_lp_stats lst; 808 struct ubifs_lp_stats lst;
809 809
810 printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", 810 printk(KERN_ERR "(pid %d) start dumping LEB properties\n",
811 current->pid); 811 current->pid);
812 ubifs_get_lp_stats(c, &lst); 812 ubifs_get_lp_stats(c, &lst);
813 dbg_dump_lstats(&lst); 813 dbg_dump_lstats(&lst);
@@ -819,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
819 819
820 dbg_dump_lprop(c, &lp); 820 dbg_dump_lprop(c, &lp);
821 } 821 }
822 printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", 822 printk(KERN_ERR "(pid %d) finish dumping LEB properties\n",
823 current->pid); 823 current->pid);
824} 824}
825 825
@@ -828,35 +828,35 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
828 int i; 828 int i;
829 829
830 spin_lock(&dbg_lock); 830 spin_lock(&dbg_lock);
831 printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); 831 printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid);
832 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); 832 printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz);
833 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); 833 printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz);
834 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); 834 printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz);
835 printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); 835 printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz);
836 printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); 836 printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz);
837 printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); 837 printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt);
838 printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); 838 printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght);
839 printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); 839 printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt);
840 printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); 840 printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt);
841 printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); 841 printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
842 printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); 842 printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
843 printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); 843 printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt);
844 printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); 844 printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits);
845 printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); 845 printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
846 printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); 846 printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
847 printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); 847 printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
848 printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); 848 printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits);
849 printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); 849 printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits);
850 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); 850 printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
851 printk(KERN_DEBUG "\tLPT head is at %d:%d\n", 851 printk(KERN_ERR "\tLPT head is at %d:%d\n",
852 c->nhead_lnum, c->nhead_offs); 852 c->nhead_lnum, c->nhead_offs);
853 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", 853 printk(KERN_ERR "\tLPT ltab is at %d:%d\n",
854 c->ltab_lnum, c->ltab_offs); 854 c->ltab_lnum, c->ltab_offs);
855 if (c->big_lpt) 855 if (c->big_lpt)
856 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", 856 printk(KERN_ERR "\tLPT lsave is at %d:%d\n",
857 c->lsave_lnum, c->lsave_offs); 857 c->lsave_lnum, c->lsave_offs);
858 for (i = 0; i < c->lpt_lebs; i++) 858 for (i = 0; i < c->lpt_lebs; i++)
859 printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " 859 printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d "
860 "cmt %d\n", i + c->lpt_first, c->ltab[i].free, 860 "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
861 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); 861 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
862 spin_unlock(&dbg_lock); 862 spin_unlock(&dbg_lock);
@@ -867,12 +867,12 @@ void dbg_dump_sleb(const struct ubifs_info *c,
867{ 867{
868 struct ubifs_scan_node *snod; 868 struct ubifs_scan_node *snod;
869 869
870 printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", 870 printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n",
871 current->pid, sleb->lnum, offs); 871 current->pid, sleb->lnum, offs);
872 872
873 list_for_each_entry(snod, &sleb->nodes, list) { 873 list_for_each_entry(snod, &sleb->nodes, list) {
874 cond_resched(); 874 cond_resched();
875 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, 875 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum,
876 snod->offs, snod->len); 876 snod->offs, snod->len);
877 dbg_dump_node(c, snod->node); 877 dbg_dump_node(c, snod->node);
878 } 878 }
@@ -887,7 +887,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
887 if (dbg_is_tst_rcvry(c)) 887 if (dbg_is_tst_rcvry(c))
888 return; 888 return;
889 889
890 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 890 printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
891 current->pid, lnum); 891 current->pid, lnum);
892 892
893 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 893 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -902,17 +902,17 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
902 goto out; 902 goto out;
903 } 903 }
904 904
905 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, 905 printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum,
906 sleb->nodes_cnt, sleb->endpt); 906 sleb->nodes_cnt, sleb->endpt);
907 907
908 list_for_each_entry(snod, &sleb->nodes, list) { 908 list_for_each_entry(snod, &sleb->nodes, list) {
909 cond_resched(); 909 cond_resched();
910 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, 910 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum,
911 snod->offs, snod->len); 911 snod->offs, snod->len);
912 dbg_dump_node(c, snod->node); 912 dbg_dump_node(c, snod->node);
913 } 913 }
914 914
915 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 915 printk(KERN_ERR "(pid %d) finish dumping LEB %d\n",
916 current->pid, lnum); 916 current->pid, lnum);
917 ubifs_scan_destroy(sleb); 917 ubifs_scan_destroy(sleb);
918 918
@@ -934,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c,
934 else 934 else
935 zbr = &c->zroot; 935 zbr = &c->zroot;
936 936
937 printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" 937 printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
938 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, 938 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
939 zbr->len, znode->parent, znode->iip, znode->level, 939 zbr->len, znode->parent, znode->iip, znode->level,
940 znode->child_cnt, znode->flags); 940 znode->child_cnt, znode->flags);
@@ -944,18 +944,18 @@ void dbg_dump_znode(const struct ubifs_info *c,
944 return; 944 return;
945 } 945 }
946 946
947 printk(KERN_DEBUG "zbranches:\n"); 947 printk(KERN_ERR "zbranches:\n");
948 for (n = 0; n < znode->child_cnt; n++) { 948 for (n = 0; n < znode->child_cnt; n++) {
949 zbr = &znode->zbranch[n]; 949 zbr = &znode->zbranch[n];
950 if (znode->level > 0) 950 if (znode->level > 0)
951 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " 951 printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key "
952 "%s\n", n, zbr->znode, zbr->lnum, 952 "%s\n", n, zbr->znode, zbr->lnum,
953 zbr->offs, zbr->len, 953 zbr->offs, zbr->len,
954 dbg_snprintf_key(c, &zbr->key, 954 dbg_snprintf_key(c, &zbr->key,
955 key_buf, 955 key_buf,
956 DBG_KEY_BUF_LEN)); 956 DBG_KEY_BUF_LEN));
957 else 957 else
958 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " 958 printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key "
959 "%s\n", n, zbr->znode, zbr->lnum, 959 "%s\n", n, zbr->znode, zbr->lnum,
960 zbr->offs, zbr->len, 960 zbr->offs, zbr->len,
961 dbg_snprintf_key(c, &zbr->key, 961 dbg_snprintf_key(c, &zbr->key,
@@ -969,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
969{ 969{
970 int i; 970 int i;
971 971
972 printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", 972 printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n",
973 current->pid, cat, heap->cnt); 973 current->pid, cat, heap->cnt);
974 for (i = 0; i < heap->cnt; i++) { 974 for (i = 0; i < heap->cnt; i++) {
975 struct ubifs_lprops *lprops = heap->arr[i]; 975 struct ubifs_lprops *lprops = heap->arr[i];
976 976
977 printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " 977 printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d "
978 "flags %d\n", i, lprops->lnum, lprops->hpos, 978 "flags %d\n", i, lprops->lnum, lprops->hpos,
979 lprops->free, lprops->dirty, lprops->flags); 979 lprops->free, lprops->dirty, lprops->flags);
980 } 980 }
981 printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); 981 printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid);
982} 982}
983 983
984void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 984void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -986,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
986{ 986{
987 int i; 987 int i;
988 988
989 printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); 989 printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid);
990 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 990 printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n",
991 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 991 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
992 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 992 printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n",
993 pnode->flags, iip, pnode->level, pnode->num); 993 pnode->flags, iip, pnode->level, pnode->num);
994 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 994 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
995 struct ubifs_lprops *lp = &pnode->lprops[i]; 995 struct ubifs_lprops *lp = &pnode->lprops[i];
996 996
997 printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", 997 printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n",
998 i, lp->free, lp->dirty, lp->flags, lp->lnum); 998 i, lp->free, lp->dirty, lp->flags, lp->lnum);
999 } 999 }
1000} 1000}
@@ -1004,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c)
1004 struct ubifs_znode *znode; 1004 struct ubifs_znode *znode;
1005 int level; 1005 int level;
1006 1006
1007 printk(KERN_DEBUG "\n"); 1007 printk(KERN_ERR "\n");
1008 printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); 1008 printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid);
1009 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 1009 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
1010 level = znode->level; 1010 level = znode->level;
1011 printk(KERN_DEBUG "== Level %d ==\n", level); 1011 printk(KERN_ERR "== Level %d ==\n", level);
1012 while (znode) { 1012 while (znode) {
1013 if (level != znode->level) { 1013 if (level != znode->level) {
1014 level = znode->level; 1014 level = znode->level;
1015 printk(KERN_DEBUG "== Level %d ==\n", level); 1015 printk(KERN_ERR "== Level %d ==\n", level);
1016 } 1016 }
1017 dbg_dump_znode(c, znode); 1017 dbg_dump_znode(c, znode);
1018 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 1018 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
1019 } 1019 }
1020 printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); 1020 printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid);
1021} 1021}
1022 1022
1023static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, 1023static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index ad1a6fee6010..9f717655df18 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -164,9 +164,7 @@ struct ubifs_global_debug_info {
164#define dbg_dump_stack() dump_stack() 164#define dbg_dump_stack() dump_stack()
165 165
166#define dbg_err(fmt, ...) do { \ 166#define dbg_err(fmt, ...) do { \
167 spin_lock(&dbg_lock); \
168 ubifs_err(fmt, ##__VA_ARGS__); \ 167 ubifs_err(fmt, ##__VA_ARGS__); \
169 spin_unlock(&dbg_lock); \
170} while (0) 168} while (0)
171 169
172#define ubifs_dbg_msg(type, fmt, ...) \ 170#define ubifs_dbg_msg(type, fmt, ...) \
@@ -217,7 +215,6 @@ struct ubifs_global_debug_info {
217/* Additional recovery messages */ 215/* Additional recovery messages */
218#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) 216#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
219 217
220extern spinlock_t dbg_lock;
221extern struct ubifs_global_debug_info ubifs_dbg; 218extern struct ubifs_global_debug_info ubifs_dbg;
222 219
223static inline int dbg_is_chk_gen(const struct ubifs_info *c) 220static inline int dbg_is_chk_gen(const struct ubifs_info *c)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index d6fe1c79f18b..ec9f1870ab7f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -566,6 +566,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
566 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 566 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
567 int err, budgeted = 1; 567 int err, budgeted = 1;
568 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; 568 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
569 unsigned int saved_nlink = inode->i_nlink;
569 570
570 /* 571 /*
571 * Budget request settings: deletion direntry, deletion inode (+1 for 572 * Budget request settings: deletion direntry, deletion inode (+1 for
@@ -613,7 +614,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
613out_cancel: 614out_cancel:
614 dir->i_size += sz_change; 615 dir->i_size += sz_change;
615 dir_ui->ui_size = dir->i_size; 616 dir_ui->ui_size = dir->i_size;
616 inc_nlink(inode); 617 set_nlink(inode, saved_nlink);
617 unlock_2_inodes(dir, inode); 618 unlock_2_inodes(dir, inode);
618 if (budgeted) 619 if (budgeted)
619 ubifs_release_budget(c, &req); 620 ubifs_release_budget(c, &req);
@@ -704,8 +705,7 @@ out_cancel:
704 dir->i_size += sz_change; 705 dir->i_size += sz_change;
705 dir_ui->ui_size = dir->i_size; 706 dir_ui->ui_size = dir->i_size;
706 inc_nlink(dir); 707 inc_nlink(dir);
707 inc_nlink(inode); 708 set_nlink(inode, 2);
708 inc_nlink(inode);
709 unlock_2_inodes(dir, inode); 709 unlock_2_inodes(dir, inode);
710 if (budgeted) 710 if (budgeted)
711 ubifs_release_budget(c, &req); 711 ubifs_release_budget(c, &req);
@@ -977,6 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
977 struct ubifs_budget_req ino_req = { .dirtied_ino = 1, 977 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
978 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; 978 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
979 struct timespec time; 979 struct timespec time;
980 unsigned int saved_nlink;
980 981
981 /* 982 /*
982 * Budget request settings: deletion direntry, new direntry, removing 983 * Budget request settings: deletion direntry, new direntry, removing
@@ -1059,13 +1060,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1059 if (unlink) { 1060 if (unlink) {
1060 /* 1061 /*
1061 * Directories cannot have hard-links, so if this is a 1062 * Directories cannot have hard-links, so if this is a
1062 * directory, decrement its @i_nlink twice because an empty 1063 * directory, just clear @i_nlink.
1063 * directory has @i_nlink 2.
1064 */ 1064 */
1065 saved_nlink = new_inode->i_nlink;
1065 if (is_dir) 1066 if (is_dir)
1067 clear_nlink(new_inode);
1068 else
1066 drop_nlink(new_inode); 1069 drop_nlink(new_inode);
1067 new_inode->i_ctime = time; 1070 new_inode->i_ctime = time;
1068 drop_nlink(new_inode);
1069 } else { 1071 } else {
1070 new_dir->i_size += new_sz; 1072 new_dir->i_size += new_sz;
1071 ubifs_inode(new_dir)->ui_size = new_dir->i_size; 1073 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1102,9 +1104,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1102 1104
1103out_cancel: 1105out_cancel:
1104 if (unlink) { 1106 if (unlink) {
1105 if (is_dir) 1107 set_nlink(new_inode, saved_nlink);
1106 inc_nlink(new_inode);
1107 inc_nlink(new_inode);
1108 } else { 1108 } else {
1109 new_dir->i_size -= new_sz; 1109 new_dir->i_size -= new_sz;
1110 ubifs_inode(new_dir)->ui_size = new_dir->i_size; 1110 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f9c234bf33d3..5c8f6dc1d28b 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1042,10 +1042,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1042 * the page size, the remaining memory is zeroed when mapped, and 1042 * the page size, the remaining memory is zeroed when mapped, and
1043 * writes to that region are not written out to the file." 1043 * writes to that region are not written out to the file."
1044 */ 1044 */
1045 kaddr = kmap_atomic(page, KM_USER0); 1045 kaddr = kmap_atomic(page);
1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); 1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
1047 flush_dcache_page(page); 1047 flush_dcache_page(page);
1048 kunmap_atomic(kaddr, KM_USER0); 1048 kunmap_atomic(kaddr);
1049 1049
1050 if (i_size > synced_i_size) { 1050 if (i_size > synced_i_size) {
1051 err = inode->i_sb->s_op->write_inode(inode, NULL); 1051 err = inode->i_sb->s_op->write_inode(inode, NULL);
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index ee4f43f4bb99..2a935b317232 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -679,7 +679,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
679 ret == SCANNED_GARBAGE || 679 ret == SCANNED_GARBAGE ||
680 ret == SCANNED_A_BAD_PAD_NODE || 680 ret == SCANNED_A_BAD_PAD_NODE ||
681 ret == SCANNED_A_CORRUPT_NODE) { 681 ret == SCANNED_A_CORRUPT_NODE) {
682 dbg_rcvry("found corruption - %d", ret); 682 dbg_rcvry("found corruption (%d) at %d:%d",
683 ret, lnum, offs);
683 break; 684 break;
684 } else { 685 } else {
685 dbg_err("unexpected return value %d", ret); 686 dbg_err("unexpected return value %d", ret);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 6094c5a5d7a8..771f7fb6ce92 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
410 } 410 }
411 411
412 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { 412 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
413 err = 7; 413 ubifs_err("too few main LEBs count %d, must be at least %d",
414 c->main_lebs, UBIFS_MIN_MAIN_LEBS);
414 goto failed; 415 goto failed;
415 } 416 }
416 417
417 if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || 418 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
418 c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { 419 if (c->max_bud_bytes < max_bytes) {
419 err = 8; 420 ubifs_err("too small journal (%lld bytes), must be at least "
421 "%lld bytes", c->max_bud_bytes, max_bytes);
422 goto failed;
423 }
424
425 max_bytes = (long long)c->leb_size * c->main_lebs;
426 if (c->max_bud_bytes > max_bytes) {
427 ubifs_err("too large journal size (%lld bytes), only %lld bytes"
428 "available in the main area",
429 c->max_bud_bytes, max_bytes);
420 goto failed; 430 goto failed;
421 } 431 }
422 432
@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
450 goto failed; 460 goto failed;
451 } 461 }
452 462
453 max_bytes = c->main_lebs * (long long)c->leb_size;
454 if (c->rp_size < 0 || max_bytes < c->rp_size) { 463 if (c->rp_size < 0 || max_bytes < c->rp_size) {
455 err = 14; 464 err = 14;
456 goto failed; 465 goto failed;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 63765d58445b..76e4e0566ad6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2076,15 +2076,13 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2076 goto out_umount; 2076 goto out_umount;
2077 } 2077 }
2078 2078
2079 sb->s_root = d_alloc_root(root); 2079 sb->s_root = d_make_root(root);
2080 if (!sb->s_root) 2080 if (!sb->s_root)
2081 goto out_iput; 2081 goto out_umount;
2082 2082
2083 mutex_unlock(&c->umount_mutex); 2083 mutex_unlock(&c->umount_mutex);
2084 return 0; 2084 return 0;
2085 2085
2086out_iput:
2087 iput(root);
2088out_umount: 2086out_umount:
2089 ubifs_umount(c); 2087 ubifs_umount(c);
2090out_unlock: 2088out_unlock:
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 12e94774aa88..93d59aceaaef 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -84,9 +84,6 @@
84#define INUM_WARN_WATERMARK 0xFFF00000 84#define INUM_WARN_WATERMARK 0xFFF00000
85#define INUM_WATERMARK 0xFFFFFF00 85#define INUM_WATERMARK 0xFFFFFF00
86 86
87/* Largest key size supported in this implementation */
88#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
89
90/* Maximum number of entries in each LPT (LEB category) heap */ 87/* Maximum number of entries in each LPT (LEB category) heap */
91#define LPT_HEAP_SZ 256 88#define LPT_HEAP_SZ 256
92 89
@@ -277,10 +274,10 @@ struct ubifs_old_idx {
277 274
278/* The below union makes it easier to deal with keys */ 275/* The below union makes it easier to deal with keys */
279union ubifs_key { 276union ubifs_key {
280 uint8_t u8[CUR_MAX_KEY_LEN]; 277 uint8_t u8[UBIFS_SK_LEN];
281 uint32_t u32[CUR_MAX_KEY_LEN/4]; 278 uint32_t u32[UBIFS_SK_LEN/4];
282 uint64_t u64[CUR_MAX_KEY_LEN/8]; 279 uint64_t u64[UBIFS_SK_LEN/8];
283 __le32 j32[CUR_MAX_KEY_LEN/4]; 280 __le32 j32[UBIFS_SK_LEN/4];
284}; 281};
285 282
286/** 283/**
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d567b8448dfc..7f3f7ba3df6e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,10 +87,10 @@ static int udf_adinicb_write_end(struct file *file,
87 char *kaddr; 87 char *kaddr;
88 struct udf_inode_info *iinfo = UDF_I(inode); 88 struct udf_inode_info *iinfo = UDF_I(inode);
89 89
90 kaddr = kmap_atomic(page, KM_USER0); 90 kaddr = kmap_atomic(page);
91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, 91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
92 kaddr + offset, copied); 92 kaddr + offset, copied);
93 kunmap_atomic(kaddr, KM_USER0); 93 kunmap_atomic(kaddr);
94 94
95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata); 95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
96} 96}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 08bf46edf9c4..38de8f234b94 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,8 +32,6 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
37static inline int udf_match(int len1, const unsigned char *name1, int len2, 35static inline int udf_match(int len1, const unsigned char *name1, int len2,
38 const unsigned char *name2) 36 const unsigned char *name2)
39{ 37{
@@ -649,10 +647,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
649 struct udf_inode_info *dinfo = UDF_I(dir); 647 struct udf_inode_info *dinfo = UDF_I(dir);
650 struct udf_inode_info *iinfo; 648 struct udf_inode_info *iinfo;
651 649
652 err = -EMLINK;
653 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out;
655
656 err = -EIO; 650 err = -EIO;
657 inode = udf_new_inode(dir, S_IFDIR | mode, &err); 651 inode = udf_new_inode(dir, S_IFDIR | mode, &err);
658 if (!inode) 652 if (!inode)
@@ -1032,9 +1026,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1032 struct fileIdentDesc cfi, *fi; 1026 struct fileIdentDesc cfi, *fi;
1033 int err; 1027 int err;
1034 1028
1035 if (inode->i_nlink >= UDF_MAX_LINKS)
1036 return -EMLINK;
1037
1038 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1029 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1039 if (!fi) { 1030 if (!fi) {
1040 return err; 1031 return err;
@@ -1126,10 +1117,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1126 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != 1117 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
1127 old_dir->i_ino) 1118 old_dir->i_ino)
1128 goto end_rename; 1119 goto end_rename;
1129
1130 retval = -EMLINK;
1131 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1132 goto end_rename;
1133 } 1120 }
1134 if (!nfi) { 1121 if (!nfi) {
1135 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, 1122 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 19111f92d60f..ac8a348dcb69 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,6 +75,8 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78enum { UDF_MAX_LINKS = 0xffff };
79
78/* These are the "meat" - everything else is stuffing */ 80/* These are the "meat" - everything else is stuffing */
79static int udf_fill_super(struct super_block *, void *, int); 81static int udf_fill_super(struct super_block *, void *, int);
80static void udf_put_super(struct super_block *); 82static void udf_put_super(struct super_block *);
@@ -2032,13 +2034,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2032 } 2034 }
2033 2035
2034 /* Allocate a dentry for the root inode */ 2036 /* Allocate a dentry for the root inode */
2035 sb->s_root = d_alloc_root(inode); 2037 sb->s_root = d_make_root(inode);
2036 if (!sb->s_root) { 2038 if (!sb->s_root) {
2037 udf_err(sb, "Couldn't allocate root dentry\n"); 2039 udf_err(sb, "Couldn't allocate root dentry\n");
2038 iput(inode);
2039 goto error_out; 2040 goto error_out;
2040 } 2041 }
2041 sb->s_maxbytes = MAX_LFS_FILESIZE; 2042 sb->s_maxbytes = MAX_LFS_FILESIZE;
2043 sb->s_max_links = UDF_MAX_LINKS;
2042 return 0; 2044 return 0;
2043 2045
2044error_out: 2046error_out:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 38cac199edff..a2281cadefa1 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -166,10 +166,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
166 int error; 166 int error;
167 167
168 lock_ufs(dir->i_sb); 168 lock_ufs(dir->i_sb);
169 if (inode->i_nlink >= UFS_LINK_MAX) {
170 unlock_ufs(dir->i_sb);
171 return -EMLINK;
172 }
173 169
174 inode->i_ctime = CURRENT_TIME_SEC; 170 inode->i_ctime = CURRENT_TIME_SEC;
175 inode_inc_link_count(inode); 171 inode_inc_link_count(inode);
@@ -183,10 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 179static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
184{ 180{
185 struct inode * inode; 181 struct inode * inode;
186 int err = -EMLINK; 182 int err;
187
188 if (dir->i_nlink >= UFS_LINK_MAX)
189 goto out;
190 183
191 lock_ufs(dir->i_sb); 184 lock_ufs(dir->i_sb);
192 inode_inc_link_count(dir); 185 inode_inc_link_count(dir);
@@ -305,11 +298,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
305 drop_nlink(new_inode); 298 drop_nlink(new_inode);
306 inode_dec_link_count(new_inode); 299 inode_dec_link_count(new_inode);
307 } else { 300 } else {
308 if (dir_de) {
309 err = -EMLINK;
310 if (new_dir->i_nlink >= UFS_LINK_MAX)
311 goto out_dir;
312 }
313 err = ufs_add_link(new_dentry, old_inode); 301 err = ufs_add_link(new_dentry, old_inode);
314 if (err) 302 if (err)
315 goto out_dir; 303 goto out_dir;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5246ee3e5607..f636f6b460d0 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1157,16 +1157,17 @@ magic_found:
1157 "fast symlink size (%u)\n", uspi->s_maxsymlinklen); 1157 "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
1158 uspi->s_maxsymlinklen = maxsymlen; 1158 uspi->s_maxsymlinklen = maxsymlen;
1159 } 1159 }
1160 sb->s_max_links = UFS_LINK_MAX;
1160 1161
1161 inode = ufs_iget(sb, UFS_ROOTINO); 1162 inode = ufs_iget(sb, UFS_ROOTINO);
1162 if (IS_ERR(inode)) { 1163 if (IS_ERR(inode)) {
1163 ret = PTR_ERR(inode); 1164 ret = PTR_ERR(inode);
1164 goto failed; 1165 goto failed;
1165 } 1166 }
1166 sb->s_root = d_alloc_root(inode); 1167 sb->s_root = d_make_root(inode);
1167 if (!sb->s_root) { 1168 if (!sb->s_root) {
1168 ret = -ENOMEM; 1169 ret = -ENOMEM;
1169 goto dalloc_failed; 1170 goto failed;
1170 } 1171 }
1171 1172
1172 ufs_setup_cstotal(sb); 1173 ufs_setup_cstotal(sb);
@@ -1180,8 +1181,6 @@ magic_found:
1180 UFSD("EXIT\n"); 1181 UFSD("EXIT\n");
1181 return 0; 1182 return 0;
1182 1183
1183dalloc_failed:
1184 iput(inode);
1185failed: 1184failed:
1186 if (ubh) 1185 if (ubh)
1187 ubh_brelse_uspi (uspi); 1186 ubh_brelse_uspi (uspi);
diff --git a/fs/xattr.c b/fs/xattr.c
index 82f43376c7cd..d6dfd247bb2f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -16,7 +16,7 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/evm.h> 17#include <linux/evm.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
19#include <linux/module.h> 19#include <linux/export.h>
20#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 8d5a506c82eb..69d06b07b169 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -5,7 +5,7 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/export.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/posix_acl_xattr.h> 10#include <linux/posix_acl_xattr.h>
11#include <linux/gfp.h> 11#include <linux/gfp.h>
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 427a4e82a588..0a9977983f92 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
96 xfs_qm_bhv.o \ 96 xfs_qm_bhv.o \
97 xfs_qm.o \ 97 xfs_qm.o \
98 xfs_quotaops.o 98 xfs_quotaops.o
99ifeq ($(CONFIG_XFS_QUOTA),y)
100xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
101endif
102xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 99xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
103xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 100xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
104xfs-$(CONFIG_PROC_FS) += xfs_stats.o 101xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 74b9baf36ac3..0dbb9e70fe21 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -26,6 +26,7 @@
26#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
27#include "xfs_dinode.h" 27#include "xfs_dinode.h"
28#include "xfs_inode.h" 28#include "xfs_inode.h"
29#include "xfs_inode_item.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
30#include "xfs_error.h" 31#include "xfs_error.h"
31#include "xfs_rw.h" 32#include "xfs_rw.h"
@@ -99,23 +100,6 @@ xfs_destroy_ioend(
99} 100}
100 101
101/* 102/*
102 * If the end of the current ioend is beyond the current EOF,
103 * return the new EOF value, otherwise zero.
104 */
105STATIC xfs_fsize_t
106xfs_ioend_new_eof(
107 xfs_ioend_t *ioend)
108{
109 xfs_inode_t *ip = XFS_I(ioend->io_inode);
110 xfs_fsize_t isize;
111 xfs_fsize_t bsize;
112
113 bsize = ioend->io_offset + ioend->io_size;
114 isize = MIN(i_size_read(VFS_I(ip)), bsize);
115 return isize > ip->i_d.di_size ? isize : 0;
116}
117
118/*
119 * Fast and loose check if this write could update the on-disk inode size. 103 * Fast and loose check if this write could update the on-disk inode size.
120 */ 104 */
121static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 105static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
124 XFS_I(ioend->io_inode)->i_d.di_size; 108 XFS_I(ioend->io_inode)->i_d.di_size;
125} 109}
126 110
111STATIC int
112xfs_setfilesize_trans_alloc(
113 struct xfs_ioend *ioend)
114{
115 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
116 struct xfs_trans *tp;
117 int error;
118
119 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
120
121 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
122 if (error) {
123 xfs_trans_cancel(tp, 0);
124 return error;
125 }
126
127 ioend->io_append_trans = tp;
128
129 /*
130 * We hand off the transaction to the completion thread now, so
131 * clear the flag here.
132 */
133 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
134 return 0;
135}
136
127/* 137/*
128 * Update on-disk file size now that data has been written to disk. 138 * Update on-disk file size now that data has been written to disk.
129 *
130 * This function does not block as blocking on the inode lock in IO completion
131 * can lead to IO completion order dependency deadlocks.. If it can't get the
132 * inode ilock it will return EAGAIN. Callers must handle this.
133 */ 139 */
134STATIC int 140STATIC int
135xfs_setfilesize( 141xfs_setfilesize(
136 xfs_ioend_t *ioend) 142 struct xfs_ioend *ioend)
137{ 143{
138 xfs_inode_t *ip = XFS_I(ioend->io_inode); 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
145 struct xfs_trans *tp = ioend->io_append_trans;
139 xfs_fsize_t isize; 146 xfs_fsize_t isize;
140 147
141 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 148 /*
142 return EAGAIN; 149 * The transaction was allocated in the I/O submission thread,
150 * thus we need to mark ourselves as beeing in a transaction
151 * manually.
152 */
153 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
143 154
144 isize = xfs_ioend_new_eof(ioend); 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
145 if (isize) { 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
146 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 157 if (!isize) {
147 ip->i_d.di_size = isize; 158 xfs_iunlock(ip, XFS_ILOCK_EXCL);
148 xfs_mark_inode_dirty(ip); 159 xfs_trans_cancel(tp, 0);
160 return 0;
149 } 161 }
150 162
151 xfs_iunlock(ip, XFS_ILOCK_EXCL); 163 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
152 return 0; 164
165 ip->i_d.di_size = isize;
166 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
167 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
168
169 return xfs_trans_commit(tp, 0);
153} 170}
154 171
155/* 172/*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
163 struct xfs_ioend *ioend) 180 struct xfs_ioend *ioend)
164{ 181{
165 if (atomic_dec_and_test(&ioend->io_remaining)) { 182 if (atomic_dec_and_test(&ioend->io_remaining)) {
183 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
184
166 if (ioend->io_type == IO_UNWRITTEN) 185 if (ioend->io_type == IO_UNWRITTEN)
167 queue_work(xfsconvertd_workqueue, &ioend->io_work); 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
168 else if (xfs_ioend_is_append(ioend)) 187 else if (ioend->io_append_trans)
169 queue_work(xfsdatad_workqueue, &ioend->io_work); 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
170 else 189 else
171 xfs_destroy_ioend(ioend); 190 xfs_destroy_ioend(ioend);
172 } 191 }
@@ -195,35 +214,36 @@ xfs_end_io(
195 * range to normal written extens after the data I/O has finished. 214 * range to normal written extens after the data I/O has finished.
196 */ 215 */
197 if (ioend->io_type == IO_UNWRITTEN) { 216 if (ioend->io_type == IO_UNWRITTEN) {
217 /*
218 * For buffered I/O we never preallocate a transaction when
219 * doing the unwritten extent conversion, but for direct I/O
220 * we do not know if we are converting an unwritten extent
221 * or not at the point where we preallocate the transaction.
222 */
223 if (ioend->io_append_trans) {
224 ASSERT(ioend->io_isdirect);
225
226 current_set_flags_nested(
227 &ioend->io_append_trans->t_pflags, PF_FSTRANS);
228 xfs_trans_cancel(ioend->io_append_trans, 0);
229 }
230
198 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
199 ioend->io_size); 232 ioend->io_size);
200 if (error) { 233 if (error) {
201 ioend->io_error = -error; 234 ioend->io_error = -error;
202 goto done; 235 goto done;
203 } 236 }
237 } else if (ioend->io_append_trans) {
238 error = xfs_setfilesize(ioend);
239 if (error)
240 ioend->io_error = -error;
241 } else {
242 ASSERT(!xfs_ioend_is_append(ioend));
204 } 243 }
205 244
206 /*
207 * We might have to update the on-disk file size after extending
208 * writes.
209 */
210 error = xfs_setfilesize(ioend);
211 ASSERT(!error || error == EAGAIN);
212
213done: 245done:
214 /* 246 xfs_destroy_ioend(ioend);
215 * If we didn't complete processing of the ioend, requeue it to the
216 * tail of the workqueue for another attempt later. Otherwise destroy
217 * it.
218 */
219 if (error == EAGAIN) {
220 atomic_inc(&ioend->io_remaining);
221 xfs_finish_ioend(ioend);
222 /* ensure we don't spin on blocked ioends */
223 delay(1);
224 } else {
225 xfs_destroy_ioend(ioend);
226 }
227} 247}
228 248
229/* 249/*
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
259 */ 279 */
260 atomic_set(&ioend->io_remaining, 1); 280 atomic_set(&ioend->io_remaining, 1);
261 ioend->io_isasync = 0; 281 ioend->io_isasync = 0;
282 ioend->io_isdirect = 0;
262 ioend->io_error = 0; 283 ioend->io_error = 0;
263 ioend->io_list = NULL; 284 ioend->io_list = NULL;
264 ioend->io_type = type; 285 ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
269 ioend->io_size = 0; 290 ioend->io_size = 0;
270 ioend->io_iocb = NULL; 291 ioend->io_iocb = NULL;
271 ioend->io_result = 0; 292 ioend->io_result = 0;
293 ioend->io_append_trans = NULL;
272 294
273 INIT_WORK(&ioend->io_work, xfs_end_io); 295 INIT_WORK(&ioend->io_work, xfs_end_io);
274 return ioend; 296 return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
379 atomic_inc(&ioend->io_remaining); 401 atomic_inc(&ioend->io_remaining);
380 bio->bi_private = ioend; 402 bio->bi_private = ioend;
381 bio->bi_end_io = xfs_end_bio; 403 bio->bi_end_io = xfs_end_bio;
382
383 /*
384 * If the I/O is beyond EOF we mark the inode dirty immediately
385 * but don't update the inode size until I/O completion.
386 */
387 if (xfs_ioend_new_eof(ioend))
388 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
389
390 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
391} 405}
392 406
@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
1033 wbc, end_index); 1047 wbc, end_index);
1034 } 1048 }
1035 1049
1036 if (iohead) 1050 if (iohead) {
1051 /*
1052 * Reserve log space if we might write beyond the on-disk
1053 * inode size.
1054 */
1055 if (ioend->io_type != IO_UNWRITTEN &&
1056 xfs_ioend_is_append(ioend)) {
1057 err = xfs_setfilesize_trans_alloc(ioend);
1058 if (err)
1059 goto error;
1060 }
1061
1037 xfs_submit_ioend(wbc, iohead); 1062 xfs_submit_ioend(wbc, iohead);
1063 }
1038 1064
1039 return 0; 1065 return 0;
1040 1066
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
1314{ 1340{
1315 struct inode *inode = iocb->ki_filp->f_mapping->host; 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1316 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1343 struct xfs_ioend *ioend = NULL;
1317 ssize_t ret; 1344 ssize_t ret;
1318 1345
1319 if (rw & WRITE) { 1346 if (rw & WRITE) {
1320 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); 1347 size_t size = iov_length(iov, nr_segs);
1348
1349 /*
1350 * We need to preallocate a transaction for a size update
1351 * here. In the case that this write both updates the size
1352 * and converts at least on unwritten extent we will cancel
1353 * the still clean transaction after the I/O has finished.
1354 */
1355 iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
1356 if (offset + size > XFS_I(inode)->i_d.di_size) {
1357 ret = xfs_setfilesize_trans_alloc(ioend);
1358 if (ret)
1359 goto out_destroy_ioend;
1360 ioend->io_isdirect = 1;
1361 }
1321 1362
1322 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1323 offset, nr_segs, 1364 offset, nr_segs,
1324 xfs_get_blocks_direct, 1365 xfs_get_blocks_direct,
1325 xfs_end_io_direct_write, NULL, 0); 1366 xfs_end_io_direct_write, NULL, 0);
1326 if (ret != -EIOCBQUEUED && iocb->private) 1367 if (ret != -EIOCBQUEUED && iocb->private)
1327 xfs_destroy_ioend(iocb->private); 1368 goto out_trans_cancel;
1328 } else { 1369 } else {
1329 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1330 offset, nr_segs, 1371 offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
1333 } 1374 }
1334 1375
1335 return ret; 1376 return ret;
1377
1378out_trans_cancel:
1379 if (ioend->io_append_trans) {
1380 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1381 PF_FSTRANS);
1382 xfs_trans_cancel(ioend->io_append_trans, 0);
1383 }
1384out_destroy_ioend:
1385 xfs_destroy_ioend(ioend);
1386 return ret;
1336} 1387}
1337 1388
1338STATIC void 1389STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 116dd5c37034..84eafbcb0d9d 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_AOPS_H__ 18#ifndef __XFS_AOPS_H__
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 21extern mempool_t *xfs_ioend_pool;
24 22
25/* 23/*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
48 int io_error; /* I/O error code */ 46 int io_error; /* I/O error code */
49 atomic_t io_remaining; /* hold count */ 47 atomic_t io_remaining; /* hold count */
50 unsigned int io_isasync : 1; /* needs aio_complete */ 48 unsigned int io_isasync : 1; /* needs aio_complete */
49 unsigned int io_isdirect : 1;/* direct I/O */
51 struct inode *io_inode; /* file being written to */ 50 struct inode *io_inode; /* file being written to */
52 struct buffer_head *io_buffer_head;/* buffer linked list head */ 51 struct buffer_head *io_buffer_head;/* buffer linked list head */
53 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 52 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
54 size_t io_size; /* size of the extent */ 53 size_t io_size; /* size of the extent */
55 xfs_off_t io_offset; /* offset in the file */ 54 xfs_off_t io_offset; /* offset in the file */
56 struct work_struct io_work; /* xfsdatad work queue */ 55 struct work_struct io_work; /* xfsdatad work queue */
56 struct xfs_trans *io_append_trans;/* xact. for size update */
57 struct kiocb *io_iocb; 57 struct kiocb *io_iocb;
58 int io_result; 58 int io_result;
59} xfs_ioend_t; 59} xfs_ioend_t;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 188ef2fbd628..3548c6f75593 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5536,8 +5536,12 @@ xfs_getbmap(
5536 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 5536 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5537 return XFS_ERROR(ENOMEM); 5537 return XFS_ERROR(ENOMEM);
5538 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); 5538 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
5539 if (!out) 5539 if (!out) {
5540 return XFS_ERROR(ENOMEM); 5540 out = kmem_zalloc_large(bmv->bmv_count *
5541 sizeof(struct getbmapx));
5542 if (!out)
5543 return XFS_ERROR(ENOMEM);
5544 }
5541 5545
5542 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5546 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5543 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { 5547 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,7 +5665,10 @@ xfs_getbmap(
5661 break; 5665 break;
5662 } 5666 }
5663 5667
5664 kmem_free(out); 5668 if (is_vmalloc_addr(out))
5669 kmem_free_large(out);
5670 else
5671 kmem_free(out);
5665 return error; 5672 return error;
5666} 5673}
5667 5674
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4dff85c7d7eb..6819b5163e33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
45STATIC int xfsbufd(void *); 45STATIC int xfsbufd(void *);
46 46
47static struct workqueue_struct *xfslogd_workqueue; 47static struct workqueue_struct *xfslogd_workqueue;
48struct workqueue_struct *xfsdatad_workqueue;
49struct workqueue_struct *xfsconvertd_workqueue;
50 48
51#ifdef XFS_BUF_LOCK_TRACKING 49#ifdef XFS_BUF_LOCK_TRACKING
52# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 50# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
1793 if (!xfslogd_workqueue) 1791 if (!xfslogd_workqueue)
1794 goto out_free_buf_zone; 1792 goto out_free_buf_zone;
1795 1793
1796 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
1797 if (!xfsdatad_workqueue)
1798 goto out_destroy_xfslogd_workqueue;
1799
1800 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
1801 WQ_MEM_RECLAIM, 1);
1802 if (!xfsconvertd_workqueue)
1803 goto out_destroy_xfsdatad_workqueue;
1804
1805 return 0; 1794 return 0;
1806 1795
1807 out_destroy_xfsdatad_workqueue:
1808 destroy_workqueue(xfsdatad_workqueue);
1809 out_destroy_xfslogd_workqueue:
1810 destroy_workqueue(xfslogd_workqueue);
1811 out_free_buf_zone: 1796 out_free_buf_zone:
1812 kmem_zone_destroy(xfs_buf_zone); 1797 kmem_zone_destroy(xfs_buf_zone);
1813 out: 1798 out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
1817void 1802void
1818xfs_buf_terminate(void) 1803xfs_buf_terminate(void)
1819{ 1804{
1820 destroy_workqueue(xfsconvertd_workqueue);
1821 destroy_workqueue(xfsdatad_workqueue);
1822 destroy_workqueue(xfslogd_workqueue); 1805 destroy_workqueue(xfslogd_workqueue);
1823 kmem_zone_destroy(xfs_buf_zone); 1806 kmem_zone_destroy(xfs_buf_zone);
1824} 1807}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index dd974a55c77d..1137bbc5eccb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -215,7 +215,7 @@ xfs_swap_extents(
215 xfs_trans_t *tp; 215 xfs_trans_t *tp;
216 xfs_bstat_t *sbp = &sxp->sx_stat; 216 xfs_bstat_t *sbp = &sxp->sx_stat;
217 xfs_ifork_t *tempifp, *ifp, *tifp; 217 xfs_ifork_t *tempifp, *ifp, *tifp;
218 int ilf_fields, tilf_fields; 218 int src_log_flags, target_log_flags;
219 int error = 0; 219 int error = 0;
220 int aforkblks = 0; 220 int aforkblks = 0;
221 int taforkblks = 0; 221 int taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
385 tip->i_delayed_blks = ip->i_delayed_blks; 385 tip->i_delayed_blks = ip->i_delayed_blks;
386 ip->i_delayed_blks = 0; 386 ip->i_delayed_blks = 0;
387 387
388 ilf_fields = XFS_ILOG_CORE; 388 src_log_flags = XFS_ILOG_CORE;
389 389 switch (ip->i_d.di_format) {
390 switch(ip->i_d.di_format) {
391 case XFS_DINODE_FMT_EXTENTS: 390 case XFS_DINODE_FMT_EXTENTS:
392 /* If the extents fit in the inode, fix the 391 /* If the extents fit in the inode, fix the
393 * pointer. Otherwise it's already NULL or 392 * pointer. Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
397 ifp->if_u1.if_extents = 396 ifp->if_u1.if_extents =
398 ifp->if_u2.if_inline_ext; 397 ifp->if_u2.if_inline_ext;
399 } 398 }
400 ilf_fields |= XFS_ILOG_DEXT; 399 src_log_flags |= XFS_ILOG_DEXT;
401 break; 400 break;
402 case XFS_DINODE_FMT_BTREE: 401 case XFS_DINODE_FMT_BTREE:
403 ilf_fields |= XFS_ILOG_DBROOT; 402 src_log_flags |= XFS_ILOG_DBROOT;
404 break; 403 break;
405 } 404 }
406 405
407 tilf_fields = XFS_ILOG_CORE; 406 target_log_flags = XFS_ILOG_CORE;
408 407 switch (tip->i_d.di_format) {
409 switch(tip->i_d.di_format) {
410 case XFS_DINODE_FMT_EXTENTS: 408 case XFS_DINODE_FMT_EXTENTS:
411 /* If the extents fit in the inode, fix the 409 /* If the extents fit in the inode, fix the
412 * pointer. Otherwise it's already NULL or 410 * pointer. Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
416 tifp->if_u1.if_extents = 414 tifp->if_u1.if_extents =
417 tifp->if_u2.if_inline_ext; 415 tifp->if_u2.if_inline_ext;
418 } 416 }
419 tilf_fields |= XFS_ILOG_DEXT; 417 target_log_flags |= XFS_ILOG_DEXT;
420 break; 418 break;
421 case XFS_DINODE_FMT_BTREE: 419 case XFS_DINODE_FMT_BTREE:
422 tilf_fields |= XFS_ILOG_DBROOT; 420 target_log_flags |= XFS_ILOG_DBROOT;
423 break; 421 break;
424 } 422 }
425 423
@@ -427,8 +425,8 @@ xfs_swap_extents(
427 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 425 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
428 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 426 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
429 427
430 xfs_trans_log_inode(tp, ip, ilf_fields); 428 xfs_trans_log_inode(tp, ip, src_log_flags);
431 xfs_trans_log_inode(tp, tip, tilf_fields); 429 xfs_trans_log_inode(tp, tip, target_log_flags);
432 430
433 /* 431 /*
434 * If this is a synchronous mount, make sure that the 432 * If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 9245e029b8ea..d3b63aefd01d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -29,6 +29,7 @@
29#include "xfs_dinode.h" 29#include "xfs_dinode.h"
30#include "xfs_inode.h" 30#include "xfs_inode.h"
31#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_dir2.h"
32#include "xfs_dir2_format.h" 33#include "xfs_dir2_format.h"
33#include "xfs_dir2_priv.h" 34#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 53db20ee3e77..4be16a0cbe5a 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -43,11 +43,10 @@
43 * Lock order: 43 * Lock order:
44 * 44 *
45 * ip->i_lock 45 * ip->i_lock
46 * qh->qh_lock 46 * qi->qi_tree_lock
47 * qi->qi_dqlist_lock 47 * dquot->q_qlock (xfs_dqlock() and friends)
48 * dquot->q_qlock (xfs_dqlock() and friends) 48 * dquot->q_flush (xfs_dqflock() and friends)
49 * dquot->q_flush (xfs_dqflock() and friends) 49 * qi->qi_lru_lock
50 * xfs_Gqm->qm_dqfrlist_lock
51 * 50 *
52 * If two dquots need to be locked the order is user before group/project, 51 * If two dquots need to be locked the order is user before group/project,
53 * otherwise by the lowest id first, see xfs_dqlock2. 52 * otherwise by the lowest id first, see xfs_dqlock2.
@@ -60,6 +59,9 @@ int xfs_dqreq_num;
60int xfs_dqerror_mod = 33; 59int xfs_dqerror_mod = 33;
61#endif 60#endif
62 61
62struct kmem_zone *xfs_qm_dqtrxzone;
63static struct kmem_zone *xfs_qm_dqzone;
64
63static struct lock_class_key xfs_dquot_other_class; 65static struct lock_class_key xfs_dquot_other_class;
64 66
65/* 67/*
@@ -69,12 +71,12 @@ void
69xfs_qm_dqdestroy( 71xfs_qm_dqdestroy(
70 xfs_dquot_t *dqp) 72 xfs_dquot_t *dqp)
71{ 73{
72 ASSERT(list_empty(&dqp->q_freelist)); 74 ASSERT(list_empty(&dqp->q_lru));
73 75
74 mutex_destroy(&dqp->q_qlock); 76 mutex_destroy(&dqp->q_qlock);
75 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 77 kmem_zone_free(xfs_qm_dqzone, dqp);
76 78
77 atomic_dec(&xfs_Gqm->qm_totaldquots); 79 XFS_STATS_DEC(xs_qm_dquot);
78} 80}
79 81
80/* 82/*
@@ -282,7 +284,7 @@ xfs_qm_dqalloc(
282 * Return if this type of quotas is turned off while we didn't 284 * Return if this type of quotas is turned off while we didn't
283 * have an inode lock 285 * have an inode lock
284 */ 286 */
285 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 287 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
286 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 288 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
287 return (ESRCH); 289 return (ESRCH);
288 } 290 }
@@ -384,7 +386,7 @@ xfs_qm_dqtobp(
384 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 386 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
385 387
386 xfs_ilock(quotip, XFS_ILOCK_SHARED); 388 xfs_ilock(quotip, XFS_ILOCK_SHARED);
387 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 389 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
388 /* 390 /*
389 * Return if this type of quotas is turned off while we 391 * Return if this type of quotas is turned off while we
390 * didn't have the quota inode lock. 392 * didn't have the quota inode lock.
@@ -492,12 +494,12 @@ xfs_qm_dqread(
492 int cancelflags = 0; 494 int cancelflags = 0;
493 495
494 496
495 dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); 497 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
496 498
497 dqp->dq_flags = type; 499 dqp->dq_flags = type;
498 dqp->q_core.d_id = cpu_to_be32(id); 500 dqp->q_core.d_id = cpu_to_be32(id);
499 dqp->q_mount = mp; 501 dqp->q_mount = mp;
500 INIT_LIST_HEAD(&dqp->q_freelist); 502 INIT_LIST_HEAD(&dqp->q_lru);
501 mutex_init(&dqp->q_qlock); 503 mutex_init(&dqp->q_qlock);
502 init_waitqueue_head(&dqp->q_pinwait); 504 init_waitqueue_head(&dqp->q_pinwait);
503 505
@@ -516,7 +518,7 @@ xfs_qm_dqread(
516 if (!(type & XFS_DQ_USER)) 518 if (!(type & XFS_DQ_USER))
517 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 519 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
518 520
519 atomic_inc(&xfs_Gqm->qm_totaldquots); 521 XFS_STATS_INC(xs_qm_dquot);
520 522
521 trace_xfs_dqread(dqp); 523 trace_xfs_dqread(dqp);
522 524
@@ -602,60 +604,6 @@ error0:
602} 604}
603 605
604/* 606/*
605 * Lookup a dquot in the incore dquot hashtable. We keep two separate
606 * hashtables for user and group dquots; and, these are global tables
607 * inside the XQM, not per-filesystem tables.
608 * The hash chain must be locked by caller, and it is left locked
609 * on return. Returning dquot is locked.
610 */
611STATIC int
612xfs_qm_dqlookup(
613 xfs_mount_t *mp,
614 xfs_dqid_t id,
615 xfs_dqhash_t *qh,
616 xfs_dquot_t **O_dqpp)
617{
618 xfs_dquot_t *dqp;
619
620 ASSERT(mutex_is_locked(&qh->qh_lock));
621
622 /*
623 * Traverse the hashchain looking for a match
624 */
625 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
626 /*
627 * We already have the hashlock. We don't need the
628 * dqlock to look at the id field of the dquot, since the
629 * id can't be modified without the hashlock anyway.
630 */
631 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
632 continue;
633
634 trace_xfs_dqlookup_found(dqp);
635
636 xfs_dqlock(dqp);
637 if (dqp->dq_flags & XFS_DQ_FREEING) {
638 *O_dqpp = NULL;
639 xfs_dqunlock(dqp);
640 return -1;
641 }
642
643 dqp->q_nrefs++;
644
645 /*
646 * move the dquot to the front of the hashchain
647 */
648 list_move(&dqp->q_hashlist, &qh->qh_list);
649 trace_xfs_dqlookup_done(dqp);
650 *O_dqpp = dqp;
651 return 0;
652 }
653
654 *O_dqpp = NULL;
655 return 1;
656}
657
658/*
659 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 607 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
660 * a locked dquot, doing an allocation (if requested) as needed. 608 * a locked dquot, doing an allocation (if requested) as needed.
661 * When both an inode and an id are given, the inode's id takes precedence. 609 * When both an inode and an id are given, the inode's id takes precedence.
@@ -672,10 +620,10 @@ xfs_qm_dqget(
672 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ 620 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
673 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ 621 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
674{ 622{
675 xfs_dquot_t *dqp; 623 struct xfs_quotainfo *qi = mp->m_quotainfo;
676 xfs_dqhash_t *h; 624 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
677 uint version; 625 struct xfs_dquot *dqp;
678 int error; 626 int error;
679 627
680 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 628 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
681 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 629 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@@ -683,7 +631,6 @@ xfs_qm_dqget(
683 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 631 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
684 return (ESRCH); 632 return (ESRCH);
685 } 633 }
686 h = XFS_DQ_HASH(mp, id, type);
687 634
688#ifdef DEBUG 635#ifdef DEBUG
689 if (xfs_do_dqerror) { 636 if (xfs_do_dqerror) {
@@ -699,42 +646,33 @@ xfs_qm_dqget(
699 type == XFS_DQ_GROUP); 646 type == XFS_DQ_GROUP);
700 if (ip) { 647 if (ip) {
701 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 648 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
702 if (type == XFS_DQ_USER) 649 ASSERT(xfs_inode_dquot(ip, type) == NULL);
703 ASSERT(ip->i_udquot == NULL);
704 else
705 ASSERT(ip->i_gdquot == NULL);
706 } 650 }
707#endif 651#endif
708 652
709restart: 653restart:
710 mutex_lock(&h->qh_lock); 654 mutex_lock(&qi->qi_tree_lock);
655 dqp = radix_tree_lookup(tree, id);
656 if (dqp) {
657 xfs_dqlock(dqp);
658 if (dqp->dq_flags & XFS_DQ_FREEING) {
659 xfs_dqunlock(dqp);
660 mutex_unlock(&qi->qi_tree_lock);
661 trace_xfs_dqget_freeing(dqp);
662 delay(1);
663 goto restart;
664 }
711 665
712 /* 666 dqp->q_nrefs++;
713 * Look in the cache (hashtable). 667 mutex_unlock(&qi->qi_tree_lock);
714 * The chain is kept locked during lookup. 668
715 */ 669 trace_xfs_dqget_hit(dqp);
716 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { 670 XFS_STATS_INC(xs_qm_dqcachehits);
717 case -1: 671 *O_dqpp = dqp;
718 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 672 return 0;
719 mutex_unlock(&h->qh_lock);
720 delay(1);
721 goto restart;
722 case 0:
723 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
724 /*
725 * The dquot was found, moved to the front of the chain,
726 * taken off the freelist if it was on it, and locked
727 * at this point. Just unlock the hashchain and return.
728 */
729 ASSERT(*O_dqpp);
730 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
731 mutex_unlock(&h->qh_lock);
732 trace_xfs_dqget_hit(*O_dqpp);
733 return 0; /* success */
734 default:
735 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
736 break;
737 } 673 }
674 mutex_unlock(&qi->qi_tree_lock);
675 XFS_STATS_INC(xs_qm_dqcachemisses);
738 676
739 /* 677 /*
740 * Dquot cache miss. We don't want to keep the inode lock across 678 * Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +683,6 @@ restart:
745 */ 683 */
746 if (ip) 684 if (ip)
747 xfs_iunlock(ip, XFS_ILOCK_EXCL); 685 xfs_iunlock(ip, XFS_ILOCK_EXCL);
748 /*
749 * Save the hashchain version stamp, and unlock the chain, so that
750 * we don't keep the lock across a disk read
751 */
752 version = h->qh_version;
753 mutex_unlock(&h->qh_lock);
754 686
755 error = xfs_qm_dqread(mp, id, type, flags, &dqp); 687 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
756 688
@@ -760,97 +692,53 @@ restart:
760 if (error) 692 if (error)
761 return error; 693 return error;
762 694
763 /*
764 * Dquot lock comes after hashlock in the lock ordering
765 */
766 if (ip) { 695 if (ip) {
767 /* 696 /*
768 * A dquot could be attached to this inode by now, since 697 * A dquot could be attached to this inode by now, since
769 * we had dropped the ilock. 698 * we had dropped the ilock.
770 */ 699 */
771 if (type == XFS_DQ_USER) { 700 if (xfs_this_quota_on(mp, type)) {
772 if (!XFS_IS_UQUOTA_ON(mp)) { 701 struct xfs_dquot *dqp1;
773 /* inode stays locked on return */ 702
774 xfs_qm_dqdestroy(dqp); 703 dqp1 = xfs_inode_dquot(ip, type);
775 return XFS_ERROR(ESRCH); 704 if (dqp1) {
776 }
777 if (ip->i_udquot) {
778 xfs_qm_dqdestroy(dqp); 705 xfs_qm_dqdestroy(dqp);
779 dqp = ip->i_udquot; 706 dqp = dqp1;
780 xfs_dqlock(dqp); 707 xfs_dqlock(dqp);
781 goto dqret; 708 goto dqret;
782 } 709 }
783 } else { 710 } else {
784 if (!XFS_IS_OQUOTA_ON(mp)) { 711 /* inode stays locked on return */
785 /* inode stays locked on return */ 712 xfs_qm_dqdestroy(dqp);
786 xfs_qm_dqdestroy(dqp); 713 return XFS_ERROR(ESRCH);
787 return XFS_ERROR(ESRCH);
788 }
789 if (ip->i_gdquot) {
790 xfs_qm_dqdestroy(dqp);
791 dqp = ip->i_gdquot;
792 xfs_dqlock(dqp);
793 goto dqret;
794 }
795 } 714 }
796 } 715 }
797 716
798 /* 717 mutex_lock(&qi->qi_tree_lock);
799 * Hashlock comes after ilock in lock order 718 error = -radix_tree_insert(tree, id, dqp);
800 */ 719 if (unlikely(error)) {
801 mutex_lock(&h->qh_lock); 720 WARN_ON(error != EEXIST);
802 if (version != h->qh_version) { 721
803 xfs_dquot_t *tmpdqp;
804 /* 722 /*
805 * Now, see if somebody else put the dquot in the 723 * Duplicate found. Just throw away the new dquot and start
806 * hashtable before us. This can happen because we didn't 724 * over.
807 * keep the hashchain lock. We don't have to worry about
808 * lock order between the two dquots here since dqp isn't
809 * on any findable lists yet.
810 */ 725 */
811 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { 726 mutex_unlock(&qi->qi_tree_lock);
812 case 0: 727 trace_xfs_dqget_dup(dqp);
813 case -1: 728 xfs_qm_dqdestroy(dqp);
814 /* 729 XFS_STATS_INC(xs_qm_dquot_dups);
815 * Duplicate found, either in cache or on its way out. 730 goto restart;
816 * Just throw away the new dquot and start over.
817 */
818 if (tmpdqp)
819 xfs_qm_dqput(tmpdqp);
820 mutex_unlock(&h->qh_lock);
821 xfs_qm_dqdestroy(dqp);
822 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
823 goto restart;
824 default:
825 break;
826 }
827 } 731 }
828 732
829 /* 733 /*
830 * Put the dquot at the beginning of the hash-chain and mp's list
831 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
832 */
833 ASSERT(mutex_is_locked(&h->qh_lock));
834 dqp->q_hash = h;
835 list_add(&dqp->q_hashlist, &h->qh_list);
836 h->qh_version++;
837
838 /*
839 * Attach this dquot to this filesystem's list of all dquots,
840 * kept inside the mount structure in m_quotainfo field
841 */
842 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
843
844 /*
845 * We return a locked dquot to the caller, with a reference taken 734 * We return a locked dquot to the caller, with a reference taken
846 */ 735 */
847 xfs_dqlock(dqp); 736 xfs_dqlock(dqp);
848 dqp->q_nrefs = 1; 737 dqp->q_nrefs = 1;
849 738
850 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); 739 qi->qi_dquots++;
851 mp->m_quotainfo->qi_dquots++; 740 mutex_unlock(&qi->qi_tree_lock);
852 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 741
853 mutex_unlock(&h->qh_lock);
854 dqret: 742 dqret:
855 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 743 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
856 trace_xfs_dqget_miss(dqp); 744 trace_xfs_dqget_miss(dqp);
@@ -859,37 +747,22 @@ restart:
859} 747}
860 748
861 749
862/* 750STATIC void
863 * Release a reference to the dquot (decrement ref-count) 751xfs_qm_dqput_final(
864 * and unlock it. If there is a group quota attached to this
865 * dquot, carefully release that too without tripping over
866 * deadlocks'n'stuff.
867 */
868void
869xfs_qm_dqput(
870 struct xfs_dquot *dqp) 752 struct xfs_dquot *dqp)
871{ 753{
754 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
872 struct xfs_dquot *gdqp; 755 struct xfs_dquot *gdqp;
873 756
874 ASSERT(dqp->q_nrefs > 0);
875 ASSERT(XFS_DQ_IS_LOCKED(dqp));
876
877 trace_xfs_dqput(dqp);
878
879recurse:
880 if (--dqp->q_nrefs > 0) {
881 xfs_dqunlock(dqp);
882 return;
883 }
884
885 trace_xfs_dqput_free(dqp); 757 trace_xfs_dqput_free(dqp);
886 758
887 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 759 mutex_lock(&qi->qi_lru_lock);
888 if (list_empty(&dqp->q_freelist)) { 760 if (list_empty(&dqp->q_lru)) {
889 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 761 list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
890 xfs_Gqm->qm_dqfrlist_cnt++; 762 qi->qi_lru_count++;
763 XFS_STATS_INC(xs_qm_dquot_unused);
891 } 764 }
892 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 765 mutex_unlock(&qi->qi_lru_lock);
893 766
894 /* 767 /*
895 * If we just added a udquot to the freelist, then we want to release 768 * If we just added a udquot to the freelist, then we want to release
@@ -906,10 +779,29 @@ recurse:
906 /* 779 /*
907 * If we had a group quota hint, release it now. 780 * If we had a group quota hint, release it now.
908 */ 781 */
909 if (gdqp) { 782 if (gdqp)
910 dqp = gdqp; 783 xfs_qm_dqput(gdqp);
911 goto recurse; 784}
912 } 785
786/*
787 * Release a reference to the dquot (decrement ref-count) and unlock it.
788 *
789 * If there is a group quota attached to this dquot, carefully release that
790 * too without tripping over deadlocks'n'stuff.
791 */
792void
793xfs_qm_dqput(
794 struct xfs_dquot *dqp)
795{
796 ASSERT(dqp->q_nrefs > 0);
797 ASSERT(XFS_DQ_IS_LOCKED(dqp));
798
799 trace_xfs_dqput(dqp);
800
801 if (--dqp->q_nrefs > 0)
802 xfs_dqunlock(dqp);
803 else
804 xfs_qm_dqput_final(dqp);
913} 805}
914 806
915/* 807/*
@@ -1091,17 +983,6 @@ xfs_qm_dqflush(
1091 983
1092} 984}
1093 985
1094void
1095xfs_dqunlock(
1096 xfs_dquot_t *dqp)
1097{
1098 xfs_dqunlock_nonotify(dqp);
1099 if (dqp->q_logitem.qli_dquot == dqp) {
1100 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1101 &dqp->q_logitem.qli_item);
1102 }
1103}
1104
1105/* 986/*
1106 * Lock two xfs_dquot structures. 987 * Lock two xfs_dquot structures.
1107 * 988 *
@@ -1131,85 +1012,6 @@ xfs_dqlock2(
1131} 1012}
1132 1013
1133/* 1014/*
1134 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1135 * called via unmount as well as quotaoff, and the purge will always succeed.
1136 */
1137void
1138xfs_qm_dqpurge(
1139 struct xfs_dquot *dqp)
1140{
1141 struct xfs_mount *mp = dqp->q_mount;
1142 struct xfs_dqhash *qh = dqp->q_hash;
1143
1144 xfs_dqlock(dqp);
1145
1146 /*
1147 * If we're turning off quotas, we have to make sure that, for
1148 * example, we don't delete quota disk blocks while dquots are
1149 * in the process of getting written to those disk blocks.
1150 * This dquot might well be on AIL, and we can't leave it there
1151 * if we're turning off quotas. Basically, we need this flush
1152 * lock, and are willing to block on it.
1153 */
1154 if (!xfs_dqflock_nowait(dqp)) {
1155 /*
1156 * Block on the flush lock after nudging dquot buffer,
1157 * if it is incore.
1158 */
1159 xfs_dqflock_pushbuf_wait(dqp);
1160 }
1161
1162 /*
1163 * If we are turning this type of quotas off, we don't care
1164 * about the dirty metadata sitting in this dquot. OTOH, if
1165 * we're unmounting, we do care, so we flush it and wait.
1166 */
1167 if (XFS_DQ_IS_DIRTY(dqp)) {
1168 int error;
1169
1170 /*
1171 * We don't care about getting disk errors here. We need
1172 * to purge this dquot anyway, so we go ahead regardless.
1173 */
1174 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1175 if (error)
1176 xfs_warn(mp, "%s: dquot %p flush failed",
1177 __func__, dqp);
1178 xfs_dqflock(dqp);
1179 }
1180
1181 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1182 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1183 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1184
1185 xfs_dqfunlock(dqp);
1186 xfs_dqunlock(dqp);
1187
1188 mutex_lock(&qh->qh_lock);
1189 list_del_init(&dqp->q_hashlist);
1190 qh->qh_version++;
1191 mutex_unlock(&qh->qh_lock);
1192
1193 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1194 list_del_init(&dqp->q_mplist);
1195 mp->m_quotainfo->qi_dqreclaims++;
1196 mp->m_quotainfo->qi_dquots--;
1197 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1198
1199 /*
1200 * We move dquots to the freelist as soon as their reference count
1201 * hits zero, so it really should be on the freelist here.
1202 */
1203 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1204 ASSERT(!list_empty(&dqp->q_freelist));
1205 list_del_init(&dqp->q_freelist);
1206 xfs_Gqm->qm_dqfrlist_cnt--;
1207 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1208
1209 xfs_qm_dqdestroy(dqp);
1210}
1211
1212/*
1213 * Give the buffer a little push if it is incore and 1015 * Give the buffer a little push if it is incore and
1214 * wait on the flush lock. 1016 * wait on the flush lock.
1215 */ 1017 */
@@ -1241,3 +1043,31 @@ xfs_dqflock_pushbuf_wait(
1241out_lock: 1043out_lock:
1242 xfs_dqflock(dqp); 1044 xfs_dqflock(dqp);
1243} 1045}
1046
1047int __init
1048xfs_qm_init(void)
1049{
1050 xfs_qm_dqzone =
1051 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1052 if (!xfs_qm_dqzone)
1053 goto out;
1054
1055 xfs_qm_dqtrxzone =
1056 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1057 if (!xfs_qm_dqtrxzone)
1058 goto out_free_dqzone;
1059
1060 return 0;
1061
1062out_free_dqzone:
1063 kmem_zone_destroy(xfs_qm_dqzone);
1064out:
1065 return -ENOMEM;
1066}
1067
1068void __exit
1069xfs_qm_exit(void)
1070{
1071 kmem_zone_destroy(xfs_qm_dqtrxzone);
1072 kmem_zone_destroy(xfs_qm_dqzone);
1073}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index a1d91d8f1802..ef9190bd8b30 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -29,16 +29,6 @@
29 * when quotas are off. 29 * when quotas are off.
30 */ 30 */
31 31
32/*
33 * The hash chain headers (hash buckets)
34 */
35typedef struct xfs_dqhash {
36 struct list_head qh_list;
37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t;
41
42struct xfs_mount; 32struct xfs_mount;
43struct xfs_trans; 33struct xfs_trans;
44 34
@@ -47,10 +37,7 @@ struct xfs_trans;
47 */ 37 */
48typedef struct xfs_dquot { 38typedef struct xfs_dquot {
49 uint dq_flags; /* various flags (XFS_DQ_*) */ 39 uint dq_flags; /* various flags (XFS_DQ_*) */
50 struct list_head q_freelist; /* global free list of dquots */ 40 struct list_head q_lru; /* global free list of dquots */
51 struct list_head q_mplist; /* mount's list of dquots */
52 struct list_head q_hashlist; /* gloabl hash list of dquots */
53 xfs_dqhash_t *q_hash; /* the hashchain header */
54 struct xfs_mount*q_mount; /* filesystem this relates to */ 41 struct xfs_mount*q_mount; /* filesystem this relates to */
55 struct xfs_trans*q_transp; /* trans this belongs to currently */ 42 struct xfs_trans*q_transp; /* trans this belongs to currently */
56 uint q_nrefs; /* # active refs from inodes */ 43 uint q_nrefs; /* # active refs from inodes */
@@ -110,11 +97,37 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp)
110 mutex_lock(&dqp->q_qlock); 97 mutex_lock(&dqp->q_qlock);
111} 98}
112 99
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) 100static inline void xfs_dqunlock(struct xfs_dquot *dqp)
114{ 101{
115 mutex_unlock(&dqp->q_qlock); 102 mutex_unlock(&dqp->q_qlock);
116} 103}
117 104
105static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)
106{
107 switch (type & XFS_DQ_ALLTYPES) {
108 case XFS_DQ_USER:
109 return XFS_IS_UQUOTA_ON(mp);
110 case XFS_DQ_GROUP:
111 case XFS_DQ_PROJ:
112 return XFS_IS_OQUOTA_ON(mp);
113 default:
114 return 0;
115 }
116}
117
118static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
119{
120 switch (type & XFS_DQ_ALLTYPES) {
121 case XFS_DQ_USER:
122 return ip->i_udquot;
123 case XFS_DQ_GROUP:
124 case XFS_DQ_PROJ:
125 return ip->i_gdquot;
126 default:
127 return NULL;
128 }
129}
130
118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 131#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 132#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 133#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -125,15 +138,10 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
125 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ 138 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
126 XFS_DQ_TO_QINF(dqp)->qi_gquotaip) 139 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
127 140
128#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, 141extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **); 142 uint, struct xfs_dquot **);
134extern void xfs_qm_dqdestroy(xfs_dquot_t *); 143extern void xfs_qm_dqdestroy(xfs_dquot_t *);
135extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 144extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
136extern void xfs_qm_dqpurge(xfs_dquot_t *);
137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 145extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 146extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
139 xfs_disk_dquot_t *); 147 xfs_disk_dquot_t *);
@@ -144,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
144extern void xfs_qm_dqput(xfs_dquot_t *); 152extern void xfs_qm_dqput(xfs_dquot_t *);
145 153
146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); 154extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
147extern void xfs_dqunlock(struct xfs_dquot *);
148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); 155extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149 156
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) 157static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b4..54a67dd9ac0a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163 struct inode *inode = file->f_mapping->host; 163 struct inode *inode = file->f_mapping->host;
164 struct xfs_inode *ip = XFS_I(inode); 164 struct xfs_inode *ip = XFS_I(inode);
165 struct xfs_mount *mp = ip->i_mount; 165 struct xfs_mount *mp = ip->i_mount;
166 struct xfs_trans *tp;
167 int error = 0; 166 int error = 0;
168 int log_flushed = 0; 167 int log_flushed = 0;
169 xfs_lsn_t lsn = 0; 168 xfs_lsn_t lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
194 } 193 }
195 194
196 /* 195 /*
197 * We always need to make sure that the required inode state is safe on 196 * All metadata updates are logged, which means that we just have
198 * disk. The inode might be clean but we still might need to force the 197 * to flush the log up to the latest LSN that touched the inode.
199 * log because of committed transactions that haven't hit the disk yet.
200 * Likewise, there could be unflushed non-transactional changes to the
201 * inode core that have to go to disk and this requires us to issue
202 * a synchronous transaction to capture these changes correctly.
203 *
204 * This code relies on the assumption that if the i_update_core field
205 * of the inode is clear and the inode is unpinned then it is clean
206 * and no action is required.
207 */ 198 */
208 xfs_ilock(ip, XFS_ILOCK_SHARED); 199 xfs_ilock(ip, XFS_ILOCK_SHARED);
209 200 if (xfs_ipincount(ip)) {
210 /* 201 if (!datasync ||
211 * First check if the VFS inode is marked dirty. All the dirtying 202 (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync.
215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster.
218 */
219 if (((inode->i_state & I_DIRTY_DATASYNC) ||
220 ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221 ip->i_update_core) {
222 /*
223 * Kick off a transaction to log the inode core to get the
224 * updates. The sync transaction will also force the log.
225 */
226 xfs_iunlock(ip, XFS_ILOCK_SHARED);
227 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228 error = xfs_trans_reserve(tp, 0,
229 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230 if (error) {
231 xfs_trans_cancel(tp, 0);
232 return -error;
233 }
234 xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236 /*
237 * Note - it's possible that we might have pushed ourselves out
238 * of the way during trans_reserve which would flush the inode.
239 * But there's no guarantee that the inode buffer has actually
240 * gone out yet (it's delwri). Plus the buffer could be pinned
241 * anyway if it's part of an inode in another recent
242 * transaction. So we play it safe and fire off the
243 * transaction anyway.
244 */
245 xfs_trans_ijoin(tp, ip, 0);
246 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247 error = xfs_trans_commit(tp, 0);
248
249 lsn = ip->i_itemp->ili_last_lsn;
250 xfs_iunlock(ip, XFS_ILOCK_EXCL);
251 } else {
252 /*
253 * Timestamps/size haven't changed since last inode flush or
254 * inode transaction commit. That means either nothing got
255 * written or a transaction committed which caught the updates.
256 * If the latter happened and the transaction hasn't hit the
257 * disk yet, the inode will be still be pinned. If it is,
258 * force the log.
259 */
260 if (xfs_ipincount(ip))
261 lsn = ip->i_itemp->ili_last_lsn; 203 lsn = ip->i_itemp->ili_last_lsn;
262 xfs_iunlock(ip, XFS_ILOCK_SHARED);
263 } 204 }
205 xfs_iunlock(ip, XFS_ILOCK_SHARED);
264 206
265 if (!error && lsn) 207 if (lsn)
266 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 208 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267 209
268 /* 210 /*
@@ -659,9 +601,6 @@ restart:
659 return error; 601 return error;
660 } 602 }
661 603
662 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663 file_update_time(file);
664
665 /* 604 /*
666 * If the offset is beyond the size of the file, we need to zero any 605 * If the offset is beyond the size of the file, we need to zero any
667 * blocks that fall between the existing EOF and the start of this 606 * blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
685 return error; 624 return error;
686 625
687 /* 626 /*
627 * Updating the timestamps will grab the ilock again from
628 * xfs_fs_dirty_inode, so we have to call it after dropping the
629 * lock above. Eventually we should look into a way to avoid
630 * the pointless lock roundtrip.
631 */
632 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
633 file_update_time(file);
634
635 /*
688 * If we're writing the file then make sure to clear the setuid and 636 * If we're writing the file then make sure to clear the setuid and
689 * setgid bits if the process is not being run by root. This keeps 637 * setgid bits if the process is not being run by root. This keeps
690 * people from modifying setuid and setgid binaries. 638 * people from modifying setuid and setgid binaries.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8c3e46394d48..a98cb4524e6c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,6 @@ xfs_inode_alloc(
91 ip->i_afp = NULL; 91 ip->i_afp = NULL;
92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
93 ip->i_flags = 0; 93 ip->i_flags = 0;
94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 94 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 95 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 96
@@ -350,9 +349,20 @@ xfs_iget_cache_miss(
350 BUG(); 349 BUG();
351 } 350 }
352 351
353 spin_lock(&pag->pag_ici_lock); 352 /*
353 * These values must be set before inserting the inode into the radix
354 * tree as the moment it is inserted a concurrent lookup (allowed by the
355 * RCU locking mechanism) can find it and that lookup must see that this
356 * is an inode currently under construction (i.e. that XFS_INEW is set).
357 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
358 * memory barrier that ensures this detection works correctly at lookup
359 * time.
360 */
361 ip->i_udquot = ip->i_gdquot = NULL;
362 xfs_iflags_set(ip, XFS_INEW);
354 363
355 /* insert the new inode */ 364 /* insert the new inode */
365 spin_lock(&pag->pag_ici_lock);
356 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 366 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
357 if (unlikely(error)) { 367 if (unlikely(error)) {
358 WARN_ON(error != -EEXIST); 368 WARN_ON(error != -EEXIST);
@@ -360,11 +370,6 @@ xfs_iget_cache_miss(
360 error = EAGAIN; 370 error = EAGAIN;
361 goto out_preload_end; 371 goto out_preload_end;
362 } 372 }
363
364 /* These values _must_ be set before releasing the radix tree lock! */
365 ip->i_udquot = ip->i_gdquot = NULL;
366 xfs_iflags_set(ip, XFS_INEW);
367
368 spin_unlock(&pag->pag_ici_lock); 373 spin_unlock(&pag->pag_ici_lock);
369 radix_tree_preload_end(); 374 radix_tree_preload_end();
370 375
@@ -418,6 +423,15 @@ xfs_iget(
418 xfs_perag_t *pag; 423 xfs_perag_t *pag;
419 xfs_agino_t agino; 424 xfs_agino_t agino;
420 425
426 /*
427 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
428 * doesn't get freed while it's being referenced during a
429 * radix tree traversal here. It assumes this function
430 * aqcuires only the ILOCK (and therefore it has no need to
431 * involve the IOLOCK in this synchronization).
432 */
433 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
434
421 /* reject inode numbers outside existing AGs */ 435 /* reject inode numbers outside existing AGs */
422 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 436 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
423 return EINVAL; 437 return EINVAL;
@@ -642,8 +656,7 @@ xfs_iunlock(
642 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 656 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
643 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 657 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
644 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 658 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
645 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | 659 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
646 XFS_LOCK_DEP_MASK)) == 0);
647 ASSERT(lock_flags != 0); 660 ASSERT(lock_flags != 0);
648 661
649 if (lock_flags & XFS_IOLOCK_EXCL) 662 if (lock_flags & XFS_IOLOCK_EXCL)
@@ -656,16 +669,6 @@ xfs_iunlock(
656 else if (lock_flags & XFS_ILOCK_SHARED) 669 else if (lock_flags & XFS_ILOCK_SHARED)
657 mrunlock_shared(&ip->i_lock); 670 mrunlock_shared(&ip->i_lock);
658 671
659 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
660 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
661 /*
662 * Let the AIL know that this item has been unlocked in case
663 * it is in the AIL and anyone is waiting on it. Don't do
664 * this if the caller has asked us not to.
665 */
666 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
667 (xfs_log_item_t*)(ip->i_itemp));
668 }
669 trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 672 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
670} 673}
671 674
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b21022499c2e..bc46c0a133d3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1656,14 +1656,13 @@ retry:
1656 iip = ip->i_itemp; 1656 iip = ip->i_itemp;
1657 if (!iip || xfs_inode_clean(ip)) { 1657 if (!iip || xfs_inode_clean(ip)) {
1658 ASSERT(ip != free_ip); 1658 ASSERT(ip != free_ip);
1659 ip->i_update_core = 0;
1660 xfs_ifunlock(ip); 1659 xfs_ifunlock(ip);
1661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1660 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1662 continue; 1661 continue;
1663 } 1662 }
1664 1663
1665 iip->ili_last_fields = iip->ili_format.ilf_fields; 1664 iip->ili_last_fields = iip->ili_fields;
1666 iip->ili_format.ilf_fields = 0; 1665 iip->ili_fields = 0;
1667 iip->ili_logged = 1; 1666 iip->ili_logged = 1;
1668 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 1667 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
1669 &iip->ili_item.li_lsn); 1668 &iip->ili_item.li_lsn);
@@ -2177,7 +2176,7 @@ xfs_iflush_fork(
2177 mp = ip->i_mount; 2176 mp = ip->i_mount;
2178 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2177 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
2179 case XFS_DINODE_FMT_LOCAL: 2178 case XFS_DINODE_FMT_LOCAL:
2180 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2179 if ((iip->ili_fields & dataflag[whichfork]) &&
2181 (ifp->if_bytes > 0)) { 2180 (ifp->if_bytes > 0)) {
2182 ASSERT(ifp->if_u1.if_data != NULL); 2181 ASSERT(ifp->if_u1.if_data != NULL);
2183 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2182 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2187,8 +2186,8 @@ xfs_iflush_fork(
2187 2186
2188 case XFS_DINODE_FMT_EXTENTS: 2187 case XFS_DINODE_FMT_EXTENTS:
2189 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2188 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2190 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2189 !(iip->ili_fields & extflag[whichfork]));
2191 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2190 if ((iip->ili_fields & extflag[whichfork]) &&
2192 (ifp->if_bytes > 0)) { 2191 (ifp->if_bytes > 0)) {
2193 ASSERT(xfs_iext_get_ext(ifp, 0)); 2192 ASSERT(xfs_iext_get_ext(ifp, 0));
2194 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2193 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2198,7 +2197,7 @@ xfs_iflush_fork(
2198 break; 2197 break;
2199 2198
2200 case XFS_DINODE_FMT_BTREE: 2199 case XFS_DINODE_FMT_BTREE:
2201 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2200 if ((iip->ili_fields & brootflag[whichfork]) &&
2202 (ifp->if_broot_bytes > 0)) { 2201 (ifp->if_broot_bytes > 0)) {
2203 ASSERT(ifp->if_broot != NULL); 2202 ASSERT(ifp->if_broot != NULL);
2204 ASSERT(ifp->if_broot_bytes <= 2203 ASSERT(ifp->if_broot_bytes <=
@@ -2211,14 +2210,14 @@ xfs_iflush_fork(
2211 break; 2210 break;
2212 2211
2213 case XFS_DINODE_FMT_DEV: 2212 case XFS_DINODE_FMT_DEV:
2214 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2213 if (iip->ili_fields & XFS_ILOG_DEV) {
2215 ASSERT(whichfork == XFS_DATA_FORK); 2214 ASSERT(whichfork == XFS_DATA_FORK);
2216 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 2215 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2217 } 2216 }
2218 break; 2217 break;
2219 2218
2220 case XFS_DINODE_FMT_UUID: 2219 case XFS_DINODE_FMT_UUID:
2221 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2220 if (iip->ili_fields & XFS_ILOG_UUID) {
2222 ASSERT(whichfork == XFS_DATA_FORK); 2221 ASSERT(whichfork == XFS_DATA_FORK);
2223 memcpy(XFS_DFORK_DPTR(dip), 2222 memcpy(XFS_DFORK_DPTR(dip),
2224 &ip->i_df.if_u2.if_uuid, 2223 &ip->i_df.if_u2.if_uuid,
@@ -2451,9 +2450,8 @@ xfs_iflush(
2451 * to disk, because the log record didn't make it to disk! 2450 * to disk, because the log record didn't make it to disk!
2452 */ 2451 */
2453 if (XFS_FORCED_SHUTDOWN(mp)) { 2452 if (XFS_FORCED_SHUTDOWN(mp)) {
2454 ip->i_update_core = 0;
2455 if (iip) 2453 if (iip)
2456 iip->ili_format.ilf_fields = 0; 2454 iip->ili_fields = 0;
2457 xfs_ifunlock(ip); 2455 xfs_ifunlock(ip);
2458 return XFS_ERROR(EIO); 2456 return XFS_ERROR(EIO);
2459 } 2457 }
@@ -2533,26 +2531,6 @@ xfs_iflush_int(
2533 /* set *dip = inode's place in the buffer */ 2531 /* set *dip = inode's place in the buffer */
2534 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2532 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2535 2533
2536 /*
2537 * Clear i_update_core before copying out the data.
2538 * This is for coordination with our timestamp updates
2539 * that don't hold the inode lock. They will always
2540 * update the timestamps BEFORE setting i_update_core,
2541 * so if we clear i_update_core after they set it we
2542 * are guaranteed to see their updates to the timestamps.
2543 * I believe that this depends on strongly ordered memory
2544 * semantics, but we have that. We use the SYNCHRONIZE
2545 * macro to make sure that the compiler does not reorder
2546 * the i_update_core access below the data copy below.
2547 */
2548 ip->i_update_core = 0;
2549 SYNCHRONIZE();
2550
2551 /*
2552 * Make sure to get the latest timestamps from the Linux inode.
2553 */
2554 xfs_synchronize_times(ip);
2555
2556 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 2534 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2557 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2535 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2558 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 2536 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2663,36 +2641,33 @@ xfs_iflush_int(
2663 xfs_inobp_check(mp, bp); 2641 xfs_inobp_check(mp, bp);
2664 2642
2665 /* 2643 /*
2666 * We've recorded everything logged in the inode, so we'd 2644 * We've recorded everything logged in the inode, so we'd like to clear
2667 * like to clear the ilf_fields bits so we don't log and 2645 * the ili_fields bits so we don't log and flush things unnecessarily.
2668 * flush things unnecessarily. However, we can't stop 2646 * However, we can't stop logging all this information until the data
2669 * logging all this information until the data we've copied 2647 * we've copied into the disk buffer is written to disk. If we did we
2670 * into the disk buffer is written to disk. If we did we might 2648 * might overwrite the copy of the inode in the log with all the data
2671 * overwrite the copy of the inode in the log with all the 2649 * after re-logging only part of it, and in the face of a crash we
2672 * data after re-logging only part of it, and in the face of 2650 * wouldn't have all the data we need to recover.
2673 * a crash we wouldn't have all the data we need to recover.
2674 * 2651 *
2675 * What we do is move the bits to the ili_last_fields field. 2652 * What we do is move the bits to the ili_last_fields field. When
2676 * When logging the inode, these bits are moved back to the 2653 * logging the inode, these bits are moved back to the ili_fields field.
2677 * ilf_fields field. In the xfs_iflush_done() routine we 2654 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
2678 * clear ili_last_fields, since we know that the information 2655 * know that the information those bits represent is permanently on
2679 * those bits represent is permanently on disk. As long as 2656 * disk. As long as the flush completes before the inode is logged
2680 * the flush completes before the inode is logged again, then 2657 * again, then both ili_fields and ili_last_fields will be cleared.
2681 * both ilf_fields and ili_last_fields will be cleared.
2682 * 2658 *
2683 * We can play with the ilf_fields bits here, because the inode 2659 * We can play with the ili_fields bits here, because the inode lock
2684 * lock must be held exclusively in order to set bits there 2660 * must be held exclusively in order to set bits there and the flush
2685 * and the flush lock protects the ili_last_fields bits. 2661 * lock protects the ili_last_fields bits. Set ili_logged so the flush
2686 * Set ili_logged so the flush done 2662 * done routine can tell whether or not to look in the AIL. Also, store
2687 * routine can tell whether or not to look in the AIL. 2663 * the current LSN of the inode so that we can tell whether the item has
2688 * Also, store the current LSN of the inode so that we can tell 2664 * moved in the AIL from xfs_iflush_done(). In order to read the lsn we
2689 * whether the item has moved in the AIL from xfs_iflush_done(). 2665 * need the AIL lock, because it is a 64 bit value that cannot be read
2690 * In order to read the lsn we need the AIL lock, because 2666 * atomically.
2691 * it is a 64 bit value that cannot be read atomically.
2692 */ 2667 */
2693 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 2668 if (iip != NULL && iip->ili_fields != 0) {
2694 iip->ili_last_fields = iip->ili_format.ilf_fields; 2669 iip->ili_last_fields = iip->ili_fields;
2695 iip->ili_format.ilf_fields = 0; 2670 iip->ili_fields = 0;
2696 iip->ili_logged = 1; 2671 iip->ili_logged = 1;
2697 2672
2698 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2673 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -2711,8 +2686,7 @@ xfs_iflush_int(
2711 } else { 2686 } else {
2712 /* 2687 /*
2713 * We're flushing an inode which is not in the AIL and has 2688 * We're flushing an inode which is not in the AIL and has
2714 * not been logged but has i_update_core set. For this 2689 * not been logged. For this case we can immediately drop
2715 * case we can use a B_DELWRI flush and immediately drop
2716 * the inode flush lock because we can avoid the whole 2690 * the inode flush lock because we can avoid the whole
2717 * AIL state thing. It's OK to drop the flush lock now, 2691 * AIL state thing. It's OK to drop the flush lock now,
2718 * because we've already locked the buffer and to do anything 2692 * because we've already locked the buffer and to do anything
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 2f27b7454085..f123dbe6d42a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
241 spinlock_t i_flags_lock; /* inode i_flags lock */ 241 spinlock_t i_flags_lock; /* inode i_flags lock */
242 /* Miscellaneous state. */ 242 /* Miscellaneous state. */
243 unsigned long i_flags; /* see defined flags below */ 243 unsigned long i_flags; /* see defined flags below */
244 unsigned char i_update_core; /* timestamps/size is dirty */
245 unsigned int i_delayed_blks; /* count of delay alloc blks */ 244 unsigned int i_delayed_blks; /* count of delay alloc blks */
246 245
247 xfs_icdinode_t i_d; /* most of ondisk inode */ 246 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -275,6 +274,20 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
275} 274}
276 275
277/* 276/*
277 * If this I/O goes past the on-disk inode size update it unless it would
278 * be past the current in-core inode size.
279 */
280static inline xfs_fsize_t
281xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
282{
283 xfs_fsize_t i_size = i_size_read(VFS_I(ip));
284
285 if (new_size > i_size)
286 new_size = i_size;
287 return new_size > ip->i_d.di_size ? new_size : 0;
288}
289
290/*
278 * i_flags helper functions 291 * i_flags helper functions
279 */ 292 */
280static inline void 293static inline void
@@ -422,7 +435,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
422#define XFS_IOLOCK_SHARED (1<<1) 435#define XFS_IOLOCK_SHARED (1<<1)
423#define XFS_ILOCK_EXCL (1<<2) 436#define XFS_ILOCK_EXCL (1<<2)
424#define XFS_ILOCK_SHARED (1<<3) 437#define XFS_ILOCK_SHARED (1<<3)
425#define XFS_IUNLOCK_NONOTIFY (1<<4)
426 438
427#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 439#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
428 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 440 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
@@ -431,8 +443,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
431 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 443 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
432 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 444 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
433 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 445 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
434 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ 446 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }
435 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
436 447
437 448
438/* 449/*
@@ -522,10 +533,6 @@ void xfs_promote_inode(struct xfs_inode *);
522void xfs_lock_inodes(xfs_inode_t **, int, uint); 533void xfs_lock_inodes(xfs_inode_t **, int, uint);
523void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 534void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
524 535
525void xfs_synchronize_times(xfs_inode_t *);
526void xfs_mark_inode_dirty(xfs_inode_t *);
527void xfs_mark_inode_dirty_sync(xfs_inode_t *);
528
529#define IHOLD(ip) \ 536#define IHOLD(ip) \
530do { \ 537do { \
531 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 538 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91d71dcd4852..05d924efceaf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -57,77 +57,28 @@ xfs_inode_item_size(
57 struct xfs_inode *ip = iip->ili_inode; 57 struct xfs_inode *ip = iip->ili_inode;
58 uint nvecs = 2; 58 uint nvecs = 2;
59 59
60 /*
61 * Only log the data/extents/b-tree root if there is something
62 * left to log.
63 */
64 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
65
66 switch (ip->i_d.di_format) { 60 switch (ip->i_d.di_format) {
67 case XFS_DINODE_FMT_EXTENTS: 61 case XFS_DINODE_FMT_EXTENTS:
68 iip->ili_format.ilf_fields &= 62 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
69 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 63 ip->i_d.di_nextents > 0 &&
70 XFS_ILOG_DEV | XFS_ILOG_UUID); 64 ip->i_df.if_bytes > 0)
71 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
72 (ip->i_d.di_nextents > 0) &&
73 (ip->i_df.if_bytes > 0)) {
74 ASSERT(ip->i_df.if_u1.if_extents != NULL);
75 nvecs++; 65 nvecs++;
76 } else {
77 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
78 }
79 break; 66 break;
80 67
81 case XFS_DINODE_FMT_BTREE: 68 case XFS_DINODE_FMT_BTREE:
82 iip->ili_format.ilf_fields &= 69 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
83 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 70 ip->i_df.if_broot_bytes > 0)
84 XFS_ILOG_DEV | XFS_ILOG_UUID);
85 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
86 (ip->i_df.if_broot_bytes > 0)) {
87 ASSERT(ip->i_df.if_broot != NULL);
88 nvecs++; 71 nvecs++;
89 } else {
90 ASSERT(!(iip->ili_format.ilf_fields &
91 XFS_ILOG_DBROOT));
92#ifdef XFS_TRANS_DEBUG
93 if (iip->ili_root_size > 0) {
94 ASSERT(iip->ili_root_size ==
95 ip->i_df.if_broot_bytes);
96 ASSERT(memcmp(iip->ili_orig_root,
97 ip->i_df.if_broot,
98 iip->ili_root_size) == 0);
99 } else {
100 ASSERT(ip->i_df.if_broot_bytes == 0);
101 }
102#endif
103 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
104 }
105 break; 72 break;
106 73
107 case XFS_DINODE_FMT_LOCAL: 74 case XFS_DINODE_FMT_LOCAL:
108 iip->ili_format.ilf_fields &= 75 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
109 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 76 ip->i_df.if_bytes > 0)
110 XFS_ILOG_DEV | XFS_ILOG_UUID);
111 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
112 (ip->i_df.if_bytes > 0)) {
113 ASSERT(ip->i_df.if_u1.if_data != NULL);
114 ASSERT(ip->i_d.di_size > 0);
115 nvecs++; 77 nvecs++;
116 } else {
117 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
118 }
119 break; 78 break;
120 79
121 case XFS_DINODE_FMT_DEV: 80 case XFS_DINODE_FMT_DEV:
122 iip->ili_format.ilf_fields &=
123 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
124 XFS_ILOG_DEXT | XFS_ILOG_UUID);
125 break;
126
127 case XFS_DINODE_FMT_UUID: 81 case XFS_DINODE_FMT_UUID:
128 iip->ili_format.ilf_fields &=
129 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
130 XFS_ILOG_DEXT | XFS_ILOG_DEV);
131 break; 82 break;
132 83
133 default: 84 default:
@@ -135,56 +86,31 @@ xfs_inode_item_size(
135 break; 86 break;
136 } 87 }
137 88
138 /* 89 if (!XFS_IFORK_Q(ip))
139 * If there are no attributes associated with this file,
140 * then there cannot be anything more to log.
141 * Clear all attribute-related log flags.
142 */
143 if (!XFS_IFORK_Q(ip)) {
144 iip->ili_format.ilf_fields &=
145 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
146 return nvecs; 90 return nvecs;
147 } 91
148 92
149 /* 93 /*
150 * Log any necessary attribute data. 94 * Log any necessary attribute data.
151 */ 95 */
152 switch (ip->i_d.di_aformat) { 96 switch (ip->i_d.di_aformat) {
153 case XFS_DINODE_FMT_EXTENTS: 97 case XFS_DINODE_FMT_EXTENTS:
154 iip->ili_format.ilf_fields &= 98 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
155 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 99 ip->i_d.di_anextents > 0 &&
156 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && 100 ip->i_afp->if_bytes > 0)
157 (ip->i_d.di_anextents > 0) &&
158 (ip->i_afp->if_bytes > 0)) {
159 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
160 nvecs++; 101 nvecs++;
161 } else {
162 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
163 }
164 break; 102 break;
165 103
166 case XFS_DINODE_FMT_BTREE: 104 case XFS_DINODE_FMT_BTREE:
167 iip->ili_format.ilf_fields &= 105 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
168 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); 106 ip->i_afp->if_broot_bytes > 0)
169 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
170 (ip->i_afp->if_broot_bytes > 0)) {
171 ASSERT(ip->i_afp->if_broot != NULL);
172 nvecs++; 107 nvecs++;
173 } else {
174 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
175 }
176 break; 108 break;
177 109
178 case XFS_DINODE_FMT_LOCAL: 110 case XFS_DINODE_FMT_LOCAL:
179 iip->ili_format.ilf_fields &= 111 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
180 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); 112 ip->i_afp->if_bytes > 0)
181 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
182 (ip->i_afp->if_bytes > 0)) {
183 ASSERT(ip->i_afp->if_u1.if_data != NULL);
184 nvecs++; 113 nvecs++;
185 } else {
186 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
187 }
188 break; 114 break;
189 115
190 default: 116 default:
@@ -254,48 +180,11 @@ xfs_inode_item_format(
254 vecp++; 180 vecp++;
255 nvecs = 1; 181 nvecs = 1;
256 182
257 /*
258 * Clear i_update_core if the timestamps (or any other
259 * non-transactional modification) need flushing/logging
260 * and we're about to log them with the rest of the core.
261 *
262 * This is the same logic as xfs_iflush() but this code can't
263 * run at the same time as xfs_iflush because we're in commit
264 * processing here and so we have the inode lock held in
265 * exclusive mode. Although it doesn't really matter
266 * for the timestamps if both routines were to grab the
267 * timestamps or not. That would be ok.
268 *
269 * We clear i_update_core before copying out the data.
270 * This is for coordination with our timestamp updates
271 * that don't hold the inode lock. They will always
272 * update the timestamps BEFORE setting i_update_core,
273 * so if we clear i_update_core after they set it we
274 * are guaranteed to see their updates to the timestamps
275 * either here. Likewise, if they set it after we clear it
276 * here, we'll see it either on the next commit of this
277 * inode or the next time the inode gets flushed via
278 * xfs_iflush(). This depends on strongly ordered memory
279 * semantics, but we have that. We use the SYNCHRONIZE
280 * macro to make sure that the compiler does not reorder
281 * the i_update_core access below the data copy below.
282 */
283 if (ip->i_update_core) {
284 ip->i_update_core = 0;
285 SYNCHRONIZE();
286 }
287
288 /*
289 * Make sure to get the latest timestamps from the Linux inode.
290 */
291 xfs_synchronize_times(ip);
292
293 vecp->i_addr = &ip->i_d; 183 vecp->i_addr = &ip->i_d;
294 vecp->i_len = sizeof(struct xfs_icdinode); 184 vecp->i_len = sizeof(struct xfs_icdinode);
295 vecp->i_type = XLOG_REG_TYPE_ICORE; 185 vecp->i_type = XLOG_REG_TYPE_ICORE;
296 vecp++; 186 vecp++;
297 nvecs++; 187 nvecs++;
298 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
299 188
300 /* 189 /*
301 * If this is really an old format inode, then we need to 190 * If this is really an old format inode, then we need to
@@ -328,16 +217,17 @@ xfs_inode_item_format(
328 217
329 switch (ip->i_d.di_format) { 218 switch (ip->i_d.di_format) {
330 case XFS_DINODE_FMT_EXTENTS: 219 case XFS_DINODE_FMT_EXTENTS:
331 ASSERT(!(iip->ili_format.ilf_fields & 220 iip->ili_fields &=
332 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 221 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
333 XFS_ILOG_DEV | XFS_ILOG_UUID))); 222 XFS_ILOG_DEV | XFS_ILOG_UUID);
334 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { 223
335 ASSERT(ip->i_df.if_bytes > 0); 224 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
225 ip->i_d.di_nextents > 0 &&
226 ip->i_df.if_bytes > 0) {
336 ASSERT(ip->i_df.if_u1.if_extents != NULL); 227 ASSERT(ip->i_df.if_u1.if_extents != NULL);
337 ASSERT(ip->i_d.di_nextents > 0); 228 ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
338 ASSERT(iip->ili_extents_buf == NULL); 229 ASSERT(iip->ili_extents_buf == NULL);
339 ASSERT((ip->i_df.if_bytes / 230
340 (uint)sizeof(xfs_bmbt_rec_t)) > 0);
341#ifdef XFS_NATIVE_HOST 231#ifdef XFS_NATIVE_HOST
342 if (ip->i_d.di_nextents == ip->i_df.if_bytes / 232 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
343 (uint)sizeof(xfs_bmbt_rec_t)) { 233 (uint)sizeof(xfs_bmbt_rec_t)) {
@@ -359,15 +249,18 @@ xfs_inode_item_format(
359 iip->ili_format.ilf_dsize = vecp->i_len; 249 iip->ili_format.ilf_dsize = vecp->i_len;
360 vecp++; 250 vecp++;
361 nvecs++; 251 nvecs++;
252 } else {
253 iip->ili_fields &= ~XFS_ILOG_DEXT;
362 } 254 }
363 break; 255 break;
364 256
365 case XFS_DINODE_FMT_BTREE: 257 case XFS_DINODE_FMT_BTREE:
366 ASSERT(!(iip->ili_format.ilf_fields & 258 iip->ili_fields &=
367 (XFS_ILOG_DDATA | XFS_ILOG_DEXT | 259 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
368 XFS_ILOG_DEV | XFS_ILOG_UUID))); 260 XFS_ILOG_DEV | XFS_ILOG_UUID);
369 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 261
370 ASSERT(ip->i_df.if_broot_bytes > 0); 262 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
263 ip->i_df.if_broot_bytes > 0) {
371 ASSERT(ip->i_df.if_broot != NULL); 264 ASSERT(ip->i_df.if_broot != NULL);
372 vecp->i_addr = ip->i_df.if_broot; 265 vecp->i_addr = ip->i_df.if_broot;
373 vecp->i_len = ip->i_df.if_broot_bytes; 266 vecp->i_len = ip->i_df.if_broot_bytes;
@@ -375,15 +268,30 @@ xfs_inode_item_format(
375 vecp++; 268 vecp++;
376 nvecs++; 269 nvecs++;
377 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 270 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
271 } else {
272 ASSERT(!(iip->ili_fields &
273 XFS_ILOG_DBROOT));
274#ifdef XFS_TRANS_DEBUG
275 if (iip->ili_root_size > 0) {
276 ASSERT(iip->ili_root_size ==
277 ip->i_df.if_broot_bytes);
278 ASSERT(memcmp(iip->ili_orig_root,
279 ip->i_df.if_broot,
280 iip->ili_root_size) == 0);
281 } else {
282 ASSERT(ip->i_df.if_broot_bytes == 0);
283 }
284#endif
285 iip->ili_fields &= ~XFS_ILOG_DBROOT;
378 } 286 }
379 break; 287 break;
380 288
381 case XFS_DINODE_FMT_LOCAL: 289 case XFS_DINODE_FMT_LOCAL:
382 ASSERT(!(iip->ili_format.ilf_fields & 290 iip->ili_fields &=
383 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 291 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
384 XFS_ILOG_DEV | XFS_ILOG_UUID))); 292 XFS_ILOG_DEV | XFS_ILOG_UUID);
385 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { 293 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
386 ASSERT(ip->i_df.if_bytes > 0); 294 ip->i_df.if_bytes > 0) {
387 ASSERT(ip->i_df.if_u1.if_data != NULL); 295 ASSERT(ip->i_df.if_u1.if_data != NULL);
388 ASSERT(ip->i_d.di_size > 0); 296 ASSERT(ip->i_d.di_size > 0);
389 297
@@ -401,24 +309,26 @@ xfs_inode_item_format(
401 vecp++; 309 vecp++;
402 nvecs++; 310 nvecs++;
403 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 311 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
312 } else {
313 iip->ili_fields &= ~XFS_ILOG_DDATA;
404 } 314 }
405 break; 315 break;
406 316
407 case XFS_DINODE_FMT_DEV: 317 case XFS_DINODE_FMT_DEV:
408 ASSERT(!(iip->ili_format.ilf_fields & 318 iip->ili_fields &=
409 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 319 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
410 XFS_ILOG_DDATA | XFS_ILOG_UUID))); 320 XFS_ILOG_DEXT | XFS_ILOG_UUID);
411 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 321 if (iip->ili_fields & XFS_ILOG_DEV) {
412 iip->ili_format.ilf_u.ilfu_rdev = 322 iip->ili_format.ilf_u.ilfu_rdev =
413 ip->i_df.if_u2.if_rdev; 323 ip->i_df.if_u2.if_rdev;
414 } 324 }
415 break; 325 break;
416 326
417 case XFS_DINODE_FMT_UUID: 327 case XFS_DINODE_FMT_UUID:
418 ASSERT(!(iip->ili_format.ilf_fields & 328 iip->ili_fields &=
419 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 329 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
420 XFS_ILOG_DDATA | XFS_ILOG_DEV))); 330 XFS_ILOG_DEXT | XFS_ILOG_DEV);
421 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 331 if (iip->ili_fields & XFS_ILOG_UUID) {
422 iip->ili_format.ilf_u.ilfu_uuid = 332 iip->ili_format.ilf_u.ilfu_uuid =
423 ip->i_df.if_u2.if_uuid; 333 ip->i_df.if_u2.if_uuid;
424 } 334 }
@@ -430,31 +340,25 @@ xfs_inode_item_format(
430 } 340 }
431 341
432 /* 342 /*
433 * If there are no attributes associated with the file, 343 * If there are no attributes associated with the file, then we're done.
434 * then we're done.
435 * Assert that no attribute-related log flags are set.
436 */ 344 */
437 if (!XFS_IFORK_Q(ip)) { 345 if (!XFS_IFORK_Q(ip)) {
438 iip->ili_format.ilf_size = nvecs; 346 iip->ili_fields &=
439 ASSERT(!(iip->ili_format.ilf_fields & 347 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
440 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 348 goto out;
441 return;
442 } 349 }
443 350
444 switch (ip->i_d.di_aformat) { 351 switch (ip->i_d.di_aformat) {
445 case XFS_DINODE_FMT_EXTENTS: 352 case XFS_DINODE_FMT_EXTENTS:
446 ASSERT(!(iip->ili_format.ilf_fields & 353 iip->ili_fields &=
447 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 354 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
448 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 355
449#ifdef DEBUG 356 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
450 int nrecs = ip->i_afp->if_bytes / 357 ip->i_d.di_anextents > 0 &&
451 (uint)sizeof(xfs_bmbt_rec_t); 358 ip->i_afp->if_bytes > 0) {
452 ASSERT(nrecs > 0); 359 ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
453 ASSERT(nrecs == ip->i_d.di_anextents); 360 ip->i_d.di_anextents);
454 ASSERT(ip->i_afp->if_bytes > 0);
455 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 361 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
456 ASSERT(ip->i_d.di_anextents > 0);
457#endif
458#ifdef XFS_NATIVE_HOST 362#ifdef XFS_NATIVE_HOST
459 /* 363 /*
460 * There are not delayed allocation extents 364 * There are not delayed allocation extents
@@ -471,29 +375,36 @@ xfs_inode_item_format(
471 iip->ili_format.ilf_asize = vecp->i_len; 375 iip->ili_format.ilf_asize = vecp->i_len;
472 vecp++; 376 vecp++;
473 nvecs++; 377 nvecs++;
378 } else {
379 iip->ili_fields &= ~XFS_ILOG_AEXT;
474 } 380 }
475 break; 381 break;
476 382
477 case XFS_DINODE_FMT_BTREE: 383 case XFS_DINODE_FMT_BTREE:
478 ASSERT(!(iip->ili_format.ilf_fields & 384 iip->ili_fields &=
479 (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); 385 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
480 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 386
481 ASSERT(ip->i_afp->if_broot_bytes > 0); 387 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
388 ip->i_afp->if_broot_bytes > 0) {
482 ASSERT(ip->i_afp->if_broot != NULL); 389 ASSERT(ip->i_afp->if_broot != NULL);
390
483 vecp->i_addr = ip->i_afp->if_broot; 391 vecp->i_addr = ip->i_afp->if_broot;
484 vecp->i_len = ip->i_afp->if_broot_bytes; 392 vecp->i_len = ip->i_afp->if_broot_bytes;
485 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; 393 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
486 vecp++; 394 vecp++;
487 nvecs++; 395 nvecs++;
488 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 396 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
397 } else {
398 iip->ili_fields &= ~XFS_ILOG_ABROOT;
489 } 399 }
490 break; 400 break;
491 401
492 case XFS_DINODE_FMT_LOCAL: 402 case XFS_DINODE_FMT_LOCAL:
493 ASSERT(!(iip->ili_format.ilf_fields & 403 iip->ili_fields &=
494 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 404 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
495 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { 405
496 ASSERT(ip->i_afp->if_bytes > 0); 406 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
407 ip->i_afp->if_bytes > 0) {
497 ASSERT(ip->i_afp->if_u1.if_data != NULL); 408 ASSERT(ip->i_afp->if_u1.if_data != NULL);
498 409
499 vecp->i_addr = ip->i_afp->if_u1.if_data; 410 vecp->i_addr = ip->i_afp->if_u1.if_data;
@@ -510,6 +421,8 @@ xfs_inode_item_format(
510 vecp++; 421 vecp++;
511 nvecs++; 422 nvecs++;
512 iip->ili_format.ilf_asize = (unsigned)data_bytes; 423 iip->ili_format.ilf_asize = (unsigned)data_bytes;
424 } else {
425 iip->ili_fields &= ~XFS_ILOG_ADATA;
513 } 426 }
514 break; 427 break;
515 428
@@ -518,6 +431,15 @@ xfs_inode_item_format(
518 break; 431 break;
519 } 432 }
520 433
434out:
435 /*
436 * Now update the log format that goes out to disk from the in-core
437 * values. We always write the inode core to make the arithmetic
438 * games in recovery easier, which isn't a big deal as just about any
439 * transaction would dirty it anyway.
440 */
441 iip->ili_format.ilf_fields = XFS_ILOG_CORE |
442 (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
521 iip->ili_format.ilf_size = nvecs; 443 iip->ili_format.ilf_size = nvecs;
522} 444}
523 445
@@ -596,17 +518,13 @@ xfs_inode_item_trylock(
596 /* Stale items should force out the iclog */ 518 /* Stale items should force out the iclog */
597 if (ip->i_flags & XFS_ISTALE) { 519 if (ip->i_flags & XFS_ISTALE) {
598 xfs_ifunlock(ip); 520 xfs_ifunlock(ip);
599 /* 521 xfs_iunlock(ip, XFS_ILOCK_SHARED);
600 * we hold the AIL lock - notify the unlock routine of this
601 * so it doesn't try to get the lock again.
602 */
603 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
604 return XFS_ITEM_PINNED; 522 return XFS_ITEM_PINNED;
605 } 523 }
606 524
607#ifdef DEBUG 525#ifdef DEBUG
608 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 526 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
609 ASSERT(iip->ili_format.ilf_fields != 0); 527 ASSERT(iip->ili_fields != 0);
610 ASSERT(iip->ili_logged == 0); 528 ASSERT(iip->ili_logged == 0);
611 ASSERT(lip->li_flags & XFS_LI_IN_AIL); 529 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
612 } 530 }
@@ -638,7 +556,7 @@ xfs_inode_item_unlock(
638 if (iip->ili_extents_buf != NULL) { 556 if (iip->ili_extents_buf != NULL) {
639 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); 557 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
640 ASSERT(ip->i_d.di_nextents > 0); 558 ASSERT(ip->i_d.di_nextents > 0);
641 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 559 ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
642 ASSERT(ip->i_df.if_bytes > 0); 560 ASSERT(ip->i_df.if_bytes > 0);
643 kmem_free(iip->ili_extents_buf); 561 kmem_free(iip->ili_extents_buf);
644 iip->ili_extents_buf = NULL; 562 iip->ili_extents_buf = NULL;
@@ -646,7 +564,7 @@ xfs_inode_item_unlock(
646 if (iip->ili_aextents_buf != NULL) { 564 if (iip->ili_aextents_buf != NULL) {
647 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); 565 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
648 ASSERT(ip->i_d.di_anextents > 0); 566 ASSERT(ip->i_d.di_anextents > 0);
649 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 567 ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
650 ASSERT(ip->i_afp->if_bytes > 0); 568 ASSERT(ip->i_afp->if_bytes > 0);
651 kmem_free(iip->ili_aextents_buf); 569 kmem_free(iip->ili_aextents_buf);
652 iip->ili_aextents_buf = NULL; 570 iip->ili_aextents_buf = NULL;
@@ -761,8 +679,7 @@ xfs_inode_item_push(
761 * lock without sleeping, then there must not have been 679 * lock without sleeping, then there must not have been
762 * anyone in the process of flushing the inode. 680 * anyone in the process of flushing the inode.
763 */ 681 */
764 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 682 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
765 iip->ili_format.ilf_fields != 0);
766 683
767 /* 684 /*
768 * Push the inode to it's backing buffer. This will not remove the 685 * Push the inode to it's backing buffer. This will not remove the
@@ -985,7 +902,7 @@ xfs_iflush_abort(
985 * Clear the inode logging fields so no more flushes are 902 * Clear the inode logging fields so no more flushes are
986 * attempted. 903 * attempted.
987 */ 904 */
988 iip->ili_format.ilf_fields = 0; 905 iip->ili_fields = 0;
989 } 906 }
990 /* 907 /*
991 * Release the inode's flush lock since we're done with it. 908 * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d3dee61e6d91..41d61c3b7a36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 {
86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ 86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ 87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
88 88
89
90/*
91 * The timestamps are dirty, but not necessarily anything else in the inode
92 * core. Unlike the other fields above this one must never make it to disk
93 * in the ilf_fields of the inode_log_format, but is purely store in-memory in
94 * ili_fields in the inode_log_item.
95 */
96#define XFS_ILOG_TIMESTAMP 0x4000
97
89#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 98#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
90 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ 99 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
91 XFS_ILOG_UUID | XFS_ILOG_ADATA | \ 100 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 {
101 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ 110 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
102 XFS_ILOG_DEV | XFS_ILOG_UUID | \ 111 XFS_ILOG_DEV | XFS_ILOG_UUID | \
103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 112 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
104 XFS_ILOG_ABROOT) 113 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
105 114
106static inline int xfs_ilog_fbroot(int w) 115static inline int xfs_ilog_fbroot(int w)
107{ 116{
@@ -134,6 +143,7 @@ typedef struct xfs_inode_log_item {
134 unsigned short ili_lock_flags; /* lock flags */ 143 unsigned short ili_lock_flags; /* lock flags */
135 unsigned short ili_logged; /* flushed logged data */ 144 unsigned short ili_logged; /* flushed logged data */
136 unsigned int ili_last_fields; /* fields when flushed */ 145 unsigned int ili_last_fields; /* fields when flushed */
146 unsigned int ili_fields; /* fields to be logged */
137 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged 147 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
138 data exts */ 148 data exts */
139 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
@@ -148,9 +158,7 @@ typedef struct xfs_inode_log_item {
148 158
149static inline int xfs_inode_clean(xfs_inode_t *ip) 159static inline int xfs_inode_clean(xfs_inode_t *ip)
150{ 160{
151 return (!ip->i_itemp || 161 return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
152 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
153 !ip->i_update_core;
154} 162}
155 163
156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 164extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 76f3ca5cfc36..f588320dc4b9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -450,9 +450,12 @@ xfs_attrmulti_attr_get(
450 450
451 if (*len > XATTR_SIZE_MAX) 451 if (*len > XATTR_SIZE_MAX)
452 return EINVAL; 452 return EINVAL;
453 kbuf = kmalloc(*len, GFP_KERNEL); 453 kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
454 if (!kbuf) 454 if (!kbuf) {
455 return ENOMEM; 455 kbuf = kmem_zalloc_large(*len);
456 if (!kbuf)
457 return ENOMEM;
458 }
456 459
457 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); 460 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
458 if (error) 461 if (error)
@@ -462,7 +465,10 @@ xfs_attrmulti_attr_get(
462 error = EFAULT; 465 error = EFAULT;
463 466
464 out_kfree: 467 out_kfree:
465 kfree(kbuf); 468 if (is_vmalloc_addr(kbuf))
469 kmem_free_large(kbuf);
470 else
471 kmem_free(kbuf);
466 return error; 472 return error;
467} 473}
468 474
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index f9ccb7b7c043..a849a5473aff 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat(
293 int res; 293 int res;
294 294
295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
296 sizeof(compat_xfs_bstat_t), 0, &res); 296 sizeof(compat_xfs_bstat_t), NULL, &res);
297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
298 error = xfs_bulkstat(mp, &inlast, &count, 298 error = xfs_bulkstat(mp, &inlast, &count,
299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), 299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 246c7d57c6f9..71a464503c43 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -31,6 +31,7 @@
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dinode.h" 32#include "xfs_dinode.h"
33#include "xfs_inode.h" 33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
34#include "xfs_btree.h" 35#include "xfs_btree.h"
35#include "xfs_bmap.h" 36#include "xfs_bmap.h"
36#include "xfs_rtalloc.h" 37#include "xfs_rtalloc.h"
@@ -645,6 +646,7 @@ xfs_iomap_write_unwritten(
645 xfs_trans_t *tp; 646 xfs_trans_t *tp;
646 xfs_bmbt_irec_t imap; 647 xfs_bmbt_irec_t imap;
647 xfs_bmap_free_t free_list; 648 xfs_bmap_free_t free_list;
649 xfs_fsize_t i_size;
648 uint resblks; 650 uint resblks;
649 int committed; 651 int committed;
650 int error; 652 int error;
@@ -705,7 +707,22 @@ xfs_iomap_write_unwritten(
705 if (error) 707 if (error)
706 goto error_on_bmapi_transaction; 708 goto error_on_bmapi_transaction;
707 709
708 error = xfs_bmap_finish(&(tp), &(free_list), &committed); 710 /*
711 * Log the updated inode size as we go. We have to be careful
712 * to only log it up to the actual write offset if it is
713 * halfway into a block.
714 */
715 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
716 if (i_size > offset + count)
717 i_size = offset + count;
718
719 i_size = xfs_new_eof(ip, i_size);
720 if (i_size) {
721 ip->i_d.di_size = i_size;
722 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
723 }
724
725 error = xfs_bmap_finish(&tp, &free_list, &committed);
709 if (error) 726 if (error)
710 goto error_on_bmapi_transaction; 727 goto error_on_bmapi_transaction;
711 728
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab302539e5b9..3011b879f850 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -50,65 +50,15 @@
50#include <linux/fiemap.h> 50#include <linux/fiemap.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52 52
53/* 53static int
54 * Bring the timestamps in the XFS inode uptodate. 54xfs_initxattrs(
55 * 55 struct inode *inode,
56 * Used before writing the inode to disk. 56 const struct xattr *xattr_array,
57 */ 57 void *fs_info)
58void
59xfs_synchronize_times(
60 xfs_inode_t *ip)
61{
62 struct inode *inode = VFS_I(ip);
63
64 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
65 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
66 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
67 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
68 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
69 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
70}
71
72/*
73 * If the linux inode is valid, mark it dirty, else mark the dirty state
74 * in the XFS inode to make sure we pick it up when reclaiming the inode.
75 */
76void
77xfs_mark_inode_dirty_sync(
78 xfs_inode_t *ip)
79{
80 struct inode *inode = VFS_I(ip);
81
82 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
83 mark_inode_dirty_sync(inode);
84 else {
85 barrier();
86 ip->i_update_core = 1;
87 }
88}
89
90void
91xfs_mark_inode_dirty(
92 xfs_inode_t *ip)
93{
94 struct inode *inode = VFS_I(ip);
95
96 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
97 mark_inode_dirty(inode);
98 else {
99 barrier();
100 ip->i_update_core = 1;
101 }
102
103}
104
105
106int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
107 void *fs_info)
108{ 58{
109 const struct xattr *xattr; 59 const struct xattr *xattr;
110 struct xfs_inode *ip = XFS_I(inode); 60 struct xfs_inode *ip = XFS_I(inode);
111 int error = 0; 61 int error = 0;
112 62
113 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 63 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
114 error = xfs_attr_set(ip, xattr->name, xattr->value, 64 error = xfs_attr_set(ip, xattr->name, xattr->value,
@@ -678,19 +628,16 @@ xfs_setattr_nonsize(
678 inode->i_atime = iattr->ia_atime; 628 inode->i_atime = iattr->ia_atime;
679 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 629 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
680 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 630 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
681 ip->i_update_core = 1;
682 } 631 }
683 if (mask & ATTR_CTIME) { 632 if (mask & ATTR_CTIME) {
684 inode->i_ctime = iattr->ia_ctime; 633 inode->i_ctime = iattr->ia_ctime;
685 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 634 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
686 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 635 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
687 ip->i_update_core = 1;
688 } 636 }
689 if (mask & ATTR_MTIME) { 637 if (mask & ATTR_MTIME) {
690 inode->i_mtime = iattr->ia_mtime; 638 inode->i_mtime = iattr->ia_mtime;
691 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 639 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
692 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 640 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
693 ip->i_update_core = 1;
694 } 641 }
695 642
696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 643 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -918,13 +865,11 @@ xfs_setattr_size(
918 inode->i_ctime = iattr->ia_ctime; 865 inode->i_ctime = iattr->ia_ctime;
919 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 866 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
920 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 867 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
921 ip->i_update_core = 1;
922 } 868 }
923 if (mask & ATTR_MTIME) { 869 if (mask & ATTR_MTIME) {
924 inode->i_mtime = iattr->ia_mtime; 870 inode->i_mtime = iattr->ia_mtime;
925 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 871 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
926 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 872 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
927 ip->i_update_core = 1;
928 } 873 }
929 874
930 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 875 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 751e94fe1f77..9720c54bbed0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int(
62{ 62{
63 struct xfs_icdinode *dic; /* dinode core info pointer */ 63 struct xfs_icdinode *dic; /* dinode core info pointer */
64 struct xfs_inode *ip; /* incore inode pointer */ 64 struct xfs_inode *ip; /* incore inode pointer */
65 struct inode *inode;
66 struct xfs_bstat *buf; /* return buffer */ 65 struct xfs_bstat *buf; /* return buffer */
67 int error = 0; /* error value */ 66 int error = 0; /* error value */
68 67
@@ -86,7 +85,6 @@ xfs_bulkstat_one_int(
86 ASSERT(ip->i_imap.im_blkno != 0); 85 ASSERT(ip->i_imap.im_blkno != 0);
87 86
88 dic = &ip->i_d; 87 dic = &ip->i_d;
89 inode = VFS_I(ip);
90 88
91 /* xfs_iget returns the following without needing 89 /* xfs_iget returns the following without needing
92 * further change. 90 * further change.
@@ -99,19 +97,12 @@ xfs_bulkstat_one_int(
99 buf->bs_uid = dic->di_uid; 97 buf->bs_uid = dic->di_uid;
100 buf->bs_gid = dic->di_gid; 98 buf->bs_gid = dic->di_gid;
101 buf->bs_size = dic->di_size; 99 buf->bs_size = dic->di_size;
102 100 buf->bs_atime.tv_sec = dic->di_atime.t_sec;
103 /* 101 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
104 * We need to read the timestamps from the Linux inode because 102 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
105 * the VFS keeps writing directly into the inode structure instead 103 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
106 * of telling us about the updates. 104 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
107 */ 105 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
108 buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
109 buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
110 buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
111 buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
112 buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
113 buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
114
115 buf->bs_xflags = xfs_ip2xflags(ip); 106 buf->bs_xflags = xfs_ip2xflags(ip);
116 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 107 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
117 buf->bs_extents = dic->di_nextents; 108 buf->bs_extents = dic->di_nextents;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e2cc3568c299..98a9cb5ffd17 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -67,15 +67,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log,
67 int eventual_size); 67 int eventual_size);
68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
69 69
70/* local functions to manipulate grant head */
71STATIC int xlog_grant_log_space(xlog_t *log,
72 xlog_ticket_t *xtic);
73STATIC void xlog_grant_push_ail(struct log *log, 70STATIC void xlog_grant_push_ail(struct log *log,
74 int need_bytes); 71 int need_bytes);
75STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 72STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
76 xlog_ticket_t *ticket); 73 xlog_ticket_t *ticket);
77STATIC int xlog_regrant_write_log_space(xlog_t *log,
78 xlog_ticket_t *ticket);
79STATIC void xlog_ungrant_log_space(xlog_t *log, 74STATIC void xlog_ungrant_log_space(xlog_t *log,
80 xlog_ticket_t *ticket); 75 xlog_ticket_t *ticket);
81 76
@@ -150,78 +145,93 @@ xlog_grant_add_space(
150 } while (head_val != old); 145 } while (head_val != old);
151} 146}
152 147
153STATIC bool 148STATIC void
154xlog_reserveq_wake( 149xlog_grant_head_init(
155 struct log *log, 150 struct xlog_grant_head *head)
156 int *free_bytes) 151{
152 xlog_assign_grant_head(&head->grant, 1, 0);
153 INIT_LIST_HEAD(&head->waiters);
154 spin_lock_init(&head->lock);
155}
156
157STATIC void
158xlog_grant_head_wake_all(
159 struct xlog_grant_head *head)
157{ 160{
158 struct xlog_ticket *tic; 161 struct xlog_ticket *tic;
159 int need_bytes;
160 162
161 list_for_each_entry(tic, &log->l_reserveq, t_queue) { 163 spin_lock(&head->lock);
164 list_for_each_entry(tic, &head->waiters, t_queue)
165 wake_up_process(tic->t_task);
166 spin_unlock(&head->lock);
167}
168
169static inline int
170xlog_ticket_reservation(
171 struct log *log,
172 struct xlog_grant_head *head,
173 struct xlog_ticket *tic)
174{
175 if (head == &log->l_write_head) {
176 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
177 return tic->t_unit_res;
178 } else {
162 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 179 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
163 need_bytes = tic->t_unit_res * tic->t_cnt; 180 return tic->t_unit_res * tic->t_cnt;
164 else 181 else
165 need_bytes = tic->t_unit_res; 182 return tic->t_unit_res;
166
167 if (*free_bytes < need_bytes)
168 return false;
169 *free_bytes -= need_bytes;
170
171 trace_xfs_log_grant_wake_up(log, tic);
172 wake_up(&tic->t_wait);
173 } 183 }
174
175 return true;
176} 184}
177 185
178STATIC bool 186STATIC bool
179xlog_writeq_wake( 187xlog_grant_head_wake(
180 struct log *log, 188 struct log *log,
189 struct xlog_grant_head *head,
181 int *free_bytes) 190 int *free_bytes)
182{ 191{
183 struct xlog_ticket *tic; 192 struct xlog_ticket *tic;
184 int need_bytes; 193 int need_bytes;
185 194
186 list_for_each_entry(tic, &log->l_writeq, t_queue) { 195 list_for_each_entry(tic, &head->waiters, t_queue) {
187 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 196 need_bytes = xlog_ticket_reservation(log, head, tic);
188
189 need_bytes = tic->t_unit_res;
190
191 if (*free_bytes < need_bytes) 197 if (*free_bytes < need_bytes)
192 return false; 198 return false;
193 *free_bytes -= need_bytes;
194 199
195 trace_xfs_log_regrant_write_wake_up(log, tic); 200 *free_bytes -= need_bytes;
196 wake_up(&tic->t_wait); 201 trace_xfs_log_grant_wake_up(log, tic);
202 wake_up_process(tic->t_task);
197 } 203 }
198 204
199 return true; 205 return true;
200} 206}
201 207
202STATIC int 208STATIC int
203xlog_reserveq_wait( 209xlog_grant_head_wait(
204 struct log *log, 210 struct log *log,
211 struct xlog_grant_head *head,
205 struct xlog_ticket *tic, 212 struct xlog_ticket *tic,
206 int need_bytes) 213 int need_bytes)
207{ 214{
208 list_add_tail(&tic->t_queue, &log->l_reserveq); 215 list_add_tail(&tic->t_queue, &head->waiters);
209 216
210 do { 217 do {
211 if (XLOG_FORCED_SHUTDOWN(log)) 218 if (XLOG_FORCED_SHUTDOWN(log))
212 goto shutdown; 219 goto shutdown;
213 xlog_grant_push_ail(log, need_bytes); 220 xlog_grant_push_ail(log, need_bytes);
214 221
222 __set_current_state(TASK_UNINTERRUPTIBLE);
223 spin_unlock(&head->lock);
224
215 XFS_STATS_INC(xs_sleep_logspace); 225 XFS_STATS_INC(xs_sleep_logspace);
216 trace_xfs_log_grant_sleep(log, tic);
217 226
218 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); 227 trace_xfs_log_grant_sleep(log, tic);
228 schedule();
219 trace_xfs_log_grant_wake(log, tic); 229 trace_xfs_log_grant_wake(log, tic);
220 230
221 spin_lock(&log->l_grant_reserve_lock); 231 spin_lock(&head->lock);
222 if (XLOG_FORCED_SHUTDOWN(log)) 232 if (XLOG_FORCED_SHUTDOWN(log))
223 goto shutdown; 233 goto shutdown;
224 } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); 234 } while (xlog_space_left(log, &head->grant) < need_bytes);
225 235
226 list_del_init(&tic->t_queue); 236 list_del_init(&tic->t_queue);
227 return 0; 237 return 0;
@@ -230,35 +240,58 @@ shutdown:
230 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
231} 241}
232 242
243/*
244 * Atomically get the log space required for a log ticket.
245 *
246 * Once a ticket gets put onto head->waiters, it will only return after the
247 * needed reservation is satisfied.
248 *
249 * This function is structured so that it has a lock free fast path. This is
250 * necessary because every new transaction reservation will come through this
251 * path. Hence any lock will be globally hot if we take it unconditionally on
252 * every pass.
253 *
254 * As tickets are only ever moved on and off head->waiters under head->lock, we
255 * only need to take that lock if we are going to add the ticket to the queue
256 * and sleep. We can avoid taking the lock if the ticket was never added to
257 * head->waiters because the t_queue list head will be empty and we hold the
258 * only reference to it so it can safely be checked unlocked.
259 */
233STATIC int 260STATIC int
234xlog_writeq_wait( 261xlog_grant_head_check(
235 struct log *log, 262 struct log *log,
263 struct xlog_grant_head *head,
236 struct xlog_ticket *tic, 264 struct xlog_ticket *tic,
237 int need_bytes) 265 int *need_bytes)
238{ 266{
239 list_add_tail(&tic->t_queue, &log->l_writeq); 267 int free_bytes;
240 268 int error = 0;
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
246 XFS_STATS_INC(xs_sleep_logspace);
247 trace_xfs_log_regrant_write_sleep(log, tic);
248 269
249 xlog_wait(&tic->t_wait, &log->l_grant_write_lock); 270 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
250 trace_xfs_log_regrant_write_wake(log, tic);
251 271
252 spin_lock(&log->l_grant_write_lock); 272 /*
253 if (XLOG_FORCED_SHUTDOWN(log)) 273 * If there are other waiters on the queue then give them a chance at
254 goto shutdown; 274 * logspace before us. Wake up the first waiters, if we do not wake
255 } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); 275 * up all the waiters then go to sleep waiting for more free space,
276 * otherwise try to get some space for this transaction.
277 */
278 *need_bytes = xlog_ticket_reservation(log, head, tic);
279 free_bytes = xlog_space_left(log, &head->grant);
280 if (!list_empty_careful(&head->waiters)) {
281 spin_lock(&head->lock);
282 if (!xlog_grant_head_wake(log, head, &free_bytes) ||
283 free_bytes < *need_bytes) {
284 error = xlog_grant_head_wait(log, head, tic,
285 *need_bytes);
286 }
287 spin_unlock(&head->lock);
288 } else if (free_bytes < *need_bytes) {
289 spin_lock(&head->lock);
290 error = xlog_grant_head_wait(log, head, tic, *need_bytes);
291 spin_unlock(&head->lock);
292 }
256 293
257 list_del_init(&tic->t_queue); 294 return error;
258 return 0;
259shutdown:
260 list_del_init(&tic->t_queue);
261 return XFS_ERROR(EIO);
262} 295}
263 296
264static void 297static void
@@ -286,6 +319,128 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
286} 319}
287 320
288/* 321/*
322 * Replenish the byte reservation required by moving the grant write head.
323 */
324int
325xfs_log_regrant(
326 struct xfs_mount *mp,
327 struct xlog_ticket *tic)
328{
329 struct log *log = mp->m_log;
330 int need_bytes;
331 int error = 0;
332
333 if (XLOG_FORCED_SHUTDOWN(log))
334 return XFS_ERROR(EIO);
335
336 XFS_STATS_INC(xs_try_logspace);
337
338 /*
339 * This is a new transaction on the ticket, so we need to change the
340 * transaction ID so that the next transaction has a different TID in
341 * the log. Just add one to the existing tid so that we can see chains
342 * of rolling transactions in the log easily.
343 */
344 tic->t_tid++;
345
346 xlog_grant_push_ail(log, tic->t_unit_res);
347
348 tic->t_curr_res = tic->t_unit_res;
349 xlog_tic_reset_res(tic);
350
351 if (tic->t_cnt > 0)
352 return 0;
353
354 trace_xfs_log_regrant(log, tic);
355
356 error = xlog_grant_head_check(log, &log->l_write_head, tic,
357 &need_bytes);
358 if (error)
359 goto out_error;
360
361 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
362 trace_xfs_log_regrant_exit(log, tic);
363 xlog_verify_grant_tail(log);
364 return 0;
365
366out_error:
367 /*
368 * If we are failing, make sure the ticket doesn't have any current
369 * reservations. We don't want to add this back when the ticket/
370 * transaction gets cancelled.
371 */
372 tic->t_curr_res = 0;
373 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
374 return error;
375}
376
377/*
378 * Reserve log space and return a ticket corresponding the reservation.
379 *
380 * Each reservation is going to reserve extra space for a log record header.
381 * When writes happen to the on-disk log, we don't subtract the length of the
382 * log record header from any reservation. By wasting space in each
383 * reservation, we prevent over allocation problems.
384 */
385int
386xfs_log_reserve(
387 struct xfs_mount *mp,
388 int unit_bytes,
389 int cnt,
390 struct xlog_ticket **ticp,
391 __uint8_t client,
392 bool permanent,
393 uint t_type)
394{
395 struct log *log = mp->m_log;
396 struct xlog_ticket *tic;
397 int need_bytes;
398 int error = 0;
399
400 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
401
402 if (XLOG_FORCED_SHUTDOWN(log))
403 return XFS_ERROR(EIO);
404
405 XFS_STATS_INC(xs_try_logspace);
406
407 ASSERT(*ticp == NULL);
408 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
409 KM_SLEEP | KM_MAYFAIL);
410 if (!tic)
411 return XFS_ERROR(ENOMEM);
412
413 tic->t_trans_type = t_type;
414 *ticp = tic;
415
416 xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt);
417
418 trace_xfs_log_reserve(log, tic);
419
420 error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
421 &need_bytes);
422 if (error)
423 goto out_error;
424
425 xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
426 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
427 trace_xfs_log_reserve_exit(log, tic);
428 xlog_verify_grant_tail(log);
429 return 0;
430
431out_error:
432 /*
433 * If we are failing, make sure the ticket doesn't have any current
434 * reservations. We don't want to add this back when the ticket/
435 * transaction gets cancelled.
436 */
437 tic->t_curr_res = 0;
438 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
439 return error;
440}
441
442
443/*
289 * NOTES: 444 * NOTES:
290 * 445 *
291 * 1. currblock field gets updated at startup and after in-core logs 446 * 1. currblock field gets updated at startup and after in-core logs
@@ -395,88 +550,6 @@ xfs_log_release_iclog(
395} 550}
396 551
397/* 552/*
398 * 1. Reserve an amount of on-disk log space and return a ticket corresponding
399 * to the reservation.
400 * 2. Potentially, push buffers at tail of log to disk.
401 *
402 * Each reservation is going to reserve extra space for a log record header.
403 * When writes happen to the on-disk log, we don't subtract the length of the
404 * log record header from any reservation. By wasting space in each
405 * reservation, we prevent over allocation problems.
406 */
407int
408xfs_log_reserve(
409 struct xfs_mount *mp,
410 int unit_bytes,
411 int cnt,
412 struct xlog_ticket **ticket,
413 __uint8_t client,
414 uint flags,
415 uint t_type)
416{
417 struct log *log = mp->m_log;
418 struct xlog_ticket *internal_ticket;
419 int retval = 0;
420
421 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
422
423 if (XLOG_FORCED_SHUTDOWN(log))
424 return XFS_ERROR(EIO);
425
426 XFS_STATS_INC(xs_try_logspace);
427
428
429 if (*ticket != NULL) {
430 ASSERT(flags & XFS_LOG_PERM_RESERV);
431 internal_ticket = *ticket;
432
433 /*
434 * this is a new transaction on the ticket, so we need to
435 * change the transaction ID so that the next transaction has a
436 * different TID in the log. Just add one to the existing tid
437 * so that we can see chains of rolling transactions in the log
438 * easily.
439 */
440 internal_ticket->t_tid++;
441
442 trace_xfs_log_reserve(log, internal_ticket);
443
444 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
445 retval = xlog_regrant_write_log_space(log, internal_ticket);
446 } else {
447 /* may sleep if need to allocate more tickets */
448 internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
449 client, flags,
450 KM_SLEEP|KM_MAYFAIL);
451 if (!internal_ticket)
452 return XFS_ERROR(ENOMEM);
453 internal_ticket->t_trans_type = t_type;
454 *ticket = internal_ticket;
455
456 trace_xfs_log_reserve(log, internal_ticket);
457
458 xlog_grant_push_ail(log,
459 (internal_ticket->t_unit_res *
460 internal_ticket->t_cnt));
461 retval = xlog_grant_log_space(log, internal_ticket);
462 }
463
464 if (unlikely(retval)) {
465 /*
466 * If we are failing, make sure the ticket doesn't have any
467 * current reservations. We don't want to add this back
468 * when the ticket/ transaction gets cancelled.
469 */
470 internal_ticket->t_curr_res = 0;
471 /* ungrant will give back unit_res * t_cnt. */
472 internal_ticket->t_cnt = 0;
473 }
474
475 return retval;
476}
477
478
479/*
480 * Mount a log filesystem 553 * Mount a log filesystem
481 * 554 *
482 * mp - ubiquitous xfs mount point structure 555 * mp - ubiquitous xfs mount point structure
@@ -760,64 +833,35 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 833 INIT_LIST_HEAD(&item->li_cil);
761} 834}
762 835
836/*
837 * Wake up processes waiting for log space after we have moved the log tail.
838 */
763void 839void
764xfs_log_move_tail(xfs_mount_t *mp, 840xfs_log_space_wake(
765 xfs_lsn_t tail_lsn) 841 struct xfs_mount *mp)
766{ 842{
767 xlog_ticket_t *tic; 843 struct log *log = mp->m_log;
768 xlog_t *log = mp->m_log; 844 int free_bytes;
769 int need_bytes, free_bytes;
770 845
771 if (XLOG_FORCED_SHUTDOWN(log)) 846 if (XLOG_FORCED_SHUTDOWN(log))
772 return; 847 return;
773 848
774 if (tail_lsn == 0) 849 if (!list_empty_careful(&log->l_write_head.waiters)) {
775 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 850 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
776
777 /* tail_lsn == 1 implies that we weren't passed a valid value. */
778 if (tail_lsn != 1)
779 atomic64_set(&log->l_tail_lsn, tail_lsn);
780
781 if (!list_empty_careful(&log->l_writeq)) {
782#ifdef DEBUG
783 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
784 panic("Recovery problem");
785#endif
786 spin_lock(&log->l_grant_write_lock);
787 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
788 list_for_each_entry(tic, &log->l_writeq, t_queue) {
789 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
790 851
791 if (free_bytes < tic->t_unit_res && tail_lsn != 1) 852 spin_lock(&log->l_write_head.lock);
792 break; 853 free_bytes = xlog_space_left(log, &log->l_write_head.grant);
793 tail_lsn = 0; 854 xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
794 free_bytes -= tic->t_unit_res; 855 spin_unlock(&log->l_write_head.lock);
795 trace_xfs_log_regrant_write_wake_up(log, tic);
796 wake_up(&tic->t_wait);
797 }
798 spin_unlock(&log->l_grant_write_lock);
799 } 856 }
800 857
801 if (!list_empty_careful(&log->l_reserveq)) { 858 if (!list_empty_careful(&log->l_reserve_head.waiters)) {
802#ifdef DEBUG 859 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
803 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 860
804 panic("Recovery problem"); 861 spin_lock(&log->l_reserve_head.lock);
805#endif 862 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
806 spin_lock(&log->l_grant_reserve_lock); 863 xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
807 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 864 spin_unlock(&log->l_reserve_head.lock);
808 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
809 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
810 need_bytes = tic->t_unit_res*tic->t_cnt;
811 else
812 need_bytes = tic->t_unit_res;
813 if (free_bytes < need_bytes && tail_lsn != 1)
814 break;
815 tail_lsn = 0;
816 free_bytes -= need_bytes;
817 trace_xfs_log_grant_wake_up(log, tic);
818 wake_up(&tic->t_wait);
819 }
820 spin_unlock(&log->l_grant_reserve_lock);
821 } 865 }
822} 866}
823 867
@@ -867,21 +911,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
867 return needed; 911 return needed;
868} 912}
869 913
870/****************************************************************************** 914/*
871 *
872 * local routines
873 *
874 ******************************************************************************
875 */
876
877/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
878 * The log manager must keep track of the last LR which was committed
879 * to disk. The lsn of this LR will become the new tail_lsn whenever
880 * xfs_trans_tail_ail returns 0. If we don't do this, we run into
881 * the situation where stuff could be written into the log but nothing
882 * was ever in the AIL when asked. Eventually, we panic since the
883 * tail hits the head.
884 *
885 * We may be holding the log iclog lock upon entering this routine. 915 * We may be holding the log iclog lock upon entering this routine.
886 */ 916 */
887xfs_lsn_t 917xfs_lsn_t
@@ -891,10 +921,17 @@ xlog_assign_tail_lsn(
891 xfs_lsn_t tail_lsn; 921 xfs_lsn_t tail_lsn;
892 struct log *log = mp->m_log; 922 struct log *log = mp->m_log;
893 923
924 /*
925 * To make sure we always have a valid LSN for the log tail we keep
926 * track of the last LSN which was committed in log->l_last_sync_lsn,
927 * and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
928 *
929 * If the AIL has been emptied we also need to wake any process
930 * waiting for this condition.
931 */
894 tail_lsn = xfs_ail_min_lsn(mp->m_ail); 932 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
895 if (!tail_lsn) 933 if (!tail_lsn)
896 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 934 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
897
898 atomic64_set(&log->l_tail_lsn, tail_lsn); 935 atomic64_set(&log->l_tail_lsn, tail_lsn);
899 return tail_lsn; 936 return tail_lsn;
900} 937}
@@ -1100,12 +1137,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1100 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); 1137 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1101 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); 1138 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1102 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1139 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1103 xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); 1140
1104 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); 1141 xlog_grant_head_init(&log->l_reserve_head);
1105 INIT_LIST_HEAD(&log->l_reserveq); 1142 xlog_grant_head_init(&log->l_write_head);
1106 INIT_LIST_HEAD(&log->l_writeq);
1107 spin_lock_init(&log->l_grant_reserve_lock);
1108 spin_lock_init(&log->l_grant_write_lock);
1109 1143
1110 error = EFSCORRUPTED; 1144 error = EFSCORRUPTED;
1111 if (xfs_sb_version_hassector(&mp->m_sb)) { 1145 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1280,7 +1314,7 @@ xlog_grant_push_ail(
1280 1314
1281 ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1315 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1282 1316
1283 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 1317 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
1284 free_blocks = BTOBBT(free_bytes); 1318 free_blocks = BTOBBT(free_bytes);
1285 1319
1286 /* 1320 /*
@@ -1412,8 +1446,8 @@ xlog_sync(xlog_t *log,
1412 roundoff < BBTOB(1))); 1446 roundoff < BBTOB(1)));
1413 1447
1414 /* move grant heads by roundoff in sync */ 1448 /* move grant heads by roundoff in sync */
1415 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); 1449 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
1416 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); 1450 xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
1417 1451
1418 /* put cycle number in every block */ 1452 /* put cycle number in every block */
1419 xlog_pack_data(log, iclog, roundoff); 1453 xlog_pack_data(log, iclog, roundoff);
@@ -2566,119 +2600,6 @@ restart:
2566 return 0; 2600 return 0;
2567} /* xlog_state_get_iclog_space */ 2601} /* xlog_state_get_iclog_space */
2568 2602
2569/*
2570 * Atomically get the log space required for a log ticket.
2571 *
2572 * Once a ticket gets put onto the reserveq, it will only return after the
2573 * needed reservation is satisfied.
2574 *
2575 * This function is structured so that it has a lock free fast path. This is
2576 * necessary because every new transaction reservation will come through this
2577 * path. Hence any lock will be globally hot if we take it unconditionally on
2578 * every pass.
2579 *
2580 * As tickets are only ever moved on and off the reserveq under the
2581 * l_grant_reserve_lock, we only need to take that lock if we are going to add
2582 * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
2583 * was never added to the reserveq because the t_queue list head will be empty
2584 * and we hold the only reference to it so it can safely be checked unlocked.
2585 */
2586STATIC int
2587xlog_grant_log_space(
2588 struct log *log,
2589 struct xlog_ticket *tic)
2590{
2591 int free_bytes, need_bytes;
2592 int error = 0;
2593
2594 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2595
2596 trace_xfs_log_grant_enter(log, tic);
2597
2598 /*
2599 * If there are other waiters on the queue then give them a chance at
2600 * logspace before us. Wake up the first waiters, if we do not wake
2601 * up all the waiters then go to sleep waiting for more free space,
2602 * otherwise try to get some space for this transaction.
2603 */
2604 need_bytes = tic->t_unit_res;
2605 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2606 need_bytes *= tic->t_ocnt;
2607 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2608 if (!list_empty_careful(&log->l_reserveq)) {
2609 spin_lock(&log->l_grant_reserve_lock);
2610 if (!xlog_reserveq_wake(log, &free_bytes) ||
2611 free_bytes < need_bytes)
2612 error = xlog_reserveq_wait(log, tic, need_bytes);
2613 spin_unlock(&log->l_grant_reserve_lock);
2614 } else if (free_bytes < need_bytes) {
2615 spin_lock(&log->l_grant_reserve_lock);
2616 error = xlog_reserveq_wait(log, tic, need_bytes);
2617 spin_unlock(&log->l_grant_reserve_lock);
2618 }
2619 if (error)
2620 return error;
2621
2622 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2623 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2624 trace_xfs_log_grant_exit(log, tic);
2625 xlog_verify_grant_tail(log);
2626 return 0;
2627}
2628
2629/*
2630 * Replenish the byte reservation required by moving the grant write head.
2631 *
2632 * Similar to xlog_grant_log_space, the function is structured to have a lock
2633 * free fast path.
2634 */
2635STATIC int
2636xlog_regrant_write_log_space(
2637 struct log *log,
2638 struct xlog_ticket *tic)
2639{
2640 int free_bytes, need_bytes;
2641 int error = 0;
2642
2643 tic->t_curr_res = tic->t_unit_res;
2644 xlog_tic_reset_res(tic);
2645
2646 if (tic->t_cnt > 0)
2647 return 0;
2648
2649 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2650
2651 trace_xfs_log_regrant_write_enter(log, tic);
2652
2653 /*
2654 * If there are other waiters on the queue then give them a chance at
2655 * logspace before us. Wake up the first waiters, if we do not wake
2656 * up all the waiters then go to sleep waiting for more free space,
2657 * otherwise try to get some space for this transaction.
2658 */
2659 need_bytes = tic->t_unit_res;
2660 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2661 if (!list_empty_careful(&log->l_writeq)) {
2662 spin_lock(&log->l_grant_write_lock);
2663 if (!xlog_writeq_wake(log, &free_bytes) ||
2664 free_bytes < need_bytes)
2665 error = xlog_writeq_wait(log, tic, need_bytes);
2666 spin_unlock(&log->l_grant_write_lock);
2667 } else if (free_bytes < need_bytes) {
2668 spin_lock(&log->l_grant_write_lock);
2669 error = xlog_writeq_wait(log, tic, need_bytes);
2670 spin_unlock(&log->l_grant_write_lock);
2671 }
2672
2673 if (error)
2674 return error;
2675
2676 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2677 trace_xfs_log_regrant_write_exit(log, tic);
2678 xlog_verify_grant_tail(log);
2679 return 0;
2680}
2681
2682/* The first cnt-1 times through here we don't need to 2603/* The first cnt-1 times through here we don't need to
2683 * move the grant write head because the permanent 2604 * move the grant write head because the permanent
2684 * reservation has reserved cnt times the unit amount. 2605 * reservation has reserved cnt times the unit amount.
@@ -2695,9 +2616,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2695 if (ticket->t_cnt > 0) 2616 if (ticket->t_cnt > 0)
2696 ticket->t_cnt--; 2617 ticket->t_cnt--;
2697 2618
2698 xlog_grant_sub_space(log, &log->l_grant_reserve_head, 2619 xlog_grant_sub_space(log, &log->l_reserve_head.grant,
2699 ticket->t_curr_res); 2620 ticket->t_curr_res);
2700 xlog_grant_sub_space(log, &log->l_grant_write_head, 2621 xlog_grant_sub_space(log, &log->l_write_head.grant,
2701 ticket->t_curr_res); 2622 ticket->t_curr_res);
2702 ticket->t_curr_res = ticket->t_unit_res; 2623 ticket->t_curr_res = ticket->t_unit_res;
2703 xlog_tic_reset_res(ticket); 2624 xlog_tic_reset_res(ticket);
@@ -2708,7 +2629,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2708 if (ticket->t_cnt > 0) 2629 if (ticket->t_cnt > 0)
2709 return; 2630 return;
2710 2631
2711 xlog_grant_add_space(log, &log->l_grant_reserve_head, 2632 xlog_grant_add_space(log, &log->l_reserve_head.grant,
2712 ticket->t_unit_res); 2633 ticket->t_unit_res);
2713 2634
2714 trace_xfs_log_regrant_reserve_exit(log, ticket); 2635 trace_xfs_log_regrant_reserve_exit(log, ticket);
@@ -2754,14 +2675,13 @@ xlog_ungrant_log_space(xlog_t *log,
2754 bytes += ticket->t_unit_res*ticket->t_cnt; 2675 bytes += ticket->t_unit_res*ticket->t_cnt;
2755 } 2676 }
2756 2677
2757 xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); 2678 xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
2758 xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); 2679 xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
2759 2680
2760 trace_xfs_log_ungrant_exit(log, ticket); 2681 trace_xfs_log_ungrant_exit(log, ticket);
2761 2682
2762 xfs_log_move_tail(log->l_mp, 1); 2683 xfs_log_space_wake(log->l_mp);
2763} /* xlog_ungrant_log_space */ 2684}
2764
2765 2685
2766/* 2686/*
2767 * Flush iclog to disk if this is the last reference to the given iclog and 2687 * Flush iclog to disk if this is the last reference to the given iclog and
@@ -3219,7 +3139,7 @@ xlog_ticket_alloc(
3219 int unit_bytes, 3139 int unit_bytes,
3220 int cnt, 3140 int cnt,
3221 char client, 3141 char client,
3222 uint xflags, 3142 bool permanent,
3223 int alloc_flags) 3143 int alloc_flags)
3224{ 3144{
3225 struct xlog_ticket *tic; 3145 struct xlog_ticket *tic;
@@ -3313,6 +3233,7 @@ xlog_ticket_alloc(
3313 } 3233 }
3314 3234
3315 atomic_set(&tic->t_ref, 1); 3235 atomic_set(&tic->t_ref, 1);
3236 tic->t_task = current;
3316 INIT_LIST_HEAD(&tic->t_queue); 3237 INIT_LIST_HEAD(&tic->t_queue);
3317 tic->t_unit_res = unit_bytes; 3238 tic->t_unit_res = unit_bytes;
3318 tic->t_curr_res = unit_bytes; 3239 tic->t_curr_res = unit_bytes;
@@ -3322,9 +3243,8 @@ xlog_ticket_alloc(
3322 tic->t_clientid = client; 3243 tic->t_clientid = client;
3323 tic->t_flags = XLOG_TIC_INITED; 3244 tic->t_flags = XLOG_TIC_INITED;
3324 tic->t_trans_type = 0; 3245 tic->t_trans_type = 0;
3325 if (xflags & XFS_LOG_PERM_RESERV) 3246 if (permanent)
3326 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3247 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3327 init_waitqueue_head(&tic->t_wait);
3328 3248
3329 xlog_tic_reset_res(tic); 3249 xlog_tic_reset_res(tic);
3330 3250
@@ -3380,7 +3300,7 @@ xlog_verify_grant_tail(
3380 int tail_cycle, tail_blocks; 3300 int tail_cycle, tail_blocks;
3381 int cycle, space; 3301 int cycle, space;
3382 3302
3383 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3303 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
3384 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3304 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3385 if (tail_cycle != cycle) { 3305 if (tail_cycle != cycle) {
3386 if (cycle - 1 != tail_cycle && 3306 if (cycle - 1 != tail_cycle &&
@@ -3582,7 +3502,6 @@ xfs_log_force_umount(
3582 struct xfs_mount *mp, 3502 struct xfs_mount *mp,
3583 int logerror) 3503 int logerror)
3584{ 3504{
3585 xlog_ticket_t *tic;
3586 xlog_t *log; 3505 xlog_t *log;
3587 int retval; 3506 int retval;
3588 3507
@@ -3650,15 +3569,8 @@ xfs_log_force_umount(
3650 * we don't enqueue anything once the SHUTDOWN flag is set, and this 3569 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3651 * action is protected by the grant locks. 3570 * action is protected by the grant locks.
3652 */ 3571 */
3653 spin_lock(&log->l_grant_reserve_lock); 3572 xlog_grant_head_wake_all(&log->l_reserve_head);
3654 list_for_each_entry(tic, &log->l_reserveq, t_queue) 3573 xlog_grant_head_wake_all(&log->l_write_head);
3655 wake_up(&tic->t_wait);
3656 spin_unlock(&log->l_grant_reserve_lock);
3657
3658 spin_lock(&log->l_grant_write_lock);
3659 list_for_each_entry(tic, &log->l_writeq, t_queue)
3660 wake_up(&tic->t_wait);
3661 spin_unlock(&log->l_grant_write_lock);
3662 3574
3663 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3575 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3664 ASSERT(!logerror); 3576 ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2aee3b22d29c..2c622bedb302 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -53,15 +53,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
53#define XFS_LOG_REL_PERM_RESERV 0x1 53#define XFS_LOG_REL_PERM_RESERV 0x1
54 54
55/* 55/*
56 * Flags to xfs_log_reserve()
57 *
58 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are
59 * performed against this type of reservation, the reservation
60 * is not decreased. Long running transactions should use this.
61 */
62#define XFS_LOG_PERM_RESERV 0x2
63
64/*
65 * Flags to xfs_log_force() 56 * Flags to xfs_log_force()
66 * 57 *
67 * XFS_LOG_SYNC: Synchronous force in-core log to disk 58 * XFS_LOG_SYNC: Synchronous force in-core log to disk
@@ -160,8 +151,8 @@ int xfs_log_mount(struct xfs_mount *mp,
160 xfs_daddr_t start_block, 151 xfs_daddr_t start_block,
161 int num_bblocks); 152 int num_bblocks);
162int xfs_log_mount_finish(struct xfs_mount *mp); 153int xfs_log_mount_finish(struct xfs_mount *mp);
163void xfs_log_move_tail(struct xfs_mount *mp, 154xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
164 xfs_lsn_t tail_lsn); 155void xfs_log_space_wake(struct xfs_mount *mp);
165int xfs_log_notify(struct xfs_mount *mp, 156int xfs_log_notify(struct xfs_mount *mp,
166 struct xlog_in_core *iclog, 157 struct xlog_in_core *iclog,
167 xfs_log_callback_t *callback_entry); 158 xfs_log_callback_t *callback_entry);
@@ -172,8 +163,9 @@ int xfs_log_reserve(struct xfs_mount *mp,
172 int count, 163 int count,
173 struct xlog_ticket **ticket, 164 struct xlog_ticket **ticket,
174 __uint8_t clientid, 165 __uint8_t clientid,
175 uint flags, 166 bool permanent,
176 uint t_type); 167 uint t_type);
168int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
177int xfs_log_unmount_write(struct xfs_mount *mp); 169int xfs_log_unmount_write(struct xfs_mount *mp);
178void xfs_log_unmount(struct xfs_mount *mp); 170void xfs_log_unmount(struct xfs_mount *mp);
179int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 171int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 2d3b6a498d63..2152900b79d4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -239,8 +239,8 @@ typedef struct xlog_res {
239} xlog_res_t; 239} xlog_res_t;
240 240
241typedef struct xlog_ticket { 241typedef struct xlog_ticket {
242 wait_queue_head_t t_wait; /* ticket wait queue */
243 struct list_head t_queue; /* reserve/write queue */ 242 struct list_head t_queue; /* reserve/write queue */
243 struct task_struct *t_task; /* task that owns this ticket */
244 xlog_tid_t t_tid; /* transaction identifier : 4 */ 244 xlog_tid_t t_tid; /* transaction identifier : 4 */
245 atomic_t t_ref; /* ticket reference count : 4 */ 245 atomic_t t_ref; /* ticket reference count : 4 */
246 int t_curr_res; /* current reservation in bytes : 4 */ 246 int t_curr_res; /* current reservation in bytes : 4 */
@@ -470,6 +470,16 @@ struct xfs_cil {
470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) 470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 471
472/* 472/*
473 * ticket grant locks, queues and accounting have their own cachlines
474 * as these are quite hot and can be operated on concurrently.
475 */
476struct xlog_grant_head {
477 spinlock_t lock ____cacheline_aligned_in_smp;
478 struct list_head waiters;
479 atomic64_t grant;
480};
481
482/*
473 * The reservation head lsn is not made up of a cycle number and block number. 483 * The reservation head lsn is not made up of a cycle number and block number.
474 * Instead, it uses a cycle number and byte number. Logs don't expect to 484 * Instead, it uses a cycle number and byte number. Logs don't expect to
475 * overflow 31 bits worth of byte offset, so using a byte number will mean 485 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -520,17 +530,8 @@ typedef struct log {
520 /* lsn of 1st LR with unflushed * buffers */ 530 /* lsn of 1st LR with unflushed * buffers */
521 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; 531 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
522 532
523 /* 533 struct xlog_grant_head l_reserve_head;
524 * ticket grant locks, queues and accounting have their own cachlines 534 struct xlog_grant_head l_write_head;
525 * as these are quite hot and can be operated on concurrently.
526 */
527 spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
528 struct list_head l_reserveq;
529 atomic64_t l_grant_reserve_head;
530
531 spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
532 struct list_head l_writeq;
533 atomic64_t l_grant_write_head;
534 535
535 /* The following field are used for debugging; need to hold icloglock */ 536 /* The following field are used for debugging; need to hold icloglock */
536#ifdef DEBUG 537#ifdef DEBUG
@@ -545,14 +546,13 @@ typedef struct log {
545#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 546#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
546 547
547/* common routines */ 548/* common routines */
548extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
549extern int xlog_recover(xlog_t *log); 549extern int xlog_recover(xlog_t *log);
550extern int xlog_recover_finish(xlog_t *log); 550extern int xlog_recover_finish(xlog_t *log);
551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
552 552
553extern kmem_zone_t *xfs_log_ticket_zone; 553extern kmem_zone_t *xfs_log_ticket_zone;
554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, 554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
555 int count, char client, uint xflags, 555 int count, char client, bool permanent,
556 int alloc_flags); 556 int alloc_flags);
557 557
558 558
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0ed9ee77937c..7c75c7374d5a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -965,9 +965,9 @@ xlog_find_tail(
965 log->l_curr_cycle++; 965 log->l_curr_cycle++;
966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
968 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, 968 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
969 BBTOB(log->l_curr_block)); 969 BBTOB(log->l_curr_block));
970 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, 970 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
971 BBTOB(log->l_curr_block)); 971 BBTOB(log->l_curr_block));
972 972
973 /* 973 /*
@@ -3695,7 +3695,7 @@ xlog_do_recover(
3695 3695
3696 /* Convert superblock from on-disk format */ 3696 /* Convert superblock from on-disk format */
3697 sbp = &log->l_mp->m_sb; 3697 sbp = &log->l_mp->m_sb;
3698 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3698 xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp));
3699 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3699 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3700 ASSERT(xfs_sb_good_version(sbp)); 3700 ASSERT(xfs_sb_good_version(sbp));
3701 xfs_buf_relse(bp); 3701 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d06afbc3540d..1ffead4b2296 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -158,7 +158,7 @@ xfs_uuid_mount(
158 158
159 out_duplicate: 159 out_duplicate:
160 mutex_unlock(&xfs_uuid_table_mutex); 160 mutex_unlock(&xfs_uuid_table_mutex);
161 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); 161 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
162 return XFS_ERROR(EINVAL); 162 return XFS_ERROR(EINVAL);
163} 163}
164 164
@@ -553,9 +553,11 @@ out_unwind:
553 553
554void 554void
555xfs_sb_from_disk( 555xfs_sb_from_disk(
556 xfs_sb_t *to, 556 struct xfs_mount *mp,
557 xfs_dsb_t *from) 557 xfs_dsb_t *from)
558{ 558{
559 struct xfs_sb *to = &mp->m_sb;
560
559 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 561 to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
560 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 562 to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
561 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 563 to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
@@ -693,7 +695,7 @@ reread:
693 * Initialize the mount structure from the superblock. 695 * Initialize the mount structure from the superblock.
694 * But first do some basic consistency checking. 696 * But first do some basic consistency checking.
695 */ 697 */
696 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 698 xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp));
697 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 699 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
698 if (error) { 700 if (error) {
699 if (loud) 701 if (loud)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 19f69e232509..9eba73887829 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
212 int64_t m_low_space[XFS_LOWSP_MAX]; 212 int64_t m_low_space[XFS_LOWSP_MAX];
213 /* low free space thresholds */ 213 /* low free space thresholds */
214
215 struct workqueue_struct *m_data_workqueue;
216 struct workqueue_struct *m_unwritten_workqueue;
214} xfs_mount_t; 217} xfs_mount_t;
215 218
216/* 219/*
@@ -395,7 +398,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
395extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 398extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, 399extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
397 xfs_agnumber_t *); 400 xfs_agnumber_t *);
398extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 401extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *);
399extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 402extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
400 403
401#endif /* __XFS_MOUNT_H__ */ 404#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c436def733bf..55c6afedc879 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,194 +48,189 @@
48 * quota functionality, including maintaining the freelist and hash 48 * quota functionality, including maintaining the freelist and hash
49 * tables of dquots. 49 * tables of dquots.
50 */ 50 */
51struct mutex xfs_Gqm_lock;
52struct xfs_qm *xfs_Gqm;
53
54kmem_zone_t *qm_dqzone;
55kmem_zone_t *qm_dqtrxzone;
56
57STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
58STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
59
60STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 51STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
61STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 52STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
62STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); 53STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
63 54
64static struct shrinker xfs_qm_shaker = {
65 .shrink = xfs_qm_shake,
66 .seeks = DEFAULT_SEEKS,
67};
68
69/* 55/*
70 * Initialize the XQM structure. 56 * We use the batch lookup interface to iterate over the dquots as it
71 * Note that there is not one quota manager per file system. 57 * currently is the only interface into the radix tree code that allows
58 * fuzzy lookups instead of exact matches. Holding the lock over multiple
59 * operations is fine as all callers are used either during mount/umount
60 * or quotaoff.
72 */ 61 */
73STATIC struct xfs_qm * 62#define XFS_DQ_LOOKUP_BATCH 32
74xfs_Gqm_init(void) 63
64STATIC int
65xfs_qm_dquot_walk(
66 struct xfs_mount *mp,
67 int type,
68 int (*execute)(struct xfs_dquot *dqp))
75{ 69{
76 xfs_dqhash_t *udqhash, *gdqhash; 70 struct xfs_quotainfo *qi = mp->m_quotainfo;
77 xfs_qm_t *xqm; 71 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
78 size_t hsize; 72 uint32_t next_index;
79 uint i; 73 int last_error = 0;
74 int skipped;
75 int nr_found;
76
77restart:
78 skipped = 0;
79 next_index = 0;
80 nr_found = 0;
81
82 while (1) {
83 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
84 int error = 0;
85 int i;
86
87 mutex_lock(&qi->qi_tree_lock);
88 nr_found = radix_tree_gang_lookup(tree, (void **)batch,
89 next_index, XFS_DQ_LOOKUP_BATCH);
90 if (!nr_found) {
91 mutex_unlock(&qi->qi_tree_lock);
92 break;
93 }
80 94
81 /* 95 for (i = 0; i < nr_found; i++) {
82 * Initialize the dquot hash tables. 96 struct xfs_dquot *dqp = batch[i];
83 */
84 udqhash = kmem_zalloc_greedy(&hsize,
85 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
86 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
87 if (!udqhash)
88 goto out;
89 97
90 gdqhash = kmem_zalloc_large(hsize); 98 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
91 if (!gdqhash)
92 goto out_free_udqhash;
93 99
94 hsize /= sizeof(xfs_dqhash_t); 100 error = execute(batch[i]);
101 if (error == EAGAIN) {
102 skipped++;
103 continue;
104 }
105 if (error && last_error != EFSCORRUPTED)
106 last_error = error;
107 }
95 108
96 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); 109 mutex_unlock(&qi->qi_tree_lock);
97 xqm->qm_dqhashmask = hsize - 1;
98 xqm->qm_usr_dqhtable = udqhash;
99 xqm->qm_grp_dqhtable = gdqhash;
100 ASSERT(xqm->qm_usr_dqhtable != NULL);
101 ASSERT(xqm->qm_grp_dqhtable != NULL);
102 110
103 for (i = 0; i < hsize; i++) { 111 /* bail out if the filesystem is corrupted. */
104 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); 112 if (last_error == EFSCORRUPTED) {
105 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); 113 skipped = 0;
114 break;
115 }
106 } 116 }
107 117
108 /* 118 if (skipped) {
109 * Freelist of all dquots of all file systems 119 delay(1);
110 */ 120 goto restart;
111 INIT_LIST_HEAD(&xqm->qm_dqfrlist); 121 }
112 xqm->qm_dqfrlist_cnt = 0;
113 mutex_init(&xqm->qm_dqfrlist_lock);
114
115 /*
116 * dquot zone. we register our own low-memory callback.
117 */
118 if (!qm_dqzone) {
119 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
120 "xfs_dquots");
121 qm_dqzone = xqm->qm_dqzone;
122 } else
123 xqm->qm_dqzone = qm_dqzone;
124
125 register_shrinker(&xfs_qm_shaker);
126
127 /*
128 * The t_dqinfo portion of transactions.
129 */
130 if (!qm_dqtrxzone) {
131 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
132 "xfs_dqtrx");
133 qm_dqtrxzone = xqm->qm_dqtrxzone;
134 } else
135 xqm->qm_dqtrxzone = qm_dqtrxzone;
136
137 atomic_set(&xqm->qm_totaldquots, 0);
138 xqm->qm_nrefs = 0;
139 return xqm;
140 122
141 out_free_udqhash: 123 return last_error;
142 kmem_free_large(udqhash);
143 out:
144 return NULL;
145} 124}
146 125
126
147/* 127/*
148 * Destroy the global quota manager when its reference count goes to zero. 128 * Purge a dquot from all tracking data structures and free it.
149 */ 129 */
150STATIC void 130STATIC int
151xfs_qm_destroy( 131xfs_qm_dqpurge(
152 struct xfs_qm *xqm) 132 struct xfs_dquot *dqp)
153{ 133{
154 int hsize, i; 134 struct xfs_mount *mp = dqp->q_mount;
135 struct xfs_quotainfo *qi = mp->m_quotainfo;
136 struct xfs_dquot *gdqp = NULL;
155 137
156 ASSERT(xqm != NULL); 138 xfs_dqlock(dqp);
157 ASSERT(xqm->qm_nrefs == 0); 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
140 xfs_dqunlock(dqp);
141 return EAGAIN;
142 }
158 143
159 unregister_shrinker(&xfs_qm_shaker); 144 /*
145 * If this quota has a group hint attached, prepare for releasing it
146 * now.
147 */
148 gdqp = dqp->q_gdquot;
149 if (gdqp) {
150 xfs_dqlock(gdqp);
151 dqp->q_gdquot = NULL;
152 }
160 153
161 mutex_lock(&xqm->qm_dqfrlist_lock); 154 dqp->dq_flags |= XFS_DQ_FREEING;
162 ASSERT(list_empty(&xqm->qm_dqfrlist));
163 mutex_unlock(&xqm->qm_dqfrlist_lock);
164 155
165 hsize = xqm->qm_dqhashmask + 1; 156 /*
166 for (i = 0; i < hsize; i++) { 157 * If we're turning off quotas, we have to make sure that, for
167 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 158 * example, we don't delete quota disk blocks while dquots are
168 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 159 * in the process of getting written to those disk blocks.
160 * This dquot might well be on AIL, and we can't leave it there
161 * if we're turning off quotas. Basically, we need this flush
162 * lock, and are willing to block on it.
163 */
164 if (!xfs_dqflock_nowait(dqp)) {
165 /*
166 * Block on the flush lock after nudging dquot buffer,
167 * if it is incore.
168 */
169 xfs_dqflock_pushbuf_wait(dqp);
169 } 170 }
170 kmem_free_large(xqm->qm_usr_dqhtable);
171 kmem_free_large(xqm->qm_grp_dqhtable);
172 xqm->qm_usr_dqhtable = NULL;
173 xqm->qm_grp_dqhtable = NULL;
174 xqm->qm_dqhashmask = 0;
175 171
176 kmem_free(xqm);
177}
178
179/*
180 * Called at mount time to let XQM know that another file system is
181 * starting quotas. This isn't crucial information as the individual mount
182 * structures are pretty independent, but it helps the XQM keep a
183 * global view of what's going on.
184 */
185/* ARGSUSED */
186STATIC int
187xfs_qm_hold_quotafs_ref(
188 struct xfs_mount *mp)
189{
190 /* 172 /*
191 * Need to lock the xfs_Gqm structure for things like this. For example, 173 * If we are turning this type of quotas off, we don't care
192 * the structure could disappear between the entry to this routine and 174 * about the dirty metadata sitting in this dquot. OTOH, if
193 * a HOLD operation if not locked. 175 * we're unmounting, we do care, so we flush it and wait.
194 */ 176 */
195 mutex_lock(&xfs_Gqm_lock); 177 if (XFS_DQ_IS_DIRTY(dqp)) {
178 int error;
196 179
197 if (!xfs_Gqm) { 180 /*
198 xfs_Gqm = xfs_Gqm_init(); 181 * We don't care about getting disk errors here. We need
199 if (!xfs_Gqm) { 182 * to purge this dquot anyway, so we go ahead regardless.
200 mutex_unlock(&xfs_Gqm_lock); 183 */
201 return ENOMEM; 184 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
202 } 185 if (error)
186 xfs_warn(mp, "%s: dquot %p flush failed",
187 __func__, dqp);
188 xfs_dqflock(dqp);
203 } 189 }
204 190
191 ASSERT(atomic_read(&dqp->q_pincount) == 0);
192 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
193 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
194
195 xfs_dqfunlock(dqp);
196 xfs_dqunlock(dqp);
197
198 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
199 be32_to_cpu(dqp->q_core.d_id));
200 qi->qi_dquots--;
201
205 /* 202 /*
206 * We can keep a list of all filesystems with quotas mounted for 203 * We move dquots to the freelist as soon as their reference count
207 * debugging and statistical purposes, but ... 204 * hits zero, so it really should be on the freelist here.
208 * Just take a reference and get out.
209 */ 205 */
210 xfs_Gqm->qm_nrefs++; 206 mutex_lock(&qi->qi_lru_lock);
211 mutex_unlock(&xfs_Gqm_lock); 207 ASSERT(!list_empty(&dqp->q_lru));
208 list_del_init(&dqp->q_lru);
209 qi->qi_lru_count--;
210 XFS_STATS_DEC(xs_qm_dquot_unused);
211 mutex_unlock(&qi->qi_lru_lock);
212 212
213 xfs_qm_dqdestroy(dqp);
214
215 if (gdqp)
216 xfs_qm_dqput(gdqp);
213 return 0; 217 return 0;
214} 218}
215 219
216
217/* 220/*
218 * Release the reference that a filesystem took at mount time, 221 * Purge the dquot cache.
219 * so that we know when we need to destroy the entire quota manager.
220 */ 222 */
221/* ARGSUSED */ 223void
222STATIC void 224xfs_qm_dqpurge_all(
223xfs_qm_rele_quotafs_ref( 225 struct xfs_mount *mp,
224 struct xfs_mount *mp) 226 uint flags)
225{ 227{
226 ASSERT(xfs_Gqm); 228 if (flags & XFS_QMOPT_UQUOTA)
227 ASSERT(xfs_Gqm->qm_nrefs > 0); 229 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
228 230 if (flags & XFS_QMOPT_GQUOTA)
229 /* 231 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
230 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 232 if (flags & XFS_QMOPT_PQUOTA)
231 * be restarted. 233 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
232 */
233 mutex_lock(&xfs_Gqm_lock);
234 if (--xfs_Gqm->qm_nrefs == 0) {
235 xfs_qm_destroy(xfs_Gqm);
236 xfs_Gqm = NULL;
237 }
238 mutex_unlock(&xfs_Gqm_lock);
239} 234}
240 235
241/* 236/*
@@ -376,175 +371,6 @@ xfs_qm_unmount_quotas(
376 } 371 }
377} 372}
378 373
379/*
380 * Flush all dquots of the given file system to disk. The dquots are
381 * _not_ purged from memory here, just their data written to disk.
382 */
383STATIC int
384xfs_qm_dqflush_all(
385 struct xfs_mount *mp)
386{
387 struct xfs_quotainfo *q = mp->m_quotainfo;
388 int recl;
389 struct xfs_dquot *dqp;
390 int error;
391
392 if (!q)
393 return 0;
394again:
395 mutex_lock(&q->qi_dqlist_lock);
396 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
397 xfs_dqlock(dqp);
398 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
399 !XFS_DQ_IS_DIRTY(dqp)) {
400 xfs_dqunlock(dqp);
401 continue;
402 }
403
404 /* XXX a sentinel would be better */
405 recl = q->qi_dqreclaims;
406 if (!xfs_dqflock_nowait(dqp)) {
407 /*
408 * If we can't grab the flush lock then check
409 * to see if the dquot has been flushed delayed
410 * write. If so, grab its buffer and send it
411 * out immediately. We'll be able to acquire
412 * the flush lock when the I/O completes.
413 */
414 xfs_dqflock_pushbuf_wait(dqp);
415 }
416 /*
417 * Let go of the mplist lock. We don't want to hold it
418 * across a disk write.
419 */
420 mutex_unlock(&q->qi_dqlist_lock);
421 error = xfs_qm_dqflush(dqp, 0);
422 xfs_dqunlock(dqp);
423 if (error)
424 return error;
425
426 mutex_lock(&q->qi_dqlist_lock);
427 if (recl != q->qi_dqreclaims) {
428 mutex_unlock(&q->qi_dqlist_lock);
429 /* XXX restart limit */
430 goto again;
431 }
432 }
433
434 mutex_unlock(&q->qi_dqlist_lock);
435 /* return ! busy */
436 return 0;
437}
438
439/*
440 * Release the group dquot pointers the user dquots may be
441 * carrying around as a hint. mplist is locked on entry and exit.
442 */
443STATIC void
444xfs_qm_detach_gdquots(
445 struct xfs_mount *mp)
446{
447 struct xfs_quotainfo *q = mp->m_quotainfo;
448 struct xfs_dquot *dqp, *gdqp;
449
450 again:
451 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
452 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
453 xfs_dqlock(dqp);
454 if (dqp->dq_flags & XFS_DQ_FREEING) {
455 xfs_dqunlock(dqp);
456 mutex_unlock(&q->qi_dqlist_lock);
457 delay(1);
458 mutex_lock(&q->qi_dqlist_lock);
459 goto again;
460 }
461
462 gdqp = dqp->q_gdquot;
463 if (gdqp)
464 dqp->q_gdquot = NULL;
465 xfs_dqunlock(dqp);
466
467 if (gdqp)
468 xfs_qm_dqrele(gdqp);
469 }
470}
471
472/*
473 * Go through all the incore dquots of this file system and take them
474 * off the mplist and hashlist, if the dquot type matches the dqtype
475 * parameter. This is used when turning off quota accounting for
476 * users and/or groups, as well as when the filesystem is unmounting.
477 */
478STATIC int
479xfs_qm_dqpurge_int(
480 struct xfs_mount *mp,
481 uint flags)
482{
483 struct xfs_quotainfo *q = mp->m_quotainfo;
484 struct xfs_dquot *dqp, *n;
485 uint dqtype;
486 int nmisses = 0;
487 LIST_HEAD (dispose_list);
488
489 if (!q)
490 return 0;
491
492 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
493 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
494 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
495
496 mutex_lock(&q->qi_dqlist_lock);
497
498 /*
499 * In the first pass through all incore dquots of this filesystem,
500 * we release the group dquot pointers the user dquots may be
501 * carrying around as a hint. We need to do this irrespective of
502 * what's being turned off.
503 */
504 xfs_qm_detach_gdquots(mp);
505
506 /*
507 * Try to get rid of all of the unwanted dquots.
508 */
509 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
510 xfs_dqlock(dqp);
511 if ((dqp->dq_flags & dqtype) != 0 &&
512 !(dqp->dq_flags & XFS_DQ_FREEING)) {
513 if (dqp->q_nrefs == 0) {
514 dqp->dq_flags |= XFS_DQ_FREEING;
515 list_move_tail(&dqp->q_mplist, &dispose_list);
516 } else
517 nmisses++;
518 }
519 xfs_dqunlock(dqp);
520 }
521 mutex_unlock(&q->qi_dqlist_lock);
522
523 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
524 xfs_qm_dqpurge(dqp);
525
526 return nmisses;
527}
528
529int
530xfs_qm_dqpurge_all(
531 xfs_mount_t *mp,
532 uint flags)
533{
534 int ndquots;
535
536 /*
537 * Purge the dquot cache.
538 * None of the dquots should really be busy at this point.
539 */
540 if (mp->m_quotainfo) {
541 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
542 delay(ndquots * 10);
543 }
544 }
545 return 0;
546}
547
548STATIC int 374STATIC int
549xfs_qm_dqattach_one( 375xfs_qm_dqattach_one(
550 xfs_inode_t *ip, 376 xfs_inode_t *ip,
@@ -783,14 +609,6 @@ xfs_qm_dqdetach(
783} 609}
784 610
785/* 611/*
786 * The hash chains and the mplist use the same xfs_dqhash structure as
787 * their list head, but we can take the mplist qh_lock and one of the
788 * hash qh_locks at the same time without any problem as they aren't
789 * related.
790 */
791static struct lock_class_key xfs_quota_mplist_class;
792
793/*
794 * This initializes all the quota information that's kept in the 612 * This initializes all the quota information that's kept in the
795 * mount structure 613 * mount structure
796 */ 614 */
@@ -804,13 +622,6 @@ xfs_qm_init_quotainfo(
804 622
805 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 623 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
806 624
807 /*
808 * Tell XQM that we exist as soon as possible.
809 */
810 if ((error = xfs_qm_hold_quotafs_ref(mp))) {
811 return error;
812 }
813
814 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 625 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
815 626
816 /* 627 /*
@@ -823,11 +634,13 @@ xfs_qm_init_quotainfo(
823 return error; 634 return error;
824 } 635 }
825 636
826 INIT_LIST_HEAD(&qinf->qi_dqlist); 637 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
827 mutex_init(&qinf->qi_dqlist_lock); 638 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
828 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); 639 mutex_init(&qinf->qi_tree_lock);
829 640
830 qinf->qi_dqreclaims = 0; 641 INIT_LIST_HEAD(&qinf->qi_lru_list);
642 qinf->qi_lru_count = 0;
643 mutex_init(&qinf->qi_lru_lock);
831 644
832 /* mutex used to serialize quotaoffs */ 645 /* mutex used to serialize quotaoffs */
833 mutex_init(&qinf->qi_quotaofflock); 646 mutex_init(&qinf->qi_quotaofflock);
@@ -894,6 +707,9 @@ xfs_qm_init_quotainfo(
894 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 707 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
895 } 708 }
896 709
710 qinf->qi_shrinker.shrink = xfs_qm_shake;
711 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
712 register_shrinker(&qinf->qi_shrinker);
897 return 0; 713 return 0;
898} 714}
899 715
@@ -911,17 +727,8 @@ xfs_qm_destroy_quotainfo(
911 727
912 qi = mp->m_quotainfo; 728 qi = mp->m_quotainfo;
913 ASSERT(qi != NULL); 729 ASSERT(qi != NULL);
914 ASSERT(xfs_Gqm != NULL);
915
916 /*
917 * Release the reference that XQM kept, so that we know
918 * when the XQM structure should be freed. We cannot assume
919 * that xfs_Gqm is non-null after this point.
920 */
921 xfs_qm_rele_quotafs_ref(mp);
922 730
923 ASSERT(list_empty(&qi->qi_dqlist)); 731 unregister_shrinker(&qi->qi_shrinker);
924 mutex_destroy(&qi->qi_dqlist_lock);
925 732
926 if (qi->qi_uquotaip) { 733 if (qi->qi_uquotaip) {
927 IRELE(qi->qi_uquotaip); 734 IRELE(qi->qi_uquotaip);
@@ -936,30 +743,6 @@ xfs_qm_destroy_quotainfo(
936 mp->m_quotainfo = NULL; 743 mp->m_quotainfo = NULL;
937} 744}
938 745
939
940
941/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
942
943/* ARGSUSED */
944STATIC void
945xfs_qm_list_init(
946 xfs_dqlist_t *list,
947 char *str,
948 int n)
949{
950 mutex_init(&list->qh_lock);
951 INIT_LIST_HEAD(&list->qh_list);
952 list->qh_version = 0;
953 list->qh_nelems = 0;
954}
955
956STATIC void
957xfs_qm_list_destroy(
958 xfs_dqlist_t *list)
959{
960 mutex_destroy(&(list->qh_lock));
961}
962
963/* 746/*
964 * Create an inode and return with a reference already taken, but unlocked 747 * Create an inode and return with a reference already taken, but unlocked
965 * This is how we create quota inodes 748 * This is how we create quota inodes
@@ -1397,6 +1180,28 @@ error0:
1397 return error; 1180 return error;
1398} 1181}
1399 1182
1183STATIC int
1184xfs_qm_flush_one(
1185 struct xfs_dquot *dqp)
1186{
1187 int error = 0;
1188
1189 xfs_dqlock(dqp);
1190 if (dqp->dq_flags & XFS_DQ_FREEING)
1191 goto out_unlock;
1192 if (!XFS_DQ_IS_DIRTY(dqp))
1193 goto out_unlock;
1194
1195 if (!xfs_dqflock_nowait(dqp))
1196 xfs_dqflock_pushbuf_wait(dqp);
1197
1198 error = xfs_qm_dqflush(dqp, 0);
1199
1200out_unlock:
1201 xfs_dqunlock(dqp);
1202 return error;
1203}
1204
1400/* 1205/*
1401 * Walk thru all the filesystem inodes and construct a consistent view 1206 * Walk thru all the filesystem inodes and construct a consistent view
1402 * of the disk quota world. If the quotacheck fails, disable quotas. 1207 * of the disk quota world. If the quotacheck fails, disable quotas.
@@ -1405,7 +1210,7 @@ int
1405xfs_qm_quotacheck( 1210xfs_qm_quotacheck(
1406 xfs_mount_t *mp) 1211 xfs_mount_t *mp)
1407{ 1212{
1408 int done, count, error; 1213 int done, count, error, error2;
1409 xfs_ino_t lastino; 1214 xfs_ino_t lastino;
1410 size_t structsz; 1215 size_t structsz;
1411 xfs_inode_t *uip, *gip; 1216 xfs_inode_t *uip, *gip;
@@ -1419,12 +1224,6 @@ xfs_qm_quotacheck(
1419 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); 1224 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1420 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1225 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1421 1226
1422 /*
1423 * There should be no cached dquots. The (simplistic) quotacheck
1424 * algorithm doesn't like that.
1425 */
1426 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1427
1428 xfs_notice(mp, "Quotacheck needed: Please wait."); 1227 xfs_notice(mp, "Quotacheck needed: Please wait.");
1429 1228
1430 /* 1229 /*
@@ -1463,12 +1262,21 @@ xfs_qm_quotacheck(
1463 } while (!done); 1262 } while (!done);
1464 1263
1465 /* 1264 /*
1466 * We've made all the changes that we need to make incore. 1265 * We've made all the changes that we need to make incore. Flush them
1467 * Flush them down to disk buffers if everything was updated 1266 * down to disk buffers if everything was updated successfully.
1468 * successfully.
1469 */ 1267 */
1470 if (!error) 1268 if (XFS_IS_UQUOTA_ON(mp))
1471 error = xfs_qm_dqflush_all(mp); 1269 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
1270 if (XFS_IS_GQUOTA_ON(mp)) {
1271 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
1272 if (!error)
1273 error = error2;
1274 }
1275 if (XFS_IS_PQUOTA_ON(mp)) {
1276 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
1277 if (!error)
1278 error = error2;
1279 }
1472 1280
1473 /* 1281 /*
1474 * We can get this error if we couldn't do a dquot allocation inside 1282 * We can get this error if we couldn't do a dquot allocation inside
@@ -1496,7 +1304,7 @@ xfs_qm_quotacheck(
1496 * quotachecked status, since we won't be doing accounting for 1304 * quotachecked status, since we won't be doing accounting for
1497 * that type anymore. 1305 * that type anymore.
1498 */ 1306 */
1499 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1307 mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1500 mp->m_qflags |= flags; 1308 mp->m_qflags |= flags;
1501 1309
1502 error_return: 1310 error_return:
@@ -1508,7 +1316,6 @@ xfs_qm_quotacheck(
1508 * We must turn off quotas. 1316 * We must turn off quotas.
1509 */ 1317 */
1510 ASSERT(mp->m_quotainfo != NULL); 1318 ASSERT(mp->m_quotainfo != NULL);
1511 ASSERT(xfs_Gqm != NULL);
1512 xfs_qm_destroy_quotainfo(mp); 1319 xfs_qm_destroy_quotainfo(mp);
1513 if (xfs_mount_reset_sbqflags(mp)) { 1320 if (xfs_mount_reset_sbqflags(mp)) {
1514 xfs_warn(mp, 1321 xfs_warn(mp,
@@ -1604,16 +1411,12 @@ xfs_qm_dqfree_one(
1604 struct xfs_mount *mp = dqp->q_mount; 1411 struct xfs_mount *mp = dqp->q_mount;
1605 struct xfs_quotainfo *qi = mp->m_quotainfo; 1412 struct xfs_quotainfo *qi = mp->m_quotainfo;
1606 1413
1607 mutex_lock(&dqp->q_hash->qh_lock); 1414 mutex_lock(&qi->qi_tree_lock);
1608 list_del_init(&dqp->q_hashlist); 1415 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
1609 dqp->q_hash->qh_version++; 1416 be32_to_cpu(dqp->q_core.d_id));
1610 mutex_unlock(&dqp->q_hash->qh_lock);
1611 1417
1612 mutex_lock(&qi->qi_dqlist_lock);
1613 list_del_init(&dqp->q_mplist);
1614 qi->qi_dquots--; 1418 qi->qi_dquots--;
1615 qi->qi_dqreclaims++; 1419 mutex_unlock(&qi->qi_tree_lock);
1616 mutex_unlock(&qi->qi_dqlist_lock);
1617 1420
1618 xfs_qm_dqdestroy(dqp); 1421 xfs_qm_dqdestroy(dqp);
1619} 1422}
@@ -1624,6 +1427,7 @@ xfs_qm_dqreclaim_one(
1624 struct list_head *dispose_list) 1427 struct list_head *dispose_list)
1625{ 1428{
1626 struct xfs_mount *mp = dqp->q_mount; 1429 struct xfs_mount *mp = dqp->q_mount;
1430 struct xfs_quotainfo *qi = mp->m_quotainfo;
1627 int error; 1431 int error;
1628 1432
1629 if (!xfs_dqlock_nowait(dqp)) 1433 if (!xfs_dqlock_nowait(dqp))
@@ -1637,16 +1441,14 @@ xfs_qm_dqreclaim_one(
1637 xfs_dqunlock(dqp); 1441 xfs_dqunlock(dqp);
1638 1442
1639 trace_xfs_dqreclaim_want(dqp); 1443 trace_xfs_dqreclaim_want(dqp);
1640 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1444 XFS_STATS_INC(xs_qm_dqwants);
1641 1445
1642 list_del_init(&dqp->q_freelist); 1446 list_del_init(&dqp->q_lru);
1643 xfs_Gqm->qm_dqfrlist_cnt--; 1447 qi->qi_lru_count--;
1448 XFS_STATS_DEC(xs_qm_dquot_unused);
1644 return; 1449 return;
1645 } 1450 }
1646 1451
1647 ASSERT(dqp->q_hash);
1648 ASSERT(!list_empty(&dqp->q_mplist));
1649
1650 /* 1452 /*
1651 * Try to grab the flush lock. If this dquot is in the process of 1453 * Try to grab the flush lock. If this dquot is in the process of
1652 * getting flushed to disk, we don't want to reclaim it. 1454 * getting flushed to disk, we don't want to reclaim it.
@@ -1688,11 +1490,12 @@ xfs_qm_dqreclaim_one(
1688 xfs_dqunlock(dqp); 1490 xfs_dqunlock(dqp);
1689 1491
1690 ASSERT(dqp->q_nrefs == 0); 1492 ASSERT(dqp->q_nrefs == 0);
1691 list_move_tail(&dqp->q_freelist, dispose_list); 1493 list_move_tail(&dqp->q_lru, dispose_list);
1692 xfs_Gqm->qm_dqfrlist_cnt--; 1494 qi->qi_lru_count--;
1495 XFS_STATS_DEC(xs_qm_dquot_unused);
1693 1496
1694 trace_xfs_dqreclaim_done(dqp); 1497 trace_xfs_dqreclaim_done(dqp);
1695 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); 1498 XFS_STATS_INC(xs_qm_dqreclaims);
1696 return; 1499 return;
1697 1500
1698out_busy: 1501out_busy:
@@ -1701,10 +1504,10 @@ out_busy:
1701 /* 1504 /*
1702 * Move the dquot to the tail of the list so that we don't spin on it. 1505 * Move the dquot to the tail of the list so that we don't spin on it.
1703 */ 1506 */
1704 list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 1507 list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
1705 1508
1706 trace_xfs_dqreclaim_busy(dqp); 1509 trace_xfs_dqreclaim_busy(dqp);
1707 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); 1510 XFS_STATS_INC(xs_qm_dqreclaim_misses);
1708} 1511}
1709 1512
1710STATIC int 1513STATIC int
@@ -1712,6 +1515,8 @@ xfs_qm_shake(
1712 struct shrinker *shrink, 1515 struct shrinker *shrink,
1713 struct shrink_control *sc) 1516 struct shrink_control *sc)
1714{ 1517{
1518 struct xfs_quotainfo *qi =
1519 container_of(shrink, struct xfs_quotainfo, qi_shrinker);
1715 int nr_to_scan = sc->nr_to_scan; 1520 int nr_to_scan = sc->nr_to_scan;
1716 LIST_HEAD (dispose_list); 1521 LIST_HEAD (dispose_list);
1717 struct xfs_dquot *dqp; 1522 struct xfs_dquot *dqp;
@@ -1721,24 +1526,23 @@ xfs_qm_shake(
1721 if (!nr_to_scan) 1526 if (!nr_to_scan)
1722 goto out; 1527 goto out;
1723 1528
1724 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1529 mutex_lock(&qi->qi_lru_lock);
1725 while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { 1530 while (!list_empty(&qi->qi_lru_list)) {
1726 if (nr_to_scan-- <= 0) 1531 if (nr_to_scan-- <= 0)
1727 break; 1532 break;
1728 dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, 1533 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
1729 q_freelist); 1534 q_lru);
1730 xfs_qm_dqreclaim_one(dqp, &dispose_list); 1535 xfs_qm_dqreclaim_one(dqp, &dispose_list);
1731 } 1536 }
1732 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1537 mutex_unlock(&qi->qi_lru_lock);
1733 1538
1734 while (!list_empty(&dispose_list)) { 1539 while (!list_empty(&dispose_list)) {
1735 dqp = list_first_entry(&dispose_list, struct xfs_dquot, 1540 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
1736 q_freelist); 1541 list_del_init(&dqp->q_lru);
1737 list_del_init(&dqp->q_freelist);
1738 xfs_qm_dqfree_one(dqp); 1542 xfs_qm_dqfree_one(dqp);
1739 } 1543 }
1740out: 1544out:
1741 return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; 1545 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
1742} 1546}
1743 1547
1744/* 1548/*
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9a9b997e1a0a..44b858b79d71 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -21,21 +21,10 @@
21#include "xfs_dquot_item.h" 21#include "xfs_dquot_item.h"
22#include "xfs_dquot.h" 22#include "xfs_dquot.h"
23#include "xfs_quota_priv.h" 23#include "xfs_quota_priv.h"
24#include "xfs_qm_stats.h"
25 24
26struct xfs_qm;
27struct xfs_inode; 25struct xfs_inode;
28 26
29extern struct mutex xfs_Gqm_lock; 27extern struct kmem_zone *xfs_qm_dqtrxzone;
30extern struct xfs_qm *xfs_Gqm;
31extern kmem_zone_t *qm_dqzone;
32extern kmem_zone_t *qm_dqtrxzone;
33
34/*
35 * Dquot hashtable constants/threshold values.
36 */
37#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
38#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
39 28
40/* 29/*
41 * This defines the unit of allocation of dquots. 30 * This defines the unit of allocation of dquots.
@@ -48,36 +37,20 @@ extern kmem_zone_t *qm_dqtrxzone;
48 */ 37 */
49#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 38#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
50 39
51typedef xfs_dqhash_t xfs_dqlist_t;
52
53/*
54 * Quota Manager (global) structure. Lives only in core.
55 */
56typedef struct xfs_qm {
57 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
58 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
59 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
60 struct list_head qm_dqfrlist; /* freelist of dquots */
61 struct mutex qm_dqfrlist_lock;
62 int qm_dqfrlist_cnt;
63 atomic_t qm_totaldquots; /* total incore dquots */
64 uint qm_nrefs; /* file systems with quota on */
65 kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
66 kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
67} xfs_qm_t;
68
69/* 40/*
70 * Various quota information for individual filesystems. 41 * Various quota information for individual filesystems.
71 * The mount structure keeps a pointer to this. 42 * The mount structure keeps a pointer to this.
72 */ 43 */
73typedef struct xfs_quotainfo { 44typedef struct xfs_quotainfo {
45 struct radix_tree_root qi_uquota_tree;
46 struct radix_tree_root qi_gquota_tree;
47 struct mutex qi_tree_lock;
74 xfs_inode_t *qi_uquotaip; /* user quota inode */ 48 xfs_inode_t *qi_uquotaip; /* user quota inode */
75 xfs_inode_t *qi_gquotaip; /* group quota inode */ 49 xfs_inode_t *qi_gquotaip; /* group quota inode */
76 struct list_head qi_dqlist; /* all dquots in filesys */ 50 struct list_head qi_lru_list;
77 struct mutex qi_dqlist_lock; 51 struct mutex qi_lru_lock;
52 int qi_lru_count;
78 int qi_dquots; 53 int qi_dquots;
79 int qi_dqreclaims; /* a change here indicates
80 a removal in the dqlist */
81 time_t qi_btimelimit; /* limit for blks timer */ 54 time_t qi_btimelimit; /* limit for blks timer */
82 time_t qi_itimelimit; /* limit for inodes timer */ 55 time_t qi_itimelimit; /* limit for inodes timer */
83 time_t qi_rtbtimelimit;/* limit for rt blks timer */ 56 time_t qi_rtbtimelimit;/* limit for rt blks timer */
@@ -93,8 +66,14 @@ typedef struct xfs_quotainfo {
93 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ 66 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
94 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ 67 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
95 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ 68 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
69 struct shrinker qi_shrinker;
96} xfs_quotainfo_t; 70} xfs_quotainfo_t;
97 71
72#define XFS_DQUOT_TREE(qi, type) \
73 ((type & XFS_DQ_USER) ? \
74 &((qi)->qi_uquota_tree) : \
75 &((qi)->qi_gquota_tree))
76
98 77
99extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 78extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
100extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 79extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
@@ -130,7 +109,7 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
130extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 109extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
131 110
132/* dquot stuff */ 111/* dquot stuff */
133extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); 112extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint);
134extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 113extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
135 114
136/* quota ops */ 115/* quota ops */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index a0a829addca9..e6986b5d80d8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -40,28 +40,28 @@
40STATIC void 40STATIC void
41xfs_fill_statvfs_from_dquot( 41xfs_fill_statvfs_from_dquot(
42 struct kstatfs *statp, 42 struct kstatfs *statp,
43 xfs_disk_dquot_t *dp) 43 struct xfs_dquot *dqp)
44{ 44{
45 __uint64_t limit; 45 __uint64_t limit;
46 46
47 limit = dp->d_blk_softlimit ? 47 limit = dqp->q_core.d_blk_softlimit ?
48 be64_to_cpu(dp->d_blk_softlimit) : 48 be64_to_cpu(dqp->q_core.d_blk_softlimit) :
49 be64_to_cpu(dp->d_blk_hardlimit); 49 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
50 if (limit && statp->f_blocks > limit) { 50 if (limit && statp->f_blocks > limit) {
51 statp->f_blocks = limit; 51 statp->f_blocks = limit;
52 statp->f_bfree = statp->f_bavail = 52 statp->f_bfree = statp->f_bavail =
53 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 53 (statp->f_blocks > dqp->q_res_bcount) ?
54 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 54 (statp->f_blocks - dqp->q_res_bcount) : 0;
55 } 55 }
56 56
57 limit = dp->d_ino_softlimit ? 57 limit = dqp->q_core.d_ino_softlimit ?
58 be64_to_cpu(dp->d_ino_softlimit) : 58 be64_to_cpu(dqp->q_core.d_ino_softlimit) :
59 be64_to_cpu(dp->d_ino_hardlimit); 59 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
60 if (limit && statp->f_files > limit) { 60 if (limit && statp->f_files > limit) {
61 statp->f_files = limit; 61 statp->f_files = limit;
62 statp->f_ffree = 62 statp->f_ffree =
63 (statp->f_files > be64_to_cpu(dp->d_icount)) ? 63 (statp->f_files > dqp->q_res_icount) ?
64 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; 64 (statp->f_ffree - dqp->q_res_icount) : 0;
65 } 65 }
66} 66}
67 67
@@ -82,7 +82,7 @@ xfs_qm_statvfs(
82 xfs_dquot_t *dqp; 82 xfs_dquot_t *dqp;
83 83
84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { 84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
85 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); 85 xfs_fill_statvfs_from_dquot(statp, dqp);
86 xfs_qm_dqput(dqp); 86 xfs_qm_dqput(dqp);
87 } 87 }
88} 88}
@@ -156,21 +156,3 @@ xfs_qm_newmount(
156 156
157 return 0; 157 return 0;
158} 158}
159
160void __init
161xfs_qm_init(void)
162{
163 printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
164 mutex_init(&xfs_Gqm_lock);
165 xfs_qm_init_procfs();
166}
167
168void __exit
169xfs_qm_exit(void)
170{
171 xfs_qm_cleanup_procfs();
172 if (qm_dqzone)
173 kmem_zone_destroy(qm_dqzone);
174 if (qm_dqtrxzone)
175 kmem_zone_destroy(qm_dqtrxzone);
176}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
deleted file mode 100644
index 5729ba570877..000000000000
--- a/fs/xfs/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_alloc.h"
27#include "xfs_quota.h"
28#include "xfs_mount.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_inode.h"
31#include "xfs_itable.h"
32#include "xfs_bmap.h"
33#include "xfs_rtalloc.h"
34#include "xfs_error.h"
35#include "xfs_attr.h"
36#include "xfs_buf_item.h"
37#include "xfs_qm.h"
38
39struct xqmstats xqmstats;
40
41static int xqm_proc_show(struct seq_file *m, void *v)
42{
43 /* maximum; incore; ratio free to inuse; freelist */
44 seq_printf(m, "%d\t%d\t%d\t%u\n",
45 0,
46 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
47 0,
48 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
49 return 0;
50}
51
52static int xqm_proc_open(struct inode *inode, struct file *file)
53{
54 return single_open(file, xqm_proc_show, NULL);
55}
56
57static const struct file_operations xqm_proc_fops = {
58 .owner = THIS_MODULE,
59 .open = xqm_proc_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = single_release,
63};
64
65static int xqmstat_proc_show(struct seq_file *m, void *v)
66{
67 /* quota performance statistics */
68 seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
69 xqmstats.xs_qm_dqreclaims,
70 xqmstats.xs_qm_dqreclaim_misses,
71 xqmstats.xs_qm_dquot_dups,
72 xqmstats.xs_qm_dqcachemisses,
73 xqmstats.xs_qm_dqcachehits,
74 xqmstats.xs_qm_dqwants,
75 xqmstats.xs_qm_dqshake_reclaims,
76 xqmstats.xs_qm_dqinact_reclaims);
77 return 0;
78}
79
80static int xqmstat_proc_open(struct inode *inode, struct file *file)
81{
82 return single_open(file, xqmstat_proc_show, NULL);
83}
84
85static const struct file_operations xqmstat_proc_fops = {
86 .owner = THIS_MODULE,
87 .open = xqmstat_proc_open,
88 .read = seq_read,
89 .llseek = seq_lseek,
90 .release = single_release,
91};
92
93void
94xfs_qm_init_procfs(void)
95{
96 proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
97 proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
98}
99
100void
101xfs_qm_cleanup_procfs(void)
102{
103 remove_proc_entry("fs/xfs/xqm", NULL);
104 remove_proc_entry("fs/xfs/xqmstat", NULL);
105}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
deleted file mode 100644
index 5b964fc0dc09..000000000000
--- a/fs/xfs/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (c) 2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_QM_STATS_H__
19#define __XFS_QM_STATS_H__
20
21#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
22
23/*
24 * XQM global statistics
25 */
26struct xqmstats {
27 __uint32_t xs_qm_dqreclaims;
28 __uint32_t xs_qm_dqreclaim_misses;
29 __uint32_t xs_qm_dquot_dups;
30 __uint32_t xs_qm_dqcachemisses;
31 __uint32_t xs_qm_dqcachehits;
32 __uint32_t xs_qm_dqwants;
33 __uint32_t xs_qm_dqshake_reclaims;
34 __uint32_t xs_qm_dqinact_reclaims;
35};
36
37extern struct xqmstats xqmstats;
38
39# define XQM_STATS_INC(count) ( (count)++ )
40
41extern void xfs_qm_init_procfs(void);
42extern void xfs_qm_cleanup_procfs(void);
43
44#else
45
46# define XQM_STATS_INC(count) do { } while (0)
47
48static inline void xfs_qm_init_procfs(void) { };
49static inline void xfs_qm_cleanup_procfs(void) { };
50
51#endif
52
53#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 711a86e39ff0..c4f396e437a8 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -47,9 +47,6 @@ STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
47 uint); 47 uint);
48STATIC uint xfs_qm_export_flags(uint); 48STATIC uint xfs_qm_export_flags(uint);
49STATIC uint xfs_qm_export_qtype_flags(uint); 49STATIC uint xfs_qm_export_qtype_flags(uint);
50STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
51 fs_disk_quota_t *);
52
53 50
54/* 51/*
55 * Turn off quota accounting and/or enforcement for all udquots and/or 52 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -69,7 +66,6 @@ xfs_qm_scall_quotaoff(
69 int error; 66 int error;
70 uint inactivate_flags; 67 uint inactivate_flags;
71 xfs_qoff_logitem_t *qoffstart; 68 xfs_qoff_logitem_t *qoffstart;
72 int nculprits;
73 69
74 /* 70 /*
75 * No file system can have quotas enabled on disk but not in core. 71 * No file system can have quotas enabled on disk but not in core.
@@ -175,18 +171,13 @@ xfs_qm_scall_quotaoff(
175 * This isn't protected by a particular lock directly, because we 171 * This isn't protected by a particular lock directly, because we
176 * don't want to take a mrlock every time we depend on quotas being on. 172 * don't want to take a mrlock every time we depend on quotas being on.
177 */ 173 */
178 mp->m_qflags &= ~(flags); 174 mp->m_qflags &= ~flags;
179 175
180 /* 176 /*
181 * Go through all the dquots of this file system and purge them, 177 * Go through all the dquots of this file system and purge them,
182 * according to what was turned off. We may not be able to get rid 178 * according to what was turned off.
183 * of all dquots, because dquots can have temporary references that
184 * are not attached to inodes. eg. xfs_setattr, xfs_create.
185 * So, if we couldn't purge all the dquots from the filesystem,
186 * we can't get rid of the incore data structures.
187 */ 179 */
188 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) 180 xfs_qm_dqpurge_all(mp, dqtype);
189 delay(10 * nculprits);
190 181
191 /* 182 /*
192 * Transactions that had started before ACTIVE state bit was cleared 183 * Transactions that had started before ACTIVE state bit was cleared
@@ -635,42 +626,6 @@ xfs_qm_scall_setqlim(
635 return error; 626 return error;
636} 627}
637 628
638int
639xfs_qm_scall_getquota(
640 xfs_mount_t *mp,
641 xfs_dqid_t id,
642 uint type,
643 fs_disk_quota_t *out)
644{
645 xfs_dquot_t *dqp;
646 int error;
647
648 /*
649 * Try to get the dquot. We don't want it allocated on disk, so
650 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
651 * exist, we'll get ENOENT back.
652 */
653 if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
654 return (error);
655 }
656
657 /*
658 * If everything's NULL, this dquot doesn't quite exist as far as
659 * our utility programs are concerned.
660 */
661 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
662 xfs_qm_dqput(dqp);
663 return XFS_ERROR(ENOENT);
664 }
665 /*
666 * Convert the disk dquot to the exportable format
667 */
668 xfs_qm_export_dquot(mp, &dqp->q_core, out);
669 xfs_qm_dqput(dqp);
670 return (error ? XFS_ERROR(EFAULT) : 0);
671}
672
673
674STATIC int 629STATIC int
675xfs_qm_log_quotaoff_end( 630xfs_qm_log_quotaoff_end(
676 xfs_mount_t *mp, 631 xfs_mount_t *mp,
@@ -759,50 +714,66 @@ error0:
759} 714}
760 715
761 716
762/* 717int
763 * Translate an internal style on-disk-dquot to the exportable format. 718xfs_qm_scall_getquota(
764 * The main differences are that the counters/limits are all in Basic 719 struct xfs_mount *mp,
765 * Blocks (BBs) instead of the internal FSBs, and all on-disk data has 720 xfs_dqid_t id,
766 * to be converted to the native endianness. 721 uint type,
767 */
768STATIC void
769xfs_qm_export_dquot(
770 xfs_mount_t *mp,
771 xfs_disk_dquot_t *src,
772 struct fs_disk_quota *dst) 722 struct fs_disk_quota *dst)
773{ 723{
724 struct xfs_dquot *dqp;
725 int error;
726
727 /*
728 * Try to get the dquot. We don't want it allocated on disk, so
729 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
730 * exist, we'll get ENOENT back.
731 */
732 error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
733 if (error)
734 return error;
735
736 /*
737 * If everything's NULL, this dquot doesn't quite exist as far as
738 * our utility programs are concerned.
739 */
740 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
741 error = XFS_ERROR(ENOENT);
742 goto out_put;
743 }
744
774 memset(dst, 0, sizeof(*dst)); 745 memset(dst, 0, sizeof(*dst));
775 dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ 746 dst->d_version = FS_DQUOT_VERSION;
776 dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); 747 dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags);
777 dst->d_id = be32_to_cpu(src->d_id); 748 dst->d_id = be32_to_cpu(dqp->q_core.d_id);
778 dst->d_blk_hardlimit = 749 dst->d_blk_hardlimit =
779 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); 750 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
780 dst->d_blk_softlimit = 751 dst->d_blk_softlimit =
781 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); 752 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
782 dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); 753 dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
783 dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); 754 dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
784 dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); 755 dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount);
785 dst->d_icount = be64_to_cpu(src->d_icount); 756 dst->d_icount = dqp->q_res_icount;
786 dst->d_btimer = be32_to_cpu(src->d_btimer); 757 dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer);
787 dst->d_itimer = be32_to_cpu(src->d_itimer); 758 dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer);
788 dst->d_iwarns = be16_to_cpu(src->d_iwarns); 759 dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns);
789 dst->d_bwarns = be16_to_cpu(src->d_bwarns); 760 dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns);
790 dst->d_rtb_hardlimit = 761 dst->d_rtb_hardlimit =
791 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); 762 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit));
792 dst->d_rtb_softlimit = 763 dst->d_rtb_softlimit =
793 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); 764 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit));
794 dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); 765 dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount);
795 dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); 766 dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer);
796 dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); 767 dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns);
797 768
798 /* 769 /*
799 * Internally, we don't reset all the timers when quota enforcement 770 * Internally, we don't reset all the timers when quota enforcement
800 * gets turned off. No need to confuse the user level code, 771 * gets turned off. No need to confuse the user level code,
801 * so return zeroes in that case. 772 * so return zeroes in that case.
802 */ 773 */
803 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || 774 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) ||
804 (!XFS_IS_OQUOTA_ENFORCED(mp) && 775 (!XFS_IS_OQUOTA_ENFORCED(mp) &&
805 (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { 776 (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
806 dst->d_btimer = 0; 777 dst->d_btimer = 0;
807 dst->d_itimer = 0; 778 dst->d_itimer = 0;
808 dst->d_rtbtimer = 0; 779 dst->d_rtbtimer = 0;
@@ -823,6 +794,9 @@ xfs_qm_export_dquot(
823 } 794 }
824 } 795 }
825#endif 796#endif
797out_put:
798 xfs_qm_dqput(dqp);
799 return error;
826} 800}
827 801
828STATIC uint 802STATIC uint
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 8a0807e0f979..b50ec5b95d5a 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -174,6 +174,8 @@ typedef struct xfs_qoff_logformat {
174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ 174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */
175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ 175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */
176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ 176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */
177#define XFS_ALL_QUOTA_ACTIVE \
178 (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
177 179
178/* 180/*
179 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees 181 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
index 94a3d927d716..6d86219d93da 100644
--- a/fs/xfs/xfs_quota_priv.h
+++ b/fs/xfs/xfs_quota_priv.h
@@ -24,17 +24,6 @@
24 */ 24 */
25#define XFS_DQITER_MAP_SIZE 10 25#define XFS_DQITER_MAP_SIZE 10
26 26
27/*
28 * Hash into a bucket in the dquot hash table, based on <mp, id>.
29 */
30#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
31 (__psunsigned_t)(id)) & \
32 (xfs_Gqm->qm_dqhashmask - 1))
33#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
34 (xfs_Gqm->qm_usr_dqhtable + \
35 XFS_DQ_HASHVAL(mp, id)) : \
36 (xfs_Gqm->qm_grp_dqhtable + \
37 XFS_DQ_HASHVAL(mp, id)))
38#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ 27#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
39 !dqp->q_core.d_blk_hardlimit && \ 28 !dqp->q_core.d_blk_hardlimit && \
40 !dqp->q_core.d_blk_softlimit && \ 29 !dqp->q_core.d_blk_softlimit && \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 866de277079a..e44ef7ee8ce8 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -118,17 +118,6 @@ xfs_rename(
118 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
120 120
121 if (src_is_directory) {
122 /*
123 * Check for link count overflow on target_dp
124 */
125 if (target_ip == NULL && new_parent &&
126 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
127 error = XFS_ERROR(EMLINK);
128 goto std_return;
129 }
130 }
131
132 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 121 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
133 inodes, &num_inodes); 122 inodes, &num_inodes);
134 123
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index cb6ae715814a..f429d9d5d325 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -529,7 +529,6 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
529#define XFS_BB_TO_FSB(mp,bb) \ 529#define XFS_BB_TO_FSB(mp,bb) \
530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) 530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) 531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log)
532#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
533 532
534/* 533/*
535 * File system block to byte conversions. 534 * File system block to byte conversions.
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 76fdc5861932..ce372b7d5644 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -20,9 +20,18 @@
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21DEFINE_PER_CPU(struct xfsstats, xfsstats);
22 22
23static int counter_val(int idx)
24{
25 int val = 0, cpu;
26
27 for_each_possible_cpu(cpu)
28 val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
29 return val;
30}
31
23static int xfs_stat_proc_show(struct seq_file *m, void *v) 32static int xfs_stat_proc_show(struct seq_file *m, void *v)
24{ 33{
25 int c, i, j, val; 34 int i, j;
26 __uint64_t xs_xstrat_bytes = 0; 35 __uint64_t xs_xstrat_bytes = 0;
27 __uint64_t xs_write_bytes = 0; 36 __uint64_t xs_write_bytes = 0;
28 __uint64_t xs_read_bytes = 0; 37 __uint64_t xs_read_bytes = 0;
@@ -50,20 +59,16 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
50 { "abtc2", XFSSTAT_END_ABTC_V2 }, 59 { "abtc2", XFSSTAT_END_ABTC_V2 },
51 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 60 { "bmbt2", XFSSTAT_END_BMBT_V2 },
52 { "ibt2", XFSSTAT_END_IBT_V2 }, 61 { "ibt2", XFSSTAT_END_IBT_V2 },
62 /* we print both series of quota information together */
63 { "qm", XFSSTAT_END_QM },
53 }; 64 };
54 65
55 /* Loop over all stats groups */ 66 /* Loop over all stats groups */
56 for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { 67 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
57 seq_printf(m, "%s", xstats[i].desc); 68 seq_printf(m, "%s", xstats[i].desc);
58 /* inner loop does each group */ 69 /* inner loop does each group */
59 while (j < xstats[i].endpoint) { 70 for (; j < xstats[i].endpoint; j++)
60 val = 0; 71 seq_printf(m, " %u", counter_val(j));
61 /* sum over all cpus */
62 for_each_possible_cpu(c)
63 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
64 seq_printf(m, " %u", val);
65 j++;
66 }
67 seq_putc(m, '\n'); 72 seq_putc(m, '\n');
68 } 73 }
69 /* extra precision counters */ 74 /* extra precision counters */
@@ -97,6 +102,58 @@ static const struct file_operations xfs_stat_proc_fops = {
97 .release = single_release, 102 .release = single_release,
98}; 103};
99 104
105/* legacy quota interfaces */
106#ifdef CONFIG_XFS_QUOTA
107static int xqm_proc_show(struct seq_file *m, void *v)
108{
109 /* maximum; incore; ratio free to inuse; freelist */
110 seq_printf(m, "%d\t%d\t%d\t%u\n",
111 0,
112 counter_val(XFSSTAT_END_XQMSTAT),
113 0,
114 counter_val(XFSSTAT_END_XQMSTAT + 1));
115 return 0;
116}
117
118static int xqm_proc_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, xqm_proc_show, NULL);
121}
122
123static const struct file_operations xqm_proc_fops = {
124 .owner = THIS_MODULE,
125 .open = xqm_proc_open,
126 .read = seq_read,
127 .llseek = seq_lseek,
128 .release = single_release,
129};
130
131/* legacy quota stats interface no 2 */
132static int xqmstat_proc_show(struct seq_file *m, void *v)
133{
134 int j;
135
136 seq_printf(m, "qm");
137 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
138 seq_printf(m, " %u", counter_val(j));
139 seq_putc(m, '\n');
140 return 0;
141}
142
143static int xqmstat_proc_open(struct inode *inode, struct file *file)
144{
145 return single_open(file, xqmstat_proc_show, NULL);
146}
147
148static const struct file_operations xqmstat_proc_fops = {
149 .owner = THIS_MODULE,
150 .open = xqmstat_proc_open,
151 .read = seq_read,
152 .llseek = seq_lseek,
153 .release = single_release,
154};
155#endif /* CONFIG_XFS_QUOTA */
156
100int 157int
101xfs_init_procfs(void) 158xfs_init_procfs(void)
102{ 159{
@@ -105,10 +162,24 @@ xfs_init_procfs(void)
105 162
106 if (!proc_create("fs/xfs/stat", 0, NULL, 163 if (!proc_create("fs/xfs/stat", 0, NULL,
107 &xfs_stat_proc_fops)) 164 &xfs_stat_proc_fops))
108 goto out_remove_entry; 165 goto out_remove_xfs_dir;
166#ifdef CONFIG_XFS_QUOTA
167 if (!proc_create("fs/xfs/xqmstat", 0, NULL,
168 &xqmstat_proc_fops))
169 goto out_remove_stat_file;
170 if (!proc_create("fs/xfs/xqm", 0, NULL,
171 &xqm_proc_fops))
172 goto out_remove_xqmstat_file;
173#endif
109 return 0; 174 return 0;
110 175
111 out_remove_entry: 176#ifdef CONFIG_XFS_QUOTA
177 out_remove_xqmstat_file:
178 remove_proc_entry("fs/xfs/xqmstat", NULL);
179 out_remove_stat_file:
180 remove_proc_entry("fs/xfs/stat", NULL);
181#endif
182 out_remove_xfs_dir:
112 remove_proc_entry("fs/xfs", NULL); 183 remove_proc_entry("fs/xfs", NULL);
113 out: 184 out:
114 return -ENOMEM; 185 return -ENOMEM;
@@ -117,6 +188,10 @@ xfs_init_procfs(void)
117void 188void
118xfs_cleanup_procfs(void) 189xfs_cleanup_procfs(void)
119{ 190{
191#ifdef CONFIG_XFS_QUOTA
192 remove_proc_entry("fs/xfs/xqm", NULL);
193 remove_proc_entry("fs/xfs/xqmstat", NULL);
194#endif
120 remove_proc_entry("fs/xfs/stat", NULL); 195 remove_proc_entry("fs/xfs/stat", NULL);
121 remove_proc_entry("fs/xfs", NULL); 196 remove_proc_entry("fs/xfs", NULL);
122} 197}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 736854b1ca1a..c03ad38ceaeb 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,6 +183,16 @@ struct xfsstats {
183 __uint32_t xs_ibt_2_alloc; 183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free; 184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves; 185 __uint32_t xs_ibt_2_moves;
186#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6)
187 __uint32_t xs_qm_dqreclaims;
188 __uint32_t xs_qm_dqreclaim_misses;
189 __uint32_t xs_qm_dquot_dups;
190 __uint32_t xs_qm_dqcachemisses;
191 __uint32_t xs_qm_dqcachehits;
192 __uint32_t xs_qm_dqwants;
193#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
194 __uint32_t xs_qm_dquot;
195 __uint32_t xs_qm_dquot_unused;
186/* Extra precision counters */ 196/* Extra precision counters */
187 __uint64_t xs_xstrat_bytes; 197 __uint64_t xs_xstrat_bytes;
188 __uint64_t xs_write_bytes; 198 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee5b695c99a7..912442cf0f82 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -324,10 +324,9 @@ xfs_parseargs(
324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
325 mp->m_flags |= XFS_MOUNT_FILESTREAMS; 325 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
327 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | 327 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
328 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 328 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
329 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | 329 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
330 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
331 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 330 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
332 !strcmp(this_char, MNTOPT_UQUOTA) || 331 !strcmp(this_char, MNTOPT_UQUOTA) ||
333 !strcmp(this_char, MNTOPT_USRQUOTA)) { 332 !strcmp(this_char, MNTOPT_USRQUOTA)) {
@@ -760,6 +759,36 @@ xfs_setup_devices(
760 return 0; 759 return 0;
761} 760}
762 761
762STATIC int
763xfs_init_mount_workqueues(
764 struct xfs_mount *mp)
765{
766 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
767 WQ_MEM_RECLAIM, 0, mp->m_fsname);
768 if (!mp->m_data_workqueue)
769 goto out;
770
771 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
772 WQ_MEM_RECLAIM, 0, mp->m_fsname);
773 if (!mp->m_unwritten_workqueue)
774 goto out_destroy_data_iodone_queue;
775
776 return 0;
777
778out_destroy_data_iodone_queue:
779 destroy_workqueue(mp->m_data_workqueue);
780out:
781 return -ENOMEM;
782}
783
784STATIC void
785xfs_destroy_mount_workqueues(
786 struct xfs_mount *mp)
787{
788 destroy_workqueue(mp->m_data_workqueue);
789 destroy_workqueue(mp->m_unwritten_workqueue);
790}
791
763/* Catch misguided souls that try to use this interface on XFS */ 792/* Catch misguided souls that try to use this interface on XFS */
764STATIC struct inode * 793STATIC struct inode *
765xfs_fs_alloc_inode( 794xfs_fs_alloc_inode(
@@ -834,91 +863,58 @@ xfs_fs_inode_init_once(
834} 863}
835 864
836/* 865/*
837 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 866 * This is called by the VFS when dirtying inode metadata. This can happen
838 * we catch unlogged VFS level updates to the inode. 867 * for a few reasons, but we only care about timestamp updates, given that
868 * we handled the rest ourselves. In theory no other calls should happen,
869 * but for example generic_write_end() keeps dirtying the inode after
870 * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
871 * and skip this call otherwise.
839 * 872 *
840 * We need the barrier() to maintain correct ordering between unlogged 873 * We'll hopefull get a different method just for updating timestamps soon,
841 * updates and the transaction commit code that clears the i_update_core 874 * at which point this hack can go away, and maybe we'll also get real
842 * field. This requires all updates to be completed before marking the 875 * error handling here.
843 * inode dirty.
844 */ 876 */
845STATIC void 877STATIC void
846xfs_fs_dirty_inode( 878xfs_fs_dirty_inode(
847 struct inode *inode,
848 int flags)
849{
850 barrier();
851 XFS_I(inode)->i_update_core = 1;
852}
853
854STATIC int
855xfs_fs_write_inode(
856 struct inode *inode, 879 struct inode *inode,
857 struct writeback_control *wbc) 880 int flags)
858{ 881{
859 struct xfs_inode *ip = XFS_I(inode); 882 struct xfs_inode *ip = XFS_I(inode);
860 struct xfs_mount *mp = ip->i_mount; 883 struct xfs_mount *mp = ip->i_mount;
861 int error = EAGAIN; 884 struct xfs_trans *tp;
862 885 int error;
863 trace_xfs_write_inode(ip);
864
865 if (XFS_FORCED_SHUTDOWN(mp))
866 return -XFS_ERROR(EIO);
867
868 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
869 /*
870 * Make sure the inode has made it it into the log. Instead
871 * of forcing it all the way to stable storage using a
872 * synchronous transaction we let the log force inside the
873 * ->sync_fs call do that for thus, which reduces the number
874 * of synchronous log forces dramatically.
875 */
876 error = xfs_log_dirty_inode(ip, NULL, 0);
877 if (error)
878 goto out;
879 return 0;
880 } else {
881 if (!ip->i_update_core)
882 return 0;
883 886
884 /* 887 if (flags != I_DIRTY_SYNC)
885 * We make this non-blocking if the inode is contended, return 888 return;
886 * EAGAIN to indicate to the caller that they did not succeed.
887 * This prevents the flush path from blocking on inodes inside
888 * another operation right now, they get caught later by
889 * xfs_sync.
890 */
891 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
892 goto out;
893 889
894 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 890 trace_xfs_dirty_inode(ip);
895 goto out_unlock;
896 891
897 /* 892 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
898 * Now we have the flush lock and the inode is not pinned, we 893 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
899 * can check if the inode is really clean as we know that 894 if (error) {
900 * there are no pending transaction completions, it is not 895 xfs_trans_cancel(tp, 0);
901 * waiting on the delayed write queue and there is no IO in 896 goto trouble;
902 * progress.
903 */
904 if (xfs_inode_clean(ip)) {
905 xfs_ifunlock(ip);
906 error = 0;
907 goto out_unlock;
908 }
909 error = xfs_iflush(ip, SYNC_TRYLOCK);
910 } 897 }
911 898 xfs_ilock(ip, XFS_ILOCK_EXCL);
912 out_unlock:
913 xfs_iunlock(ip, XFS_ILOCK_SHARED);
914 out:
915 /* 899 /*
916 * if we failed to write out the inode then mark 900 * Grab all the latest timestamps from the Linux inode.
917 * it dirty again so we'll try again later.
918 */ 901 */
902 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
903 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
904 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
905 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
906 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
907 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
908
909 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
910 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
911 error = xfs_trans_commit(tp, 0);
919 if (error) 912 if (error)
920 xfs_mark_inode_dirty_sync(ip); 913 goto trouble;
921 return -error; 914 return;
915
916trouble:
917 xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
922} 918}
923 919
924STATIC void 920STATIC void
@@ -983,6 +979,7 @@ xfs_fs_put_super(
983 xfs_unmountfs(mp); 979 xfs_unmountfs(mp);
984 xfs_freesb(mp); 980 xfs_freesb(mp);
985 xfs_icsb_destroy_counters(mp); 981 xfs_icsb_destroy_counters(mp);
982 xfs_destroy_mount_workqueues(mp);
986 xfs_close_devices(mp); 983 xfs_close_devices(mp);
987 xfs_free_fsname(mp); 984 xfs_free_fsname(mp);
988 kfree(mp); 985 kfree(mp);
@@ -1309,10 +1306,14 @@ xfs_fs_fill_super(
1309 if (error) 1306 if (error)
1310 goto out_free_fsname; 1307 goto out_free_fsname;
1311 1308
1312 error = xfs_icsb_init_counters(mp); 1309 error = xfs_init_mount_workqueues(mp);
1313 if (error) 1310 if (error)
1314 goto out_close_devices; 1311 goto out_close_devices;
1315 1312
1313 error = xfs_icsb_init_counters(mp);
1314 if (error)
1315 goto out_destroy_workqueues;
1316
1316 error = xfs_readsb(mp, flags); 1317 error = xfs_readsb(mp, flags);
1317 if (error) 1318 if (error)
1318 goto out_destroy_counters; 1319 goto out_destroy_counters;
@@ -1341,6 +1342,7 @@ xfs_fs_fill_super(
1341 sb->s_blocksize = mp->m_sb.sb_blocksize; 1342 sb->s_blocksize = mp->m_sb.sb_blocksize;
1342 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1343 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1343 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1344 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1345 sb->s_max_links = XFS_MAXLINK;
1344 sb->s_time_gran = 1; 1346 sb->s_time_gran = 1;
1345 set_posix_acl_flag(sb); 1347 set_posix_acl_flag(sb);
1346 1348
@@ -1361,10 +1363,10 @@ xfs_fs_fill_super(
1361 error = EINVAL; 1363 error = EINVAL;
1362 goto out_syncd_stop; 1364 goto out_syncd_stop;
1363 } 1365 }
1364 sb->s_root = d_alloc_root(root); 1366 sb->s_root = d_make_root(root);
1365 if (!sb->s_root) { 1367 if (!sb->s_root) {
1366 error = ENOMEM; 1368 error = ENOMEM;
1367 goto out_iput; 1369 goto out_syncd_stop;
1368 } 1370 }
1369 1371
1370 return 0; 1372 return 0;
@@ -1375,6 +1377,8 @@ xfs_fs_fill_super(
1375 xfs_freesb(mp); 1377 xfs_freesb(mp);
1376 out_destroy_counters: 1378 out_destroy_counters:
1377 xfs_icsb_destroy_counters(mp); 1379 xfs_icsb_destroy_counters(mp);
1380out_destroy_workqueues:
1381 xfs_destroy_mount_workqueues(mp);
1378 out_close_devices: 1382 out_close_devices:
1379 xfs_close_devices(mp); 1383 xfs_close_devices(mp);
1380 out_free_fsname: 1384 out_free_fsname:
@@ -1383,8 +1387,6 @@ xfs_fs_fill_super(
1383 out: 1387 out:
1384 return -error; 1388 return -error;
1385 1389
1386 out_iput:
1387 iput(root);
1388 out_syncd_stop: 1390 out_syncd_stop:
1389 xfs_syncd_stop(mp); 1391 xfs_syncd_stop(mp);
1390 out_unmount: 1392 out_unmount:
@@ -1430,7 +1432,6 @@ static const struct super_operations xfs_super_operations = {
1430 .alloc_inode = xfs_fs_alloc_inode, 1432 .alloc_inode = xfs_fs_alloc_inode,
1431 .destroy_inode = xfs_fs_destroy_inode, 1433 .destroy_inode = xfs_fs_destroy_inode,
1432 .dirty_inode = xfs_fs_dirty_inode, 1434 .dirty_inode = xfs_fs_dirty_inode,
1433 .write_inode = xfs_fs_write_inode,
1434 .evict_inode = xfs_fs_evict_inode, 1435 .evict_inode = xfs_fs_evict_inode,
1435 .put_super = xfs_fs_put_super, 1436 .put_super = xfs_fs_put_super,
1436 .sync_fs = xfs_fs_sync_fs, 1437 .sync_fs = xfs_fs_sync_fs,
@@ -1652,13 +1653,17 @@ init_xfs_fs(void)
1652 if (error) 1653 if (error)
1653 goto out_cleanup_procfs; 1654 goto out_cleanup_procfs;
1654 1655
1655 vfs_initquota(); 1656 error = xfs_qm_init();
1657 if (error)
1658 goto out_sysctl_unregister;
1656 1659
1657 error = register_filesystem(&xfs_fs_type); 1660 error = register_filesystem(&xfs_fs_type);
1658 if (error) 1661 if (error)
1659 goto out_sysctl_unregister; 1662 goto out_qm_exit;
1660 return 0; 1663 return 0;
1661 1664
1665 out_qm_exit:
1666 xfs_qm_exit();
1662 out_sysctl_unregister: 1667 out_sysctl_unregister:
1663 xfs_sysctl_unregister(); 1668 xfs_sysctl_unregister();
1664 out_cleanup_procfs: 1669 out_cleanup_procfs:
@@ -1680,7 +1685,7 @@ init_xfs_fs(void)
1680STATIC void __exit 1685STATIC void __exit
1681exit_xfs_fs(void) 1686exit_xfs_fs(void)
1682{ 1687{
1683 vfs_exitquota(); 1688 xfs_qm_exit();
1684 unregister_filesystem(&xfs_fs_type); 1689 unregister_filesystem(&xfs_fs_type);
1685 xfs_sysctl_unregister(); 1690 xfs_sysctl_unregister();
1686 xfs_cleanup_procfs(); 1691 xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 50a3266c999e..09b0c26b2245 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -21,13 +21,11 @@
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22 22
23#ifdef CONFIG_XFS_QUOTA 23#ifdef CONFIG_XFS_QUOTA
24extern void xfs_qm_init(void); 24extern int xfs_qm_init(void);
25extern void xfs_qm_exit(void); 25extern void xfs_qm_exit(void);
26# define vfs_initquota() xfs_qm_init()
27# define vfs_exitquota() xfs_qm_exit()
28#else 26#else
29# define vfs_initquota() do { } while (0) 27# define xfs_qm_init() (0)
30# define vfs_exitquota() do { } while (0) 28# define xfs_qm_exit() do { } while (0)
31#endif 29#endif
32 30
33#ifdef CONFIG_XFS_POSIX_ACL 31#ifdef CONFIG_XFS_POSIX_ACL
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 40b75eecd2b4..205ebcb34d9e 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
365/* 339/*
366 * When remounting a filesystem read-only or freezing the filesystem, we have 340 * When remounting a filesystem read-only or freezing the filesystem, we have
367 * two phases to execute. This first phase is syncing the data before we 341 * two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
385{ 359{
386 int error, error2 = 0; 360 int error, error2 = 0;
387 361
388 /*
389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397
398 /* force out the log */ 362 /* force out the log */
399 xfs_log_force(mp, XFS_LOG_SYNC); 363 xfs_log_force(mp, XFS_LOG_SYNC);
400 364
@@ -913,17 +877,15 @@ reclaim:
913 * can reference the inodes in the cache without taking references. 877 * can reference the inodes in the cache without taking references.
914 * 878 *
915 * We make that OK here by ensuring that we wait until the inode is 879 * We make that OK here by ensuring that we wait until the inode is
916 * unlocked after the lookup before we go ahead and free it. We get 880 * unlocked after the lookup before we go ahead and free it.
917 * both the ilock and the iolock because the code may need to drop the
918 * ilock one but will still hold the iolock.
919 */ 881 */
920 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 882 xfs_ilock(ip, XFS_ILOCK_EXCL);
921 xfs_qm_dqdetach(ip); 883 xfs_qm_dqdetach(ip);
922 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 884 xfs_iunlock(ip, XFS_ILOCK_EXCL);
923 885
924 xfs_inode_free(ip); 886 xfs_inode_free(ip);
925 return error;
926 887
888 return error;
927} 889}
928 890
929/* 891/*
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index fa965479d788..941202e7ac6e 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
40int xfs_reclaim_inodes_count(struct xfs_mount *mp); 38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bb134a819930..75eb54af4d58 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
580DEFINE_INODE_EVENT(xfs_dir_fsync); 580DEFINE_INODE_EVENT(xfs_dir_fsync);
581DEFINE_INODE_EVENT(xfs_file_fsync); 581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode); 582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode); 583DEFINE_INODE_EVENT(xfs_dirty_inode);
584DEFINE_INODE_EVENT(xfs_evict_inode); 584DEFINE_INODE_EVENT(xfs_evict_inode);
585 585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc); 586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
741DEFINE_DQUOT_EVENT(xfs_dqtobp_read); 741DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
742DEFINE_DQUOT_EVENT(xfs_dqread); 742DEFINE_DQUOT_EVENT(xfs_dqread);
743DEFINE_DQUOT_EVENT(xfs_dqread_fail); 743DEFINE_DQUOT_EVENT(xfs_dqread_fail);
744DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
746DEFINE_DQUOT_EVENT(xfs_dqget_hit); 744DEFINE_DQUOT_EVENT(xfs_dqget_hit);
747DEFINE_DQUOT_EVENT(xfs_dqget_miss); 745DEFINE_DQUOT_EVENT(xfs_dqget_miss);
746DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
747DEFINE_DQUOT_EVENT(xfs_dqget_dup);
748DEFINE_DQUOT_EVENT(xfs_dqput); 748DEFINE_DQUOT_EVENT(xfs_dqput);
749DEFINE_DQUOT_EVENT(xfs_dqput_wait); 749DEFINE_DQUOT_EVENT(xfs_dqput_wait);
750DEFINE_DQUOT_EVENT(xfs_dqput_free); 750DEFINE_DQUOT_EVENT(xfs_dqput_free);
@@ -782,12 +782,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
782 __entry->curr_res = tic->t_curr_res; 782 __entry->curr_res = tic->t_curr_res;
783 __entry->unit_res = tic->t_unit_res; 783 __entry->unit_res = tic->t_unit_res;
784 __entry->flags = tic->t_flags; 784 __entry->flags = tic->t_flags;
785 __entry->reserveq = list_empty(&log->l_reserveq); 785 __entry->reserveq = list_empty(&log->l_reserve_head.waiters);
786 __entry->writeq = list_empty(&log->l_writeq); 786 __entry->writeq = list_empty(&log->l_write_head.waiters);
787 xlog_crack_grant_head(&log->l_grant_reserve_head, 787 xlog_crack_grant_head(&log->l_reserve_head.grant,
788 &__entry->grant_reserve_cycle, 788 &__entry->grant_reserve_cycle,
789 &__entry->grant_reserve_bytes); 789 &__entry->grant_reserve_bytes);
790 xlog_crack_grant_head(&log->l_grant_write_head, 790 xlog_crack_grant_head(&log->l_write_head.grant,
791 &__entry->grant_write_cycle, 791 &__entry->grant_write_cycle,
792 &__entry->grant_write_bytes); 792 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 793 __entry->curr_cycle = log->l_curr_cycle;
@@ -826,20 +826,14 @@ DEFINE_EVENT(xfs_loggrant_class, name, \
826 TP_ARGS(log, tic)) 826 TP_ARGS(log, tic))
827DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 827DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
828DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 828DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
829DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
830DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); 829DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
831DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
832DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
833DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
834DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); 830DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); 831DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); 832DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
837DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 833DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 834DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 835DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); 836DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 837DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7adcdf15ae0c..103b00c90004 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -681,7 +681,6 @@ xfs_trans_reserve(
681 uint flags, 681 uint flags,
682 uint logcount) 682 uint logcount)
683{ 683{
684 int log_flags;
685 int error = 0; 684 int error = 0;
686 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 685 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
687 686
@@ -707,24 +706,32 @@ xfs_trans_reserve(
707 * Reserve the log space needed for this transaction. 706 * Reserve the log space needed for this transaction.
708 */ 707 */
709 if (logspace > 0) { 708 if (logspace > 0) {
710 ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); 709 bool permanent = false;
711 ASSERT((tp->t_log_count == 0) || 710
712 (tp->t_log_count == logcount)); 711 ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
712 ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
713
713 if (flags & XFS_TRANS_PERM_LOG_RES) { 714 if (flags & XFS_TRANS_PERM_LOG_RES) {
714 log_flags = XFS_LOG_PERM_RESERV;
715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES; 715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
716 permanent = true;
716 } else { 717 } else {
717 ASSERT(tp->t_ticket == NULL); 718 ASSERT(tp->t_ticket == NULL);
718 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 719 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
719 log_flags = 0;
720 } 720 }
721 721
722 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 722 if (tp->t_ticket != NULL) {
723 &tp->t_ticket, 723 ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
724 XFS_TRANSACTION, log_flags, tp->t_type); 724 error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
725 if (error) { 725 } else {
726 goto undo_blocks; 726 error = xfs_log_reserve(tp->t_mountp, logspace,
727 logcount, &tp->t_ticket,
728 XFS_TRANSACTION, permanent,
729 tp->t_type);
727 } 730 }
731
732 if (error)
733 goto undo_blocks;
734
728 tp->t_log_res = logspace; 735 tp->t_log_res = logspace;
729 tp->t_log_count = logcount; 736 tp->t_log_count = logcount;
730 } 737 }
@@ -752,6 +759,8 @@ xfs_trans_reserve(
752 */ 759 */
753undo_log: 760undo_log:
754 if (logspace > 0) { 761 if (logspace > 0) {
762 int log_flags;
763
755 if (flags & XFS_TRANS_PERM_LOG_RES) { 764 if (flags & XFS_TRANS_PERM_LOG_RES) {
756 log_flags = XFS_LOG_REL_PERM_RESERV; 765 log_flags = XFS_LOG_REL_PERM_RESERV;
757 } else { 766 } else {
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index ed9252bcdac9..1dead07f092c 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -611,50 +611,6 @@ xfs_ail_push_all(
611} 611}
612 612
613/* 613/*
614 * This is to be called when an item is unlocked that may have
615 * been in the AIL. It will wake up the first member of the AIL
616 * wait list if this item's unlocking might allow it to progress.
617 * If the item is in the AIL, then we need to get the AIL lock
618 * while doing our checking so we don't race with someone going
619 * to sleep waiting for this event in xfs_trans_push_ail().
620 */
621void
622xfs_trans_unlocked_item(
623 struct xfs_ail *ailp,
624 xfs_log_item_t *lip)
625{
626 xfs_log_item_t *min_lip;
627
628 /*
629 * If we're forcibly shutting down, we may have
630 * unlocked log items arbitrarily. The last thing
631 * we want to do is to move the tail of the log
632 * over some potentially valid data.
633 */
634 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
635 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
636 return;
637 }
638
639 /*
640 * This is the one case where we can call into xfs_ail_min()
641 * without holding the AIL lock because we only care about the
642 * case where we are at the tail of the AIL. If the object isn't
643 * at the tail, it doesn't matter what result we get back. This
644 * is slightly racy because since we were just unlocked, we could
645 * go to sleep between the call to xfs_ail_min and the call to
646 * xfs_log_move_tail, have someone else lock us, commit to us disk,
647 * move us out of the tail of the AIL, and then we wake up. However,
648 * the call to xfs_log_move_tail() doesn't do anything if there's
649 * not enough free space to wake people up so we're safe calling it.
650 */
651 min_lip = xfs_ail_min(ailp);
652
653 if (min_lip == lip)
654 xfs_log_move_tail(ailp->xa_mount, 1);
655} /* xfs_trans_unlocked_item */
656
657/*
658 * xfs_trans_ail_update - bulk AIL insertion operation. 614 * xfs_trans_ail_update - bulk AIL insertion operation.
659 * 615 *
660 * @xfs_trans_ail_update takes an array of log items that all need to be 616 * @xfs_trans_ail_update takes an array of log items that all need to be
@@ -685,7 +641,6 @@ xfs_trans_ail_update_bulk(
685 xfs_lsn_t lsn) __releases(ailp->xa_lock) 641 xfs_lsn_t lsn) __releases(ailp->xa_lock)
686{ 642{
687 xfs_log_item_t *mlip; 643 xfs_log_item_t *mlip;
688 xfs_lsn_t tail_lsn;
689 int mlip_changed = 0; 644 int mlip_changed = 0;
690 int i; 645 int i;
691 LIST_HEAD(tmp); 646 LIST_HEAD(tmp);
@@ -712,22 +667,12 @@ xfs_trans_ail_update_bulk(
712 667
713 if (!list_empty(&tmp)) 668 if (!list_empty(&tmp))
714 xfs_ail_splice(ailp, cur, &tmp, lsn); 669 xfs_ail_splice(ailp, cur, &tmp, lsn);
670 spin_unlock(&ailp->xa_lock);
715 671
716 if (!mlip_changed) { 672 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
717 spin_unlock(&ailp->xa_lock); 673 xlog_assign_tail_lsn(ailp->xa_mount);
718 return; 674 xfs_log_space_wake(ailp->xa_mount);
719 } 675 }
720
721 /*
722 * It is not safe to access mlip after the AIL lock is dropped, so we
723 * must get a copy of li_lsn before we do so. This is especially
724 * important on 32-bit platforms where accessing and updating 64-bit
725 * values like li_lsn is not atomic.
726 */
727 mlip = xfs_ail_min(ailp);
728 tail_lsn = mlip->li_lsn;
729 spin_unlock(&ailp->xa_lock);
730 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
731} 676}
732 677
733/* 678/*
@@ -758,7 +703,6 @@ xfs_trans_ail_delete_bulk(
758 int nr_items) __releases(ailp->xa_lock) 703 int nr_items) __releases(ailp->xa_lock)
759{ 704{
760 xfs_log_item_t *mlip; 705 xfs_log_item_t *mlip;
761 xfs_lsn_t tail_lsn;
762 int mlip_changed = 0; 706 int mlip_changed = 0;
763 int i; 707 int i;
764 708
@@ -785,23 +729,12 @@ xfs_trans_ail_delete_bulk(
785 if (mlip == lip) 729 if (mlip == lip)
786 mlip_changed = 1; 730 mlip_changed = 1;
787 } 731 }
732 spin_unlock(&ailp->xa_lock);
788 733
789 if (!mlip_changed) { 734 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
790 spin_unlock(&ailp->xa_lock); 735 xlog_assign_tail_lsn(ailp->xa_mount);
791 return; 736 xfs_log_space_wake(ailp->xa_mount);
792 } 737 }
793
794 /*
795 * It is not safe to access mlip after the AIL lock is dropped, so we
796 * must get a copy of li_lsn before we do so. This is especially
797 * important on 32-bit platforms where accessing and updating 64-bit
798 * values like li_lsn is not atomic. It is possible we've emptied the
799 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
800 */
801 mlip = xfs_ail_min(ailp);
802 tail_lsn = mlip ? mlip->li_lsn : 0;
803 spin_unlock(&ailp->xa_lock);
804 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
805} 738}
806 739
807/* 740/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 475a4ded4f41..1302d1d95a58 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -463,19 +463,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
463 * Default to a normal brelse() call if the tp is NULL. 463 * Default to a normal brelse() call if the tp is NULL.
464 */ 464 */
465 if (tp == NULL) { 465 if (tp == NULL) {
466 struct xfs_log_item *lip = bp->b_fspriv;
467
468 ASSERT(bp->b_transp == NULL); 466 ASSERT(bp->b_transp == NULL);
469
470 /*
471 * If there's a buf log item attached to the buffer,
472 * then let the AIL know that the buffer is being
473 * unlocked.
474 */
475 if (lip != NULL && lip->li_type == XFS_LI_BUF) {
476 bip = bp->b_fspriv;
477 xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
478 }
479 xfs_buf_relse(bp); 467 xfs_buf_relse(bp);
480 return; 468 return;
481 } 469 }
@@ -550,21 +538,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
550 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 538 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
551 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 539 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
552 xfs_buf_item_relse(bp); 540 xfs_buf_item_relse(bp);
553 bip = NULL;
554 }
555 bp->b_transp = NULL;
556
557 /*
558 * If we've still got a buf log item on the buffer, then
559 * tell the AIL that the buffer is being unlocked.
560 */
561 if (bip != NULL) {
562 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
563 (xfs_log_item_t*)bip);
564 } 541 }
565 542
543 bp->b_transp = NULL;
566 xfs_buf_relse(bp); 544 xfs_buf_relse(bp);
567 return;
568} 545}
569 546
570/* 547/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c4ba366d24e6..279099717ed2 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -605,7 +605,7 @@ xfs_trans_dqresv(
605 time_t timer; 605 time_t timer;
606 xfs_qwarncnt_t warns; 606 xfs_qwarncnt_t warns;
607 xfs_qwarncnt_t warnlimit; 607 xfs_qwarncnt_t warnlimit;
608 xfs_qcnt_t count; 608 xfs_qcnt_t total_count;
609 xfs_qcnt_t *resbcountp; 609 xfs_qcnt_t *resbcountp;
610 xfs_quotainfo_t *q = mp->m_quotainfo; 610 xfs_quotainfo_t *q = mp->m_quotainfo;
611 611
@@ -648,13 +648,12 @@ xfs_trans_dqresv(
648 * hardlimit or exceed the timelimit if we allocate 648 * hardlimit or exceed the timelimit if we allocate
649 * nblks. 649 * nblks.
650 */ 650 */
651 if (hardlimit > 0ULL && 651 total_count = *resbcountp + nblks;
652 hardlimit < nblks + *resbcountp) { 652 if (hardlimit && total_count > hardlimit) {
653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); 653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
654 goto error_return; 654 goto error_return;
655 } 655 }
656 if (softlimit > 0ULL && 656 if (softlimit && total_count > softlimit) {
657 softlimit < nblks + *resbcountp) {
658 if ((timer != 0 && get_seconds() > timer) || 657 if ((timer != 0 && get_seconds() > timer) ||
659 (warns != 0 && warns >= warnlimit)) { 658 (warns != 0 && warns >= warnlimit)) {
660 xfs_quota_warn(mp, dqp, 659 xfs_quota_warn(mp, dqp,
@@ -666,7 +665,7 @@ xfs_trans_dqresv(
666 } 665 }
667 } 666 }
668 if (ninos > 0) { 667 if (ninos > 0) {
669 count = be64_to_cpu(dqp->q_core.d_icount); 668 total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
670 timer = be32_to_cpu(dqp->q_core.d_itimer); 669 timer = be32_to_cpu(dqp->q_core.d_itimer);
671 warns = be16_to_cpu(dqp->q_core.d_iwarns); 670 warns = be16_to_cpu(dqp->q_core.d_iwarns);
672 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; 671 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
@@ -677,13 +676,11 @@ xfs_trans_dqresv(
677 if (!softlimit) 676 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 677 softlimit = q->qi_isoftlimit;
679 678
680 if (hardlimit > 0ULL && 679 if (hardlimit && total_count > hardlimit) {
681 hardlimit < ninos + count) {
682 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 680 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
683 goto error_return; 681 goto error_return;
684 } 682 }
685 if (softlimit > 0ULL && 683 if (softlimit && total_count > softlimit) {
686 softlimit < ninos + count) {
687 if ((timer != 0 && get_seconds() > timer) || 684 if ((timer != 0 && get_seconds() > timer) ||
688 (warns != 0 && warns >= warnlimit)) { 685 (warns != 0 && warns >= warnlimit)) {
689 xfs_quota_warn(mp, dqp, 686 xfs_quota_warn(mp, dqp,
@@ -878,7 +875,7 @@ STATIC void
878xfs_trans_alloc_dqinfo( 875xfs_trans_alloc_dqinfo(
879 xfs_trans_t *tp) 876 xfs_trans_t *tp)
880{ 877{
881 tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); 878 tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
882} 879}
883 880
884void 881void
@@ -887,6 +884,6 @@ xfs_trans_free_dqinfo(
887{ 884{
888 if (!tp->t_dqinfo) 885 if (!tp->t_dqinfo)
889 return; 886 return;
890 kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); 887 kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
891 tp->t_dqinfo = NULL; 888 tp->t_dqinfo = NULL;
892} 889}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 32f0288ae10f..7a7442c03f2b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -95,10 +95,14 @@ xfs_trans_ichgtime(
95 if ((flags & XFS_ICHGTIME_MOD) && 95 if ((flags & XFS_ICHGTIME_MOD) &&
96 !timespec_equal(&inode->i_mtime, &tv)) { 96 !timespec_equal(&inode->i_mtime, &tv)) {
97 inode->i_mtime = tv; 97 inode->i_mtime = tv;
98 ip->i_d.di_mtime.t_sec = tv.tv_sec;
99 ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
98 } 100 }
99 if ((flags & XFS_ICHGTIME_CHG) && 101 if ((flags & XFS_ICHGTIME_CHG) &&
100 !timespec_equal(&inode->i_ctime, &tv)) { 102 !timespec_equal(&inode->i_ctime, &tv)) {
101 inode->i_ctime = tv; 103 inode->i_ctime = tv;
104 ip->i_d.di_ctime.t_sec = tv.tv_sec;
105 ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
102 } 106 }
103} 107}
104 108
@@ -126,12 +130,12 @@ xfs_trans_log_inode(
126 /* 130 /*
127 * Always OR in the bits from the ili_last_fields field. 131 * Always OR in the bits from the ili_last_fields field.
128 * This is to coordinate with the xfs_iflush() and xfs_iflush_done() 132 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
129 * routines in the eventual clearing of the ilf_fields bits. 133 * routines in the eventual clearing of the ili_fields bits.
130 * See the big comment in xfs_iflush() for an explanation of 134 * See the big comment in xfs_iflush() for an explanation of
131 * this coordination mechanism. 135 * this coordination mechanism.
132 */ 136 */
133 flags |= ip->i_itemp->ili_last_fields; 137 flags |= ip->i_itemp->ili_last_fields;
134 ip->i_itemp->ili_format.ilf_fields |= flags; 138 ip->i_itemp->ili_fields |= flags;
135} 139}
136 140
137#ifdef XFS_TRANS_DEBUG 141#ifdef XFS_TRANS_DEBUG
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 44820b9fcb43..8ab2ced415f1 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -104,9 +104,6 @@ void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
104void xfs_ail_push_all(struct xfs_ail *); 104void xfs_ail_push_all(struct xfs_ail *);
105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
106 106
107void xfs_trans_unlocked_item(struct xfs_ail *,
108 xfs_log_item_t *);
109
110struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 107struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
111 struct xfs_ail_cursor *cur, 108 struct xfs_ail_cursor *cur,
112 xfs_lsn_t lsn); 109 xfs_lsn_t lsn);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 89dbb4a50872..79c05ac85bfe 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -296,8 +296,6 @@ xfs_bumplink(
296 xfs_trans_t *tp, 296 xfs_trans_t *tp,
297 xfs_inode_t *ip) 297 xfs_inode_t *ip)
298{ 298{
299 if (ip->i_d.di_nlink >= XFS_MAXLINK)
300 return XFS_ERROR(EMLINK);
301 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
302 300
303 ASSERT(ip->i_d.di_nlink > 0); 301 ASSERT(ip->i_d.di_nlink > 0);
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
index 7c220b4227bc..db14d0c08682 100644
--- a/fs/xfs/xfs_vnode.h
+++ b/fs/xfs/xfs_vnode.h
@@ -22,7 +22,6 @@
22 22
23struct file; 23struct file;
24struct xfs_inode; 24struct xfs_inode;
25struct xfs_iomap;
26struct attrlist_cursor_kern; 25struct attrlist_cursor_kern;
27 26
28/* 27/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ebdb88840a47..64981d7e7375 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -917,14 +917,6 @@ xfs_create(
917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
918 unlock_dp_on_error = B_TRUE; 918 unlock_dp_on_error = B_TRUE;
919 919
920 /*
921 * Check for directory link count overflow.
922 */
923 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
924 error = XFS_ERROR(EMLINK);
925 goto out_trans_cancel;
926 }
927
928 xfs_bmap_init(&free_list, &first_block); 920 xfs_bmap_init(&free_list, &first_block);
929 921
930 /* 922 /*
@@ -1429,14 +1421,6 @@ xfs_link(
1429 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1421 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1430 1422
1431 /* 1423 /*
1432 * If the source has too many links, we can't make any more to it.
1433 */
1434 if (sip->i_d.di_nlink >= XFS_MAXLINK) {
1435 error = XFS_ERROR(EMLINK);
1436 goto error_return;
1437 }
1438
1439 /*
1440 * If we are using project inheritance, we only allow hard link 1424 * If we are using project inheritance, we only allow hard link
1441 * creation in our tree when the project IDs are the same; else 1425 * creation in our tree when the project IDs are the same; else
1442 * the tree quota mechanism could be circumvented. 1426 * the tree quota mechanism could be circumvented.
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 0c877cbde142..447e146b2ba6 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -10,7 +10,6 @@ struct kiocb;
10struct pipe_inode_info; 10struct pipe_inode_info;
11struct uio; 11struct uio;
12struct xfs_inode; 12struct xfs_inode;
13struct xfs_iomap;
14 13
15 14
16int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); 15int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
@@ -49,8 +48,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
49int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 48int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
50int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 49int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
51 int flags, struct attrlist_cursor_kern *cursor); 50 int flags, struct attrlist_cursor_kern *cursor);
52int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
53 int flags, struct xfs_iomap *iomapp, int *niomaps);
54void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, 51void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
55 xfs_off_t last, int fiopt); 52 xfs_off_t last, int fiopt);
56int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, 53int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,