aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c64
-rw-r--r--fs/9p/fid.c8
-rw-r--r--fs/9p/v9fs.c59
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c13
-rw-r--r--fs/9p/vfs_dentry.c12
-rw-r--r--fs/9p/vfs_dir.c13
-rw-r--r--fs/9p/vfs_file.c34
-rw-r--r--fs/9p/vfs_inode.c203
-rw-r--r--fs/9p/vfs_inode_dotl.c137
-rw-r--r--fs/9p/vfs_super.c14
-rw-r--r--fs/9p/xattr.c16
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile3
-rw-r--r--fs/adfs/super.c4
-rw-r--r--fs/affs/affs.h6
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/namei.c8
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/dir.c12
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c11
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs4/autofs_i.h3
-rw-r--r--fs/autofs4/dev-ioctl.c10
-rw-r--r--fs/autofs4/inode.c9
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/autofs4/waitq.c40
-rw-r--r--fs/bad_inode.c6
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/block_dev.c51
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c13
-rw-r--r--fs/btrfs/ioctl.c22
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/buffer.c50
-rw-r--r--fs/cachefiles/interface.c1
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/dir.c88
-rw-r--r--fs/ceph/export.c6
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/super.c37
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/ceph/xattr.c22
-rw-r--r--fs/char_dev.c6
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/coda/cnode.c38
-rw-r--r--fs/coda/coda_fs_i.h4
-rw-r--r--fs/coda/dir.c37
-rw-r--r--fs/coda/inode.c11
-rw-r--r--fs/compat.c13
-rw-r--r--fs/compat_ioctl.c38
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/configfs/inode.c6
-rw-r--r--fs/cramfs/inode.c3
-rw-r--r--fs/dcache.c143
-rw-r--r--fs/debugfs/file.c117
-rw-r--r--fs/debugfs/inode.c16
-rw-r--r--fs/devpts/inode.c8
-rw-r--r--fs/direct-io.c57
-rw-r--r--fs/dlm/config.c130
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/debug_fs.c28
-rw-r--r--fs/dlm/dir.c1
-rw-r--r--fs/dlm/dlm_internal.h60
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c71
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/member.c486
-rw-r--r--fs/dlm/member.h10
-rw-r--r--fs/dlm/rcom.c99
-rw-r--r--fs/dlm/rcom.h2
-rw-r--r--fs/dlm/recover.c87
-rw-r--r--fs/dlm/recoverd.c53
-rw-r--r--fs/dlm/user.c5
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/ecryptfs/super.c5
-rw-r--r--fs/efs/super.c1
-rw-r--r--fs/eventpoll.c234
-rw-r--r--fs/exec.c6
-rw-r--r--fs/exofs/Kconfig11
-rw-r--r--fs/exofs/Kconfig.ore12
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c6
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c78
-rw-r--r--fs/exofs/super.c3
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c9
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext2/ioctl.c12
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext2/xattr.c1
-rw-r--r--fs/ext2/xattr_security.c1
-rw-r--r--fs/ext2/xattr_trusted.c1
-rw-r--r--fs/ext2/xattr_user.c1
-rw-r--r--fs/ext3/ialloc.c10
-rw-r--r--fs/ext3/inode.c45
-rw-r--r--fs/ext3/ioctl.c26
-rw-r--r--fs/ext3/namei.c17
-rw-r--r--fs/ext3/super.c22
-rw-r--r--fs/ext3/xattr_security.c1
-rw-r--r--fs/ext3/xattr_trusted.c1
-rw-r--r--fs/ext3/xattr_user.c1
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/ialloc.c26
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c148
-rw-r--r--fs/ext4/ioctl.c116
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/migrate.c1
-rw-r--r--fs/ext4/namei.c8
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/ext4/resize.c1175
-rw-r--r--fs/ext4/super.c34
-rw-r--r--fs/ext4/xattr_security.c6
-rw-r--r--fs/ext4/xattr_trusted.c1
-rw-r--r--fs/ext4/xattr_user.c1
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c33
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c7
-rw-r--r--fs/fhandle.c8
-rw-r--r--fs/file_table.c23
-rw-r--r--fs/filesystems.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c21
-rw-r--r--fs/fuse/dev.c57
-rw-r--r--fs/fuse/dir.c70
-rw-r--r--fs/fuse/file.c58
-rw-r--r--fs/fuse/fuse_i.h12
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/gfs2/acl.c14
-rw-r--r--fs/gfs2/aops.c18
-rw-r--r--fs/gfs2/bmap.c26
-rw-r--r--fs/gfs2/dir.c64
-rw-r--r--fs/gfs2/dir.h2
-rw-r--r--fs/gfs2/export.c3
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/glock.h7
-rw-r--r--fs/gfs2/incore.h80
-rw-r--r--fs/gfs2/inode.c92
-rw-r--r--fs/gfs2/lock_dlm.c993
-rw-r--r--fs/gfs2/log.c6
-rw-r--r--fs/gfs2/main.c13
-rw-r--r--fs/gfs2/meta_io.c4
-rw-r--r--fs/gfs2/ops_fstype.c33
-rw-r--r--fs/gfs2/quota.c91
-rw-r--r--fs/gfs2/recovery.c11
-rw-r--r--fs/gfs2/rgrp.c293
-rw-r--r--fs/gfs2/rgrp.h16
-rw-r--r--fs/gfs2/super.c23
-rw-r--r--fs/gfs2/sys.c33
-rw-r--r--fs/gfs2/sys.h2
-rw-r--r--fs/gfs2/trans.h6
-rw-r--r--fs/gfs2/xattr.c48
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/super.c5
-rw-r--r--fs/hfsplus/dir.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hfsplus/super.c12
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hpfs/namei.c6
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/hppfs/hppfs.c3
-rw-r--r--fs/hugetlbfs/inode.c69
-rw-r--r--fs/inode.c94
-rw-r--r--fs/internal.h30
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/ioprio.c24
-rw-r--r--fs/isofs/inode.c12
-rw-r--r--fs/isofs/isofs.h6
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd/revoke.c34
-rw-r--r--fs/jbd/transaction.c38
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/revoke.c34
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/jffs2/dir.c14
-rw-r--r--fs/jffs2/erase.c17
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/jffs2/readinode.c22
-rw-r--r--fs/jffs2/scan.c12
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jffs2/wbuf.c38
-rw-r--r--fs/jffs2/writev.c32
-rw-r--r--fs/jfs/ioctl.c4
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/jfs/namei.c6
-rw-r--r--fs/jfs/super.c5
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/logfs/dev_mtd.c78
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/minix/namei.c6
-rw-r--r--fs/mount.h76
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/namei.c55
-rw-r--r--fs/namespace.c831
-rw-r--r--fs/ncpfs/dir.c18
-rw-r--r--fs/ncpfs/inode.c7
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/ncpfs/symlink.c2
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c12
-rw-r--r--fs/nfs/dir.c33
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/idmap.c83
-rw-r--r--fs/nfs/inode.c48
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4filelayout.c9
-rw-r--r--fs/nfs/nfs4proc.c182
-rw-r--r--fs/nfs/nfs4state.c104
-rw-r--r--fs/nfs/nfs4xdr.c137
-rw-r--r--fs/nfs/objlayout/objio_osd.c3
-rw-r--r--fs/nfs/objlayout/objlayout.c4
-rw-r--r--fs/nfs/pnfs.c42
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/proc.c3
-rw-r--r--fs/nfs/super.c92
-rw-r--r--fs/nfs/write.c31
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/fault_inject.c91
-rw-r--r--fs/nfsd/fault_inject.h28
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4idmap.c11
-rw-r--r--fs/nfsd/nfs4proc.c11
-rw-r--r--fs/nfsd/nfs4recover.c34
-rw-r--r--fs/nfsd/nfs4state.c330
-rw-r--r--fs/nfsd/nfs4xdr.c3
-rw-r--r--fs/nfsd/nfsctl.c12
-rw-r--r--fs/nfsd/nfsd.h20
-rw-r--r--fs/nfsd/nfsfh.c4
-rw-r--r--fs/nfsd/nfsfh.h2
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/vfs.c55
-rw-r--r--fs/nfsd/vfs.h12
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/nilfs2/ioctl.c22
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/nls/nls_base.c73
-rw-r--r--fs/notify/fanotify/fanotify_user.c6
-rw-r--r--fs/notify/fsnotify.c9
-rw-r--r--fs/notify/mark.c8
-rw-r--r--fs/notify/vfsmount_mark.c19
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c8
-rw-r--r--fs/ntfs/volume.h4
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c24
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/ioctl.c4
-rw-r--r--fs/ocfs2/move_extents.c4
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/stack_user.c4
-rw-r--r--fs/ocfs2/super.c10
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/omfs/dir.c6
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/omfs/omfs.h2
-rw-r--r--fs/open.c22
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/partitions/Kconfig251
-rw-r--r--fs/partitions/Makefile20
-rw-r--r--fs/partitions/acorn.c556
-rw-r--r--fs/partitions/acorn.h14
-rw-r--r--fs/partitions/amiga.c139
-rw-r--r--fs/partitions/amiga.h6
-rw-r--r--fs/partitions/atari.c149
-rw-r--r--fs/partitions/atari.h34
-rw-r--r--fs/partitions/check.c687
-rw-r--r--fs/partitions/check.h49
-rw-r--r--fs/partitions/efi.c675
-rw-r--r--fs/partitions/efi.h134
-rw-r--r--fs/partitions/ibm.c275
-rw-r--r--fs/partitions/ibm.h1
-rw-r--r--fs/partitions/karma.c57
-rw-r--r--fs/partitions/karma.h8
-rw-r--r--fs/partitions/ldm.c1570
-rw-r--r--fs/partitions/ldm.h215
-rw-r--r--fs/partitions/mac.c134
-rw-r--r--fs/partitions/mac.h44
-rw-r--r--fs/partitions/msdos.c552
-rw-r--r--fs/partitions/msdos.h8
-rw-r--r--fs/partitions/osf.c86
-rw-r--r--fs/partitions/osf.h7
-rw-r--r--fs/partitions/sgi.c82
-rw-r--r--fs/partitions/sgi.h8
-rw-r--r--fs/partitions/sun.c122
-rw-r--r--fs/partitions/sun.h8
-rw-r--r--fs/partitions/sysv68.c95
-rw-r--r--fs/partitions/sysv68.h1
-rw-r--r--fs/partitions/ultrix.c48
-rw-r--r--fs/partitions/ultrix.h5
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/pnode.c120
-rw-r--r--fs/pnode.h36
-rw-r--r--fs/proc/array.c17
-rw-r--r--fs/proc/base.c545
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c19
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c1
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/root.c70
-rw-r--r--fs/proc/stat.c63
-rw-r--r--fs/proc/uptime.c11
-rw-r--r--fs/proc_namespace.c333
-rw-r--r--fs/pstore/inode.c3
-rw-r--r--fs/pstore/platform.c36
-rw-r--r--fs/qnx4/inode.c8
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/quota/quota.c1
-rw-r--r--fs/ramfs/inode.c8
-rw-r--r--fs/reiserfs/bitmap.c94
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c8
-rw-r--r--fs/reiserfs/journal.c64
-rw-r--r--fs/reiserfs/namei.c8
-rw-r--r--fs/reiserfs/super.c200
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/romfs/mmap-nommu.c28
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/seq_file.c10
-rw-r--r--fs/splice.c1
-rw-r--r--fs/squashfs/cache.c30
-rw-r--r--fs/squashfs/inode.c4
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c3
-rw-r--r--fs/statfs.c21
-rw-r--r--fs/super.c70
-rw-r--r--fs/sync.c1
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/sysfs/group.c2
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysfs/sysfs.h4
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/sysv/namei.c6
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/ubifs/debug.c90
-rw-r--r--fs/ubifs/debug.h75
-rw-r--r--fs/ubifs/dir.c14
-rw-r--r--fs/ubifs/ioctl.c4
-rw-r--r--fs/ubifs/journal.c7
-rw-r--r--fs/ubifs/lpt.c6
-rw-r--r--fs/ubifs/replay.c8
-rw-r--r--fs/ubifs/super.c5
-rw-r--r--fs/ubifs/tnc.c58
-rw-r--r--fs/ubifs/tnc_misc.c10
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/ubifs/xattr.c6
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c63
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/udf/super.c25
-rw-r--r--fs/udf/symlink.c14
-rw-r--r--fs/udf/udf_sb.h8
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_buf.c10
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_discard.c4
-rw-r--r--fs/xfs/xfs_dquot.c500
-rw-r--r--fs/xfs/xfs_dquot.h39
-rw-r--r--fs/xfs/xfs_dquot_item.c5
-rw-r--r--fs/xfs/xfs_file.c6
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c8
-rw-r--r--fs/xfs/xfs_ioctl32.c8
-rw-r--r--fs/xfs/xfs_iops.c8
-rw-r--r--fs/xfs/xfs_log.c79
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--fs/xfs/xfs_log_cil.c98
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c464
-rw-r--r--fs/xfs/xfs_qm.h6
-rw-r--r--fs/xfs/xfs_quota.h12
-rw-r--r--fs/xfs/xfs_super.c40
-rw-r--r--fs/xfs/xfs_sync.c6
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c475
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
-rw-r--r--fs/xfs/xfs_vnodeops.h4
454 files changed, 8975 insertions, 11728 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 945aa5f02f9b..a9ea73d6dcf3 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -62,8 +62,8 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
62 uint16_t klen = 0; 62 uint16_t klen = 0;
63 63
64 v9ses = (struct v9fs_session_info *)cookie_netfs_data; 64 v9ses = (struct v9fs_session_info *)cookie_netfs_data;
65 P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses, 65 p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n",
66 buffer, bufmax); 66 v9ses, buffer, bufmax);
67 67
68 if (v9ses->cachetag) 68 if (v9ses->cachetag)
69 klen = strlen(v9ses->cachetag); 69 klen = strlen(v9ses->cachetag);
@@ -72,7 +72,7 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
72 return 0; 72 return 0;
73 73
74 memcpy(buffer, v9ses->cachetag, klen); 74 memcpy(buffer, v9ses->cachetag, klen);
75 P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag); 75 p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag);
76 return klen; 76 return klen;
77} 77}
78 78
@@ -91,14 +91,14 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
91 v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, 91 v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
92 &v9fs_cache_session_index_def, 92 &v9fs_cache_session_index_def,
93 v9ses); 93 v9ses);
94 P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses, 94 p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
95 v9ses->fscache); 95 v9ses, v9ses->fscache);
96} 96}
97 97
98void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) 98void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
99{ 99{
100 P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses, 100 p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n",
101 v9ses->fscache); 101 v9ses, v9ses->fscache);
102 fscache_relinquish_cookie(v9ses->fscache, 0); 102 fscache_relinquish_cookie(v9ses->fscache, 0);
103 v9ses->fscache = NULL; 103 v9ses->fscache = NULL;
104} 104}
@@ -109,8 +109,8 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
109{ 109{
110 const struct v9fs_inode *v9inode = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
111 memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); 111 memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
112 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, 112 p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n",
113 v9inode->qid.path); 113 &v9inode->vfs_inode, v9inode->qid.path);
114 return sizeof(v9inode->qid.path); 114 return sizeof(v9inode->qid.path);
115} 115}
116 116
@@ -120,8 +120,8 @@ static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
120 const struct v9fs_inode *v9inode = cookie_netfs_data; 120 const struct v9fs_inode *v9inode = cookie_netfs_data;
121 *size = i_size_read(&v9inode->vfs_inode); 121 *size = i_size_read(&v9inode->vfs_inode);
122 122
123 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode, 123 p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n",
124 *size); 124 &v9inode->vfs_inode, *size);
125} 125}
126 126
127static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, 127static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
@@ -129,8 +129,8 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
129{ 129{
130 const struct v9fs_inode *v9inode = cookie_netfs_data; 130 const struct v9fs_inode *v9inode = cookie_netfs_data;
131 memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); 131 memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
132 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, 132 p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n",
133 v9inode->qid.version); 133 &v9inode->vfs_inode, v9inode->qid.version);
134 return sizeof(v9inode->qid.version); 134 return sizeof(v9inode->qid.version);
135} 135}
136 136
@@ -206,8 +206,8 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
206 &v9fs_cache_inode_index_def, 206 &v9fs_cache_inode_index_def,
207 v9inode); 207 v9inode);
208 208
209 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, 209 p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
210 v9inode->fscache); 210 inode, v9inode->fscache);
211} 211}
212 212
213void v9fs_cache_inode_put_cookie(struct inode *inode) 213void v9fs_cache_inode_put_cookie(struct inode *inode)
@@ -216,8 +216,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode)
216 216
217 if (!v9inode->fscache) 217 if (!v9inode->fscache)
218 return; 218 return;
219 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, 219 p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n",
220 v9inode->fscache); 220 inode, v9inode->fscache);
221 221
222 fscache_relinquish_cookie(v9inode->fscache, 0); 222 fscache_relinquish_cookie(v9inode->fscache, 0);
223 v9inode->fscache = NULL; 223 v9inode->fscache = NULL;
@@ -229,8 +229,8 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode)
229 229
230 if (!v9inode->fscache) 230 if (!v9inode->fscache)
231 return; 231 return;
232 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, 232 p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n",
233 v9inode->fscache); 233 inode, v9inode->fscache);
234 234
235 fscache_relinquish_cookie(v9inode->fscache, 1); 235 fscache_relinquish_cookie(v9inode->fscache, 1);
236 v9inode->fscache = NULL; 236 v9inode->fscache = NULL;
@@ -272,8 +272,8 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
272 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, 272 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
273 &v9fs_cache_inode_index_def, 273 &v9fs_cache_inode_index_def,
274 v9inode); 274 v9inode);
275 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", 275 p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
276 inode, old, v9inode->fscache); 276 inode, old, v9inode->fscache);
277 277
278 spin_unlock(&v9inode->fscache_lock); 278 spin_unlock(&v9inode->fscache_lock);
279} 279}
@@ -323,7 +323,7 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
323 int ret; 323 int ret;
324 const struct v9fs_inode *v9inode = V9FS_I(inode); 324 const struct v9fs_inode *v9inode = V9FS_I(inode);
325 325
326 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 326 p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
327 if (!v9inode->fscache) 327 if (!v9inode->fscache)
328 return -ENOBUFS; 328 return -ENOBUFS;
329 329
@@ -335,13 +335,13 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
335 switch (ret) { 335 switch (ret) {
336 case -ENOBUFS: 336 case -ENOBUFS:
337 case -ENODATA: 337 case -ENODATA:
338 P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret); 338 p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret);
339 return 1; 339 return 1;
340 case 0: 340 case 0:
341 P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); 341 p9_debug(P9_DEBUG_FSC, "BIO submitted\n");
342 return ret; 342 return ret;
343 default: 343 default:
344 P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); 344 p9_debug(P9_DEBUG_FSC, "ret %d\n", ret);
345 return ret; 345 return ret;
346 } 346 }
347} 347}
@@ -361,7 +361,7 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
361 int ret; 361 int ret;
362 const struct v9fs_inode *v9inode = V9FS_I(inode); 362 const struct v9fs_inode *v9inode = V9FS_I(inode);
363 363
364 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); 364 p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages);
365 if (!v9inode->fscache) 365 if (!v9inode->fscache)
366 return -ENOBUFS; 366 return -ENOBUFS;
367 367
@@ -373,15 +373,15 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
373 switch (ret) { 373 switch (ret) {
374 case -ENOBUFS: 374 case -ENOBUFS:
375 case -ENODATA: 375 case -ENODATA:
376 P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret); 376 p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret);
377 return 1; 377 return 1;
378 case 0: 378 case 0:
379 BUG_ON(!list_empty(pages)); 379 BUG_ON(!list_empty(pages));
380 BUG_ON(*nr_pages != 0); 380 BUG_ON(*nr_pages != 0);
381 P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); 381 p9_debug(P9_DEBUG_FSC, "BIO submitted\n");
382 return ret; 382 return ret;
383 default: 383 default:
384 P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); 384 p9_debug(P9_DEBUG_FSC, "ret %d\n", ret);
385 return ret; 385 return ret;
386 } 386 }
387} 387}
@@ -396,9 +396,9 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
396 int ret; 396 int ret;
397 const struct v9fs_inode *v9inode = V9FS_I(inode); 397 const struct v9fs_inode *v9inode = V9FS_I(inode);
398 398
399 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 399 p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
400 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); 400 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
401 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); 401 p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret);
402 if (ret != 0) 402 if (ret != 0)
403 v9fs_uncache_page(inode, page); 403 v9fs_uncache_page(inode, page);
404} 404}
@@ -409,7 +409,7 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
409void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) 409void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
410{ 410{
411 const struct v9fs_inode *v9inode = V9FS_I(inode); 411 const struct v9fs_inode *v9inode = V9FS_I(inode);
412 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 412 p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
413 if (PageFsCache(page)) 413 if (PageFsCache(page))
414 fscache_wait_on_page_write(v9inode->fscache, page); 414 fscache_wait_on_page_write(v9inode->fscache, page);
415} 415}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 85b67ffa2a43..da8eefbe830d 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -45,8 +45,8 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
45{ 45{
46 struct v9fs_dentry *dent; 46 struct v9fs_dentry *dent;
47 47
48 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", 48 p9_debug(P9_DEBUG_VFS, "fid %d dentry %s\n",
49 fid->fid, dentry->d_name.name); 49 fid->fid, dentry->d_name.name);
50 50
51 dent = dentry->d_fsdata; 51 dent = dentry->d_fsdata;
52 if (!dent) { 52 if (!dent) {
@@ -79,8 +79,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
79 struct v9fs_dentry *dent; 79 struct v9fs_dentry *dent;
80 struct p9_fid *fid, *ret; 80 struct p9_fid *fid, *ret;
81 81
82 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", 82 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
83 dentry->d_name.name, dentry, uid, any); 83 dentry->d_name.name, dentry, uid, any);
84 dent = (struct v9fs_dentry *) dentry->d_fsdata; 84 dent = (struct v9fs_dentry *) dentry->d_fsdata;
85 ret = NULL; 85 ret = NULL;
86 if (dent) { 86 if (dent) {
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2b78014a124a..1964f98e74be 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -23,6 +23,8 @@
23 * 23 *
24 */ 24 */
25 25
26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27
26#include <linux/module.h> 28#include <linux/module.h>
27#include <linux/errno.h> 29#include <linux/errno.h>
28#include <linux/fs.h> 30#include <linux/fs.h>
@@ -85,15 +87,15 @@ static int get_cache_mode(char *s)
85 87
86 if (!strcmp(s, "loose")) { 88 if (!strcmp(s, "loose")) {
87 version = CACHE_LOOSE; 89 version = CACHE_LOOSE;
88 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n"); 90 p9_debug(P9_DEBUG_9P, "Cache mode: loose\n");
89 } else if (!strcmp(s, "fscache")) { 91 } else if (!strcmp(s, "fscache")) {
90 version = CACHE_FSCACHE; 92 version = CACHE_FSCACHE;
91 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n"); 93 p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n");
92 } else if (!strcmp(s, "none")) { 94 } else if (!strcmp(s, "none")) {
93 version = CACHE_NONE; 95 version = CACHE_NONE;
94 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n"); 96 p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
95 } else 97 } else
96 printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s); 98 pr_info("Unknown Cache mode %s\n", s);
97 return version; 99 return version;
98} 100}
99 101
@@ -140,8 +142,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
140 case Opt_debug: 142 case Opt_debug:
141 r = match_int(&args[0], &option); 143 r = match_int(&args[0], &option);
142 if (r < 0) { 144 if (r < 0) {
143 P9_DPRINTK(P9_DEBUG_ERROR, 145 p9_debug(P9_DEBUG_ERROR,
144 "integer field, but no integer?\n"); 146 "integer field, but no integer?\n");
145 ret = r; 147 ret = r;
146 continue; 148 continue;
147 } 149 }
@@ -154,8 +156,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
154 case Opt_dfltuid: 156 case Opt_dfltuid:
155 r = match_int(&args[0], &option); 157 r = match_int(&args[0], &option);
156 if (r < 0) { 158 if (r < 0) {
157 P9_DPRINTK(P9_DEBUG_ERROR, 159 p9_debug(P9_DEBUG_ERROR,
158 "integer field, but no integer?\n"); 160 "integer field, but no integer?\n");
159 ret = r; 161 ret = r;
160 continue; 162 continue;
161 } 163 }
@@ -164,8 +166,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
164 case Opt_dfltgid: 166 case Opt_dfltgid:
165 r = match_int(&args[0], &option); 167 r = match_int(&args[0], &option);
166 if (r < 0) { 168 if (r < 0) {
167 P9_DPRINTK(P9_DEBUG_ERROR, 169 p9_debug(P9_DEBUG_ERROR,
168 "integer field, but no integer?\n"); 170 "integer field, but no integer?\n");
169 ret = r; 171 ret = r;
170 continue; 172 continue;
171 } 173 }
@@ -174,8 +176,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
174 case Opt_afid: 176 case Opt_afid:
175 r = match_int(&args[0], &option); 177 r = match_int(&args[0], &option);
176 if (r < 0) { 178 if (r < 0) {
177 P9_DPRINTK(P9_DEBUG_ERROR, 179 p9_debug(P9_DEBUG_ERROR,
178 "integer field, but no integer?\n"); 180 "integer field, but no integer?\n");
179 ret = r; 181 ret = r;
180 continue; 182 continue;
181 } 183 }
@@ -205,8 +207,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
205 s = match_strdup(&args[0]); 207 s = match_strdup(&args[0]);
206 if (!s) { 208 if (!s) {
207 ret = -ENOMEM; 209 ret = -ENOMEM;
208 P9_DPRINTK(P9_DEBUG_ERROR, 210 p9_debug(P9_DEBUG_ERROR,
209 "problem allocating copy of cache arg\n"); 211 "problem allocating copy of cache arg\n");
210 goto free_and_return; 212 goto free_and_return;
211 } 213 }
212 ret = get_cache_mode(s); 214 ret = get_cache_mode(s);
@@ -223,8 +225,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
223 s = match_strdup(&args[0]); 225 s = match_strdup(&args[0]);
224 if (!s) { 226 if (!s) {
225 ret = -ENOMEM; 227 ret = -ENOMEM;
226 P9_DPRINTK(P9_DEBUG_ERROR, 228 p9_debug(P9_DEBUG_ERROR,
227 "problem allocating copy of access arg\n"); 229 "problem allocating copy of access arg\n");
228 goto free_and_return; 230 goto free_and_return;
229 } 231 }
230 232
@@ -240,8 +242,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
240 v9ses->uid = simple_strtoul(s, &e, 10); 242 v9ses->uid = simple_strtoul(s, &e, 10);
241 if (*e != '\0') { 243 if (*e != '\0') {
242 ret = -EINVAL; 244 ret = -EINVAL;
243 printk(KERN_INFO "9p: Unknown access " 245 pr_info("Unknown access argument %s\n",
244 "argument %s.\n", s); 246 s);
245 kfree(s); 247 kfree(s);
246 goto free_and_return; 248 goto free_and_return;
247 } 249 }
@@ -254,9 +256,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
254#ifdef CONFIG_9P_FS_POSIX_ACL 256#ifdef CONFIG_9P_FS_POSIX_ACL
255 v9ses->flags |= V9FS_POSIX_ACL; 257 v9ses->flags |= V9FS_POSIX_ACL;
256#else 258#else
257 P9_DPRINTK(P9_DEBUG_ERROR, 259 p9_debug(P9_DEBUG_ERROR,
258 "Not defined CONFIG_9P_FS_POSIX_ACL. " 260 "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n");
259 "Ignoring posixacl option\n");
260#endif 261#endif
261 break; 262 break;
262 263
@@ -318,7 +319,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
318 if (IS_ERR(v9ses->clnt)) { 319 if (IS_ERR(v9ses->clnt)) {
319 retval = PTR_ERR(v9ses->clnt); 320 retval = PTR_ERR(v9ses->clnt);
320 v9ses->clnt = NULL; 321 v9ses->clnt = NULL;
321 P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n"); 322 p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n");
322 goto error; 323 goto error;
323 } 324 }
324 325
@@ -371,7 +372,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
371 if (IS_ERR(fid)) { 372 if (IS_ERR(fid)) {
372 retval = PTR_ERR(fid); 373 retval = PTR_ERR(fid);
373 fid = NULL; 374 fid = NULL;
374 P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n"); 375 p9_debug(P9_DEBUG_ERROR, "cannot attach\n");
375 goto error; 376 goto error;
376 } 377 }
377 378
@@ -429,7 +430,7 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
429 */ 430 */
430 431
431void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 432void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
432 P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); 433 p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
433 p9_client_disconnect(v9ses->clnt); 434 p9_client_disconnect(v9ses->clnt);
434} 435}
435 436
@@ -442,7 +443,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
442 443
443void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) 444void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses)
444{ 445{
445 P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); 446 p9_debug(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses);
446 p9_client_begin_disconnect(v9ses->clnt); 447 p9_client_begin_disconnect(v9ses->clnt);
447} 448}
448 449
@@ -591,23 +592,23 @@ static void v9fs_cache_unregister(void)
591static int __init init_v9fs(void) 592static int __init init_v9fs(void)
592{ 593{
593 int err; 594 int err;
594 printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); 595 pr_info("Installing v9fs 9p2000 file system support\n");
595 /* TODO: Setup list of registered trasnport modules */ 596 /* TODO: Setup list of registered trasnport modules */
596 err = register_filesystem(&v9fs_fs_type); 597 err = register_filesystem(&v9fs_fs_type);
597 if (err < 0) { 598 if (err < 0) {
598 printk(KERN_ERR "Failed to register filesystem\n"); 599 pr_err("Failed to register filesystem\n");
599 return err; 600 return err;
600 } 601 }
601 602
602 err = v9fs_cache_register(); 603 err = v9fs_cache_register();
603 if (err < 0) { 604 if (err < 0) {
604 printk(KERN_ERR "Failed to register v9fs for caching\n"); 605 pr_err("Failed to register v9fs for caching\n");
605 goto out_fs_unreg; 606 goto out_fs_unreg;
606 } 607 }
607 608
608 err = v9fs_sysfs_init(); 609 err = v9fs_sysfs_init();
609 if (err < 0) { 610 if (err < 0) {
610 printk(KERN_ERR "Failed to register with sysfs\n"); 611 pr_err("Failed to register with sysfs\n");
611 goto out_sysfs_cleanup; 612 goto out_sysfs_cleanup;
612 } 613 }
613 614
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 410ffd6ceb5f..dc95a252523d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
54 54
55struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
56void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
57struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); 57struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
58int v9fs_init_inode(struct v9fs_session_info *v9ses, 58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode, dev_t); 59 struct inode *inode, umode_t mode, dev_t);
60void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
61ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 2524e4cbb8ea..0ad61c6a65a5 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -56,7 +56,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
56 struct inode *inode; 56 struct inode *inode;
57 57
58 inode = page->mapping->host; 58 inode = page->mapping->host;
59 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 59 p9_debug(P9_DEBUG_VFS, "\n");
60 60
61 BUG_ON(!PageLocked(page)); 61 BUG_ON(!PageLocked(page));
62 62
@@ -116,14 +116,14 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
116 struct inode *inode; 116 struct inode *inode;
117 117
118 inode = mapping->host; 118 inode = mapping->host;
119 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); 119 p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
120 120
121 ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); 121 ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
122 if (ret == 0) 122 if (ret == 0)
123 return ret; 123 return ret;
124 124
125 ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); 125 ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
126 P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret); 126 p9_debug(P9_DEBUG_VFS, " = %d\n", ret);
127 return ret; 127 return ret;
128} 128}
129 129
@@ -263,10 +263,9 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
263 * Now that we do caching with cache mode enabled, We need 263 * Now that we do caching with cache mode enabled, We need
264 * to support direct IO 264 * to support direct IO
265 */ 265 */
266 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " 266 p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
267 "off/no(%lld/%lu) EINVAL\n", 267 iocb->ki_filp->f_path.dentry->d_name.name,
268 iocb->ki_filp->f_path.dentry->d_name.name, 268 (long long)pos, nr_segs);
269 (long long) pos, nr_segs);
270 269
271 return -EINVAL; 270 return -EINVAL;
272} 271}
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index e022890c6f40..d529437ff442 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -53,8 +53,8 @@
53 53
54static int v9fs_dentry_delete(const struct dentry *dentry) 54static int v9fs_dentry_delete(const struct dentry *dentry)
55{ 55{
56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 56 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
57 dentry); 57 dentry->d_name.name, dentry);
58 58
59 return 1; 59 return 1;
60} 60}
@@ -66,8 +66,8 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
66 */ 66 */
67static int v9fs_cached_dentry_delete(const struct dentry *dentry) 67static int v9fs_cached_dentry_delete(const struct dentry *dentry)
68{ 68{
69 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", 69 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
70 dentry->d_name.name, dentry); 70 dentry->d_name.name, dentry);
71 71
72 /* Don't cache negative dentries */ 72 /* Don't cache negative dentries */
73 if (!dentry->d_inode) 73 if (!dentry->d_inode)
@@ -86,8 +86,8 @@ static void v9fs_dentry_release(struct dentry *dentry)
86 struct v9fs_dentry *dent; 86 struct v9fs_dentry *dent;
87 struct p9_fid *temp, *current_fid; 87 struct p9_fid *temp, *current_fid;
88 88
89 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 89 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
90 dentry); 90 dentry->d_name.name, dentry);
91 dent = dentry->d_fsdata; 91 dent = dentry->d_fsdata;
92 if (dent) { 92 if (dent) {
93 list_for_each_entry_safe(current_fid, temp, &dent->fidlist, 93 list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 598fff1a54e5..ff911e779651 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -140,7 +140,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
140 int reclen = 0; 140 int reclen = 0;
141 struct p9_rdir *rdir; 141 struct p9_rdir *rdir;
142 142
143 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 143 p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
144 fid = filp->private_data; 144 fid = filp->private_data;
145 145
146 buflen = fid->clnt->msize - P9_IOHDRSZ; 146 buflen = fid->clnt->msize - P9_IOHDRSZ;
@@ -168,7 +168,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
168 err = p9stat_read(fid->clnt, rdir->buf + rdir->head, 168 err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
169 rdir->tail - rdir->head, &st); 169 rdir->tail - rdir->head, &st);
170 if (err) { 170 if (err) {
171 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 171 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
172 err = -EIO; 172 err = -EIO;
173 p9stat_free(&st); 173 p9stat_free(&st);
174 goto unlock_and_exit; 174 goto unlock_and_exit;
@@ -213,7 +213,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
213 struct p9_dirent curdirent; 213 struct p9_dirent curdirent;
214 u64 oldoffset = 0; 214 u64 oldoffset = 0;
215 215
216 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); 216 p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
217 fid = filp->private_data; 217 fid = filp->private_data;
218 218
219 buflen = fid->clnt->msize - P9_READDIRHDRSZ; 219 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
@@ -244,7 +244,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
244 rdir->tail - rdir->head, 244 rdir->tail - rdir->head,
245 &curdirent); 245 &curdirent);
246 if (err < 0) { 246 if (err < 0) {
247 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 247 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
248 err = -EIO; 248 err = -EIO;
249 goto unlock_and_exit; 249 goto unlock_and_exit;
250 } 250 }
@@ -290,9 +290,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
290 struct p9_fid *fid; 290 struct p9_fid *fid;
291 291
292 fid = filp->private_data; 292 fid = filp->private_data;
293 P9_DPRINTK(P9_DEBUG_VFS, 293 p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
294 "v9fs_dir_release: inode: %p filp: %p fid: %d\n", 294 inode, filp, fid ? fid->fid : -1);
295 inode, filp, fid ? fid->fid : -1);
296 if (fid) 295 if (fid)
297 p9_client_clunk(fid); 296 p9_client_clunk(fid);
298 return 0; 297 return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 62857a810a79..fc06fd27065e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -61,7 +61,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
61 struct p9_fid *fid; 61 struct p9_fid *fid;
62 int omode; 62 int omode;
63 63
64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); 64 p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
65 v9inode = V9FS_I(inode); 65 v9inode = V9FS_I(inode);
66 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
67 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
@@ -135,7 +135,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
135 int res = 0; 135 int res = 0;
136 struct inode *inode = filp->f_path.dentry->d_inode; 136 struct inode *inode = filp->f_path.dentry->d_inode;
137 137
138 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); 138 p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
139 139
140 /* No mandatory locks */ 140 /* No mandatory locks */
141 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 141 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -204,7 +204,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
204 break; 204 break;
205 if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) 205 if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
206 break; 206 break;
207 schedule_timeout_interruptible(P9_LOCK_TIMEOUT); 207 if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0)
208 break;
208 } 209 }
209 210
210 /* map 9p status to VFS status */ 211 /* map 9p status to VFS status */
@@ -304,8 +305,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
304 struct inode *inode = filp->f_path.dentry->d_inode; 305 struct inode *inode = filp->f_path.dentry->d_inode;
305 int ret = -ENOLCK; 306 int ret = -ENOLCK;
306 307
307 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, 308 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
308 cmd, fl, filp->f_path.dentry->d_name.name); 309 filp, cmd, fl, filp->f_path.dentry->d_name.name);
309 310
310 /* No mandatory locks */ 311 /* No mandatory locks */
311 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 312 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -340,8 +341,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
340 struct inode *inode = filp->f_path.dentry->d_inode; 341 struct inode *inode = filp->f_path.dentry->d_inode;
341 int ret = -ENOLCK; 342 int ret = -ENOLCK;
342 343
343 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, 344 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
344 cmd, fl, filp->f_path.dentry->d_name.name); 345 filp, cmd, fl, filp->f_path.dentry->d_name.name);
345 346
346 /* No mandatory locks */ 347 /* No mandatory locks */
347 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 348 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -384,8 +385,8 @@ v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
384{ 385{
385 int n, total, size; 386 int n, total, size;
386 387
387 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 388 p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
388 (long long unsigned) offset, count); 389 fid->fid, (long long unsigned)offset, count);
389 n = 0; 390 n = 0;
390 total = 0; 391 total = 0;
391 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 392 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -443,7 +444,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
443 struct p9_fid *fid; 444 struct p9_fid *fid;
444 size_t size; 445 size_t size;
445 446
446 P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); 447 p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
447 fid = filp->private_data; 448 fid = filp->private_data;
448 449
449 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 450 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -470,8 +471,8 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
470 loff_t origin = *offset; 471 loff_t origin = *offset;
471 unsigned long pg_start, pg_end; 472 unsigned long pg_start, pg_end;
472 473
473 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 474 p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
474 (int)count, (int)*offset); 475 data, (int)count, (int)*offset);
475 476
476 clnt = fid->clnt; 477 clnt = fid->clnt;
477 do { 478 do {
@@ -552,7 +553,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
552 return retval; 553 return retval;
553 554
554 mutex_lock(&inode->i_mutex); 555 mutex_lock(&inode->i_mutex);
555 P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); 556 p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
556 557
557 fid = filp->private_data; 558 fid = filp->private_data;
558 v9fs_blank_wstat(&wstat); 559 v9fs_blank_wstat(&wstat);
@@ -575,8 +576,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
575 return retval; 576 return retval;
576 577
577 mutex_lock(&inode->i_mutex); 578 mutex_lock(&inode->i_mutex);
578 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", 579 p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
579 filp, datasync);
580 580
581 fid = filp->private_data; 581 fid = filp->private_data;
582 582
@@ -607,8 +607,8 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
607 struct inode *inode = filp->f_path.dentry->d_inode; 607 struct inode *inode = filp->f_path.dentry->d_inode;
608 608
609 609
610 P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n", 610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
611 page, (unsigned long)filp->private_data); 611 page, (unsigned long)filp->private_data);
612 612
613 v9inode = V9FS_I(inode); 613 v9inode = V9FS_I(inode);
614 /* make sure the cache has finished storing the page */ 614 /* make sure the cache has finished storing the page */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 879ed8851737..014c8dd62962 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -23,6 +23,8 @@
23 * 23 *
24 */ 24 */
25 25
26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27
26#include <linux/module.h> 28#include <linux/module.h>
27#include <linux/errno.h> 29#include <linux/errno.h>
28#include <linux/fs.h> 30#include <linux/fs.h>
@@ -59,15 +61,13 @@ static const struct inode_operations v9fs_symlink_inode_operations;
59 * 61 *
60 */ 62 */
61 63
62static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) 64static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode)
63{ 65{
64 int res; 66 int res;
65 res = mode & 0777; 67 res = mode & 0777;
66 if (S_ISDIR(mode)) 68 if (S_ISDIR(mode))
67 res |= P9_DMDIR; 69 res |= P9_DMDIR;
68 if (v9fs_proto_dotu(v9ses)) { 70 if (v9fs_proto_dotu(v9ses)) {
69 if (S_ISLNK(mode))
70 res |= P9_DMSYMLINK;
71 if (v9ses->nodev == 0) { 71 if (v9ses->nodev == 0) {
72 if (S_ISSOCK(mode)) 72 if (S_ISSOCK(mode))
73 res |= P9_DMSOCKET; 73 res |= P9_DMSOCKET;
@@ -85,10 +85,33 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
85 res |= P9_DMSETGID; 85 res |= P9_DMSETGID;
86 if ((mode & S_ISVTX) == S_ISVTX) 86 if ((mode & S_ISVTX) == S_ISVTX)
87 res |= P9_DMSETVTX; 87 res |= P9_DMSETVTX;
88 if ((mode & P9_DMLINK))
89 res |= P9_DMLINK;
90 } 88 }
89 return res;
90}
91 91
92/**
93 * p9mode2perm- convert plan9 mode bits to unix permission bits
94 * @v9ses: v9fs session information
95 * @stat: p9_wstat from which mode need to be derived
96 *
97 */
98static int p9mode2perm(struct v9fs_session_info *v9ses,
99 struct p9_wstat *stat)
100{
101 int res;
102 int mode = stat->mode;
103
104 res = mode & S_IALLUGO;
105 if (v9fs_proto_dotu(v9ses)) {
106 if ((mode & P9_DMSETUID) == P9_DMSETUID)
107 res |= S_ISUID;
108
109 if ((mode & P9_DMSETGID) == P9_DMSETGID)
110 res |= S_ISGID;
111
112 if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
113 res |= S_ISVTX;
114 }
92 return res; 115 return res;
93} 116}
94 117
@@ -99,14 +122,14 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
99 * @rdev: major number, minor number in case of device files. 122 * @rdev: major number, minor number in case of device files.
100 * 123 *
101 */ 124 */
102static int p9mode2unixmode(struct v9fs_session_info *v9ses, 125static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
103 struct p9_wstat *stat, dev_t *rdev) 126 struct p9_wstat *stat, dev_t *rdev)
104{ 127{
105 int res; 128 int res;
106 int mode = stat->mode; 129 u32 mode = stat->mode;
107 130
108 res = mode & S_IALLUGO;
109 *rdev = 0; 131 *rdev = 0;
132 res = p9mode2perm(v9ses, stat);
110 133
111 if ((mode & P9_DMDIR) == P9_DMDIR) 134 if ((mode & P9_DMDIR) == P9_DMDIR)
112 res |= S_IFDIR; 135 res |= S_IFDIR;
@@ -133,24 +156,13 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses,
133 res |= S_IFBLK; 156 res |= S_IFBLK;
134 break; 157 break;
135 default: 158 default:
136 P9_DPRINTK(P9_DEBUG_ERROR, 159 p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n",
137 "Unknown special type %c %s\n", type, 160 type, stat->extension);
138 stat->extension);
139 }; 161 };
140 *rdev = MKDEV(major, minor); 162 *rdev = MKDEV(major, minor);
141 } else 163 } else
142 res |= S_IFREG; 164 res |= S_IFREG;
143 165
144 if (v9fs_proto_dotu(v9ses)) {
145 if ((mode & P9_DMSETUID) == P9_DMSETUID)
146 res |= S_ISUID;
147
148 if ((mode & P9_DMSETGID) == P9_DMSETGID)
149 res |= S_ISGID;
150
151 if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
152 res |= S_ISVTX;
153 }
154 return res; 166 return res;
155} 167}
156 168
@@ -251,7 +263,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
251static void v9fs_i_callback(struct rcu_head *head) 263static void v9fs_i_callback(struct rcu_head *head)
252{ 264{
253 struct inode *inode = container_of(head, struct inode, i_rcu); 265 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
255 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode)); 266 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
256} 267}
257 268
@@ -261,7 +272,7 @@ void v9fs_destroy_inode(struct inode *inode)
261} 272}
262 273
263int v9fs_init_inode(struct v9fs_session_info *v9ses, 274int v9fs_init_inode(struct v9fs_session_info *v9ses,
264 struct inode *inode, int mode, dev_t rdev) 275 struct inode *inode, umode_t mode, dev_t rdev)
265{ 276{
266 int err = 0; 277 int err = 0;
267 278
@@ -281,8 +292,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
281 } else if (v9fs_proto_dotu(v9ses)) { 292 } else if (v9fs_proto_dotu(v9ses)) {
282 inode->i_op = &v9fs_file_inode_operations; 293 inode->i_op = &v9fs_file_inode_operations;
283 } else { 294 } else {
284 P9_DPRINTK(P9_DEBUG_ERROR, 295 p9_debug(P9_DEBUG_ERROR,
285 "special files without extended mode\n"); 296 "special files without extended mode\n");
286 err = -EINVAL; 297 err = -EINVAL;
287 goto error; 298 goto error;
288 } 299 }
@@ -307,8 +318,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
307 break; 318 break;
308 case S_IFLNK: 319 case S_IFLNK:
309 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { 320 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
310 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " 321 p9_debug(P9_DEBUG_ERROR,
311 "legacy protocol.\n"); 322 "extended modes used with legacy protocol\n");
312 err = -EINVAL; 323 err = -EINVAL;
313 goto error; 324 goto error;
314 } 325 }
@@ -335,8 +346,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
335 346
336 break; 347 break;
337 default: 348 default:
338 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", 349 p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n",
339 mode, mode & S_IFMT); 350 mode, mode & S_IFMT);
340 err = -EINVAL; 351 err = -EINVAL;
341 goto error; 352 goto error;
342 } 353 }
@@ -352,17 +363,18 @@ error:
352 * 363 *
353 */ 364 */
354 365
355struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) 366struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
356{ 367{
357 int err; 368 int err;
358 struct inode *inode; 369 struct inode *inode;
359 struct v9fs_session_info *v9ses = sb->s_fs_info; 370 struct v9fs_session_info *v9ses = sb->s_fs_info;
360 371
361 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 372 p9_debug(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode);
362 373
363 inode = new_inode(sb); 374 inode = new_inode(sb);
364 if (!inode) { 375 if (!inode) {
365 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); 376 pr_warn("%s (%d): Problem allocating inode\n",
377 __func__, task_pid_nr(current));
366 return ERR_PTR(-ENOMEM); 378 return ERR_PTR(-ENOMEM);
367 } 379 }
368 err = v9fs_init_inode(v9ses, inode, mode, rdev); 380 err = v9fs_init_inode(v9ses, inode, mode, rdev);
@@ -492,7 +504,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
492 int new) 504 int new)
493{ 505{
494 dev_t rdev; 506 dev_t rdev;
495 int retval, umode; 507 int retval;
508 umode_t umode;
496 unsigned long i_ino; 509 unsigned long i_ino;
497 struct inode *inode; 510 struct inode *inode;
498 struct v9fs_session_info *v9ses = sb->s_fs_info; 511 struct v9fs_session_info *v9ses = sb->s_fs_info;
@@ -578,15 +591,15 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
578 struct p9_fid *v9fid, *dfid; 591 struct p9_fid *v9fid, *dfid;
579 struct v9fs_session_info *v9ses; 592 struct v9fs_session_info *v9ses;
580 593
581 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", 594 p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
582 dir, dentry, flags); 595 dir, dentry, flags);
583 596
584 v9ses = v9fs_inode2v9ses(dir); 597 v9ses = v9fs_inode2v9ses(dir);
585 inode = dentry->d_inode; 598 inode = dentry->d_inode;
586 dfid = v9fs_fid_lookup(dentry->d_parent); 599 dfid = v9fs_fid_lookup(dentry->d_parent);
587 if (IS_ERR(dfid)) { 600 if (IS_ERR(dfid)) {
588 retval = PTR_ERR(dfid); 601 retval = PTR_ERR(dfid);
589 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); 602 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
590 return retval; 603 return retval;
591 } 604 }
592 if (v9fs_proto_dotl(v9ses)) 605 if (v9fs_proto_dotl(v9ses))
@@ -635,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
635 struct p9_fid *dfid, *ofid, *fid; 648 struct p9_fid *dfid, *ofid, *fid;
636 struct inode *inode; 649 struct inode *inode;
637 650
638 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 651 p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
639 652
640 err = 0; 653 err = 0;
641 ofid = NULL; 654 ofid = NULL;
@@ -644,7 +657,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
644 dfid = v9fs_fid_lookup(dentry->d_parent); 657 dfid = v9fs_fid_lookup(dentry->d_parent);
645 if (IS_ERR(dfid)) { 658 if (IS_ERR(dfid)) {
646 err = PTR_ERR(dfid); 659 err = PTR_ERR(dfid);
647 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 660 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
648 return ERR_PTR(err); 661 return ERR_PTR(err);
649 } 662 }
650 663
@@ -652,36 +665,41 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
652 ofid = p9_client_walk(dfid, 0, NULL, 1); 665 ofid = p9_client_walk(dfid, 0, NULL, 1);
653 if (IS_ERR(ofid)) { 666 if (IS_ERR(ofid)) {
654 err = PTR_ERR(ofid); 667 err = PTR_ERR(ofid);
655 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 668 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
656 return ERR_PTR(err); 669 return ERR_PTR(err);
657 } 670 }
658 671
659 err = p9_client_fcreate(ofid, name, perm, mode, extension); 672 err = p9_client_fcreate(ofid, name, perm, mode, extension);
660 if (err < 0) { 673 if (err < 0) {
661 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); 674 p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
662 goto error; 675 goto error;
663 } 676 }
664 677
665 /* now walk from the parent so we can get unopened fid */ 678 if (!(perm & P9_DMLINK)) {
666 fid = p9_client_walk(dfid, 1, &name, 1); 679 /* now walk from the parent so we can get unopened fid */
667 if (IS_ERR(fid)) { 680 fid = p9_client_walk(dfid, 1, &name, 1);
668 err = PTR_ERR(fid); 681 if (IS_ERR(fid)) {
669 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 682 err = PTR_ERR(fid);
670 fid = NULL; 683 p9_debug(P9_DEBUG_VFS,
671 goto error; 684 "p9_client_walk failed %d\n", err);
672 } 685 fid = NULL;
673 686 goto error;
674 /* instantiate inode and assign the unopened fid to the dentry */ 687 }
675 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 688 /*
676 if (IS_ERR(inode)) { 689 * instantiate inode and assign the unopened fid to the dentry
677 err = PTR_ERR(inode); 690 */
678 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 691 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
679 goto error; 692 if (IS_ERR(inode)) {
693 err = PTR_ERR(inode);
694 p9_debug(P9_DEBUG_VFS,
695 "inode creation failed %d\n", err);
696 goto error;
697 }
698 err = v9fs_fid_add(dentry, fid);
699 if (err < 0)
700 goto error;
701 d_instantiate(dentry, inode);
680 } 702 }
681 err = v9fs_fid_add(dentry, fid);
682 if (err < 0)
683 goto error;
684 d_instantiate(dentry, inode);
685 return ofid; 703 return ofid;
686error: 704error:
687 if (ofid) 705 if (ofid)
@@ -703,7 +721,7 @@ error:
703 */ 721 */
704 722
705static int 723static int
706v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, 724v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
707 struct nameidata *nd) 725 struct nameidata *nd)
708{ 726{
709 int err; 727 int err;
@@ -786,14 +804,14 @@ error:
786 * 804 *
787 */ 805 */
788 806
789static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 807static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
790{ 808{
791 int err; 809 int err;
792 u32 perm; 810 u32 perm;
793 struct p9_fid *fid; 811 struct p9_fid *fid;
794 struct v9fs_session_info *v9ses; 812 struct v9fs_session_info *v9ses;
795 813
796 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 814 p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
797 err = 0; 815 err = 0;
798 v9ses = v9fs_inode2v9ses(dir); 816 v9ses = v9fs_inode2v9ses(dir);
799 perm = unixmode2p9mode(v9ses, mode | S_IFDIR); 817 perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
@@ -831,8 +849,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
831 char *name; 849 char *name;
832 int result = 0; 850 int result = 0;
833 851
834 P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", 852 p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
835 dir, dentry->d_name.name, dentry, nameidata); 853 dir, dentry->d_name.name, dentry, nameidata);
836 854
837 if (dentry->d_name.len > NAME_MAX) 855 if (dentry->d_name.len > NAME_MAX)
838 return ERR_PTR(-ENAMETOOLONG); 856 return ERR_PTR(-ENAMETOOLONG);
@@ -938,7 +956,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
938 struct p9_fid *newdirfid; 956 struct p9_fid *newdirfid;
939 struct p9_wstat wstat; 957 struct p9_wstat wstat;
940 958
941 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 959 p9_debug(P9_DEBUG_VFS, "\n");
942 retval = 0; 960 retval = 0;
943 old_inode = old_dentry->d_inode; 961 old_inode = old_dentry->d_inode;
944 new_inode = new_dentry->d_inode; 962 new_inode = new_dentry->d_inode;
@@ -974,8 +992,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
974 * 9P .u can only handle file rename in the same directory 992 * 9P .u can only handle file rename in the same directory
975 */ 993 */
976 994
977 P9_DPRINTK(P9_DEBUG_ERROR, 995 p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n");
978 "old dir and new dir are different\n");
979 retval = -EXDEV; 996 retval = -EXDEV;
980 goto clunk_newdir; 997 goto clunk_newdir;
981 } 998 }
@@ -1031,7 +1048,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1031 struct p9_fid *fid; 1048 struct p9_fid *fid;
1032 struct p9_wstat *st; 1049 struct p9_wstat *st;
1033 1050
1034 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 1051 p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
1035 err = -EPERM; 1052 err = -EPERM;
1036 v9ses = v9fs_dentry2v9ses(dentry); 1053 v9ses = v9fs_dentry2v9ses(dentry);
1037 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 1054 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
@@ -1068,7 +1085,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1068 struct p9_fid *fid; 1085 struct p9_fid *fid;
1069 struct p9_wstat wstat; 1086 struct p9_wstat wstat;
1070 1087
1071 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 1088 p9_debug(P9_DEBUG_VFS, "\n");
1072 retval = inode_change_ok(dentry->d_inode, iattr); 1089 retval = inode_change_ok(dentry->d_inode, iattr);
1073 if (retval) 1090 if (retval)
1074 return retval; 1091 return retval;
@@ -1131,7 +1148,7 @@ void
1131v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, 1148v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1132 struct super_block *sb) 1149 struct super_block *sb)
1133{ 1150{
1134 mode_t mode; 1151 umode_t mode;
1135 char ext[32]; 1152 char ext[32];
1136 char tag_name[14]; 1153 char tag_name[14];
1137 unsigned int i_nlink; 1154 unsigned int i_nlink;
@@ -1167,7 +1184,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1167 set_nlink(inode, i_nlink); 1184 set_nlink(inode, i_nlink);
1168 } 1185 }
1169 } 1186 }
1170 mode = stat->mode & S_IALLUGO; 1187 mode = p9mode2perm(v9ses, stat);
1171 mode |= inode->i_mode & ~S_IALLUGO; 1188 mode |= inode->i_mode & ~S_IALLUGO;
1172 inode->i_mode = mode; 1189 inode->i_mode = mode;
1173 i_size_write(inode, stat->length); 1190 i_size_write(inode, stat->length);
@@ -1213,7 +1230,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1213 struct p9_fid *fid; 1230 struct p9_fid *fid;
1214 struct p9_wstat *st; 1231 struct p9_wstat *st;
1215 1232
1216 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 1233 p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
1217 retval = -EPERM; 1234 retval = -EPERM;
1218 v9ses = v9fs_dentry2v9ses(dentry); 1235 v9ses = v9fs_dentry2v9ses(dentry);
1219 fid = v9fs_fid_lookup(dentry); 1236 fid = v9fs_fid_lookup(dentry);
@@ -1235,8 +1252,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1235 /* copy extension buffer into buffer */ 1252 /* copy extension buffer into buffer */
1236 strncpy(buffer, st->extension, buflen); 1253 strncpy(buffer, st->extension, buflen);
1237 1254
1238 P9_DPRINTK(P9_DEBUG_VFS, 1255 p9_debug(P9_DEBUG_VFS, "%s -> %s (%s)\n",
1239 "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer); 1256 dentry->d_name.name, st->extension, buffer);
1240 1257
1241 retval = strnlen(buffer, buflen); 1258 retval = strnlen(buffer, buflen);
1242done: 1259done:
@@ -1257,7 +1274,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1257 int len = 0; 1274 int len = 0;
1258 char *link = __getname(); 1275 char *link = __getname();
1259 1276
1260 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); 1277 p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
1261 1278
1262 if (!link) 1279 if (!link)
1263 link = ERR_PTR(-ENOMEM); 1280 link = ERR_PTR(-ENOMEM);
@@ -1288,8 +1305,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1288{ 1305{
1289 char *s = nd_get_link(nd); 1306 char *s = nd_get_link(nd);
1290 1307
1291 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, 1308 p9_debug(P9_DEBUG_VFS, " %s %s\n",
1292 IS_ERR(s) ? "<error>" : s); 1309 dentry->d_name.name, IS_ERR(s) ? "<error>" : s);
1293 if (!IS_ERR(s)) 1310 if (!IS_ERR(s))
1294 __putname(s); 1311 __putname(s);
1295} 1312}
@@ -1304,19 +1321,17 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1304 */ 1321 */
1305 1322
1306static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, 1323static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1307 int mode, const char *extension) 1324 u32 perm, const char *extension)
1308{ 1325{
1309 u32 perm;
1310 struct p9_fid *fid; 1326 struct p9_fid *fid;
1311 struct v9fs_session_info *v9ses; 1327 struct v9fs_session_info *v9ses;
1312 1328
1313 v9ses = v9fs_inode2v9ses(dir); 1329 v9ses = v9fs_inode2v9ses(dir);
1314 if (!v9fs_proto_dotu(v9ses)) { 1330 if (!v9fs_proto_dotu(v9ses)) {
1315 P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); 1331 p9_debug(P9_DEBUG_ERROR, "not extended\n");
1316 return -EPERM; 1332 return -EPERM;
1317 } 1333 }
1318 1334
1319 perm = unixmode2p9mode(v9ses, mode);
1320 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, 1335 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
1321 P9_OREAD); 1336 P9_OREAD);
1322 if (IS_ERR(fid)) 1337 if (IS_ERR(fid))
@@ -1340,10 +1355,10 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1340static int 1355static int
1341v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1356v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1342{ 1357{
1343 P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, 1358 p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
1344 dentry->d_name.name, symname); 1359 dir->i_ino, dentry->d_name.name, symname);
1345 1360
1346 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); 1361 return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
1347} 1362}
1348 1363
1349/** 1364/**
@@ -1362,9 +1377,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1362 char *name; 1377 char *name;
1363 struct p9_fid *oldfid; 1378 struct p9_fid *oldfid;
1364 1379
1365 P9_DPRINTK(P9_DEBUG_VFS, 1380 p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
1366 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1381 dir->i_ino, dentry->d_name.name, old_dentry->d_name.name);
1367 old_dentry->d_name.name);
1368 1382
1369 oldfid = v9fs_fid_clone(old_dentry); 1383 oldfid = v9fs_fid_clone(old_dentry);
1370 if (IS_ERR(oldfid)) 1384 if (IS_ERR(oldfid))
@@ -1398,14 +1412,16 @@ clunk_fid:
1398 */ 1412 */
1399 1413
1400static int 1414static int
1401v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1415v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1402{ 1416{
1417 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1403 int retval; 1418 int retval;
1404 char *name; 1419 char *name;
1420 u32 perm;
1405 1421
1406 P9_DPRINTK(P9_DEBUG_VFS, 1422 p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
1407 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1423 dir->i_ino, dentry->d_name.name, mode,
1408 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1424 MAJOR(rdev), MINOR(rdev));
1409 1425
1410 if (!new_valid_dev(rdev)) 1426 if (!new_valid_dev(rdev))
1411 return -EINVAL; 1427 return -EINVAL;
@@ -1427,7 +1443,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1427 return -EINVAL; 1443 return -EINVAL;
1428 } 1444 }
1429 1445
1430 retval = v9fs_vfs_mkspecial(dir, dentry, mode, name); 1446 perm = unixmode2p9mode(v9ses, mode);
1447 retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
1431 __putname(name); 1448 __putname(name);
1432 1449
1433 return retval; 1450 return retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b5745e21946..a1e6c990cd41 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -48,7 +48,7 @@
48#include "acl.h" 48#include "acl.h"
49 49
50static int 50static int
51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, 51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
52 dev_t rdev); 52 dev_t rdev);
53 53
54/** 54/**
@@ -253,7 +253,7 @@ int v9fs_open_to_dotl_flags(int flags)
253 */ 253 */
254 254
255static int 255static int
256v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, 256v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
257 struct nameidata *nd) 257 struct nameidata *nd)
258{ 258{
259 int err = 0; 259 int err = 0;
@@ -283,13 +283,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
283 } 283 }
284 284
285 name = (char *) dentry->d_name.name; 285 name = (char *) dentry->d_name.name;
286 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " 286 p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
287 "mode:0x%x\n", name, flags, omode); 287 name, flags, omode);
288 288
289 dfid = v9fs_fid_lookup(dentry->d_parent); 289 dfid = v9fs_fid_lookup(dentry->d_parent);
290 if (IS_ERR(dfid)) { 290 if (IS_ERR(dfid)) {
291 err = PTR_ERR(dfid); 291 err = PTR_ERR(dfid);
292 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 292 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
293 return err; 293 return err;
294 } 294 }
295 295
@@ -297,7 +297,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
297 ofid = p9_client_walk(dfid, 0, NULL, 1); 297 ofid = p9_client_walk(dfid, 0, NULL, 1);
298 if (IS_ERR(ofid)) { 298 if (IS_ERR(ofid)) {
299 err = PTR_ERR(ofid); 299 err = PTR_ERR(ofid);
300 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 300 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
301 return err; 301 return err;
302 } 302 }
303 303
@@ -307,16 +307,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
307 /* Update mode based on ACL value */ 307 /* Update mode based on ACL value */
308 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); 308 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
309 if (err) { 309 if (err) {
310 P9_DPRINTK(P9_DEBUG_VFS, 310 p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
311 "Failed to get acl values in creat %d\n", err); 311 err);
312 goto error; 312 goto error;
313 } 313 }
314 err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), 314 err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
315 mode, gid, &qid); 315 mode, gid, &qid);
316 if (err < 0) { 316 if (err < 0) {
317 P9_DPRINTK(P9_DEBUG_VFS, 317 p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
318 "p9_client_open_dotl failed in creat %d\n", 318 err);
319 err);
320 goto error; 319 goto error;
321 } 320 }
322 v9fs_invalidate_inode_attr(dir); 321 v9fs_invalidate_inode_attr(dir);
@@ -325,14 +324,14 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
325 fid = p9_client_walk(dfid, 1, &name, 1); 324 fid = p9_client_walk(dfid, 1, &name, 1);
326 if (IS_ERR(fid)) { 325 if (IS_ERR(fid)) {
327 err = PTR_ERR(fid); 326 err = PTR_ERR(fid);
328 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 327 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
329 fid = NULL; 328 fid = NULL;
330 goto error; 329 goto error;
331 } 330 }
332 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 331 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
333 if (IS_ERR(inode)) { 332 if (IS_ERR(inode)) {
334 err = PTR_ERR(inode); 333 err = PTR_ERR(inode);
335 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 334 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
336 goto error; 335 goto error;
337 } 336 }
338 err = v9fs_fid_add(dentry, fid); 337 err = v9fs_fid_add(dentry, fid);
@@ -395,7 +394,7 @@ err_clunk_old_fid:
395 */ 394 */
396 395
397static int v9fs_vfs_mkdir_dotl(struct inode *dir, 396static int v9fs_vfs_mkdir_dotl(struct inode *dir,
398 struct dentry *dentry, int omode) 397 struct dentry *dentry, umode_t omode)
399{ 398{
400 int err; 399 int err;
401 struct v9fs_session_info *v9ses; 400 struct v9fs_session_info *v9ses;
@@ -408,7 +407,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
408 struct dentry *dir_dentry; 407 struct dentry *dir_dentry;
409 struct posix_acl *dacl = NULL, *pacl = NULL; 408 struct posix_acl *dacl = NULL, *pacl = NULL;
410 409
411 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 410 p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
412 err = 0; 411 err = 0;
413 v9ses = v9fs_inode2v9ses(dir); 412 v9ses = v9fs_inode2v9ses(dir);
414 413
@@ -420,7 +419,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
420 dfid = v9fs_fid_lookup(dir_dentry); 419 dfid = v9fs_fid_lookup(dir_dentry);
421 if (IS_ERR(dfid)) { 420 if (IS_ERR(dfid)) {
422 err = PTR_ERR(dfid); 421 err = PTR_ERR(dfid);
423 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 422 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
424 dfid = NULL; 423 dfid = NULL;
425 goto error; 424 goto error;
426 } 425 }
@@ -430,8 +429,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
430 /* Update mode based on ACL value */ 429 /* Update mode based on ACL value */
431 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); 430 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
432 if (err) { 431 if (err) {
433 P9_DPRINTK(P9_DEBUG_VFS, 432 p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n",
434 "Failed to get acl values in mkdir %d\n", err); 433 err);
435 goto error; 434 goto error;
436 } 435 }
437 name = (char *) dentry->d_name.name; 436 name = (char *) dentry->d_name.name;
@@ -444,8 +443,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
444 fid = p9_client_walk(dfid, 1, &name, 1); 443 fid = p9_client_walk(dfid, 1, &name, 1);
445 if (IS_ERR(fid)) { 444 if (IS_ERR(fid)) {
446 err = PTR_ERR(fid); 445 err = PTR_ERR(fid);
447 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", 446 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
448 err); 447 err);
449 fid = NULL; 448 fid = NULL;
450 goto error; 449 goto error;
451 } 450 }
@@ -453,8 +452,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
453 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 452 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
454 if (IS_ERR(inode)) { 453 if (IS_ERR(inode)) {
455 err = PTR_ERR(inode); 454 err = PTR_ERR(inode);
456 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 455 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
457 err); 456 err);
458 goto error; 457 goto error;
459 } 458 }
460 err = v9fs_fid_add(dentry, fid); 459 err = v9fs_fid_add(dentry, fid);
@@ -495,7 +494,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
495 struct p9_fid *fid; 494 struct p9_fid *fid;
496 struct p9_stat_dotl *st; 495 struct p9_stat_dotl *st;
497 496
498 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 497 p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
499 err = -EPERM; 498 err = -EPERM;
500 v9ses = v9fs_dentry2v9ses(dentry); 499 v9ses = v9fs_dentry2v9ses(dentry);
501 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 500 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
@@ -523,6 +522,46 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
523 return 0; 522 return 0;
524} 523}
525 524
525/*
526 * Attribute flags.
527 */
528#define P9_ATTR_MODE (1 << 0)
529#define P9_ATTR_UID (1 << 1)
530#define P9_ATTR_GID (1 << 2)
531#define P9_ATTR_SIZE (1 << 3)
532#define P9_ATTR_ATIME (1 << 4)
533#define P9_ATTR_MTIME (1 << 5)
534#define P9_ATTR_CTIME (1 << 6)
535#define P9_ATTR_ATIME_SET (1 << 7)
536#define P9_ATTR_MTIME_SET (1 << 8)
537
538struct dotl_iattr_map {
539 int iattr_valid;
540 int p9_iattr_valid;
541};
542
543static int v9fs_mapped_iattr_valid(int iattr_valid)
544{
545 int i;
546 int p9_iattr_valid = 0;
547 struct dotl_iattr_map dotl_iattr_map[] = {
548 { ATTR_MODE, P9_ATTR_MODE },
549 { ATTR_UID, P9_ATTR_UID },
550 { ATTR_GID, P9_ATTR_GID },
551 { ATTR_SIZE, P9_ATTR_SIZE },
552 { ATTR_ATIME, P9_ATTR_ATIME },
553 { ATTR_MTIME, P9_ATTR_MTIME },
554 { ATTR_CTIME, P9_ATTR_CTIME },
555 { ATTR_ATIME_SET, P9_ATTR_ATIME_SET },
556 { ATTR_MTIME_SET, P9_ATTR_MTIME_SET },
557 };
558 for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) {
559 if (iattr_valid & dotl_iattr_map[i].iattr_valid)
560 p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid;
561 }
562 return p9_iattr_valid;
563}
564
526/** 565/**
527 * v9fs_vfs_setattr_dotl - set file metadata 566 * v9fs_vfs_setattr_dotl - set file metadata
528 * @dentry: file whose metadata to set 567 * @dentry: file whose metadata to set
@@ -537,13 +576,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
537 struct p9_fid *fid; 576 struct p9_fid *fid;
538 struct p9_iattr_dotl p9attr; 577 struct p9_iattr_dotl p9attr;
539 578
540 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 579 p9_debug(P9_DEBUG_VFS, "\n");
541 580
542 retval = inode_change_ok(dentry->d_inode, iattr); 581 retval = inode_change_ok(dentry->d_inode, iattr);
543 if (retval) 582 if (retval)
544 return retval; 583 return retval;
545 584
546 p9attr.valid = iattr->ia_valid; 585 p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
547 p9attr.mode = iattr->ia_mode; 586 p9attr.mode = iattr->ia_mode;
548 p9attr.uid = iattr->ia_uid; 587 p9attr.uid = iattr->ia_uid;
549 p9attr.gid = iattr->ia_gid; 588 p9attr.gid = iattr->ia_gid;
@@ -594,7 +633,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
594void 633void
595v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 634v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
596{ 635{
597 mode_t mode; 636 umode_t mode;
598 struct v9fs_inode *v9inode = V9FS_I(inode); 637 struct v9fs_inode *v9inode = V9FS_I(inode);
599 638
600 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 639 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -670,14 +709,13 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
670 struct v9fs_session_info *v9ses; 709 struct v9fs_session_info *v9ses;
671 710
672 name = (char *) dentry->d_name.name; 711 name = (char *) dentry->d_name.name;
673 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", 712 p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
674 dir->i_ino, name, symname);
675 v9ses = v9fs_inode2v9ses(dir); 713 v9ses = v9fs_inode2v9ses(dir);
676 714
677 dfid = v9fs_fid_lookup(dentry->d_parent); 715 dfid = v9fs_fid_lookup(dentry->d_parent);
678 if (IS_ERR(dfid)) { 716 if (IS_ERR(dfid)) {
679 err = PTR_ERR(dfid); 717 err = PTR_ERR(dfid);
680 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 718 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
681 return err; 719 return err;
682 } 720 }
683 721
@@ -687,7 +725,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
687 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); 725 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
688 726
689 if (err < 0) { 727 if (err < 0) {
690 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); 728 p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
691 goto error; 729 goto error;
692 } 730 }
693 731
@@ -697,8 +735,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
697 fid = p9_client_walk(dfid, 1, &name, 1); 735 fid = p9_client_walk(dfid, 1, &name, 1);
698 if (IS_ERR(fid)) { 736 if (IS_ERR(fid)) {
699 err = PTR_ERR(fid); 737 err = PTR_ERR(fid);
700 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", 738 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
701 err); 739 err);
702 fid = NULL; 740 fid = NULL;
703 goto error; 741 goto error;
704 } 742 }
@@ -707,8 +745,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
707 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 745 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
708 if (IS_ERR(inode)) { 746 if (IS_ERR(inode)) {
709 err = PTR_ERR(inode); 747 err = PTR_ERR(inode);
710 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 748 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
711 err); 749 err);
712 goto error; 750 goto error;
713 } 751 }
714 err = v9fs_fid_add(dentry, fid); 752 err = v9fs_fid_add(dentry, fid);
@@ -751,9 +789,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
751 struct p9_fid *dfid, *oldfid; 789 struct p9_fid *dfid, *oldfid;
752 struct v9fs_session_info *v9ses; 790 struct v9fs_session_info *v9ses;
753 791
754 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", 792 p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
755 dir->i_ino, old_dentry->d_name.name, 793 dir->i_ino, old_dentry->d_name.name, dentry->d_name.name);
756 dentry->d_name.name);
757 794
758 v9ses = v9fs_inode2v9ses(dir); 795 v9ses = v9fs_inode2v9ses(dir);
759 dir_dentry = v9fs_dentry_from_dir_inode(dir); 796 dir_dentry = v9fs_dentry_from_dir_inode(dir);
@@ -770,7 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
770 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name); 807 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
771 808
772 if (err < 0) { 809 if (err < 0) {
773 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); 810 p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
774 return err; 811 return err;
775 } 812 }
776 813
@@ -799,7 +836,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
799 * 836 *
800 */ 837 */
801static int 838static int
802v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, 839v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
803 dev_t rdev) 840 dev_t rdev)
804{ 841{
805 int err; 842 int err;
@@ -813,9 +850,9 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
813 struct dentry *dir_dentry; 850 struct dentry *dir_dentry;
814 struct posix_acl *dacl = NULL, *pacl = NULL; 851 struct posix_acl *dacl = NULL, *pacl = NULL;
815 852
816 P9_DPRINTK(P9_DEBUG_VFS, 853 p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
817 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 854 dir->i_ino, dentry->d_name.name, omode,
818 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev)); 855 MAJOR(rdev), MINOR(rdev));
819 856
820 if (!new_valid_dev(rdev)) 857 if (!new_valid_dev(rdev))
821 return -EINVAL; 858 return -EINVAL;
@@ -825,7 +862,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
825 dfid = v9fs_fid_lookup(dir_dentry); 862 dfid = v9fs_fid_lookup(dir_dentry);
826 if (IS_ERR(dfid)) { 863 if (IS_ERR(dfid)) {
827 err = PTR_ERR(dfid); 864 err = PTR_ERR(dfid);
828 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 865 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
829 dfid = NULL; 866 dfid = NULL;
830 goto error; 867 goto error;
831 } 868 }
@@ -835,8 +872,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
835 /* Update mode based on ACL value */ 872 /* Update mode based on ACL value */
836 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); 873 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
837 if (err) { 874 if (err) {
838 P9_DPRINTK(P9_DEBUG_VFS, 875 p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n",
839 "Failed to get acl values in mknod %d\n", err); 876 err);
840 goto error; 877 goto error;
841 } 878 }
842 name = (char *) dentry->d_name.name; 879 name = (char *) dentry->d_name.name;
@@ -851,8 +888,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
851 fid = p9_client_walk(dfid, 1, &name, 1); 888 fid = p9_client_walk(dfid, 1, &name, 1);
852 if (IS_ERR(fid)) { 889 if (IS_ERR(fid)) {
853 err = PTR_ERR(fid); 890 err = PTR_ERR(fid);
854 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", 891 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
855 err); 892 err);
856 fid = NULL; 893 fid = NULL;
857 goto error; 894 goto error;
858 } 895 }
@@ -860,8 +897,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
860 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 897 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
861 if (IS_ERR(inode)) { 898 if (IS_ERR(inode)) {
862 err = PTR_ERR(inode); 899 err = PTR_ERR(inode);
863 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 900 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
864 err); 901 err);
865 goto error; 902 goto error;
866 } 903 }
867 err = v9fs_fid_add(dentry, fid); 904 err = v9fs_fid_add(dentry, fid);
@@ -905,7 +942,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
905 char *link = __getname(); 942 char *link = __getname();
906 char *target; 943 char *target;
907 944
908 P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); 945 p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
909 946
910 if (!link) { 947 if (!link) {
911 link = ERR_PTR(-ENOMEM); 948 link = ERR_PTR(-ENOMEM);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index c70251d47ed1..7b0cd87b07c2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -117,11 +117,11 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
117 struct inode *inode = NULL; 117 struct inode *inode = NULL;
118 struct dentry *root = NULL; 118 struct dentry *root = NULL;
119 struct v9fs_session_info *v9ses = NULL; 119 struct v9fs_session_info *v9ses = NULL;
120 int mode = S_IRWXUGO | S_ISVTX; 120 umode_t mode = S_IRWXUGO | S_ISVTX;
121 struct p9_fid *fid; 121 struct p9_fid *fid;
122 int retval = 0; 122 int retval = 0;
123 123
124 P9_DPRINTK(P9_DEBUG_VFS, " \n"); 124 p9_debug(P9_DEBUG_VFS, "\n");
125 125
126 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 126 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
127 if (!v9ses) 127 if (!v9ses)
@@ -191,7 +191,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
191 goto release_sb; 191 goto release_sb;
192 v9fs_fid_add(root, fid); 192 v9fs_fid_add(root, fid);
193 193
194 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 194 p9_debug(P9_DEBUG_VFS, " simple set mount, return 0\n");
195 return dget(sb->s_root); 195 return dget(sb->s_root);
196 196
197clunk_fid: 197clunk_fid:
@@ -223,7 +223,7 @@ static void v9fs_kill_super(struct super_block *s)
223{ 223{
224 struct v9fs_session_info *v9ses = s->s_fs_info; 224 struct v9fs_session_info *v9ses = s->s_fs_info;
225 225
226 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 226 p9_debug(P9_DEBUG_VFS, " %p\n", s);
227 227
228 kill_anon_super(s); 228 kill_anon_super(s);
229 229
@@ -231,7 +231,7 @@ static void v9fs_kill_super(struct super_block *s)
231 v9fs_session_close(v9ses); 231 v9fs_session_close(v9ses);
232 kfree(v9ses); 232 kfree(v9ses);
233 s->s_fs_info = NULL; 233 s->s_fs_info = NULL;
234 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); 234 p9_debug(P9_DEBUG_VFS, "exiting kill_super\n");
235} 235}
236 236
237static void 237static void
@@ -303,7 +303,7 @@ static int v9fs_write_inode(struct inode *inode,
303 * send an fsync request to server irrespective of 303 * send an fsync request to server irrespective of
304 * wbc->sync_mode. 304 * wbc->sync_mode.
305 */ 305 */
306 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); 306 p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
307 v9inode = V9FS_I(inode); 307 v9inode = V9FS_I(inode);
308 if (!v9inode->writeback_fid) 308 if (!v9inode->writeback_fid)
309 return 0; 309 return 0;
@@ -326,7 +326,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
326 * send an fsync request to server irrespective of 326 * send an fsync request to server irrespective of
327 * wbc->sync_mode. 327 * wbc->sync_mode.
328 */ 328 */
329 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); 329 p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
330 v9inode = V9FS_I(inode); 330 v9inode = V9FS_I(inode);
331 if (!v9inode->writeback_fid) 331 if (!v9inode->writeback_fid)
332 return 0; 332 return 0;
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index d288773871b3..29653b70a9c3 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -32,8 +32,8 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size); 32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
33 if (IS_ERR(attr_fid)) { 33 if (IS_ERR(attr_fid)) {
34 retval = PTR_ERR(attr_fid); 34 retval = PTR_ERR(attr_fid);
35 P9_DPRINTK(P9_DEBUG_VFS, 35 p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
36 "p9_client_attrwalk failed %zd\n", retval); 36 retval);
37 attr_fid = NULL; 37 attr_fid = NULL;
38 goto error; 38 goto error;
39 } 39 }
@@ -87,8 +87,8 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
87{ 87{
88 struct p9_fid *fid; 88 struct p9_fid *fid;
89 89
90 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n", 90 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
91 __func__, name, buffer_size); 91 name, buffer_size);
92 fid = v9fs_fid_lookup(dentry); 92 fid = v9fs_fid_lookup(dentry);
93 if (IS_ERR(fid)) 93 if (IS_ERR(fid))
94 return PTR_ERR(fid); 94 return PTR_ERR(fid);
@@ -115,8 +115,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
115 int retval, msize, write_count; 115 int retval, msize, write_count;
116 struct p9_fid *fid = NULL; 116 struct p9_fid *fid = NULL;
117 117
118 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n", 118 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
119 __func__, name, value_len, flags); 119 name, value_len, flags);
120 120
121 fid = v9fs_fid_clone(dentry); 121 fid = v9fs_fid_clone(dentry);
122 if (IS_ERR(fid)) { 122 if (IS_ERR(fid)) {
@@ -129,8 +129,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
129 */ 129 */
130 retval = p9_client_xattrcreate(fid, name, value_len, flags); 130 retval = p9_client_xattrcreate(fid, name, value_len, flags);
131 if (retval < 0) { 131 if (retval < 0) {
132 P9_DPRINTK(P9_DEBUG_VFS, 132 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
133 "p9_client_xattrcreate failed %d\n", retval); 133 retval);
134 goto error; 134 goto error;
135 } 135 }
136 msize = fid->clnt->msize; 136 msize = fid->clnt->msize;
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f4c45d4aa10..d621f02a3f9e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig"
218 218
219endif # MISC_FILESYSTEMS 219endif # MISC_FILESYSTEMS
220 220
221source "fs/exofs/Kconfig.ore"
222
221menuconfig NETWORK_FILESYSTEMS 223menuconfig NETWORK_FILESYSTEMS
222 bool "Network File Systems" 224 bool "Network File Systems"
223 default y 225 default y
@@ -266,14 +268,6 @@ source "fs/9p/Kconfig"
266 268
267endif # NETWORK_FILESYSTEMS 269endif # NETWORK_FILESYSTEMS
268 270
269if BLOCK
270menu "Partition Types"
271
272source "fs/partitions/Kconfig"
273
274endmenu
275endif
276
277source "fs/nls/Kconfig" 271source "fs/nls/Kconfig"
278source "fs/dlm/Kconfig" 272source "fs/dlm/Kconfig"
279 273
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7973b7..e95d1b64082c 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && BINFMT_ELF 28 depends on COMPAT && BINFMT_ELF
29 29
30config ARCH_BINFMT_ELF_RANDOMIZE_PIE
31 bool
32
30config BINFMT_ELF_FDPIC 33config BINFMT_ELF_FDPIC
31 bool "Kernel support for FDPIC ELF binaries" 34 bool "Kernel support for FDPIC ELF binaries"
32 default y 35 default y
diff --git a/fs/Makefile b/fs/Makefile
index d2c3353d5477..93804d4d66e1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,8 @@ else
19obj-y += no-block.o 19obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_PROC_FS) += proc_namespace.o
23
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 24obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
23obj-y += notify/ 25obj-y += notify/
24obj-$(CONFIG_EPOLL) += eventpoll.o 26obj-$(CONFIG_EPOLL) += eventpoll.o
@@ -52,7 +54,6 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
52obj-y += quota/ 54obj-y += quota/
53 55
54obj-$(CONFIG_PROC_FS) += proc/ 56obj-$(CONFIG_PROC_FS) += proc/
55obj-y += partitions/
56obj-$(CONFIG_SYSFS) += sysfs/ 57obj-$(CONFIG_SYSFS) += sysfs/
57obj-$(CONFIG_CONFIGFS_FS) += configfs/ 58obj-$(CONFIG_CONFIGFS_FS) += configfs/
58obj-y += devpts/ 59obj-y += devpts/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c8bf36a1996a..8e3b36ace305 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -126,9 +126,9 @@ static void adfs_put_super(struct super_block *sb)
126 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
127} 127}
128 128
129static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 129static int adfs_show_options(struct seq_file *seq, struct dentry *root)
130{ 130{
131 struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb); 131 struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
132 132
133 if (asb->s_uid != 0) 133 if (asb->s_uid != 0)
134 seq_printf(seq, ",uid=%u", asb->s_uid); 134 seq_printf(seq, ",uid=%u", asb->s_uid);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c2b9c79eb64e..45a0ce45d7b4 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -136,7 +136,7 @@ extern int affs_remove_header(struct dentry *dentry);
136extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh); 136extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
137extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh); 137extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
138extern void secs_to_datestamp(time_t secs, struct affs_date *ds); 138extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
139extern mode_t prot_to_mode(u32 prot); 139extern umode_t prot_to_mode(u32 prot);
140extern void mode_to_prot(struct inode *inode); 140extern void mode_to_prot(struct inode *inode);
141extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); 141extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
142extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...); 142extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
@@ -156,8 +156,8 @@ extern void affs_free_bitmap(struct super_block *sb);
156extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); 156extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
157extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); 157extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
158extern int affs_unlink(struct inode *dir, struct dentry *dentry); 158extern int affs_unlink(struct inode *dir, struct dentry *dentry);
159extern int affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); 159extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
160extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode); 160extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
161extern int affs_rmdir(struct inode *dir, struct dentry *dentry); 161extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
162extern int affs_link(struct dentry *olddentry, struct inode *dir, 162extern int affs_link(struct dentry *olddentry, struct inode *dir,
163 struct dentry *dentry); 163 struct dentry *dentry);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index de37ec842340..52a6407682e6 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,10 +390,10 @@ secs_to_datestamp(time_t secs, struct affs_date *ds)
390 ds->ticks = cpu_to_be32(secs * 50); 390 ds->ticks = cpu_to_be32(secs * 50);
391} 391}
392 392
393mode_t 393umode_t
394prot_to_mode(u32 prot) 394prot_to_mode(u32 prot)
395{ 395{
396 int mode = 0; 396 umode_t mode = 0;
397 397
398 if (!(prot & FIBF_NOWRITE)) 398 if (!(prot & FIBF_NOWRITE))
399 mode |= S_IWUSR; 399 mode |= S_IWUSR;
@@ -421,7 +421,7 @@ void
421mode_to_prot(struct inode *inode) 421mode_to_prot(struct inode *inode)
422{ 422{
423 u32 prot = AFFS_I(inode)->i_protect; 423 u32 prot = AFFS_I(inode)->i_protect;
424 mode_t mode = inode->i_mode; 424 umode_t mode = inode->i_mode;
425 425
426 if (!(mode & S_IXUSR)) 426 if (!(mode & S_IXUSR))
427 prot |= FIBF_NOEXECUTE; 427 prot |= FIBF_NOEXECUTE;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 780a11dc6318..47806940aac0 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,13 +255,13 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
255} 255}
256 256
257int 257int
258affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 258affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
259{ 259{
260 struct super_block *sb = dir->i_sb; 260 struct super_block *sb = dir->i_sb;
261 struct inode *inode; 261 struct inode *inode;
262 int error; 262 int error;
263 263
264 pr_debug("AFFS: create(%lu,\"%.*s\",0%o)\n",dir->i_ino,(int)dentry->d_name.len, 264 pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
265 dentry->d_name.name,mode); 265 dentry->d_name.name,mode);
266 266
267 inode = affs_new_inode(dir); 267 inode = affs_new_inode(dir);
@@ -285,12 +285,12 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
285} 285}
286 286
287int 287int
288affs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 288affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
289{ 289{
290 struct inode *inode; 290 struct inode *inode;
291 int error; 291 int error;
292 292
293 pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%o)\n",dir->i_ino, 293 pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
294 (int)dentry->d_name.len,dentry->d_name.name,mode); 294 (int)dentry->d_name.len,dentry->d_name.name,mode);
295 295
296 inode = affs_new_inode(dir); 296 inode = affs_new_inode(dir);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b31507d0f9b9..8ba73fed7964 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,6 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
98static void affs_i_callback(struct rcu_head *head) 98static void affs_i_callback(struct rcu_head *head)
99{ 99{
100 struct inode *inode = container_of(head, struct inode, i_rcu); 100 struct inode *inode = container_of(head, struct inode, i_rcu);
101 INIT_LIST_HEAD(&inode->i_dentry);
102 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 101 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
103} 102}
104 103
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1b0b19550015..e22dc4b4a503 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,9 +28,9 @@ static int afs_d_delete(const struct dentry *dentry);
28static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
30 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
31static int afs_create(struct inode *dir, struct dentry *dentry, int mode, 31static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
32 struct nameidata *nd); 32 struct nameidata *nd);
33static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode); 33static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
34static int afs_rmdir(struct inode *dir, struct dentry *dentry); 34static int afs_rmdir(struct inode *dir, struct dentry *dentry);
35static int afs_unlink(struct inode *dir, struct dentry *dentry); 35static int afs_unlink(struct inode *dir, struct dentry *dentry);
36static int afs_link(struct dentry *from, struct inode *dir, 36static int afs_link(struct dentry *from, struct inode *dir,
@@ -764,7 +764,7 @@ static void afs_d_release(struct dentry *dentry)
764/* 764/*
765 * create a directory on an AFS filesystem 765 * create a directory on an AFS filesystem
766 */ 766 */
767static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 767static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
768{ 768{
769 struct afs_file_status status; 769 struct afs_file_status status;
770 struct afs_callback cb; 770 struct afs_callback cb;
@@ -777,7 +777,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
777 777
778 dvnode = AFS_FS_I(dir); 778 dvnode = AFS_FS_I(dir);
779 779
780 _enter("{%x:%u},{%s},%o", 780 _enter("{%x:%u},{%s},%ho",
781 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 781 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
782 782
783 ret = -ENAMETOOLONG; 783 ret = -ENAMETOOLONG;
@@ -948,7 +948,7 @@ error:
948/* 948/*
949 * create a regular file on an AFS filesystem 949 * create a regular file on an AFS filesystem
950 */ 950 */
951static int afs_create(struct inode *dir, struct dentry *dentry, int mode, 951static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
952 struct nameidata *nd) 952 struct nameidata *nd)
953{ 953{
954 struct afs_file_status status; 954 struct afs_file_status status;
@@ -962,7 +962,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
962 962
963 dvnode = AFS_FS_I(dir); 963 dvnode = AFS_FS_I(dir);
964 964
965 _enter("{%x:%u},{%s},%o,", 965 _enter("{%x:%u},{%s},%ho,",
966 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 966 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
967 967
968 ret = -ENAMETOOLONG; 968 ret = -ENAMETOOLONG;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index aa59184151d0..8f4ce2658b7d 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -242,7 +242,7 @@ struct vfsmount *afs_d_automount(struct path *path)
242{ 242{
243 struct vfsmount *newmnt; 243 struct vfsmount *newmnt;
244 244
245 _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name); 245 _enter("{%s}", path->dentry->d_name.name);
246 246
247 newmnt = afs_mntpt_do_automount(path->dentry); 247 newmnt = afs_mntpt_do_automount(path->dentry);
248 if (IS_ERR(newmnt)) 248 if (IS_ERR(newmnt))
@@ -252,7 +252,7 @@ struct vfsmount *afs_d_automount(struct path *path)
252 mnt_set_expiry(newmnt, &afs_vfsmounts); 252 mnt_set_expiry(newmnt, &afs_vfsmounts);
253 queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, 253 queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
254 afs_mntpt_expiry_timeout * HZ); 254 afs_mntpt_expiry_timeout * HZ);
255 _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); 255 _leave(" = %p", newmnt);
256 return newmnt; 256 return newmnt;
257} 257}
258 258
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 356dcf0929e8..983ec59fc80d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -495,7 +495,6 @@ static void afs_i_callback(struct rcu_head *head)
495{ 495{
496 struct inode *inode = container_of(head, struct inode, i_rcu); 496 struct inode *inode = container_of(head, struct inode, i_rcu);
497 struct afs_vnode *vnode = AFS_FS_I(inode); 497 struct afs_vnode *vnode = AFS_FS_I(inode);
498 INIT_LIST_HEAD(&inode->i_dentry);
499 kmem_cache_free(afs_inode_cachep, vnode); 498 kmem_cache_free(afs_inode_cachep, vnode);
500} 499}
501 500
diff --git a/fs/aio.c b/fs/aio.c
index 78c514cfd212..969beb0e2231 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -476,14 +476,21 @@ static void kiocb_batch_init(struct kiocb_batch *batch, long total)
476 batch->count = total; 476 batch->count = total;
477} 477}
478 478
479static void kiocb_batch_free(struct kiocb_batch *batch) 479static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
480{ 480{
481 struct kiocb *req, *n; 481 struct kiocb *req, *n;
482 482
483 if (list_empty(&batch->head))
484 return;
485
486 spin_lock_irq(&ctx->ctx_lock);
483 list_for_each_entry_safe(req, n, &batch->head, ki_batch) { 487 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
484 list_del(&req->ki_batch); 488 list_del(&req->ki_batch);
489 list_del(&req->ki_list);
485 kmem_cache_free(kiocb_cachep, req); 490 kmem_cache_free(kiocb_cachep, req);
491 ctx->reqs_active--;
486 } 492 }
493 spin_unlock_irq(&ctx->ctx_lock);
487} 494}
488 495
489/* 496/*
@@ -1742,7 +1749,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1742 } 1749 }
1743 blk_finish_plug(&plug); 1750 blk_finish_plug(&plug);
1744 1751
1745 kiocb_batch_free(&batch); 1752 kiocb_batch_free(ctx, &batch);
1746 put_ioctx(ctx); 1753 put_ioctx(ctx);
1747 return i ? i : ret; 1754 return i ? i : ret;
1748} 1755}
diff --git a/fs/attr.c b/fs/attr.c
index 7ee7ba488313..95053ad8abcc 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(setattr_copy);
166int notify_change(struct dentry * dentry, struct iattr * attr) 166int notify_change(struct dentry * dentry, struct iattr * attr)
167{ 167{
168 struct inode *inode = dentry->d_inode; 168 struct inode *inode = dentry->d_inode;
169 mode_t mode = inode->i_mode; 169 umode_t mode = inode->i_mode;
170 int error; 170 int error;
171 struct timespec now; 171 struct timespec now;
172 unsigned int ia_valid = attr->ia_valid; 172 unsigned int ia_valid = attr->ia_valid;
@@ -177,7 +177,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
177 } 177 }
178 178
179 if ((ia_valid & ATTR_MODE)) { 179 if ((ia_valid & ATTR_MODE)) {
180 mode_t amode = attr->ia_mode; 180 umode_t amode = attr->ia_mode;
181 /* Flag setting protected by i_mutex */ 181 /* Flag setting protected by i_mutex */
182 if (is_sxid(amode)) 182 if (is_sxid(amode))
183 inode->i_flags &= ~S_NOSEC; 183 inode->i_flags &= ~S_NOSEC;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 326dc08d3e3f..d8d8e7ba6a1e 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -116,6 +116,7 @@ struct autofs_sb_info {
116 int needs_reghost; 116 int needs_reghost;
117 struct super_block *sb; 117 struct super_block *sb;
118 struct mutex wq_mutex; 118 struct mutex wq_mutex;
119 struct mutex pipe_mutex;
119 spinlock_t fs_lock; 120 spinlock_t fs_lock;
120 struct autofs_wait_queue *queues; /* Wait queue pointer */ 121 struct autofs_wait_queue *queues; /* Wait queue pointer */
121 spinlock_t lookup_lock; 122 spinlock_t lookup_lock;
@@ -155,7 +156,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
155 return 0; 156 return 0;
156} 157}
157 158
158struct inode *autofs4_get_inode(struct super_block *, mode_t); 159struct inode *autofs4_get_inode(struct super_block *, umode_t);
159void autofs4_free_ino(struct autofs_info *); 160void autofs4_free_ino(struct autofs_info *);
160 161
161/* Expiration */ 162/* Expiration */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1eb66ae..76741d8d7786 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -194,7 +194,7 @@ static int find_autofs_mount(const char *pathname,
194 return err; 194 return err;
195 err = -ENOENT; 195 err = -ENOENT;
196 while (path.dentry == path.mnt->mnt_root) { 196 while (path.dentry == path.mnt->mnt_root) {
197 if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) { 197 if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) {
198 if (test(&path, data)) { 198 if (test(&path, data)) {
199 path_get(&path); 199 path_get(&path);
200 if (!err) /* already found some */ 200 if (!err) /* already found some */
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
212 212
213static int test_by_dev(struct path *path, void *p) 213static int test_by_dev(struct path *path, void *p)
214{ 214{
215 return path->mnt->mnt_sb->s_dev == *(dev_t *)p; 215 return path->dentry->d_sb->s_dev == *(dev_t *)p;
216} 216}
217 217
218static int test_by_type(struct path *path, void *p) 218static int test_by_type(struct path *path, void *p)
@@ -538,11 +538,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
538 err = find_autofs_mount(name, &path, test_by_type, &type); 538 err = find_autofs_mount(name, &path, test_by_type, &type);
539 if (err) 539 if (err)
540 goto out; 540 goto out;
541 devid = new_encode_dev(path.mnt->mnt_sb->s_dev); 541 devid = new_encode_dev(path.dentry->d_sb->s_dev);
542 err = 0; 542 err = 0;
543 if (path.mnt->mnt_root == path.dentry) { 543 if (path.mnt->mnt_root == path.dentry) {
544 err = 1; 544 err = 1;
545 magic = path.mnt->mnt_sb->s_magic; 545 magic = path.dentry->d_sb->s_magic;
546 } 546 }
547 } else { 547 } else {
548 dev_t dev = sbi->sb->s_dev; 548 dev_t dev = sbi->sb->s_dev;
@@ -556,7 +556,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
556 err = have_submounts(path.dentry); 556 err = have_submounts(path.dentry);
557 557
558 if (follow_down_one(&path)) 558 if (follow_down_one(&path))
559 magic = path.mnt->mnt_sb->s_magic; 559 magic = path.dentry->d_sb->s_magic;
560 } 560 }
561 561
562 param->ismountpoint.out.devid = devid; 562 param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8179f1ab8175..e16980b00b8d 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -70,10 +70,10 @@ out_kill_sb:
70 kill_litter_super(sb); 70 kill_litter_super(sb);
71} 71}
72 72
73static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) 73static int autofs4_show_options(struct seq_file *m, struct dentry *root)
74{ 74{
75 struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb); 75 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
76 struct inode *root_inode = mnt->mnt_sb->s_root->d_inode; 76 struct inode *root_inode = root->d_sb->s_root->d_inode;
77 77
78 if (!sbi) 78 if (!sbi)
79 return 0; 79 return 0;
@@ -225,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
225 sbi->min_proto = 0; 225 sbi->min_proto = 0;
226 sbi->max_proto = 0; 226 sbi->max_proto = 0;
227 mutex_init(&sbi->wq_mutex); 227 mutex_init(&sbi->wq_mutex);
228 mutex_init(&sbi->pipe_mutex);
228 spin_lock_init(&sbi->fs_lock); 229 spin_lock_init(&sbi->fs_lock);
229 sbi->queues = NULL; 230 sbi->queues = NULL;
230 spin_lock_init(&sbi->lookup_lock); 231 spin_lock_init(&sbi->lookup_lock);
@@ -326,7 +327,7 @@ fail_unlock:
326 return -EINVAL; 327 return -EINVAL;
327} 328}
328 329
329struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode) 330struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
330{ 331{
331 struct inode *inode = new_inode(sb); 332 struct inode *inode = new_inode(sb);
332 333
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137e..75e5f1c8e028 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -26,7 +26,7 @@
26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
27static int autofs4_dir_unlink(struct inode *,struct dentry *); 27static int autofs4_dir_unlink(struct inode *,struct dentry *);
28static int autofs4_dir_rmdir(struct inode *,struct dentry *); 28static int autofs4_dir_rmdir(struct inode *,struct dentry *);
29static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 29static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
30static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); 30static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
31#ifdef CONFIG_COMPAT 31#ifdef CONFIG_COMPAT
32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); 32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
@@ -699,7 +699,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
699 return 0; 699 return 0;
700} 700}
701 701
702static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) 702static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
703{ 703{
704 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 704 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
705 struct autofs_info *ino = autofs4_dentry_ino(dentry); 705 struct autofs_info *ino = autofs4_dentry_ino(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e1fbdeef85db..da8876d38a7b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -56,26 +56,27 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
56 mutex_unlock(&sbi->wq_mutex); 56 mutex_unlock(&sbi->wq_mutex);
57} 57}
58 58
59static int autofs4_write(struct file *file, const void *addr, int bytes) 59static int autofs4_write(struct autofs_sb_info *sbi,
60 struct file *file, const void *addr, int bytes)
60{ 61{
61 unsigned long sigpipe, flags; 62 unsigned long sigpipe, flags;
62 mm_segment_t fs; 63 mm_segment_t fs;
63 const char *data = (const char *)addr; 64 const char *data = (const char *)addr;
64 ssize_t wr = 0; 65 ssize_t wr = 0;
65 66
66 /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
67
68 sigpipe = sigismember(&current->pending.signal, SIGPIPE); 67 sigpipe = sigismember(&current->pending.signal, SIGPIPE);
69 68
70 /* Save pointer to user space and point back to kernel space */ 69 /* Save pointer to user space and point back to kernel space */
71 fs = get_fs(); 70 fs = get_fs();
72 set_fs(KERNEL_DS); 71 set_fs(KERNEL_DS);
73 72
73 mutex_lock(&sbi->pipe_mutex);
74 while (bytes && 74 while (bytes &&
75 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { 75 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
76 data += wr; 76 data += wr;
77 bytes -= wr; 77 bytes -= wr;
78 } 78 }
79 mutex_unlock(&sbi->pipe_mutex);
79 80
80 set_fs(fs); 81 set_fs(fs);
81 82
@@ -110,6 +111,13 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
110 111
111 pkt.hdr.proto_version = sbi->version; 112 pkt.hdr.proto_version = sbi->version;
112 pkt.hdr.type = type; 113 pkt.hdr.type = type;
114 mutex_lock(&sbi->wq_mutex);
115
116 /* Check if we have become catatonic */
117 if (sbi->catatonic) {
118 mutex_unlock(&sbi->wq_mutex);
119 return;
120 }
113 switch (type) { 121 switch (type) {
114 /* Kernel protocol v4 missing and expire packets */ 122 /* Kernel protocol v4 missing and expire packets */
115 case autofs_ptype_missing: 123 case autofs_ptype_missing:
@@ -163,22 +171,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
163 } 171 }
164 default: 172 default:
165 printk("autofs4_notify_daemon: bad type %d!\n", type); 173 printk("autofs4_notify_daemon: bad type %d!\n", type);
174 mutex_unlock(&sbi->wq_mutex);
166 return; 175 return;
167 } 176 }
168 177
169 /* Check if we have become catatonic */ 178 pipe = sbi->pipe;
170 mutex_lock(&sbi->wq_mutex); 179 get_file(pipe);
171 if (!sbi->catatonic) { 180
172 pipe = sbi->pipe;
173 get_file(pipe);
174 }
175 mutex_unlock(&sbi->wq_mutex); 181 mutex_unlock(&sbi->wq_mutex);
176 182
177 if (pipe) { 183 if (autofs4_write(sbi, pipe, &pkt, pktsz))
178 if (autofs4_write(pipe, &pkt, pktsz)) 184 autofs4_catatonic_mode(sbi);
179 autofs4_catatonic_mode(sbi); 185 fput(pipe);
180 fput(pipe);
181 }
182} 186}
183 187
184static int autofs4_getpath(struct autofs_sb_info *sbi, 188static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -257,6 +261,9 @@ static int validate_request(struct autofs_wait_queue **wait,
257 struct autofs_wait_queue *wq; 261 struct autofs_wait_queue *wq;
258 struct autofs_info *ino; 262 struct autofs_info *ino;
259 263
264 if (sbi->catatonic)
265 return -ENOENT;
266
260 /* Wait in progress, continue; */ 267 /* Wait in progress, continue; */
261 wq = autofs4_find_wait(sbi, qstr); 268 wq = autofs4_find_wait(sbi, qstr);
262 if (wq) { 269 if (wq) {
@@ -289,6 +296,9 @@ static int validate_request(struct autofs_wait_queue **wait,
289 if (mutex_lock_interruptible(&sbi->wq_mutex)) 296 if (mutex_lock_interruptible(&sbi->wq_mutex))
290 return -EINTR; 297 return -EINTR;
291 298
299 if (sbi->catatonic)
300 return -ENOENT;
301
292 wq = autofs4_find_wait(sbi, qstr); 302 wq = autofs4_find_wait(sbi, qstr);
293 if (wq) { 303 if (wq) {
294 *wait = wq; 304 *wait = wq;
@@ -389,7 +399,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
389 399
390 ret = validate_request(&wq, sbi, &qstr, dentry, notify); 400 ret = validate_request(&wq, sbi, &qstr, dentry, notify);
391 if (ret <= 0) { 401 if (ret <= 0) {
392 if (ret == 0) 402 if (ret != -EINTR)
393 mutex_unlock(&sbi->wq_mutex); 403 mutex_unlock(&sbi->wq_mutex);
394 kfree(qstr.name); 404 kfree(qstr.name);
395 return ret; 405 return ret;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9205cf25f1c6..22e9a78872ff 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
173}; 173};
174 174
175static int bad_inode_create (struct inode *dir, struct dentry *dentry, 175static int bad_inode_create (struct inode *dir, struct dentry *dentry,
176 int mode, struct nameidata *nd) 176 umode_t mode, struct nameidata *nd)
177{ 177{
178 return -EIO; 178 return -EIO;
179} 179}
@@ -202,7 +202,7 @@ static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
202} 202}
203 203
204static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry, 204static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
205 int mode) 205 umode_t mode)
206{ 206{
207 return -EIO; 207 return -EIO;
208} 208}
@@ -213,7 +213,7 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
213} 213}
214 214
215static int bad_inode_mknod (struct inode *dir, struct dentry *dentry, 215static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
216 int mode, dev_t rdev) 216 umode_t mode, dev_t rdev)
217{ 217{
218 return -EIO; 218 return -EIO;
219} 219}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8342ca67abcd..6e6d536767fe 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -286,7 +286,6 @@ befs_alloc_inode(struct super_block *sb)
286static void befs_i_callback(struct rcu_head *head) 286static void befs_i_callback(struct rcu_head *head)
287{ 287{
288 struct inode *inode = container_of(head, struct inode, i_rcu); 288 struct inode *inode = container_of(head, struct inode, i_rcu);
289 INIT_LIST_HEAD(&inode->i_dentry);
290 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
291} 290}
292 291
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 9cc074019479..d12c7966db27 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -84,7 +84,7 @@ const struct file_operations bfs_dir_operations = {
84 84
85extern void dump_imap(const char *, struct super_block *); 85extern void dump_imap(const char *, struct super_block *);
86 86
87static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, 87static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 struct nameidata *nd)
89{ 89{
90 int err; 90 int err;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 697af5bf70b3..b0391bc402b1 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -251,7 +251,6 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
251static void bfs_i_callback(struct rcu_head *head) 251static void bfs_i_callback(struct rcu_head *head)
252{ 252{
253 struct inode *inode = container_of(head, struct inode, i_rcu); 253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
255 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 254 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
256} 255}
257 256
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 21ac5ee4b43f..bcb884e2d613 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
794 * default mmap base, as well as whatever program they 794 * default mmap base, as well as whatever program they
795 * might try to exec. This is because the brk will 795 * might try to exec. This is because the brk will
796 * follow the loader, and is not movable. */ 796 * follow the loader, and is not movable. */
797#if defined(CONFIG_X86) || defined(CONFIG_ARM) 797#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
798 /* Memory randomization might have been switched off 798 /* Memory randomization might have been switched off
799 * in runtime via sysctl. 799 * in runtime via sysctl.
800 * If that is the case, retain the original non-zero 800 * If that is the case, retain the original non-zero
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1e9edbdeda7e..a9198dfd5f85 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -560,7 +560,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
560 break; 560 break;
561 case 2: set_bit(Enabled, &e->flags); 561 case 2: set_bit(Enabled, &e->flags);
562 break; 562 break;
563 case 3: root = dget(file->f_path.mnt->mnt_sb->s_root); 563 case 3: root = dget(file->f_path.dentry->d_sb->s_root);
564 mutex_lock(&root->d_inode->i_mutex); 564 mutex_lock(&root->d_inode->i_mutex);
565 565
566 kill_node(e); 566 kill_node(e);
@@ -587,7 +587,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
587 Node *e; 587 Node *e;
588 struct inode *inode; 588 struct inode *inode;
589 struct dentry *root, *dentry; 589 struct dentry *root, *dentry;
590 struct super_block *sb = file->f_path.mnt->mnt_sb; 590 struct super_block *sb = file->f_path.dentry->d_sb;
591 int err = 0; 591 int err = 0;
592 592
593 e = create_entry(buffer, count); 593 e = create_entry(buffer, count);
@@ -666,7 +666,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
666 switch (res) { 666 switch (res) {
667 case 1: enabled = 0; break; 667 case 1: enabled = 0; break;
668 case 2: enabled = 1; break; 668 case 2: enabled = 1; break;
669 case 3: root = dget(file->f_path.mnt->mnt_sb->s_root); 669 case 3: root = dget(file->f_path.dentry->d_sb->s_root);
670 mutex_lock(&root->d_inode->i_mutex); 670 mutex_lock(&root->d_inode->i_mutex);
671 671
672 while (!list_empty(&entries)) 672 while (!list_empty(&entries))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b07f1da1de4e..0e575d1304b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/blkpg.h> 18#include <linux/blkpg.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/swap.h>
20#include <linux/pagevec.h> 21#include <linux/pagevec.h>
21#include <linux/writeback.h> 22#include <linux/writeback.h>
22#include <linux/mpage.h> 23#include <linux/mpage.h>
@@ -24,7 +25,7 @@
24#include <linux/uio.h> 25#include <linux/uio.h>
25#include <linux/namei.h> 26#include <linux/namei.h>
26#include <linux/log2.h> 27#include <linux/log2.h>
27#include <linux/kmemleak.h> 28#include <linux/cleancache.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include "internal.h" 30#include "internal.h"
30 31
@@ -82,13 +83,35 @@ static sector_t max_block(struct block_device *bdev)
82} 83}
83 84
84/* Kill _all_ buffers and pagecache , dirty or not.. */ 85/* Kill _all_ buffers and pagecache , dirty or not.. */
85static void kill_bdev(struct block_device *bdev) 86void kill_bdev(struct block_device *bdev)
86{ 87{
87 if (bdev->bd_inode->i_mapping->nrpages == 0) 88 struct address_space *mapping = bdev->bd_inode->i_mapping;
89
90 if (mapping->nrpages == 0)
88 return; 91 return;
92
89 invalidate_bh_lrus(); 93 invalidate_bh_lrus();
90 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 94 truncate_inode_pages(mapping, 0);
91} 95}
96EXPORT_SYMBOL(kill_bdev);
97
98/* Invalidate clean unused buffers and pagecache. */
99void invalidate_bdev(struct block_device *bdev)
100{
101 struct address_space *mapping = bdev->bd_inode->i_mapping;
102
103 if (mapping->nrpages == 0)
104 return;
105
106 invalidate_bh_lrus();
107 lru_add_drain_all(); /* make sure all lru add caches are flushed */
108 invalidate_mapping_pages(mapping, 0, -1);
109 /* 99% of the time, we don't need to flush the cleancache on the bdev.
110 * But, for the strange corners, lets be cautious
111 */
112 cleancache_flush_inode(mapping);
113}
114EXPORT_SYMBOL(invalidate_bdev);
92 115
93int set_blocksize(struct block_device *bdev, int size) 116int set_blocksize(struct block_device *bdev, int size)
94{ 117{
@@ -425,7 +448,6 @@ static void bdev_i_callback(struct rcu_head *head)
425 struct inode *inode = container_of(head, struct inode, i_rcu); 448 struct inode *inode = container_of(head, struct inode, i_rcu);
426 struct bdev_inode *bdi = BDEV_I(inode); 449 struct bdev_inode *bdi = BDEV_I(inode);
427 450
428 INIT_LIST_HEAD(&inode->i_dentry);
429 kmem_cache_free(bdev_cachep, bdi); 451 kmem_cache_free(bdev_cachep, bdi);
430} 452}
431 453
@@ -493,12 +515,12 @@ static struct file_system_type bd_type = {
493 .kill_sb = kill_anon_super, 515 .kill_sb = kill_anon_super,
494}; 516};
495 517
496struct super_block *blockdev_superblock __read_mostly; 518static struct super_block *blockdev_superblock __read_mostly;
497 519
498void __init bdev_cache_init(void) 520void __init bdev_cache_init(void)
499{ 521{
500 int err; 522 int err;
501 struct vfsmount *bd_mnt; 523 static struct vfsmount *bd_mnt;
502 524
503 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 525 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
504 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 526 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -510,12 +532,7 @@ void __init bdev_cache_init(void)
510 bd_mnt = kern_mount(&bd_type); 532 bd_mnt = kern_mount(&bd_type);
511 if (IS_ERR(bd_mnt)) 533 if (IS_ERR(bd_mnt))
512 panic("Cannot create bdev pseudo-fs"); 534 panic("Cannot create bdev pseudo-fs");
513 /* 535 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
514 * This vfsmount structure is only used to obtain the
515 * blockdev_superblock, so tell kmemleak not to report it.
516 */
517 kmemleak_not_leak(bd_mnt);
518 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
519} 536}
520 537
521/* 538/*
@@ -639,6 +656,11 @@ static struct block_device *bd_acquire(struct inode *inode)
639 return bdev; 656 return bdev;
640} 657}
641 658
659static inline int sb_is_blkdev_sb(struct super_block *sb)
660{
661 return sb == blockdev_superblock;
662}
663
642/* Call when you free inode */ 664/* Call when you free inode */
643 665
644void bd_forget(struct inode *inode) 666void bd_forget(struct inode *inode)
@@ -1117,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1117 mutex_lock_nested(&bdev->bd_mutex, for_part); 1139 mutex_lock_nested(&bdev->bd_mutex, for_part);
1118 if (!bdev->bd_openers) { 1140 if (!bdev->bd_openers) {
1119 bdev->bd_disk = disk; 1141 bdev->bd_disk = disk;
1142 bdev->bd_queue = disk->queue;
1120 bdev->bd_contains = bdev; 1143 bdev->bd_contains = bdev;
1121 if (!partno) { 1144 if (!partno) {
1122 struct backing_dev_info *bdi; 1145 struct backing_dev_info *bdi;
@@ -1137,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1137 disk_put_part(bdev->bd_part); 1160 disk_put_part(bdev->bd_part);
1138 bdev->bd_part = NULL; 1161 bdev->bd_part = NULL;
1139 bdev->bd_disk = NULL; 1162 bdev->bd_disk = NULL;
1163 bdev->bd_queue = NULL;
1140 mutex_unlock(&bdev->bd_mutex); 1164 mutex_unlock(&bdev->bd_mutex);
1141 disk_unblock_events(disk); 1165 disk_unblock_events(disk);
1142 put_disk(disk); 1166 put_disk(disk);
@@ -1210,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1210 disk_put_part(bdev->bd_part); 1234 disk_put_part(bdev->bd_part);
1211 bdev->bd_disk = NULL; 1235 bdev->bd_disk = NULL;
1212 bdev->bd_part = NULL; 1236 bdev->bd_part = NULL;
1237 bdev->bd_queue = NULL;
1213 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); 1238 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1214 if (bdev != bdev->bd_contains) 1239 if (bdev != bdev->bd_contains)
1215 __blkdev_put(bdev->bd_contains, mode, 1); 1240 __blkdev_put(bdev->bd_contains, mode, 1);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 0b394580d860..0cc20b35c1c4 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -334,7 +334,7 @@ again:
334 if (freezing(current)) { 334 if (freezing(current)) {
335 worker->working = 0; 335 worker->working = 0;
336 spin_unlock_irq(&worker->lock); 336 spin_unlock_irq(&worker->lock);
337 refrigerator(); 337 try_to_freeze();
338 } else { 338 } else {
339 spin_unlock_irq(&worker->lock); 339 spin_unlock_irq(&worker->lock);
340 if (!kthread_should_stop()) { 340 if (!kthread_should_stop()) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f44b3928dc2d..d8525662ca7a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
872 872
873#ifdef CONFIG_MIGRATION 873#ifdef CONFIG_MIGRATION
874static int btree_migratepage(struct address_space *mapping, 874static int btree_migratepage(struct address_space *mapping,
875 struct page *newpage, struct page *page) 875 struct page *newpage, struct page *page,
876 enum migrate_mode mode)
876{ 877{
877 /* 878 /*
878 * we can't safely write a btree page from here, 879 * we can't safely write a btree page from here,
@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping,
887 if (page_has_private(page) && 888 if (page_has_private(page) &&
888 !try_to_release_page(page, GFP_KERNEL)) 889 !try_to_release_page(page, GFP_KERNEL))
889 return -EAGAIN; 890 return -EAGAIN;
890 return migrate_page(mapping, newpage, page); 891 return migrate_page(mapping, newpage, page, mode);
891} 892}
892#endif 893#endif
893 894
@@ -1579,9 +1580,7 @@ static int cleaner_kthread(void *arg)
1579 btrfs_run_defrag_inodes(root->fs_info); 1580 btrfs_run_defrag_inodes(root->fs_info);
1580 } 1581 }
1581 1582
1582 if (freezing(current)) { 1583 if (!try_to_freeze()) {
1583 refrigerator();
1584 } else {
1585 set_current_state(TASK_INTERRUPTIBLE); 1584 set_current_state(TASK_INTERRUPTIBLE);
1586 if (!kthread_should_stop()) 1585 if (!kthread_should_stop())
1587 schedule(); 1586 schedule();
@@ -1635,9 +1634,7 @@ sleep:
1635 wake_up_process(root->fs_info->cleaner_kthread); 1634 wake_up_process(root->fs_info->cleaner_kthread);
1636 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1635 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1637 1636
1638 if (freezing(current)) { 1637 if (!try_to_freeze()) {
1639 refrigerator();
1640 } else {
1641 set_current_state(TASK_INTERRUPTIBLE); 1638 set_current_state(TASK_INTERRUPTIBLE);
1642 if (!kthread_should_stop() && 1639 if (!kthread_should_stop() &&
1643 !btrfs_transaction_blocked(root->fs_info)) 1640 !btrfs_transaction_blocked(root->fs_info))
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97fbe939c050..034d98503229 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1081again: 1081again:
1082 for (i = 0; i < num_pages; i++) { 1082 for (i = 0; i < num_pages; i++) {
1083 pages[i] = find_or_create_page(inode->i_mapping, index + i, 1083 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1084 mask); 1084 mask | __GFP_WRITE);
1085 if (!pages[i]) { 1085 if (!pages[i]) {
1086 faili = i - 1; 1086 faili = i - 1;
1087 err = -ENOMEM; 1087 err = -ENOMEM;
@@ -1136,7 +1136,8 @@ again:
1136 GFP_NOFS); 1136 GFP_NOFS);
1137 } 1137 }
1138 for (i = 0; i < num_pages; i++) { 1138 for (i = 0; i < num_pages; i++) {
1139 clear_page_dirty_for_io(pages[i]); 1139 if (clear_page_dirty_for_io(pages[i]))
1140 account_page_redirty(pages[i]);
1140 set_page_extent_mapped(pages[i]); 1141 set_page_extent_mapped(pages[i]);
1141 WARN_ON(!PageLocked(pages[i])); 1142 WARN_ON(!PageLocked(pages[i]));
1142 } 1143 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ec23d43d0c35..9a897bf79538 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -423,7 +423,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
423 } 423 }
424 424
425 if (index == 0) 425 if (index == 0)
426 offset = sizeof(u32) * io_ctl->num_pages;; 426 offset = sizeof(u32) * io_ctl->num_pages;
427 427
428 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 428 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
429 PAGE_CACHE_SIZE - offset); 429 PAGE_CACHE_SIZE - offset);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fd1a06df5bc6..81b235a61f8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1944,7 +1944,7 @@ enum btrfs_orphan_cleanup_state {
1944}; 1944};
1945 1945
1946/* 1946/*
1947 * This is called in transaction commmit time. If there are no orphan 1947 * This is called in transaction commit time. If there are no orphan
1948 * files in the subvolume, it removes orphan item and frees block_rsv 1948 * files in the subvolume, it removes orphan item and frees block_rsv
1949 * structure. 1949 * structure.
1950 */ 1950 */
@@ -4412,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4412 struct btrfs_root *root, 4412 struct btrfs_root *root,
4413 struct inode *dir, 4413 struct inode *dir,
4414 const char *name, int name_len, 4414 const char *name, int name_len,
4415 u64 ref_objectid, u64 objectid, int mode, 4415 u64 ref_objectid, u64 objectid,
4416 u64 *index) 4416 umode_t mode, u64 *index)
4417{ 4417{
4418 struct inode *inode; 4418 struct inode *inode;
4419 struct btrfs_inode_item *inode_item; 4419 struct btrfs_inode_item *inode_item;
@@ -4596,7 +4596,7 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4596} 4596}
4597 4597
4598static int btrfs_mknod(struct inode *dir, struct dentry *dentry, 4598static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4599 int mode, dev_t rdev) 4599 umode_t mode, dev_t rdev)
4600{ 4600{
4601 struct btrfs_trans_handle *trans; 4601 struct btrfs_trans_handle *trans;
4602 struct btrfs_root *root = BTRFS_I(dir)->root; 4602 struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4665,7 +4665,7 @@ out_unlock:
4665} 4665}
4666 4666
4667static int btrfs_create(struct inode *dir, struct dentry *dentry, 4667static int btrfs_create(struct inode *dir, struct dentry *dentry,
4668 int mode, struct nameidata *nd) 4668 umode_t mode, struct nameidata *nd)
4669{ 4669{
4670 struct btrfs_trans_handle *trans; 4670 struct btrfs_trans_handle *trans;
4671 struct btrfs_root *root = BTRFS_I(dir)->root; 4671 struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4792,7 +4792,7 @@ fail:
4792 return err; 4792 return err;
4793} 4793}
4794 4794
4795static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 4795static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4796{ 4796{
4797 struct inode *inode = NULL; 4797 struct inode *inode = NULL;
4798 struct btrfs_trans_handle *trans; 4798 struct btrfs_trans_handle *trans;
@@ -6761,7 +6761,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6761static void btrfs_i_callback(struct rcu_head *head) 6761static void btrfs_i_callback(struct rcu_head *head)
6762{ 6762{
6763 struct inode *inode = container_of(head, struct inode, i_rcu); 6763 struct inode *inode = container_of(head, struct inode, i_rcu);
6764 INIT_LIST_HEAD(&inode->i_dentry);
6765 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6764 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6766} 6765}
6767 6766
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c04f02c7d5bb..5441ff1480fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -201,7 +201,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
201 } 201 }
202 } 202 }
203 203
204 ret = mnt_want_write(file->f_path.mnt); 204 ret = mnt_want_write_file(file);
205 if (ret) 205 if (ret)
206 goto out_unlock; 206 goto out_unlock;
207 207
@@ -259,7 +259,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
259 259
260 btrfs_end_transaction(trans, root); 260 btrfs_end_transaction(trans, root);
261 261
262 mnt_drop_write(file->f_path.mnt); 262 mnt_drop_write_file(file);
263 263
264 ret = 0; 264 ret = 0;
265 out_unlock: 265 out_unlock:
@@ -1855,7 +1855,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1855 goto out; 1855 goto out;
1856 } 1856 }
1857 1857
1858 err = mnt_want_write(file->f_path.mnt); 1858 err = mnt_want_write_file(file);
1859 if (err) 1859 if (err)
1860 goto out; 1860 goto out;
1861 1861
@@ -1971,7 +1971,7 @@ out_dput:
1971 dput(dentry); 1971 dput(dentry);
1972out_unlock_dir: 1972out_unlock_dir:
1973 mutex_unlock(&dir->i_mutex); 1973 mutex_unlock(&dir->i_mutex);
1974 mnt_drop_write(file->f_path.mnt); 1974 mnt_drop_write_file(file);
1975out: 1975out:
1976 kfree(vol_args); 1976 kfree(vol_args);
1977 return err; 1977 return err;
@@ -1987,7 +1987,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1987 if (btrfs_root_readonly(root)) 1987 if (btrfs_root_readonly(root))
1988 return -EROFS; 1988 return -EROFS;
1989 1989
1990 ret = mnt_want_write(file->f_path.mnt); 1990 ret = mnt_want_write_file(file);
1991 if (ret) 1991 if (ret)
1992 return ret; 1992 return ret;
1993 1993
@@ -2040,7 +2040,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2040 ret = -EINVAL; 2040 ret = -EINVAL;
2041 } 2041 }
2042out: 2042out:
2043 mnt_drop_write(file->f_path.mnt); 2043 mnt_drop_write_file(file);
2044 return ret; 2044 return ret;
2045} 2045}
2046 2046
@@ -2195,7 +2195,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2195 if (btrfs_root_readonly(root)) 2195 if (btrfs_root_readonly(root))
2196 return -EROFS; 2196 return -EROFS;
2197 2197
2198 ret = mnt_want_write(file->f_path.mnt); 2198 ret = mnt_want_write_file(file);
2199 if (ret) 2199 if (ret)
2200 return ret; 2200 return ret;
2201 2201
@@ -2510,7 +2510,7 @@ out_unlock:
2510out_fput: 2510out_fput:
2511 fput(src_file); 2511 fput(src_file);
2512out_drop_write: 2512out_drop_write:
2513 mnt_drop_write(file->f_path.mnt); 2513 mnt_drop_write_file(file);
2514 return ret; 2514 return ret;
2515} 2515}
2516 2516
@@ -2549,7 +2549,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2549 if (btrfs_root_readonly(root)) 2549 if (btrfs_root_readonly(root))
2550 goto out; 2550 goto out;
2551 2551
2552 ret = mnt_want_write(file->f_path.mnt); 2552 ret = mnt_want_write_file(file);
2553 if (ret) 2553 if (ret)
2554 goto out; 2554 goto out;
2555 2555
@@ -2565,7 +2565,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2565 2565
2566out_drop: 2566out_drop:
2567 atomic_dec(&root->fs_info->open_ioctl_trans); 2567 atomic_dec(&root->fs_info->open_ioctl_trans);
2568 mnt_drop_write(file->f_path.mnt); 2568 mnt_drop_write_file(file);
2569out: 2569out:
2570 return ret; 2570 return ret;
2571} 2571}
@@ -2800,7 +2800,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2800 2800
2801 atomic_dec(&root->fs_info->open_ioctl_trans); 2801 atomic_dec(&root->fs_info->open_ioctl_trans);
2802 2802
2803 mnt_drop_write(file->f_path.mnt); 2803 mnt_drop_write_file(file);
2804 return 0; 2804 return 0;
2805} 2805}
2806 2806
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 200f63bc6675..ae488aa1966a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,7 +40,6 @@
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/mnt_namespace.h>
44#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
45#include "compat.h" 44#include "compat.h"
46#include "delayed-inode.h" 45#include "delayed-inode.h"
@@ -662,9 +661,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
662 return ret; 661 return ret;
663} 662}
664 663
665static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 664static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
666{ 665{
667 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 666 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
668 struct btrfs_fs_info *info = root->fs_info; 667 struct btrfs_fs_info *info = root->fs_info;
669 char *compress_type; 668 char *compress_type;
670 669
diff --git a/fs/buffer.c b/fs/buffer.c
index 19d8eb7fdc81..1a30db77af32 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,7 +41,6 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <linux/cleancache.h>
45 44
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47 46
@@ -231,55 +230,6 @@ out:
231 return ret; 230 return ret;
232} 231}
233 232
234/* If invalidate_buffers() will trash dirty buffers, it means some kind
235 of fs corruption is going on. Trashing dirty data always imply losing
236 information that was supposed to be just stored on the physical layer
237 by the user.
238
239 Thus invalidate_buffers in general usage is not allwowed to trash
240 dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
241 be preserved. These buffers are simply skipped.
242
243 We also skip buffers which are still in use. For example this can
244 happen if a userspace program is reading the block device.
245
246 NOTE: In the case where the user removed a removable-media-disk even if
247 there's still dirty data not synced on disk (due a bug in the device driver
248 or due an error of the user), by not destroying the dirty buffers we could
249 generate corruption also on the next media inserted, thus a parameter is
250 necessary to handle this case in the most safe way possible (trying
251 to not corrupt also the new disk inserted with the data belonging to
252 the old now corrupted disk). Also for the ramdisk the natural thing
253 to do in order to release the ramdisk memory is to destroy dirty buffers.
254
255 These are two special cases. Normal usage imply the device driver
256 to issue a sync on the device (without waiting I/O completion) and
257 then an invalidate_buffers call that doesn't trash dirty buffers.
258
259 For handling cache coherency with the blkdev pagecache the 'update' case
260 is been introduced. It is needed to re-read from disk any pinned
261 buffer. NOTE: re-reading from disk is destructive so we can do it only
262 when we assume nobody is changing the buffercache under our I/O and when
263 we think the disk contains more recent information than the buffercache.
264 The update == 1 pass marks the buffers we need to update, the update == 2
265 pass does the actual I/O. */
266void invalidate_bdev(struct block_device *bdev)
267{
268 struct address_space *mapping = bdev->bd_inode->i_mapping;
269
270 if (mapping->nrpages == 0)
271 return;
272
273 invalidate_bh_lrus();
274 lru_add_drain_all(); /* make sure all lru add caches are flushed */
275 invalidate_mapping_pages(mapping, 0, -1);
276 /* 99% of the time, we don't need to flush the cleancache on the bdev.
277 * But, for the strange corners, lets be cautious
278 */
279 cleancache_flush_inode(mapping);
280}
281EXPORT_SYMBOL(invalidate_bdev);
282
283/* 233/*
284 * Kick the writeback threads then try to free up some ZONE_NORMAL memory. 234 * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
285 */ 235 */
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1064805e653b..67bef6d01484 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/mount.h> 13#include <linux/mount.h>
14#include <linux/buffer_head.h>
15#include "internal.h" 14#include "internal.h"
16 15
17#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 16#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8b53193e4f7c..b60fc8bfb3e9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -928,7 +928,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
928 u64 size, u64 max_size, 928 u64 size, u64 max_size,
929 struct timespec *mtime, struct timespec *atime, 929 struct timespec *mtime, struct timespec *atime,
930 u64 time_warp_seq, 930 u64 time_warp_seq,
931 uid_t uid, gid_t gid, mode_t mode, 931 uid_t uid, gid_t gid, umode_t mode,
932 u64 xattr_version, 932 u64 xattr_version,
933 struct ceph_buffer *xattrs_buf, 933 struct ceph_buffer *xattrs_buf,
934 u64 follows) 934 u64 follows)
@@ -1078,7 +1078,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1078 u64 size, max_size; 1078 u64 size, max_size;
1079 struct timespec mtime, atime; 1079 struct timespec mtime, atime;
1080 int wake = 0; 1080 int wake = 0;
1081 mode_t mode; 1081 umode_t mode;
1082 uid_t uid; 1082 uid_t uid;
1083 gid_t gid; 1083 gid_t gid;
1084 struct ceph_mds_session *session; 1084 struct ceph_mds_session *session;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 98954003a8d3..618246bc2196 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -666,7 +666,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
666} 666}
667 667
668static int ceph_mknod(struct inode *dir, struct dentry *dentry, 668static int ceph_mknod(struct inode *dir, struct dentry *dentry,
669 int mode, dev_t rdev) 669 umode_t mode, dev_t rdev)
670{ 670{
671 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 671 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
672 struct ceph_mds_client *mdsc = fsc->mdsc; 672 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -676,7 +676,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
676 if (ceph_snap(dir) != CEPH_NOSNAP) 676 if (ceph_snap(dir) != CEPH_NOSNAP)
677 return -EROFS; 677 return -EROFS;
678 678
679 dout("mknod in dir %p dentry %p mode 0%o rdev %d\n", 679 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
680 dir, dentry, mode, rdev); 680 dir, dentry, mode, rdev);
681 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 681 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
682 if (IS_ERR(req)) { 682 if (IS_ERR(req)) {
@@ -699,7 +699,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
699 return err; 699 return err;
700} 700}
701 701
702static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, 702static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
703 struct nameidata *nd) 703 struct nameidata *nd)
704{ 704{
705 dout("create in dir %p dentry %p name '%.*s'\n", 705 dout("create in dir %p dentry %p name '%.*s'\n",
@@ -753,7 +753,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
753 return err; 753 return err;
754} 754}
755 755
756static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) 756static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
757{ 757{
758 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 758 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
759 struct ceph_mds_client *mdsc = fsc->mdsc; 759 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -767,7 +767,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
767 dout("mksnap dir %p snap '%.*s' dn %p\n", dir, 767 dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
768 dentry->d_name.len, dentry->d_name.name, dentry); 768 dentry->d_name.len, dentry->d_name.name, dentry);
769 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 769 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
770 dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode); 770 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
771 op = CEPH_MDS_OP_MKDIR; 771 op = CEPH_MDS_OP_MKDIR;
772 } else { 772 } else {
773 goto out; 773 goto out;
@@ -870,7 +870,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
870 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 870 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
871 dout("unlink/rmdir dir %p dn %p inode %p\n", 871 dout("unlink/rmdir dir %p dn %p inode %p\n",
872 dir, dentry, inode); 872 dir, dentry, inode);
873 op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ? 873 op = S_ISDIR(dentry->d_inode->i_mode) ?
874 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 874 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
875 } else 875 } else
876 goto out; 876 goto out;
@@ -973,7 +973,7 @@ static int dentry_lease_is_valid(struct dentry *dentry)
973 973
974 spin_lock(&dentry->d_lock); 974 spin_lock(&dentry->d_lock);
975 di = ceph_dentry(dentry); 975 di = ceph_dentry(dentry);
976 if (di && di->lease_session) { 976 if (di->lease_session) {
977 s = di->lease_session; 977 s = di->lease_session;
978 spin_lock(&s->s_cap_lock); 978 spin_lock(&s->s_cap_lock);
979 gen = s->s_cap_gen; 979 gen = s->s_cap_gen;
@@ -1072,13 +1072,11 @@ static void ceph_d_release(struct dentry *dentry)
1072 struct ceph_dentry_info *di = ceph_dentry(dentry); 1072 struct ceph_dentry_info *di = ceph_dentry(dentry);
1073 1073
1074 dout("d_release %p\n", dentry); 1074 dout("d_release %p\n", dentry);
1075 if (di) { 1075 ceph_dentry_lru_del(dentry);
1076 ceph_dentry_lru_del(dentry); 1076 if (di->lease_session)
1077 if (di->lease_session) 1077 ceph_put_mds_session(di->lease_session);
1078 ceph_put_mds_session(di->lease_session); 1078 kmem_cache_free(ceph_dentry_cachep, di);
1079 kmem_cache_free(ceph_dentry_cachep, di); 1079 dentry->d_fsdata = NULL;
1080 dentry->d_fsdata = NULL;
1081 }
1082} 1080}
1083 1081
1084static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1082static int ceph_snapdir_d_revalidate(struct dentry *dentry,
@@ -1096,17 +1094,36 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1096 */ 1094 */
1097void ceph_dir_set_complete(struct inode *inode) 1095void ceph_dir_set_complete(struct inode *inode)
1098{ 1096{
1099 /* not yet implemented */ 1097 struct dentry *dentry = d_find_any_alias(inode);
1098
1099 if (dentry && ceph_dentry(dentry) &&
1100 ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
1101 dout(" marking %p (%p) complete\n", inode, dentry);
1102 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1103 }
1104 dput(dentry);
1100} 1105}
1101 1106
1102void ceph_dir_clear_complete(struct inode *inode) 1107void ceph_dir_clear_complete(struct inode *inode)
1103{ 1108{
1104 /* not yet implemented */ 1109 struct dentry *dentry = d_find_any_alias(inode);
1110
1111 if (dentry && ceph_dentry(dentry)) {
1112 dout(" marking %p (%p) complete\n", inode, dentry);
1113 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1114 }
1115 dput(dentry);
1105} 1116}
1106 1117
1107bool ceph_dir_test_complete(struct inode *inode) 1118bool ceph_dir_test_complete(struct inode *inode)
1108{ 1119{
1109 /* not yet implemented */ 1120 struct dentry *dentry = d_find_any_alias(inode);
1121
1122 if (dentry && ceph_dentry(dentry)) {
1123 dout(" marking %p (%p) NOT complete\n", inode, dentry);
1124 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1125 }
1126 dput(dentry);
1110 return false; 1127 return false;
1111} 1128}
1112 1129
@@ -1220,6 +1237,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1220 do { 1237 do {
1221 ceph_mdsc_get_request(req); 1238 ceph_mdsc_get_request(req);
1222 spin_unlock(&ci->i_unsafe_lock); 1239 spin_unlock(&ci->i_unsafe_lock);
1240
1223 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1241 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
1224 inode, req->r_tid, last_tid); 1242 inode, req->r_tid, last_tid);
1225 if (req->r_timeout) { 1243 if (req->r_timeout) {
@@ -1232,9 +1250,9 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1232 } else { 1250 } else {
1233 wait_for_completion(&req->r_safe_completion); 1251 wait_for_completion(&req->r_safe_completion);
1234 } 1252 }
1235 spin_lock(&ci->i_unsafe_lock);
1236 ceph_mdsc_put_request(req); 1253 ceph_mdsc_put_request(req);
1237 1254
1255 spin_lock(&ci->i_unsafe_lock);
1238 if (ret || list_empty(head)) 1256 if (ret || list_empty(head))
1239 break; 1257 break;
1240 req = list_entry(head->next, 1258 req = list_entry(head->next,
@@ -1259,13 +1277,11 @@ void ceph_dentry_lru_add(struct dentry *dn)
1259 1277
1260 dout("dentry_lru_add %p %p '%.*s'\n", di, dn, 1278 dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
1261 dn->d_name.len, dn->d_name.name); 1279 dn->d_name.len, dn->d_name.name);
1262 if (di) { 1280 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1263 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1281 spin_lock(&mdsc->dentry_lru_lock);
1264 spin_lock(&mdsc->dentry_lru_lock); 1282 list_add_tail(&di->lru, &mdsc->dentry_lru);
1265 list_add_tail(&di->lru, &mdsc->dentry_lru); 1283 mdsc->num_dentry++;
1266 mdsc->num_dentry++; 1284 spin_unlock(&mdsc->dentry_lru_lock);
1267 spin_unlock(&mdsc->dentry_lru_lock);
1268 }
1269} 1285}
1270 1286
1271void ceph_dentry_lru_touch(struct dentry *dn) 1287void ceph_dentry_lru_touch(struct dentry *dn)
@@ -1275,12 +1291,10 @@ void ceph_dentry_lru_touch(struct dentry *dn)
1275 1291
1276 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, 1292 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
1277 dn->d_name.len, dn->d_name.name, di->offset); 1293 dn->d_name.len, dn->d_name.name, di->offset);
1278 if (di) { 1294 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1279 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1295 spin_lock(&mdsc->dentry_lru_lock);
1280 spin_lock(&mdsc->dentry_lru_lock); 1296 list_move_tail(&di->lru, &mdsc->dentry_lru);
1281 list_move_tail(&di->lru, &mdsc->dentry_lru); 1297 spin_unlock(&mdsc->dentry_lru_lock);
1282 spin_unlock(&mdsc->dentry_lru_lock);
1283 }
1284} 1298}
1285 1299
1286void ceph_dentry_lru_del(struct dentry *dn) 1300void ceph_dentry_lru_del(struct dentry *dn)
@@ -1290,13 +1304,11 @@ void ceph_dentry_lru_del(struct dentry *dn)
1290 1304
1291 dout("dentry_lru_del %p %p '%.*s'\n", di, dn, 1305 dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
1292 dn->d_name.len, dn->d_name.name); 1306 dn->d_name.len, dn->d_name.name);
1293 if (di) { 1307 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1294 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1308 spin_lock(&mdsc->dentry_lru_lock);
1295 spin_lock(&mdsc->dentry_lru_lock); 1309 list_del_init(&di->lru);
1296 list_del_init(&di->lru); 1310 mdsc->num_dentry--;
1297 mdsc->num_dentry--; 1311 spin_unlock(&mdsc->dentry_lru_lock);
1298 spin_unlock(&mdsc->dentry_lru_lock);
1299 }
1300} 1312}
1301 1313
1302/* 1314/*
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 9fbcdecaaccd..fbb2a643ef10 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -56,9 +56,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
56 return -EINVAL; 56 return -EINVAL;
57 57
58 spin_lock(&dentry->d_lock); 58 spin_lock(&dentry->d_lock);
59 parent = dget(dentry->d_parent); 59 parent = dentry->d_parent;
60 spin_unlock(&dentry->d_lock);
61
62 if (*max_len >= connected_handle_length) { 60 if (*max_len >= connected_handle_length) {
63 dout("encode_fh %p connectable\n", dentry); 61 dout("encode_fh %p connectable\n", dentry);
64 cfh->ino = ceph_ino(dentry->d_inode); 62 cfh->ino = ceph_ino(dentry->d_inode);
@@ -81,7 +79,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
81 *max_len = handle_length; 79 *max_len = handle_length;
82 type = 255; 80 type = 255;
83 } 81 }
84 dput(parent); 82 spin_unlock(&dentry->d_lock);
85 return type; 83 return type;
86} 84}
87 85
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 87fb132fb330..2c489378b4cd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -384,7 +384,6 @@ static void ceph_i_callback(struct rcu_head *head)
384 struct inode *inode = container_of(head, struct inode, i_rcu); 384 struct inode *inode = container_of(head, struct inode, i_rcu);
385 struct ceph_inode_info *ci = ceph_inode(inode); 385 struct ceph_inode_info *ci = ceph_inode(inode);
386 386
387 INIT_LIST_HEAD(&inode->i_dentry);
388 kmem_cache_free(ceph_inode_cachep, ci); 387 kmem_cache_free(ceph_inode_cachep, ci);
389} 388}
390 389
@@ -851,11 +850,12 @@ static void ceph_set_dentry_offset(struct dentry *dn)
851{ 850{
852 struct dentry *dir = dn->d_parent; 851 struct dentry *dir = dn->d_parent;
853 struct inode *inode = dir->d_inode; 852 struct inode *inode = dir->d_inode;
854 struct ceph_inode_info *ci = ceph_inode(inode); 853 struct ceph_inode_info *ci;
855 struct ceph_dentry_info *di; 854 struct ceph_dentry_info *di;
856 855
857 BUG_ON(!inode); 856 BUG_ON(!inode);
858 857
858 ci = ceph_inode(inode);
859 di = ceph_dentry(dn); 859 di = ceph_dentry(dn);
860 860
861 spin_lock(&ci->i_ceph_lock); 861 spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6203d805eb45..23ab6a3f1825 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2772,7 +2772,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2772 di = ceph_dentry(dentry); 2772 di = ceph_dentry(dentry);
2773 switch (h->action) { 2773 switch (h->action) {
2774 case CEPH_MDS_LEASE_REVOKE: 2774 case CEPH_MDS_LEASE_REVOKE:
2775 if (di && di->lease_session == session) { 2775 if (di->lease_session == session) {
2776 if (ceph_seq_cmp(di->lease_seq, seq) > 0) 2776 if (ceph_seq_cmp(di->lease_seq, seq) > 0)
2777 h->seq = cpu_to_le32(di->lease_seq); 2777 h->seq = cpu_to_le32(di->lease_seq);
2778 __ceph_mdsc_drop_dentry_lease(dentry); 2778 __ceph_mdsc_drop_dentry_lease(dentry);
@@ -2781,7 +2781,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2781 break; 2781 break;
2782 2782
2783 case CEPH_MDS_LEASE_RENEW: 2783 case CEPH_MDS_LEASE_RENEW:
2784 if (di && di->lease_session == session && 2784 if (di->lease_session == session &&
2785 di->lease_gen == session->s_cap_gen && 2785 di->lease_gen == session->s_cap_gen &&
2786 di->lease_renew_from && 2786 di->lease_renew_from &&
2787 di->lease_renew_after == 0) { 2787 di->lease_renew_after == 0) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b48f15f101a0..00de2c9568cd 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,8 @@ enum {
131 Opt_rbytes, 131 Opt_rbytes,
132 Opt_norbytes, 132 Opt_norbytes,
133 Opt_noasyncreaddir, 133 Opt_noasyncreaddir,
134 Opt_dcache,
135 Opt_nodcache,
134 Opt_ino32, 136 Opt_ino32,
135}; 137};
136 138
@@ -152,6 +154,8 @@ static match_table_t fsopt_tokens = {
152 {Opt_rbytes, "rbytes"}, 154 {Opt_rbytes, "rbytes"},
153 {Opt_norbytes, "norbytes"}, 155 {Opt_norbytes, "norbytes"},
154 {Opt_noasyncreaddir, "noasyncreaddir"}, 156 {Opt_noasyncreaddir, "noasyncreaddir"},
157 {Opt_dcache, "dcache"},
158 {Opt_nodcache, "nodcache"},
155 {Opt_ino32, "ino32"}, 159 {Opt_ino32, "ino32"},
156 {-1, NULL} 160 {-1, NULL}
157}; 161};
@@ -231,6 +235,12 @@ static int parse_fsopt_token(char *c, void *private)
231 case Opt_noasyncreaddir: 235 case Opt_noasyncreaddir:
232 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 236 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
233 break; 237 break;
238 case Opt_dcache:
239 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
240 break;
241 case Opt_nodcache:
242 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
243 break;
234 case Opt_ino32: 244 case Opt_ino32:
235 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 245 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
236 break; 246 break;
@@ -341,11 +351,11 @@ out:
341/** 351/**
342 * ceph_show_options - Show mount options in /proc/mounts 352 * ceph_show_options - Show mount options in /proc/mounts
343 * @m: seq_file to write to 353 * @m: seq_file to write to
344 * @mnt: mount descriptor 354 * @root: root of that (sub)tree
345 */ 355 */
346static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) 356static int ceph_show_options(struct seq_file *m, struct dentry *root)
347{ 357{
348 struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); 358 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
349 struct ceph_mount_options *fsopt = fsc->mount_options; 359 struct ceph_mount_options *fsopt = fsc->mount_options;
350 struct ceph_options *opt = fsc->client->options; 360 struct ceph_options *opt = fsc->client->options;
351 361
@@ -377,6 +387,10 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
377 seq_puts(m, ",norbytes"); 387 seq_puts(m, ",norbytes");
378 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 388 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
379 seq_puts(m, ",noasyncreaddir"); 389 seq_puts(m, ",noasyncreaddir");
390 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
391 seq_puts(m, ",dcache");
392 else
393 seq_puts(m, ",nodcache");
380 394
381 if (fsopt->wsize) 395 if (fsopt->wsize)
382 seq_printf(m, ",wsize=%d", fsopt->wsize); 396 seq_printf(m, ",wsize=%d", fsopt->wsize);
@@ -636,19 +650,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
636 req->r_num_caps = 2; 650 req->r_num_caps = 2;
637 err = ceph_mdsc_do_request(mdsc, NULL, req); 651 err = ceph_mdsc_do_request(mdsc, NULL, req);
638 if (err == 0) { 652 if (err == 0) {
653 struct inode *inode = req->r_target_inode;
654 req->r_target_inode = NULL;
639 dout("open_root_inode success\n"); 655 dout("open_root_inode success\n");
640 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && 656 if (ceph_ino(inode) == CEPH_INO_ROOT &&
641 fsc->sb->s_root == NULL) { 657 fsc->sb->s_root == NULL) {
642 root = d_alloc_root(req->r_target_inode); 658 root = d_alloc_root(inode);
643 ceph_init_dentry(root); 659 if (!root) {
660 iput(inode);
661 root = ERR_PTR(-ENOMEM);
662 goto out;
663 }
644 } else { 664 } else {
645 root = d_obtain_alias(req->r_target_inode); 665 root = d_obtain_alias(inode);
646 } 666 }
647 req->r_target_inode = NULL; 667 ceph_init_dentry(root);
648 dout("open_root_inode success, root dentry is %p\n", root); 668 dout("open_root_inode success, root dentry is %p\n", root);
649 } else { 669 } else {
650 root = ERR_PTR(err); 670 root = ERR_PTR(err);
651 } 671 }
672out:
652 ceph_mdsc_put_request(req); 673 ceph_mdsc_put_request(req);
653 return root; 674 return root;
654} 675}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index edcbf3774a56..1421f3d875a2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -28,6 +28,7 @@
28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ 28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ 29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ 30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
31#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
31 32
32#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 33#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
33 34
@@ -136,7 +137,7 @@ struct ceph_cap_snap {
136 int issued, dirty; 137 int issued, dirty;
137 struct ceph_snap_context *context; 138 struct ceph_snap_context *context;
138 139
139 mode_t mode; 140 umode_t mode;
140 uid_t uid; 141 uid_t uid;
141 gid_t gid; 142 gid_t gid;
142 143
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a5e36e4488a7..857214ae8c08 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -818,6 +818,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
818 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); 818 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
819 int issued; 819 int issued;
820 int err; 820 int err;
821 int required_blob_size;
821 int dirty; 822 int dirty;
822 823
823 if (ceph_snap(inode) != CEPH_NOSNAP) 824 if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -833,14 +834,34 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 return -EOPNOTSUPP; 834 return -EOPNOTSUPP;
834 } 835 }
835 836
837 err = -ENOMEM;
836 spin_lock(&ci->i_ceph_lock); 838 spin_lock(&ci->i_ceph_lock);
837 __build_xattrs(inode); 839 __build_xattrs(inode);
840retry:
838 issued = __ceph_caps_issued(ci, NULL); 841 issued = __ceph_caps_issued(ci, NULL);
839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 842 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
840 843
841 if (!(issued & CEPH_CAP_XATTR_EXCL)) 844 if (!(issued & CEPH_CAP_XATTR_EXCL))
842 goto do_sync; 845 goto do_sync;
843 846
847 required_blob_size = __get_required_blob_size(ci, 0, 0);
848
849 if (!ci->i_xattrs.prealloc_blob ||
850 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
851 struct ceph_buffer *blob;
852
853 spin_unlock(&ci->i_ceph_lock);
854 dout(" preaallocating new blob size=%d\n", required_blob_size);
855 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
856 if (!blob)
857 goto out;
858 spin_lock(&ci->i_ceph_lock);
859 if (ci->i_xattrs.prealloc_blob)
860 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
861 ci->i_xattrs.prealloc_blob = blob;
862 goto retry;
863 }
864
844 err = __remove_xattr_by_name(ceph_inode(inode), name); 865 err = __remove_xattr_by_name(ceph_inode(inode), name);
845 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 866 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
846 ci->i_xattrs.dirty = true; 867 ci->i_xattrs.dirty = true;
@@ -853,6 +874,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
853do_sync: 874do_sync:
854 spin_unlock(&ci->i_ceph_lock); 875 spin_unlock(&ci->i_ceph_lock);
855 err = ceph_send_removexattr(dentry, name); 876 err = ceph_send_removexattr(dentry, name);
877out:
856 return err; 878 return err;
857} 879}
858 880
diff --git a/fs/char_dev.c b/fs/char_dev.c
index dca9e5e0f73b..3f152b92a94a 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -272,7 +272,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
272 cd = __register_chrdev_region(major, baseminor, count, name); 272 cd = __register_chrdev_region(major, baseminor, count, name);
273 if (IS_ERR(cd)) 273 if (IS_ERR(cd))
274 return PTR_ERR(cd); 274 return PTR_ERR(cd);
275 275
276 cdev = cdev_alloc(); 276 cdev = cdev_alloc();
277 if (!cdev) 277 if (!cdev)
278 goto out2; 278 goto out2;
@@ -280,7 +280,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
280 cdev->owner = fops->owner; 280 cdev->owner = fops->owner;
281 cdev->ops = fops; 281 cdev->ops = fops;
282 kobject_set_name(&cdev->kobj, "%s", name); 282 kobject_set_name(&cdev->kobj, "%s", name);
283 283
284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); 284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
285 if (err) 285 if (err)
286 goto out; 286 goto out;
@@ -405,7 +405,7 @@ static int chrdev_open(struct inode *inode, struct file *filp)
405 goto out_cdev_put; 405 goto out_cdev_put;
406 406
407 if (filp->f_op->open) { 407 if (filp->f_op->open) {
408 ret = filp->f_op->open(inode,filp); 408 ret = filp->f_op->open(inode, filp);
409 if (ret) 409 if (ret)
410 goto out_cdev_put; 410 goto out_cdev_put;
411 } 411 }
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 500d65859279..c865bfdfe819 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -59,8 +59,8 @@ struct cifs_sb_info {
59 gid_t mnt_gid; 59 gid_t mnt_gid;
60 uid_t mnt_backupuid; 60 uid_t mnt_backupuid;
61 gid_t mnt_backupgid; 61 gid_t mnt_backupgid;
62 mode_t mnt_file_mode; 62 umode_t mnt_file_mode;
63 mode_t mnt_dir_mode; 63 umode_t mnt_dir_mode;
64 unsigned int mnt_cifs_flags; 64 unsigned int mnt_cifs_flags;
65 char *mountdata; /* options received at mount time or via DFS refs */ 65 char *mountdata; /* options received at mount time or via DFS refs */
66 struct backing_dev_info bdi; 66 struct backing_dev_info bdi;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8f1fe324162b..b1fd382d1952 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -343,9 +343,9 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
343 * ones are. 343 * ones are.
344 */ 344 */
345static int 345static int
346cifs_show_options(struct seq_file *s, struct vfsmount *m) 346cifs_show_options(struct seq_file *s, struct dentry *root)
347{ 347{
348 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); 348 struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
349 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); 349 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
350 struct sockaddr *srcaddr; 350 struct sockaddr *srcaddr;
351 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 351 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
@@ -393,7 +393,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
393 cifs_show_address(s, tcon->ses->server); 393 cifs_show_address(s, tcon->ses->server);
394 394
395 if (!tcon->unix_ext) 395 if (!tcon->unix_ext)
396 seq_printf(s, ",file_mode=0%o,dir_mode=0%o", 396 seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
397 cifs_sb->mnt_file_mode, 397 cifs_sb->mnt_file_mode,
398 cifs_sb->mnt_dir_mode); 398 cifs_sb->mnt_dir_mode);
399 if (tcon->seal) 399 if (tcon->seal)
@@ -430,7 +430,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
430 seq_printf(s, ",cifsacl"); 430 seq_printf(s, ",cifsacl");
431 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 431 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
432 seq_printf(s, ",dynperm"); 432 seq_printf(s, ",dynperm");
433 if (m->mnt_sb->s_flags & MS_POSIXACL) 433 if (root->d_sb->s_flags & MS_POSIXACL)
434 seq_printf(s, ",acl"); 434 seq_printf(s, ",acl");
435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) 435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
436 seq_printf(s, ",mfsymlinks"); 436 seq_printf(s, ",mfsymlinks");
@@ -488,7 +488,7 @@ static void cifs_umount_begin(struct super_block *sb)
488} 488}
489 489
490#ifdef CONFIG_CIFS_STATS2 490#ifdef CONFIG_CIFS_STATS2
491static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt) 491static int cifs_show_stats(struct seq_file *s, struct dentry *root)
492{ 492{
493 /* BB FIXME */ 493 /* BB FIXME */
494 return 0; 494 return 0;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30ff56005d8f..fe5ecf1b422a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -44,14 +44,14 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
44/* Functions related to inodes */ 44/* Functions related to inodes */
45extern const struct inode_operations cifs_dir_inode_ops; 45extern const struct inode_operations cifs_dir_inode_ops;
46extern struct inode *cifs_root_iget(struct super_block *); 46extern struct inode *cifs_root_iget(struct super_block *);
47extern int cifs_create(struct inode *, struct dentry *, int, 47extern int cifs_create(struct inode *, struct dentry *, umode_t,
48 struct nameidata *); 48 struct nameidata *);
49extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 49extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
50 struct nameidata *); 50 struct nameidata *);
51extern int cifs_unlink(struct inode *dir, struct dentry *dentry); 51extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
52extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); 52extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
53extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); 53extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
54extern int cifs_mkdir(struct inode *, struct dentry *, int); 54extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
55extern int cifs_rmdir(struct inode *, struct dentry *); 55extern int cifs_rmdir(struct inode *, struct dentry *);
56extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 56extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
57 struct dentry *); 57 struct dentry *);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8238aa13e01c..ba53c1c6c6cc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -169,8 +169,8 @@ struct smb_vol {
169 gid_t linux_gid; 169 gid_t linux_gid;
170 uid_t backupuid; 170 uid_t backupuid;
171 gid_t backupgid; 171 gid_t backupgid;
172 mode_t file_mode; 172 umode_t file_mode;
173 mode_t dir_mode; 173 umode_t dir_mode;
174 unsigned secFlg; 174 unsigned secFlg;
175 bool retry:1; 175 bool retry:1;
176 bool intr:1; 176 bool intr:1;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f3670cf72587..4666780f315d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2819,7 +2819,7 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2819 cifs_sb->mnt_backupgid = pvolume_info->backupgid; 2819 cifs_sb->mnt_backupgid = pvolume_info->backupgid;
2820 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2820 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
2821 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; 2821 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
2822 cFYI(1, "file mode: 0x%x dir mode: 0x%x", 2822 cFYI(1, "file mode: 0x%hx dir mode: 0x%hx",
2823 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2823 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2824 2824
2825 cifs_sb->actimeo = pvolume_info->actimeo; 2825 cifs_sb->actimeo = pvolume_info->actimeo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d7eeb9d3ed6f..df8fecb5b993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -136,7 +136,7 @@ cifs_bp_rename_retry:
136/* Inode operations in similar order to how they appear in Linux file fs.h */ 136/* Inode operations in similar order to how they appear in Linux file fs.h */
137 137
138int 138int
139cifs_create(struct inode *inode, struct dentry *direntry, int mode, 139cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
140 struct nameidata *nd) 140 struct nameidata *nd)
141{ 141{
142 int rc = -ENOENT; 142 int rc = -ENOENT;
@@ -355,7 +355,7 @@ cifs_create_out:
355 return rc; 355 return rc;
356} 356}
357 357
358int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, 358int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
359 dev_t device_number) 359 dev_t device_number)
360{ 360{
361 int rc = -EPERM; 361 int rc = -EPERM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e851d5b8931e..a5f54b7d9822 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1264,7 +1264,7 @@ unlink_out:
1264 return rc; 1264 return rc;
1265} 1265}
1266 1266
1267int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) 1267int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
1268{ 1268{
1269 int rc = 0, tmprc; 1269 int rc = 0, tmprc;
1270 int xid; 1270 int xid;
@@ -1275,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1275 struct inode *newinode = NULL; 1275 struct inode *newinode = NULL;
1276 struct cifs_fattr fattr; 1276 struct cifs_fattr fattr;
1277 1277
1278 cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode); 1278 cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
1279 1279
1280 cifs_sb = CIFS_SB(inode->i_sb); 1280 cifs_sb = CIFS_SB(inode->i_sb);
1281 tlink = cifs_sb_tlink(cifs_sb); 1281 tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 6475877b0763..911cf30d057d 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -88,24 +88,21 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid,
88 - link the two up if this is needed 88 - link the two up if this is needed
89 - fill in the attributes 89 - fill in the attributes
90*/ 90*/
91int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_block *sb) 91struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb)
92{ 92{
93 struct coda_vattr attr; 93 struct coda_vattr attr;
94 struct inode *inode;
94 int error; 95 int error;
95 96
96 /* We get inode numbers from Venus -- see venus source */ 97 /* We get inode numbers from Venus -- see venus source */
97 error = venus_getattr(sb, fid, &attr); 98 error = venus_getattr(sb, fid, &attr);
98 if ( error ) { 99 if (error)
99 *inode = NULL; 100 return ERR_PTR(error);
100 return error;
101 }
102 101
103 *inode = coda_iget(sb, fid, &attr); 102 inode = coda_iget(sb, fid, &attr);
104 if ( IS_ERR(*inode) ) { 103 if (IS_ERR(inode))
105 printk("coda_cnode_make: coda_iget failed\n"); 104 printk("coda_cnode_make: coda_iget failed\n");
106 return PTR_ERR(*inode); 105 return inode;
107 }
108 return 0;
109} 106}
110 107
111 108
@@ -156,19 +153,16 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb)
156} 153}
157 154
158/* the CONTROL inode is made without asking attributes from Venus */ 155/* the CONTROL inode is made without asking attributes from Venus */
159int coda_cnode_makectl(struct inode **inode, struct super_block *sb) 156struct inode *coda_cnode_makectl(struct super_block *sb)
160{ 157{
161 int error = -ENOMEM; 158 struct inode *inode = new_inode(sb);
162 159 if (inode) {
163 *inode = new_inode(sb); 160 inode->i_ino = CTL_INO;
164 if (*inode) { 161 inode->i_op = &coda_ioctl_inode_operations;
165 (*inode)->i_ino = CTL_INO; 162 inode->i_fop = &coda_ioctl_operations;
166 (*inode)->i_op = &coda_ioctl_inode_operations; 163 inode->i_mode = 0444;
167 (*inode)->i_fop = &coda_ioctl_operations; 164 return inode;
168 (*inode)->i_mode = 0444;
169 error = 0;
170 } 165 }
171 166 return ERR_PTR(-ENOMEM);
172 return error;
173} 167}
174 168
diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h
index e35071b1de0e..b24fdfd8a3f0 100644
--- a/fs/coda/coda_fs_i.h
+++ b/fs/coda/coda_fs_i.h
@@ -49,9 +49,9 @@ struct coda_file_info {
49#define C_DYING 0x4 /* from venus (which died) */ 49#define C_DYING 0x4 /* from venus (which died) */
50#define C_PURGE 0x8 50#define C_PURGE 0x8
51 51
52int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); 52struct inode *coda_cnode_make(struct CodaFid *, struct super_block *);
53struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); 53struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr);
54int coda_cnode_makectl(struct inode **inode, struct super_block *sb); 54struct inode *coda_cnode_makectl(struct super_block *sb);
55struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); 55struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb);
56void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); 56void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *);
57 57
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 28e7e135cfab..177515829062 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,14 +30,14 @@
30#include "coda_int.h" 30#include "coda_int.h"
31 31
32/* dir inode-ops */ 32/* dir inode-ops */
33static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd); 33static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); 34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode,
36 struct dentry *entry); 36 struct dentry *entry);
37static int coda_unlink(struct inode *dir_inode, struct dentry *entry); 37static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
38static int coda_symlink(struct inode *dir_inode, struct dentry *entry, 38static int coda_symlink(struct inode *dir_inode, struct dentry *entry,
39 const char *symname); 39 const char *symname);
40static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, int mode); 40static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode);
41static int coda_rmdir(struct inode *dir_inode, struct dentry *entry); 41static int coda_rmdir(struct inode *dir_inode, struct dentry *entry);
42static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, 42static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
43 struct inode *new_inode, struct dentry *new_dentry); 43 struct inode *new_inode, struct dentry *new_dentry);
@@ -96,12 +96,11 @@ const struct file_operations coda_dir_operations = {
96/* access routines: lookup, readlink, permission */ 96/* access routines: lookup, readlink, permission */
97static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) 97static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd)
98{ 98{
99 struct inode *inode = NULL; 99 struct super_block *sb = dir->i_sb;
100 struct CodaFid resfid = { { 0, } };
101 int type = 0;
102 int error = 0;
103 const char *name = entry->d_name.name; 100 const char *name = entry->d_name.name;
104 size_t length = entry->d_name.len; 101 size_t length = entry->d_name.len;
102 struct inode *inode;
103 int type = 0;
105 104
106 if (length > CODA_MAXNAMLEN) { 105 if (length > CODA_MAXNAMLEN) {
107 printk(KERN_ERR "name too long: lookup, %s (%*s)\n", 106 printk(KERN_ERR "name too long: lookup, %s (%*s)\n",
@@ -111,23 +110,21 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
111 110
112 /* control object, create inode on the fly */ 111 /* control object, create inode on the fly */
113 if (coda_isroot(dir) && coda_iscontrol(name, length)) { 112 if (coda_isroot(dir) && coda_iscontrol(name, length)) {
114 error = coda_cnode_makectl(&inode, dir->i_sb); 113 inode = coda_cnode_makectl(sb);
115 type = CODA_NOCACHE; 114 type = CODA_NOCACHE;
116 goto exit; 115 } else {
116 struct CodaFid fid = { { 0, } };
117 int error = venus_lookup(sb, coda_i2f(dir), name, length,
118 &type, &fid);
119 inode = !error ? coda_cnode_make(&fid, sb) : ERR_PTR(error);
117 } 120 }
118 121
119 error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, 122 if (!IS_ERR(inode) && (type & CODA_NOCACHE))
120 &type, &resfid);
121 if (!error)
122 error = coda_cnode_make(&inode, &resfid, dir->i_sb);
123
124 if (error && error != -ENOENT)
125 return ERR_PTR(error);
126
127exit:
128 if (inode && (type & CODA_NOCACHE))
129 coda_flag_inode(inode, C_VATTR | C_PURGE); 123 coda_flag_inode(inode, C_VATTR | C_PURGE);
130 124
125 if (inode == ERR_PTR(-ENOENT))
126 inode = NULL;
127
131 return d_splice_alias(inode, entry); 128 return d_splice_alias(inode, entry);
132} 129}
133 130
@@ -191,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
191} 188}
192 189
193/* creation routines: create, mknod, mkdir, link, symlink */ 190/* creation routines: create, mknod, mkdir, link, symlink */
194static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) 191static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
195{ 192{
196 int error; 193 int error;
197 const char *name=de->d_name.name; 194 const char *name=de->d_name.name;
@@ -223,7 +220,7 @@ err_out:
223 return error; 220 return error;
224} 221}
225 222
226static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) 223static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
227{ 224{
228 struct inode *inode; 225 struct inode *inode;
229 struct coda_vattr attrs; 226 struct coda_vattr attrs;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 871b27715465..5e2e1b3f068d 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,6 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
58static void coda_i_callback(struct rcu_head *head) 58static void coda_i_callback(struct rcu_head *head)
59{ 59{
60 struct inode *inode = container_of(head, struct inode, i_rcu); 60 struct inode *inode = container_of(head, struct inode, i_rcu);
61 INIT_LIST_HEAD(&inode->i_dentry);
62 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 61 kmem_cache_free(coda_inode_cachep, ITOC(inode));
63} 62}
64 63
@@ -205,10 +204,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
205 printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); 204 printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid));
206 205
207 /* make root inode */ 206 /* make root inode */
208 error = coda_cnode_make(&root, &fid, sb); 207 root = coda_cnode_make(&fid, sb);
209 if ( error || !root ) { 208 if (IS_ERR(root)) {
210 printk("Failure of coda_cnode_make for root: error %d\n", error); 209 error = PTR_ERR(root);
211 goto error; 210 printk("Failure of coda_cnode_make for root: error %d\n", error);
211 root = NULL;
212 goto error;
212 } 213 }
213 214
214 printk("coda_read_super: rootinode is %ld dev %s\n", 215 printk("coda_read_super: rootinode is %ld dev %s\n",
diff --git a/fs/compat.c b/fs/compat.c
index c98787536bb8..fa9d721ecfee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -342,16 +342,9 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
342 */ 342 */
343asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) 343asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
344{ 344{
345 struct super_block *sb;
346 struct compat_ustat tmp; 345 struct compat_ustat tmp;
347 struct kstatfs sbuf; 346 struct kstatfs sbuf;
348 int err; 347 int err = vfs_ustat(new_decode_dev(dev), &sbuf);
349
350 sb = user_get_super(new_decode_dev(dev));
351 if (!sb)
352 return -EINVAL;
353 err = statfs_by_dentry(sb->s_root, &sbuf);
354 drop_super(sb);
355 if (err) 348 if (err)
356 return err; 349 return err;
357 350
@@ -1288,7 +1281,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1288 * O_LARGEFILE flag. 1281 * O_LARGEFILE flag.
1289 */ 1282 */
1290asmlinkage long 1283asmlinkage long
1291compat_sys_open(const char __user *filename, int flags, int mode) 1284compat_sys_open(const char __user *filename, int flags, umode_t mode)
1292{ 1285{
1293 return do_sys_open(AT_FDCWD, filename, flags, mode); 1286 return do_sys_open(AT_FDCWD, filename, flags, mode);
1294} 1287}
@@ -1298,7 +1291,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
1298 * O_LARGEFILE flag. 1291 * O_LARGEFILE flag.
1299 */ 1292 */
1300asmlinkage long 1293asmlinkage long
1301compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode) 1294compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
1302{ 1295{
1303 return do_sys_open(dfd, filename, flags, mode); 1296 return do_sys_open(dfd, filename, flags, mode);
1304} 1297}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 32200c2ca7f6..a26bea10e81b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1507,35 +1507,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1507 return -ENOIOCTLCMD; 1507 return -ENOIOCTLCMD;
1508} 1508}
1509 1509
1510static void compat_ioctl_error(struct file *filp, unsigned int fd,
1511 unsigned int cmd, unsigned long arg)
1512{
1513 char buf[10];
1514 char *fn = "?";
1515 char *path;
1516
1517 /* find the name of the device. */
1518 path = (char *)__get_free_page(GFP_KERNEL);
1519 if (path) {
1520 fn = d_path(&filp->f_path, path, PAGE_SIZE);
1521 if (IS_ERR(fn))
1522 fn = "?";
1523 }
1524
1525 sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
1526 if (!isprint(buf[1]))
1527 sprintf(buf, "%02x", buf[1]);
1528 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
1529 "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
1530 current->comm, current->pid,
1531 (int)fd, (unsigned int)cmd, buf,
1532 (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
1533 (unsigned int)arg, fn);
1534
1535 if (path)
1536 free_page((unsigned long)path);
1537}
1538
1539static int compat_ioctl_check_table(unsigned int xcmd) 1510static int compat_ioctl_check_table(unsigned int xcmd)
1540{ 1511{
1541 int i; 1512 int i;
@@ -1622,13 +1593,8 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1622 goto found_handler; 1593 goto found_handler;
1623 1594
1624 error = do_ioctl_trans(fd, cmd, arg, filp); 1595 error = do_ioctl_trans(fd, cmd, arg, filp);
1625 if (error == -ENOIOCTLCMD) { 1596 if (error == -ENOIOCTLCMD)
1626 static int count; 1597 error = -ENOTTY;
1627
1628 if (++count <= 50)
1629 compat_ioctl_error(filp, fd, cmd, arg);
1630 error = -EINVAL;
1631 }
1632 1598
1633 goto out_fput; 1599 goto out_fput;
1634 1600
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 82bda8fdfc1c..ede857d20a04 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -63,8 +63,8 @@ extern struct kmem_cache *configfs_dir_cachep;
63 63
64extern int configfs_is_root(struct config_item *item); 64extern int configfs_is_root(struct config_item *item);
65 65
66extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *); 66extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *);
67extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 67extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
68extern int configfs_inode_init(void); 68extern int configfs_inode_init(void);
69extern void configfs_inode_exit(void); 69extern void configfs_inode_exit(void);
70 70
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9a37a9b6de3a..5ddd7ebd9dcd 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -311,8 +311,8 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
311 311
312 if (item->ci_parent) 312 if (item->ci_parent)
313 parent = item->ci_parent->ci_dentry; 313 parent = item->ci_parent->ci_dentry;
314 else if (configfs_mount && configfs_mount->mnt_sb) 314 else if (configfs_mount)
315 parent = configfs_mount->mnt_sb->s_root; 315 parent = configfs_mount->mnt_root;
316 else 316 else
317 return -EFAULT; 317 return -EFAULT;
318 318
@@ -1170,7 +1170,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
1170} 1170}
1171EXPORT_SYMBOL(configfs_undepend_item); 1171EXPORT_SYMBOL(configfs_undepend_item);
1172 1172
1173static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1173static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1174{ 1174{
1175 int ret = 0; 1175 int ret = 0;
1176 int module_got = 0; 1176 int module_got = 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 9d8715c45f25..3ee36d418863 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -116,7 +116,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
116 return error; 116 return error;
117} 117}
118 118
119static inline void set_default_inode_attr(struct inode * inode, mode_t mode) 119static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
120{ 120{
121 inode->i_mode = mode; 121 inode->i_mode = mode;
122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -132,7 +132,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
132 inode->i_ctime = iattr->ia_ctime; 132 inode->i_ctime = iattr->ia_ctime;
133} 133}
134 134
135struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) 135struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd)
136{ 136{
137 struct inode * inode = new_inode(configfs_sb); 137 struct inode * inode = new_inode(configfs_sb);
138 if (inode) { 138 if (inode) {
@@ -185,7 +185,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
185 185
186#endif /* CONFIG_LOCKDEP */ 186#endif /* CONFIG_LOCKDEP */
187 187
188int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) 188int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
189{ 189{
190 int error = 0; 190 int error = 0;
191 struct inode * inode = NULL; 191 struct inode * inode = NULL;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 739fb59bcdc2..a2ee8f9f5a38 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -20,7 +20,6 @@
20#include <linux/cramfs_fs.h> 20#include <linux/cramfs_fs.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/cramfs_fs_sb.h> 22#include <linux/cramfs_fs_sb.h>
23#include <linux/buffer_head.h>
24#include <linux/vfs.h> 23#include <linux/vfs.h>
25#include <linux/mutex.h> 24#include <linux/mutex.h>
26 25
@@ -378,7 +377,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
378 unsigned long nextoffset; 377 unsigned long nextoffset;
379 char *name; 378 char *name;
380 ino_t ino; 379 ino_t ino;
381 mode_t mode; 380 umode_t mode;
382 int namelen, error; 381 int namelen, error;
383 382
384 mutex_lock(&read_mutex); 383 mutex_lock(&read_mutex);
diff --git a/fs/dcache.c b/fs/dcache.c
index 89509b5a090e..16a53cc2cc02 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,6 +38,7 @@
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include "internal.h" 40#include "internal.h"
41#include "mount.h"
41 42
42/* 43/*
43 * Usage: 44 * Usage:
@@ -242,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry)
242static void __dentry_lru_del(struct dentry *dentry) 243static void __dentry_lru_del(struct dentry *dentry)
243{ 244{
244 list_del_init(&dentry->d_lru); 245 list_del_init(&dentry->d_lru);
246 dentry->d_flags &= ~DCACHE_SHRINK_LIST;
245 dentry->d_sb->s_nr_dentry_unused--; 247 dentry->d_sb->s_nr_dentry_unused--;
246 dentry_stat.nr_unused--; 248 dentry_stat.nr_unused--;
247} 249}
@@ -275,15 +277,15 @@ static void dentry_lru_prune(struct dentry *dentry)
275 } 277 }
276} 278}
277 279
278static void dentry_lru_move_tail(struct dentry *dentry) 280static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
279{ 281{
280 spin_lock(&dcache_lru_lock); 282 spin_lock(&dcache_lru_lock);
281 if (list_empty(&dentry->d_lru)) { 283 if (list_empty(&dentry->d_lru)) {
282 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 284 list_add_tail(&dentry->d_lru, list);
283 dentry->d_sb->s_nr_dentry_unused++; 285 dentry->d_sb->s_nr_dentry_unused++;
284 dentry_stat.nr_unused++; 286 dentry_stat.nr_unused++;
285 } else { 287 } else {
286 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 288 list_move_tail(&dentry->d_lru, list);
287 } 289 }
288 spin_unlock(&dcache_lru_lock); 290 spin_unlock(&dcache_lru_lock);
289} 291}
@@ -769,14 +771,18 @@ static void shrink_dentry_list(struct list_head *list)
769} 771}
770 772
771/** 773/**
772 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 774 * prune_dcache_sb - shrink the dcache
773 * @sb: superblock to shrink dentry LRU. 775 * @sb: superblock
774 * @count: number of entries to prune 776 * @count: number of entries to try to free
775 * @flags: flags to control the dentry processing 777 *
778 * Attempt to shrink the superblock dcache LRU by @count entries. This is
779 * done when we need more memory an called from the superblock shrinker
780 * function.
776 * 781 *
777 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 782 * This function may fail to free any resources if all the dentries are in
783 * use.
778 */ 784 */
779static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) 785void prune_dcache_sb(struct super_block *sb, int count)
780{ 786{
781 struct dentry *dentry; 787 struct dentry *dentry;
782 LIST_HEAD(referenced); 788 LIST_HEAD(referenced);
@@ -795,18 +801,13 @@ relock:
795 goto relock; 801 goto relock;
796 } 802 }
797 803
798 /* 804 if (dentry->d_flags & DCACHE_REFERENCED) {
799 * If we are honouring the DCACHE_REFERENCED flag and the
800 * dentry has this flag set, don't free it. Clear the flag
801 * and put it back on the LRU.
802 */
803 if (flags & DCACHE_REFERENCED &&
804 dentry->d_flags & DCACHE_REFERENCED) {
805 dentry->d_flags &= ~DCACHE_REFERENCED; 805 dentry->d_flags &= ~DCACHE_REFERENCED;
806 list_move(&dentry->d_lru, &referenced); 806 list_move(&dentry->d_lru, &referenced);
807 spin_unlock(&dentry->d_lock); 807 spin_unlock(&dentry->d_lock);
808 } else { 808 } else {
809 list_move_tail(&dentry->d_lru, &tmp); 809 list_move_tail(&dentry->d_lru, &tmp);
810 dentry->d_flags |= DCACHE_SHRINK_LIST;
810 spin_unlock(&dentry->d_lock); 811 spin_unlock(&dentry->d_lock);
811 if (!--count) 812 if (!--count)
812 break; 813 break;
@@ -821,23 +822,6 @@ relock:
821} 822}
822 823
823/** 824/**
824 * prune_dcache_sb - shrink the dcache
825 * @sb: superblock
826 * @nr_to_scan: number of entries to try to free
827 *
828 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
829 * done when we need more memory an called from the superblock shrinker
830 * function.
831 *
832 * This function may fail to free any resources if all the dentries are in
833 * use.
834 */
835void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
836{
837 __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
838}
839
840/**
841 * shrink_dcache_sb - shrink dcache for a superblock 825 * shrink_dcache_sb - shrink dcache for a superblock
842 * @sb: superblock 826 * @sb: superblock
843 * 827 *
@@ -1091,7 +1075,7 @@ EXPORT_SYMBOL(have_submounts);
1091 * drop the lock and return early due to latency 1075 * drop the lock and return early due to latency
1092 * constraints. 1076 * constraints.
1093 */ 1077 */
1094static int select_parent(struct dentry * parent) 1078static int select_parent(struct dentry *parent, struct list_head *dispose)
1095{ 1079{
1096 struct dentry *this_parent; 1080 struct dentry *this_parent;
1097 struct list_head *next; 1081 struct list_head *next;
@@ -1113,17 +1097,21 @@ resume:
1113 1097
1114 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1098 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1115 1099
1116 /* 1100 /*
1117 * move only zero ref count dentries to the end 1101 * move only zero ref count dentries to the dispose list.
1118 * of the unused list for prune_dcache 1102 *
1103 * Those which are presently on the shrink list, being processed
1104 * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1105 * loop in shrink_dcache_parent() might not make any progress
1106 * and loop forever.
1119 */ 1107 */
1120 if (!dentry->d_count) { 1108 if (dentry->d_count) {
1121 dentry_lru_move_tail(dentry);
1122 found++;
1123 } else {
1124 dentry_lru_del(dentry); 1109 dentry_lru_del(dentry);
1110 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1111 dentry_lru_move_list(dentry, dispose);
1112 dentry->d_flags |= DCACHE_SHRINK_LIST;
1113 found++;
1125 } 1114 }
1126
1127 /* 1115 /*
1128 * We can return to the caller if we have found some (this 1116 * We can return to the caller if we have found some (this
1129 * ensures forward progress). We'll be coming back to find 1117 * ensures forward progress). We'll be coming back to find
@@ -1180,14 +1168,13 @@ rename_retry:
1180 * 1168 *
1181 * Prune the dcache to remove unused children of the parent dentry. 1169 * Prune the dcache to remove unused children of the parent dentry.
1182 */ 1170 */
1183
1184void shrink_dcache_parent(struct dentry * parent) 1171void shrink_dcache_parent(struct dentry * parent)
1185{ 1172{
1186 struct super_block *sb = parent->d_sb; 1173 LIST_HEAD(dispose);
1187 int found; 1174 int found;
1188 1175
1189 while ((found = select_parent(parent)) != 0) 1176 while ((found = select_parent(parent, &dispose)) != 0)
1190 __shrink_dcache_sb(sb, found, 0); 1177 shrink_dentry_list(&dispose);
1191} 1178}
1192EXPORT_SYMBOL(shrink_dcache_parent); 1179EXPORT_SYMBOL(shrink_dcache_parent);
1193 1180
@@ -1460,6 +1447,23 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1460} 1447}
1461EXPORT_SYMBOL(d_alloc_root); 1448EXPORT_SYMBOL(d_alloc_root);
1462 1449
1450struct dentry *d_make_root(struct inode *root_inode)
1451{
1452 struct dentry *res = NULL;
1453
1454 if (root_inode) {
1455 static const struct qstr name = { .name = "/", .len = 1 };
1456
1457 res = __d_alloc(root_inode->i_sb, &name);
1458 if (res)
1459 d_instantiate(res, root_inode);
1460 else
1461 iput(root_inode);
1462 }
1463 return res;
1464}
1465EXPORT_SYMBOL(d_make_root);
1466
1463static struct dentry * __d_find_any_alias(struct inode *inode) 1467static struct dentry * __d_find_any_alias(struct inode *inode)
1464{ 1468{
1465 struct dentry *alias; 1469 struct dentry *alias;
@@ -1471,7 +1475,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
1471 return alias; 1475 return alias;
1472} 1476}
1473 1477
1474static struct dentry * d_find_any_alias(struct inode *inode) 1478/**
1479 * d_find_any_alias - find any alias for a given inode
1480 * @inode: inode to find an alias for
1481 *
1482 * If any aliases exist for the given inode, take and return a
1483 * reference for one of them. If no aliases exist, return %NULL.
1484 */
1485struct dentry *d_find_any_alias(struct inode *inode)
1475{ 1486{
1476 struct dentry *de; 1487 struct dentry *de;
1477 1488
@@ -1480,7 +1491,7 @@ static struct dentry * d_find_any_alias(struct inode *inode)
1480 spin_unlock(&inode->i_lock); 1491 spin_unlock(&inode->i_lock);
1481 return de; 1492 return de;
1482} 1493}
1483 1494EXPORT_SYMBOL(d_find_any_alias);
1484 1495
1485/** 1496/**
1486 * d_obtain_alias - find or allocate a dentry for a given inode 1497 * d_obtain_alias - find or allocate a dentry for a given inode
@@ -2451,6 +2462,7 @@ static int prepend_path(const struct path *path,
2451{ 2462{
2452 struct dentry *dentry = path->dentry; 2463 struct dentry *dentry = path->dentry;
2453 struct vfsmount *vfsmnt = path->mnt; 2464 struct vfsmount *vfsmnt = path->mnt;
2465 struct mount *mnt = real_mount(vfsmnt);
2454 bool slash = false; 2466 bool slash = false;
2455 int error = 0; 2467 int error = 0;
2456 2468
@@ -2460,11 +2472,11 @@ static int prepend_path(const struct path *path,
2460 2472
2461 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2473 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2462 /* Global root? */ 2474 /* Global root? */
2463 if (vfsmnt->mnt_parent == vfsmnt) { 2475 if (!mnt_has_parent(mnt))
2464 goto global_root; 2476 goto global_root;
2465 } 2477 dentry = mnt->mnt_mountpoint;
2466 dentry = vfsmnt->mnt_mountpoint; 2478 mnt = mnt->mnt_parent;
2467 vfsmnt = vfsmnt->mnt_parent; 2479 vfsmnt = &mnt->mnt;
2468 continue; 2480 continue;
2469 } 2481 }
2470 parent = dentry->d_parent; 2482 parent = dentry->d_parent;
@@ -2501,7 +2513,7 @@ global_root:
2501 if (!slash) 2513 if (!slash)
2502 error = prepend(buffer, buflen, "/", 1); 2514 error = prepend(buffer, buflen, "/", 1);
2503 if (!error) 2515 if (!error)
2504 error = vfsmnt->mnt_ns ? 1 : 2; 2516 error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
2505 goto out; 2517 goto out;
2506} 2518}
2507 2519
@@ -2853,31 +2865,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2853 return result; 2865 return result;
2854} 2866}
2855 2867
2856int path_is_under(struct path *path1, struct path *path2)
2857{
2858 struct vfsmount *mnt = path1->mnt;
2859 struct dentry *dentry = path1->dentry;
2860 int res;
2861
2862 br_read_lock(vfsmount_lock);
2863 if (mnt != path2->mnt) {
2864 for (;;) {
2865 if (mnt->mnt_parent == mnt) {
2866 br_read_unlock(vfsmount_lock);
2867 return 0;
2868 }
2869 if (mnt->mnt_parent == path2->mnt)
2870 break;
2871 mnt = mnt->mnt_parent;
2872 }
2873 dentry = mnt->mnt_mountpoint;
2874 }
2875 res = is_subdir(dentry, path2->dentry);
2876 br_read_unlock(vfsmount_lock);
2877 return res;
2878}
2879EXPORT_SYMBOL(path_is_under);
2880
2881void d_genocide(struct dentry *root) 2868void d_genocide(struct dentry *root)
2882{ 2869{
2883 struct dentry *this_parent; 2870 struct dentry *this_parent;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 90f76575c056..f65d4455c5e5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -15,9 +15,11 @@
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/seq_file.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/namei.h> 20#include <linux/namei.h>
20#include <linux/debugfs.h> 21#include <linux/debugfs.h>
22#include <linux/io.h>
21 23
22static ssize_t default_read_file(struct file *file, char __user *buf, 24static ssize_t default_read_file(struct file *file, char __user *buf,
23 size_t count, loff_t *ppos) 25 size_t count, loff_t *ppos)
@@ -95,7 +97,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
95 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 97 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
96 * code. 98 * code.
97 */ 99 */
98struct dentry *debugfs_create_u8(const char *name, mode_t mode, 100struct dentry *debugfs_create_u8(const char *name, umode_t mode,
99 struct dentry *parent, u8 *value) 101 struct dentry *parent, u8 *value)
100{ 102{
101 /* if there are no write bits set, make read only */ 103 /* if there are no write bits set, make read only */
@@ -147,7 +149,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
147 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 149 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
148 * code. 150 * code.
149 */ 151 */
150struct dentry *debugfs_create_u16(const char *name, mode_t mode, 152struct dentry *debugfs_create_u16(const char *name, umode_t mode,
151 struct dentry *parent, u16 *value) 153 struct dentry *parent, u16 *value)
152{ 154{
153 /* if there are no write bits set, make read only */ 155 /* if there are no write bits set, make read only */
@@ -199,7 +201,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
199 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 201 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
200 * code. 202 * code.
201 */ 203 */
202struct dentry *debugfs_create_u32(const char *name, mode_t mode, 204struct dentry *debugfs_create_u32(const char *name, umode_t mode,
203 struct dentry *parent, u32 *value) 205 struct dentry *parent, u32 *value)
204{ 206{
205 /* if there are no write bits set, make read only */ 207 /* if there are no write bits set, make read only */
@@ -252,7 +254,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
252 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 254 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
253 * code. 255 * code.
254 */ 256 */
255struct dentry *debugfs_create_u64(const char *name, mode_t mode, 257struct dentry *debugfs_create_u64(const char *name, umode_t mode,
256 struct dentry *parent, u64 *value) 258 struct dentry *parent, u64 *value)
257{ 259{
258 /* if there are no write bits set, make read only */ 260 /* if there are no write bits set, make read only */
@@ -298,7 +300,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n
298 * @value: a pointer to the variable that the file should read to and write 300 * @value: a pointer to the variable that the file should read to and write
299 * from. 301 * from.
300 */ 302 */
301struct dentry *debugfs_create_x8(const char *name, mode_t mode, 303struct dentry *debugfs_create_x8(const char *name, umode_t mode,
302 struct dentry *parent, u8 *value) 304 struct dentry *parent, u8 *value)
303{ 305{
304 /* if there are no write bits set, make read only */ 306 /* if there are no write bits set, make read only */
@@ -322,7 +324,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8);
322 * @value: a pointer to the variable that the file should read to and write 324 * @value: a pointer to the variable that the file should read to and write
323 * from. 325 * from.
324 */ 326 */
325struct dentry *debugfs_create_x16(const char *name, mode_t mode, 327struct dentry *debugfs_create_x16(const char *name, umode_t mode,
326 struct dentry *parent, u16 *value) 328 struct dentry *parent, u16 *value)
327{ 329{
328 /* if there are no write bits set, make read only */ 330 /* if there are no write bits set, make read only */
@@ -346,7 +348,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16);
346 * @value: a pointer to the variable that the file should read to and write 348 * @value: a pointer to the variable that the file should read to and write
347 * from. 349 * from.
348 */ 350 */
349struct dentry *debugfs_create_x32(const char *name, mode_t mode, 351struct dentry *debugfs_create_x32(const char *name, umode_t mode,
350 struct dentry *parent, u32 *value) 352 struct dentry *parent, u32 *value)
351{ 353{
352 /* if there are no write bits set, make read only */ 354 /* if there are no write bits set, make read only */
@@ -370,7 +372,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x32);
370 * @value: a pointer to the variable that the file should read to and write 372 * @value: a pointer to the variable that the file should read to and write
371 * from. 373 * from.
372 */ 374 */
373struct dentry *debugfs_create_x64(const char *name, mode_t mode, 375struct dentry *debugfs_create_x64(const char *name, umode_t mode,
374 struct dentry *parent, u64 *value) 376 struct dentry *parent, u64 *value)
375{ 377{
376 return debugfs_create_file(name, mode, parent, value, &fops_x64); 378 return debugfs_create_file(name, mode, parent, value, &fops_x64);
@@ -401,7 +403,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
401 * @value: a pointer to the variable that the file should read to and write 403 * @value: a pointer to the variable that the file should read to and write
402 * from. 404 * from.
403 */ 405 */
404struct dentry *debugfs_create_size_t(const char *name, mode_t mode, 406struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
405 struct dentry *parent, size_t *value) 407 struct dentry *parent, size_t *value)
406{ 408{
407 return debugfs_create_file(name, mode, parent, value, &fops_size_t); 409 return debugfs_create_file(name, mode, parent, value, &fops_size_t);
@@ -473,7 +475,7 @@ static const struct file_operations fops_bool = {
473 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 475 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
474 * code. 476 * code.
475 */ 477 */
476struct dentry *debugfs_create_bool(const char *name, mode_t mode, 478struct dentry *debugfs_create_bool(const char *name, umode_t mode,
477 struct dentry *parent, u32 *value) 479 struct dentry *parent, u32 *value)
478{ 480{
479 return debugfs_create_file(name, mode, parent, value, &fops_bool); 481 return debugfs_create_file(name, mode, parent, value, &fops_bool);
@@ -518,10 +520,103 @@ static const struct file_operations fops_blob = {
518 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 520 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
519 * code. 521 * code.
520 */ 522 */
521struct dentry *debugfs_create_blob(const char *name, mode_t mode, 523struct dentry *debugfs_create_blob(const char *name, umode_t mode,
522 struct dentry *parent, 524 struct dentry *parent,
523 struct debugfs_blob_wrapper *blob) 525 struct debugfs_blob_wrapper *blob)
524{ 526{
525 return debugfs_create_file(name, mode, parent, blob, &fops_blob); 527 return debugfs_create_file(name, mode, parent, blob, &fops_blob);
526} 528}
527EXPORT_SYMBOL_GPL(debugfs_create_blob); 529EXPORT_SYMBOL_GPL(debugfs_create_blob);
530
531#ifdef CONFIG_HAS_IOMEM
532
533/*
534 * The regset32 stuff is used to print 32-bit registers using the
535 * seq_file utilities. We offer printing a register set in an already-opened
536 * sequential file or create a debugfs file that only prints a regset32.
537 */
538
539/**
540 * debugfs_print_regs32 - use seq_print to describe a set of registers
541 * @s: the seq_file structure being used to generate output
542 * @regs: an array if struct debugfs_reg32 structures
543 * @mregs: the length of the above array
544 * @base: the base address to be used in reading the registers
545 * @prefix: a string to be prefixed to every output line
546 *
547 * This function outputs a text block describing the current values of
548 * some 32-bit hardware registers. It is meant to be used within debugfs
549 * files based on seq_file that need to show registers, intermixed with other
550 * information. The prefix argument may be used to specify a leading string,
551 * because some peripherals have several blocks of identical registers,
552 * for example configuration of dma channels
553 */
554int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
555 int nregs, void __iomem *base, char *prefix)
556{
557 int i, ret = 0;
558
559 for (i = 0; i < nregs; i++, regs++) {
560 if (prefix)
561 ret += seq_printf(s, "%s", prefix);
562 ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
563 readl(base + regs->offset));
564 }
565 return ret;
566}
567EXPORT_SYMBOL_GPL(debugfs_print_regs32);
568
569static int debugfs_show_regset32(struct seq_file *s, void *data)
570{
571 struct debugfs_regset32 *regset = s->private;
572
573 debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
574 return 0;
575}
576
577static int debugfs_open_regset32(struct inode *inode, struct file *file)
578{
579 return single_open(file, debugfs_show_regset32, inode->i_private);
580}
581
582static const struct file_operations fops_regset32 = {
583 .open = debugfs_open_regset32,
584 .read = seq_read,
585 .llseek = seq_lseek,
586 .release = single_release,
587};
588
589/**
590 * debugfs_create_regset32 - create a debugfs file that returns register values
591 * @name: a pointer to a string containing the name of the file to create.
592 * @mode: the permission that the file should have
593 * @parent: a pointer to the parent dentry for this file. This should be a
594 * directory dentry if set. If this parameter is %NULL, then the
595 * file will be created in the root of the debugfs filesystem.
596 * @regset: a pointer to a struct debugfs_regset32, which contains a pointer
597 * to an array of register definitions, the array size and the base
598 * address where the register bank is to be found.
599 *
600 * This function creates a file in debugfs with the given name that reports
601 * the names and values of a set of 32-bit registers. If the @mode variable
602 * is so set it can be read from. Writing is not supported.
603 *
604 * This function will return a pointer to a dentry if it succeeds. This
605 * pointer must be passed to the debugfs_remove() function when the file is
606 * to be removed (no automatic cleanup happens if your module is unloaded,
607 * you are responsible here.) If an error occurs, %NULL will be returned.
608 *
609 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
610 * returned. It is not wise to check for this value, but rather, check for
611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
612 * code.
613 */
614struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
615 struct dentry *parent,
616 struct debugfs_regset32 *regset)
617{
618 return debugfs_create_file(name, mode, parent, regset, &fops_regset32);
619}
620EXPORT_SYMBOL_GPL(debugfs_create_regset32);
621
622#endif /* CONFIG_HAS_IOMEM */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f3a257d7a985..956d5ddddf6e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,7 +30,7 @@ static struct vfsmount *debugfs_mount;
30static int debugfs_mount_count; 30static int debugfs_mount_count;
31static bool debugfs_registered; 31static bool debugfs_registered;
32 32
33static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev, 33static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev,
34 void *data, const struct file_operations *fops) 34 void *data, const struct file_operations *fops)
35 35
36{ 36{
@@ -69,7 +69,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
69 69
70/* SMP-safe */ 70/* SMP-safe */
71static int debugfs_mknod(struct inode *dir, struct dentry *dentry, 71static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
72 int mode, dev_t dev, void *data, 72 umode_t mode, dev_t dev, void *data,
73 const struct file_operations *fops) 73 const struct file_operations *fops)
74{ 74{
75 struct inode *inode; 75 struct inode *inode;
@@ -87,7 +87,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
87 return error; 87 return error;
88} 88}
89 89
90static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode, 90static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
91 void *data, const struct file_operations *fops) 91 void *data, const struct file_operations *fops)
92{ 92{
93 int res; 93 int res;
@@ -101,14 +101,14 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
101 return res; 101 return res;
102} 102}
103 103
104static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode, 104static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
105 void *data, const struct file_operations *fops) 105 void *data, const struct file_operations *fops)
106{ 106{
107 mode = (mode & S_IALLUGO) | S_IFLNK; 107 mode = (mode & S_IALLUGO) | S_IFLNK;
108 return debugfs_mknod(dir, dentry, mode, 0, data, fops); 108 return debugfs_mknod(dir, dentry, mode, 0, data, fops);
109} 109}
110 110
111static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode, 111static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
112 void *data, const struct file_operations *fops) 112 void *data, const struct file_operations *fops)
113{ 113{
114 int res; 114 int res;
@@ -146,7 +146,7 @@ static struct file_system_type debug_fs_type = {
146 .kill_sb = kill_litter_super, 146 .kill_sb = kill_litter_super,
147}; 147};
148 148
149static int debugfs_create_by_name(const char *name, mode_t mode, 149static int debugfs_create_by_name(const char *name, umode_t mode,
150 struct dentry *parent, 150 struct dentry *parent,
151 struct dentry **dentry, 151 struct dentry **dentry,
152 void *data, 152 void *data,
@@ -160,7 +160,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
160 * have around. 160 * have around.
161 */ 161 */
162 if (!parent) 162 if (!parent)
163 parent = debugfs_mount->mnt_sb->s_root; 163 parent = debugfs_mount->mnt_root;
164 164
165 *dentry = NULL; 165 *dentry = NULL;
166 mutex_lock(&parent->d_inode->i_mutex); 166 mutex_lock(&parent->d_inode->i_mutex);
@@ -214,7 +214,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
214 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 214 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
215 * returned. 215 * returned.
216 */ 216 */
217struct dentry *debugfs_create_file(const char *name, mode_t mode, 217struct dentry *debugfs_create_file(const char *name, umode_t mode,
218 struct dentry *parent, void *data, 218 struct dentry *parent, void *data,
219 const struct file_operations *fops) 219 const struct file_operations *fops)
220{ 220{
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5d5297efe97..c4e2a58a2e82 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -246,9 +246,9 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
246 return err; 246 return err;
247} 247}
248 248
249static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) 249static int devpts_show_options(struct seq_file *seq, struct dentry *root)
250{ 250{
251 struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb); 251 struct pts_fs_info *fsi = DEVPTS_SB(root->d_sb);
252 struct pts_mount_opts *opts = &fsi->mount_opts; 252 struct pts_mount_opts *opts = &fsi->mount_opts;
253 253
254 if (opts->setuid) 254 if (opts->setuid)
@@ -301,7 +301,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
301 301
302 inode = new_inode(s); 302 inode = new_inode(s);
303 if (!inode) 303 if (!inode)
304 goto free_fsi; 304 goto fail;
305 inode->i_ino = 1; 305 inode->i_ino = 1;
306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
@@ -316,8 +316,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
316 printk(KERN_ERR "devpts: get root dentry failed\n"); 316 printk(KERN_ERR "devpts: get root dentry failed\n");
317 iput(inode); 317 iput(inode);
318 318
319free_fsi:
320 kfree(s->s_fs_info);
321fail: 319fail:
322 return -ENOMEM; 320 return -ENOMEM;
323} 321}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d740ab67ff6e..4a588dbd11bf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -36,6 +36,7 @@
36#include <linux/rwsem.h> 36#include <linux/rwsem.h>
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h>
39 40
40/* 41/*
41 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
580{ 581{
581 int ret; 582 int ret;
582 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ 583 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
584 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
583 unsigned long fs_count; /* Number of filesystem-sized blocks */ 585 unsigned long fs_count; /* Number of filesystem-sized blocks */
584 unsigned long dio_count;/* Number of dio_block-sized blocks */
585 unsigned long blkmask;
586 int create; 586 int create;
587 587
588 /* 588 /*
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
593 if (ret == 0) { 593 if (ret == 0) {
594 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); 594 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
595 fs_startblk = sdio->block_in_file >> sdio->blkfactor; 595 fs_startblk = sdio->block_in_file >> sdio->blkfactor;
596 dio_count = sdio->final_block_in_request - sdio->block_in_file; 596 fs_endblk = (sdio->final_block_in_request - 1) >>
597 fs_count = dio_count >> sdio->blkfactor; 597 sdio->blkfactor;
598 blkmask = (1 << sdio->blkfactor) - 1; 598 fs_count = fs_endblk - fs_startblk + 1;
599 if (dio_count & blkmask)
600 fs_count++;
601 599
602 map_bh->b_state = 0; 600 map_bh->b_state = 0;
603 map_bh->b_size = fs_count << dio->inode->i_blkbits; 601 map_bh->b_size = fs_count << dio->inode->i_blkbits;
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
1090 * individual fields and will generate much worse code. This is important 1088 * individual fields and will generate much worse code. This is important
1091 * for the whole file. 1089 * for the whole file.
1092 */ 1090 */
1093ssize_t 1091static inline ssize_t
1094__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1092do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1095 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1093 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1096 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1094 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1097 dio_submit_t submit_io, int flags) 1095 dio_submit_t submit_io, int flags)
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1100 size_t size; 1098 size_t size;
1101 unsigned long addr; 1099 unsigned long addr;
1102 unsigned blkbits = inode->i_blkbits; 1100 unsigned blkbits = inode->i_blkbits;
1103 unsigned bdev_blkbits = 0;
1104 unsigned blocksize_mask = (1 << blkbits) - 1; 1101 unsigned blocksize_mask = (1 << blkbits) - 1;
1105 ssize_t retval = -EINVAL; 1102 ssize_t retval = -EINVAL;
1106 loff_t end = offset; 1103 loff_t end = offset;
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1113 if (rw & WRITE) 1110 if (rw & WRITE)
1114 rw = WRITE_ODIRECT; 1111 rw = WRITE_ODIRECT;
1115 1112
1116 if (bdev) 1113 /*
1117 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1114 * Avoid references to bdev if not absolutely needed to give
1115 * the early prefetch in the caller enough time.
1116 */
1118 1117
1119 if (offset & blocksize_mask) { 1118 if (offset & blocksize_mask) {
1120 if (bdev) 1119 if (bdev)
1121 blkbits = bdev_blkbits; 1120 blkbits = blksize_bits(bdev_logical_block_size(bdev));
1122 blocksize_mask = (1 << blkbits) - 1; 1121 blocksize_mask = (1 << blkbits) - 1;
1123 if (offset & blocksize_mask) 1122 if (offset & blocksize_mask)
1124 goto out; 1123 goto out;
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1129 addr = (unsigned long)iov[seg].iov_base; 1128 addr = (unsigned long)iov[seg].iov_base;
1130 size = iov[seg].iov_len; 1129 size = iov[seg].iov_len;
1131 end += size; 1130 end += size;
1132 if ((addr & blocksize_mask) || (size & blocksize_mask)) { 1131 if (unlikely((addr & blocksize_mask) ||
1132 (size & blocksize_mask))) {
1133 if (bdev) 1133 if (bdev)
1134 blkbits = bdev_blkbits; 1134 blkbits = blksize_bits(
1135 bdev_logical_block_size(bdev));
1135 blocksize_mask = (1 << blkbits) - 1; 1136 blocksize_mask = (1 << blkbits) - 1;
1136 if ((addr & blocksize_mask) || (size & blocksize_mask)) 1137 if ((addr & blocksize_mask) || (size & blocksize_mask))
1137 goto out; 1138 goto out;
1138 } 1139 }
1139 } 1140 }
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1316out: 1317out:
1317 return retval; 1318 return retval;
1318} 1319}
1320
1321ssize_t
1322__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1323 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1324 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1325 dio_submit_t submit_io, int flags)
1326{
1327 /*
1328 * The block device state is needed in the end to finally
1329 * submit everything. Since it's likely to be cache cold
1330 * prefetch it here as first thing to hide some of the
1331 * latency.
1332 *
1333 * Attempt to prefetch the pieces we likely need later.
1334 */
1335 prefetch(&bdev->bd_disk->part_tbl);
1336 prefetch(bdev->bd_queue);
1337 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1338
1339 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
1340 nr_segs, get_block, end_io,
1341 submit_io, flags);
1342}
1343
1319EXPORT_SYMBOL(__blockdev_direct_IO); 1344EXPORT_SYMBOL(__blockdev_direct_IO);
1320 1345
1321static __init int dio_init(void) 1346static __init int dio_init(void)
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 6cf72fcc0d0c..e7e327d43fa5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/in.h> 18#include <linux/in.h>
19#include <linux/in6.h> 19#include <linux/in6.h>
20#include <linux/dlmconstants.h>
20#include <net/ipv6.h> 21#include <net/ipv6.h>
21#include <net/sock.h> 22#include <net/sock.h>
22 23
@@ -36,6 +37,7 @@
36static struct config_group *space_list; 37static struct config_group *space_list;
37static struct config_group *comm_list; 38static struct config_group *comm_list;
38static struct dlm_comm *local_comm; 39static struct dlm_comm *local_comm;
40static uint32_t dlm_comm_count;
39 41
40struct dlm_clusters; 42struct dlm_clusters;
41struct dlm_cluster; 43struct dlm_cluster;
@@ -103,6 +105,8 @@ struct dlm_cluster {
103 unsigned int cl_timewarn_cs; 105 unsigned int cl_timewarn_cs;
104 unsigned int cl_waitwarn_us; 106 unsigned int cl_waitwarn_us;
105 unsigned int cl_new_rsb_count; 107 unsigned int cl_new_rsb_count;
108 unsigned int cl_recover_callbacks;
109 char cl_cluster_name[DLM_LOCKSPACE_LEN];
106}; 110};
107 111
108enum { 112enum {
@@ -118,6 +122,8 @@ enum {
118 CLUSTER_ATTR_TIMEWARN_CS, 122 CLUSTER_ATTR_TIMEWARN_CS,
119 CLUSTER_ATTR_WAITWARN_US, 123 CLUSTER_ATTR_WAITWARN_US,
120 CLUSTER_ATTR_NEW_RSB_COUNT, 124 CLUSTER_ATTR_NEW_RSB_COUNT,
125 CLUSTER_ATTR_RECOVER_CALLBACKS,
126 CLUSTER_ATTR_CLUSTER_NAME,
121}; 127};
122 128
123struct cluster_attribute { 129struct cluster_attribute {
@@ -126,6 +132,27 @@ struct cluster_attribute {
126 ssize_t (*store)(struct dlm_cluster *, const char *, size_t); 132 ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
127}; 133};
128 134
135static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf)
136{
137 return sprintf(buf, "%s\n", cl->cl_cluster_name);
138}
139
140static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl,
141 const char *buf, size_t len)
142{
143 strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN);
144 strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN);
145 return len;
146}
147
148static struct cluster_attribute cluster_attr_cluster_name = {
149 .attr = { .ca_owner = THIS_MODULE,
150 .ca_name = "cluster_name",
151 .ca_mode = S_IRUGO | S_IWUSR },
152 .show = cluster_cluster_name_read,
153 .store = cluster_cluster_name_write,
154};
155
129static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, 156static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
130 int *info_field, int check_zero, 157 int *info_field, int check_zero,
131 const char *buf, size_t len) 158 const char *buf, size_t len)
@@ -171,6 +198,7 @@ CLUSTER_ATTR(protocol, 0);
171CLUSTER_ATTR(timewarn_cs, 1); 198CLUSTER_ATTR(timewarn_cs, 1);
172CLUSTER_ATTR(waitwarn_us, 0); 199CLUSTER_ATTR(waitwarn_us, 0);
173CLUSTER_ATTR(new_rsb_count, 0); 200CLUSTER_ATTR(new_rsb_count, 0);
201CLUSTER_ATTR(recover_callbacks, 0);
174 202
175static struct configfs_attribute *cluster_attrs[] = { 203static struct configfs_attribute *cluster_attrs[] = {
176 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 204 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -185,6 +213,8 @@ static struct configfs_attribute *cluster_attrs[] = {
185 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 213 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
186 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, 214 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
187 [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, 215 [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
216 [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr,
217 [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr,
188 NULL, 218 NULL,
189}; 219};
190 220
@@ -293,6 +323,7 @@ struct dlm_comms {
293 323
294struct dlm_comm { 324struct dlm_comm {
295 struct config_item item; 325 struct config_item item;
326 int seq;
296 int nodeid; 327 int nodeid;
297 int local; 328 int local;
298 int addr_count; 329 int addr_count;
@@ -309,6 +340,7 @@ struct dlm_node {
309 int nodeid; 340 int nodeid;
310 int weight; 341 int weight;
311 int new; 342 int new;
343 int comm_seq; /* copy of cm->seq when nd->nodeid is set */
312}; 344};
313 345
314static struct configfs_group_operations clusters_ops = { 346static struct configfs_group_operations clusters_ops = {
@@ -455,6 +487,9 @@ static struct config_group *make_cluster(struct config_group *g,
455 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 487 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
456 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; 488 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
457 cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; 489 cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
490 cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
491 memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
492 DLM_LOCKSPACE_LEN);
458 493
459 space_list = &sps->ss_group; 494 space_list = &sps->ss_group;
460 comm_list = &cms->cs_group; 495 comm_list = &cms->cs_group;
@@ -558,6 +593,11 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
558 return ERR_PTR(-ENOMEM); 593 return ERR_PTR(-ENOMEM);
559 594
560 config_item_init_type_name(&cm->item, name, &comm_type); 595 config_item_init_type_name(&cm->item, name, &comm_type);
596
597 cm->seq = dlm_comm_count++;
598 if (!cm->seq)
599 cm->seq = dlm_comm_count++;
600
561 cm->nodeid = -1; 601 cm->nodeid = -1;
562 cm->local = 0; 602 cm->local = 0;
563 cm->addr_count = 0; 603 cm->addr_count = 0;
@@ -801,7 +841,10 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
801static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, 841static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
802 size_t len) 842 size_t len)
803{ 843{
844 uint32_t seq = 0;
804 nd->nodeid = simple_strtol(buf, NULL, 0); 845 nd->nodeid = simple_strtol(buf, NULL, 0);
846 dlm_comm_seq(nd->nodeid, &seq);
847 nd->comm_seq = seq;
805 return len; 848 return len;
806} 849}
807 850
@@ -908,13 +951,13 @@ static void put_comm(struct dlm_comm *cm)
908} 951}
909 952
910/* caller must free mem */ 953/* caller must free mem */
911int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 954int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
912 int **new_out, int *new_count_out) 955 int *count_out)
913{ 956{
914 struct dlm_space *sp; 957 struct dlm_space *sp;
915 struct dlm_node *nd; 958 struct dlm_node *nd;
916 int i = 0, rv = 0, ids_count = 0, new_count = 0; 959 struct dlm_config_node *nodes, *node;
917 int *ids, *new; 960 int rv, count;
918 961
919 sp = get_space(lsname); 962 sp = get_space(lsname);
920 if (!sp) 963 if (!sp)
@@ -927,73 +970,42 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
927 goto out; 970 goto out;
928 } 971 }
929 972
930 ids_count = sp->members_count; 973 count = sp->members_count;
931 974
932 ids = kcalloc(ids_count, sizeof(int), GFP_NOFS); 975 nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS);
933 if (!ids) { 976 if (!nodes) {
934 rv = -ENOMEM; 977 rv = -ENOMEM;
935 goto out; 978 goto out;
936 } 979 }
937 980
981 node = nodes;
938 list_for_each_entry(nd, &sp->members, list) { 982 list_for_each_entry(nd, &sp->members, list) {
939 ids[i++] = nd->nodeid; 983 node->nodeid = nd->nodeid;
940 if (nd->new) 984 node->weight = nd->weight;
941 new_count++; 985 node->new = nd->new;
942 } 986 node->comm_seq = nd->comm_seq;
943 987 node++;
944 if (ids_count != i)
945 printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i);
946
947 if (!new_count)
948 goto out_ids;
949 988
950 new = kcalloc(new_count, sizeof(int), GFP_NOFS); 989 nd->new = 0;
951 if (!new) {
952 kfree(ids);
953 rv = -ENOMEM;
954 goto out;
955 } 990 }
956 991
957 i = 0; 992 *count_out = count;
958 list_for_each_entry(nd, &sp->members, list) { 993 *nodes_out = nodes;
959 if (nd->new) { 994 rv = 0;
960 new[i++] = nd->nodeid;
961 nd->new = 0;
962 }
963 }
964 *new_count_out = new_count;
965 *new_out = new;
966
967 out_ids:
968 *ids_count_out = ids_count;
969 *ids_out = ids;
970 out: 995 out:
971 mutex_unlock(&sp->members_lock); 996 mutex_unlock(&sp->members_lock);
972 put_space(sp); 997 put_space(sp);
973 return rv; 998 return rv;
974} 999}
975 1000
976int dlm_node_weight(char *lsname, int nodeid) 1001int dlm_comm_seq(int nodeid, uint32_t *seq)
977{ 1002{
978 struct dlm_space *sp; 1003 struct dlm_comm *cm = get_comm(nodeid, NULL);
979 struct dlm_node *nd; 1004 if (!cm)
980 int w = -EEXIST; 1005 return -EEXIST;
981 1006 *seq = cm->seq;
982 sp = get_space(lsname); 1007 put_comm(cm);
983 if (!sp) 1008 return 0;
984 goto out;
985
986 mutex_lock(&sp->members_lock);
987 list_for_each_entry(nd, &sp->members, list) {
988 if (nd->nodeid != nodeid)
989 continue;
990 w = nd->weight;
991 break;
992 }
993 mutex_unlock(&sp->members_lock);
994 put_space(sp);
995 out:
996 return w;
997} 1009}
998 1010
999int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) 1011int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
@@ -1047,6 +1059,8 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
1047#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 1059#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
1048#define DEFAULT_WAITWARN_US 0 1060#define DEFAULT_WAITWARN_US 0
1049#define DEFAULT_NEW_RSB_COUNT 128 1061#define DEFAULT_NEW_RSB_COUNT 128
1062#define DEFAULT_RECOVER_CALLBACKS 0
1063#define DEFAULT_CLUSTER_NAME ""
1050 1064
1051struct dlm_config_info dlm_config = { 1065struct dlm_config_info dlm_config = {
1052 .ci_tcp_port = DEFAULT_TCP_PORT, 1066 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -1060,6 +1074,8 @@ struct dlm_config_info dlm_config = {
1060 .ci_protocol = DEFAULT_PROTOCOL, 1074 .ci_protocol = DEFAULT_PROTOCOL,
1061 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, 1075 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1062 .ci_waitwarn_us = DEFAULT_WAITWARN_US, 1076 .ci_waitwarn_us = DEFAULT_WAITWARN_US,
1063 .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT 1077 .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
1078 .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
1079 .ci_cluster_name = DEFAULT_CLUSTER_NAME
1064}; 1080};
1065 1081
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 3099d0dd26c0..9f5e3663bb0c 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,13 @@
14#ifndef __CONFIG_DOT_H__ 14#ifndef __CONFIG_DOT_H__
15#define __CONFIG_DOT_H__ 15#define __CONFIG_DOT_H__
16 16
17struct dlm_config_node {
18 int nodeid;
19 int weight;
20 int new;
21 uint32_t comm_seq;
22};
23
17#define DLM_MAX_ADDR_COUNT 3 24#define DLM_MAX_ADDR_COUNT 3
18 25
19struct dlm_config_info { 26struct dlm_config_info {
@@ -29,15 +36,17 @@ struct dlm_config_info {
29 int ci_timewarn_cs; 36 int ci_timewarn_cs;
30 int ci_waitwarn_us; 37 int ci_waitwarn_us;
31 int ci_new_rsb_count; 38 int ci_new_rsb_count;
39 int ci_recover_callbacks;
40 char ci_cluster_name[DLM_LOCKSPACE_LEN];
32}; 41};
33 42
34extern struct dlm_config_info dlm_config; 43extern struct dlm_config_info dlm_config;
35 44
36int dlm_config_init(void); 45int dlm_config_init(void);
37void dlm_config_exit(void); 46void dlm_config_exit(void);
38int dlm_node_weight(char *lsname, int nodeid); 47int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
39int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 48 int *count_out);
40 int **new_out, int *new_count_out); 49int dlm_comm_seq(int nodeid, uint32_t *seq);
41int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); 50int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
42int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); 51int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
43int dlm_our_nodeid(void); 52int dlm_our_nodeid(void);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 59779237e2b4..3dca2b39e83f 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -393,6 +393,7 @@ static const struct seq_operations format3_seq_ops;
393 393
394static void *table_seq_start(struct seq_file *seq, loff_t *pos) 394static void *table_seq_start(struct seq_file *seq, loff_t *pos)
395{ 395{
396 struct rb_node *node;
396 struct dlm_ls *ls = seq->private; 397 struct dlm_ls *ls = seq->private;
397 struct rsbtbl_iter *ri; 398 struct rsbtbl_iter *ri;
398 struct dlm_rsb *r; 399 struct dlm_rsb *r;
@@ -418,9 +419,10 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
418 ri->format = 3; 419 ri->format = 3;
419 420
420 spin_lock(&ls->ls_rsbtbl[bucket].lock); 421 spin_lock(&ls->ls_rsbtbl[bucket].lock);
421 if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 422 if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
422 list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, 423 for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node;
423 res_hashchain) { 424 node = rb_next(node)) {
425 r = rb_entry(node, struct dlm_rsb, res_hashnode);
424 if (!entry--) { 426 if (!entry--) {
425 dlm_hold_rsb(r); 427 dlm_hold_rsb(r);
426 ri->rsb = r; 428 ri->rsb = r;
@@ -449,9 +451,9 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
449 } 451 }
450 452
451 spin_lock(&ls->ls_rsbtbl[bucket].lock); 453 spin_lock(&ls->ls_rsbtbl[bucket].lock);
452 if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 454 if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
453 r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 455 node = rb_first(&ls->ls_rsbtbl[bucket].keep);
454 struct dlm_rsb, res_hashchain); 456 r = rb_entry(node, struct dlm_rsb, res_hashnode);
455 dlm_hold_rsb(r); 457 dlm_hold_rsb(r);
456 ri->rsb = r; 458 ri->rsb = r;
457 ri->bucket = bucket; 459 ri->bucket = bucket;
@@ -467,7 +469,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
467{ 469{
468 struct dlm_ls *ls = seq->private; 470 struct dlm_ls *ls = seq->private;
469 struct rsbtbl_iter *ri = iter_ptr; 471 struct rsbtbl_iter *ri = iter_ptr;
470 struct list_head *next; 472 struct rb_node *next;
471 struct dlm_rsb *r, *rp; 473 struct dlm_rsb *r, *rp;
472 loff_t n = *pos; 474 loff_t n = *pos;
473 unsigned bucket; 475 unsigned bucket;
@@ -480,10 +482,10 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
480 482
481 spin_lock(&ls->ls_rsbtbl[bucket].lock); 483 spin_lock(&ls->ls_rsbtbl[bucket].lock);
482 rp = ri->rsb; 484 rp = ri->rsb;
483 next = rp->res_hashchain.next; 485 next = rb_next(&rp->res_hashnode);
484 486
485 if (next != &ls->ls_rsbtbl[bucket].list) { 487 if (next) {
486 r = list_entry(next, struct dlm_rsb, res_hashchain); 488 r = rb_entry(next, struct dlm_rsb, res_hashnode);
487 dlm_hold_rsb(r); 489 dlm_hold_rsb(r);
488 ri->rsb = r; 490 ri->rsb = r;
489 spin_unlock(&ls->ls_rsbtbl[bucket].lock); 491 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
@@ -511,9 +513,9 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
511 } 513 }
512 514
513 spin_lock(&ls->ls_rsbtbl[bucket].lock); 515 spin_lock(&ls->ls_rsbtbl[bucket].lock);
514 if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { 516 if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
515 r = list_first_entry(&ls->ls_rsbtbl[bucket].list, 517 next = rb_first(&ls->ls_rsbtbl[bucket].keep);
516 struct dlm_rsb, res_hashchain); 518 r = rb_entry(next, struct dlm_rsb, res_hashnode);
517 dlm_hold_rsb(r); 519 dlm_hold_rsb(r);
518 ri->rsb = r; 520 ri->rsb = r;
519 ri->bucket = bucket; 521 ri->bucket = bucket;
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 7b84c1dbc82e..83641574b016 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -290,7 +290,6 @@ int dlm_recover_directory(struct dlm_ls *ls)
290 290
291 out_status: 291 out_status:
292 error = 0; 292 error = 0;
293 dlm_set_recover_status(ls, DLM_RS_DIR);
294 log_debug(ls, "dlm_recover_directory %d entries", count); 293 log_debug(ls, "dlm_recover_directory %d entries", count);
295 out_free: 294 out_free:
296 kfree(last_name); 295 kfree(last_name);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index fe2860c02449..3a564d197e99 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -103,8 +103,8 @@ struct dlm_dirtable {
103}; 103};
104 104
105struct dlm_rsbtable { 105struct dlm_rsbtable {
106 struct list_head list; 106 struct rb_root keep;
107 struct list_head toss; 107 struct rb_root toss;
108 spinlock_t lock; 108 spinlock_t lock;
109}; 109};
110 110
@@ -117,6 +117,10 @@ struct dlm_member {
117 struct list_head list; 117 struct list_head list;
118 int nodeid; 118 int nodeid;
119 int weight; 119 int weight;
120 int slot;
121 int slot_prev;
122 int comm_seq;
123 uint32_t generation;
120}; 124};
121 125
122/* 126/*
@@ -125,10 +129,8 @@ struct dlm_member {
125 129
126struct dlm_recover { 130struct dlm_recover {
127 struct list_head list; 131 struct list_head list;
128 int *nodeids; /* nodeids of all members */ 132 struct dlm_config_node *nodes;
129 int node_count; 133 int nodes_count;
130 int *new; /* nodeids of new members */
131 int new_count;
132 uint64_t seq; 134 uint64_t seq;
133}; 135};
134 136
@@ -285,7 +287,10 @@ struct dlm_rsb {
285 unsigned long res_toss_time; 287 unsigned long res_toss_time;
286 uint32_t res_first_lkid; 288 uint32_t res_first_lkid;
287 struct list_head res_lookup; /* lkbs waiting on first */ 289 struct list_head res_lookup; /* lkbs waiting on first */
288 struct list_head res_hashchain; /* rsbtbl */ 290 union {
291 struct list_head res_hashchain;
292 struct rb_node res_hashnode; /* rsbtbl */
293 };
289 struct list_head res_grantqueue; 294 struct list_head res_grantqueue;
290 struct list_head res_convertqueue; 295 struct list_head res_convertqueue;
291 struct list_head res_waitqueue; 296 struct list_head res_waitqueue;
@@ -334,7 +339,9 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
334/* dlm_header is first element of all structs sent between nodes */ 339/* dlm_header is first element of all structs sent between nodes */
335 340
336#define DLM_HEADER_MAJOR 0x00030000 341#define DLM_HEADER_MAJOR 0x00030000
337#define DLM_HEADER_MINOR 0x00000000 342#define DLM_HEADER_MINOR 0x00000001
343
344#define DLM_HEADER_SLOTS 0x00000001
338 345
339#define DLM_MSG 1 346#define DLM_MSG 1
340#define DLM_RCOM 2 347#define DLM_RCOM 2
@@ -422,10 +429,34 @@ union dlm_packet {
422 struct dlm_rcom rcom; 429 struct dlm_rcom rcom;
423}; 430};
424 431
432#define DLM_RSF_NEED_SLOTS 0x00000001
433
434/* RCOM_STATUS data */
435struct rcom_status {
436 __le32 rs_flags;
437 __le32 rs_unused1;
438 __le64 rs_unused2;
439};
440
441/* RCOM_STATUS_REPLY data */
425struct rcom_config { 442struct rcom_config {
426 __le32 rf_lvblen; 443 __le32 rf_lvblen;
427 __le32 rf_lsflags; 444 __le32 rf_lsflags;
428 __le64 rf_unused; 445
446 /* DLM_HEADER_SLOTS adds: */
447 __le32 rf_flags;
448 __le16 rf_our_slot;
449 __le16 rf_num_slots;
450 __le32 rf_generation;
451 __le32 rf_unused1;
452 __le64 rf_unused2;
453};
454
455struct rcom_slot {
456 __le32 ro_nodeid;
457 __le16 ro_slot;
458 __le16 ro_unused1;
459 __le64 ro_unused2;
429}; 460};
430 461
431struct rcom_lock { 462struct rcom_lock {
@@ -452,6 +483,7 @@ struct dlm_ls {
452 struct list_head ls_list; /* list of lockspaces */ 483 struct list_head ls_list; /* list of lockspaces */
453 dlm_lockspace_t *ls_local_handle; 484 dlm_lockspace_t *ls_local_handle;
454 uint32_t ls_global_id; /* global unique lockspace ID */ 485 uint32_t ls_global_id; /* global unique lockspace ID */
486 uint32_t ls_generation;
455 uint32_t ls_exflags; 487 uint32_t ls_exflags;
456 int ls_lvblen; 488 int ls_lvblen;
457 int ls_count; /* refcount of processes in 489 int ls_count; /* refcount of processes in
@@ -490,6 +522,11 @@ struct dlm_ls {
490 int ls_total_weight; 522 int ls_total_weight;
491 int *ls_node_array; 523 int *ls_node_array;
492 524
525 int ls_slot;
526 int ls_num_slots;
527 int ls_slots_size;
528 struct dlm_slot *ls_slots;
529
493 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 530 struct dlm_rsb ls_stub_rsb; /* for returning errors */
494 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 531 struct dlm_lkb ls_stub_lkb; /* for returning errors */
495 struct dlm_message ls_stub_ms; /* for faking a reply */ 532 struct dlm_message ls_stub_ms; /* for faking a reply */
@@ -537,6 +574,9 @@ struct dlm_ls {
537 struct list_head ls_root_list; /* root resources */ 574 struct list_head ls_root_list; /* root resources */
538 struct rw_semaphore ls_root_sem; /* protect root_list */ 575 struct rw_semaphore ls_root_sem; /* protect root_list */
539 576
577 const struct dlm_lockspace_ops *ls_ops;
578 void *ls_ops_arg;
579
540 int ls_namelen; 580 int ls_namelen;
541 char ls_name[1]; 581 char ls_name[1];
542}; 582};
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 83b5e32514e1..d47183043c59 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -56,6 +56,7 @@
56 L: receive_xxxx_reply() <- R: send_xxxx_reply() 56 L: receive_xxxx_reply() <- R: send_xxxx_reply()
57*/ 57*/
58#include <linux/types.h> 58#include <linux/types.h>
59#include <linux/rbtree.h>
59#include <linux/slab.h> 60#include <linux/slab.h>
60#include "dlm_internal.h" 61#include "dlm_internal.h"
61#include <linux/dlm_device.h> 62#include <linux/dlm_device.h>
@@ -380,6 +381,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
380 381
381 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); 382 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
382 list_del(&r->res_hashchain); 383 list_del(&r->res_hashchain);
384 /* Convert the empty list_head to a NULL rb_node for tree usage: */
385 memset(&r->res_hashnode, 0, sizeof(struct rb_node));
383 ls->ls_new_rsb_count--; 386 ls->ls_new_rsb_count--;
384 spin_unlock(&ls->ls_new_rsb_spin); 387 spin_unlock(&ls->ls_new_rsb_spin);
385 388
@@ -388,7 +391,6 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
388 memcpy(r->res_name, name, len); 391 memcpy(r->res_name, name, len);
389 mutex_init(&r->res_mutex); 392 mutex_init(&r->res_mutex);
390 393
391 INIT_LIST_HEAD(&r->res_hashchain);
392 INIT_LIST_HEAD(&r->res_lookup); 394 INIT_LIST_HEAD(&r->res_lookup);
393 INIT_LIST_HEAD(&r->res_grantqueue); 395 INIT_LIST_HEAD(&r->res_grantqueue);
394 INIT_LIST_HEAD(&r->res_convertqueue); 396 INIT_LIST_HEAD(&r->res_convertqueue);
@@ -400,14 +402,31 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
400 return 0; 402 return 0;
401} 403}
402 404
403static int search_rsb_list(struct list_head *head, char *name, int len, 405static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
406{
407 char maxname[DLM_RESNAME_MAXLEN];
408
409 memset(maxname, 0, DLM_RESNAME_MAXLEN);
410 memcpy(maxname, name, nlen);
411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
412}
413
414static int search_rsb_tree(struct rb_root *tree, char *name, int len,
404 unsigned int flags, struct dlm_rsb **r_ret) 415 unsigned int flags, struct dlm_rsb **r_ret)
405{ 416{
417 struct rb_node *node = tree->rb_node;
406 struct dlm_rsb *r; 418 struct dlm_rsb *r;
407 int error = 0; 419 int error = 0;
408 420 int rc;
409 list_for_each_entry(r, head, res_hashchain) { 421
410 if (len == r->res_length && !memcmp(name, r->res_name, len)) 422 while (node) {
423 r = rb_entry(node, struct dlm_rsb, res_hashnode);
424 rc = rsb_cmp(r, name, len);
425 if (rc < 0)
426 node = node->rb_left;
427 else if (rc > 0)
428 node = node->rb_right;
429 else
411 goto found; 430 goto found;
412 } 431 }
413 *r_ret = NULL; 432 *r_ret = NULL;
@@ -420,22 +439,54 @@ static int search_rsb_list(struct list_head *head, char *name, int len,
420 return error; 439 return error;
421} 440}
422 441
442static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
443{
444 struct rb_node **newn = &tree->rb_node;
445 struct rb_node *parent = NULL;
446 int rc;
447
448 while (*newn) {
449 struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb,
450 res_hashnode);
451
452 parent = *newn;
453 rc = rsb_cmp(cur, rsb->res_name, rsb->res_length);
454 if (rc < 0)
455 newn = &parent->rb_left;
456 else if (rc > 0)
457 newn = &parent->rb_right;
458 else {
459 log_print("rsb_insert match");
460 dlm_dump_rsb(rsb);
461 dlm_dump_rsb(cur);
462 return -EEXIST;
463 }
464 }
465
466 rb_link_node(&rsb->res_hashnode, parent, newn);
467 rb_insert_color(&rsb->res_hashnode, tree);
468 return 0;
469}
470
423static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, 471static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
424 unsigned int flags, struct dlm_rsb **r_ret) 472 unsigned int flags, struct dlm_rsb **r_ret)
425{ 473{
426 struct dlm_rsb *r; 474 struct dlm_rsb *r;
427 int error; 475 int error;
428 476
429 error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); 477 error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
430 if (!error) { 478 if (!error) {
431 kref_get(&r->res_ref); 479 kref_get(&r->res_ref);
432 goto out; 480 goto out;
433 } 481 }
434 error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 482 error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
435 if (error) 483 if (error)
436 goto out; 484 goto out;
437 485
438 list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); 486 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
487 error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
488 if (error)
489 return error;
439 490
440 if (dlm_no_directory(ls)) 491 if (dlm_no_directory(ls))
441 goto out; 492 goto out;
@@ -527,8 +578,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
527 nodeid = 0; 578 nodeid = 0;
528 r->res_nodeid = nodeid; 579 r->res_nodeid = nodeid;
529 } 580 }
530 list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); 581 error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep);
531 error = 0;
532 out_unlock: 582 out_unlock:
533 spin_unlock(&ls->ls_rsbtbl[bucket].lock); 583 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
534 out: 584 out:
@@ -556,7 +606,8 @@ static void toss_rsb(struct kref *kref)
556 606
557 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); 607 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r););
558 kref_init(&r->res_ref); 608 kref_init(&r->res_ref);
559 list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); 609 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
610 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
560 r->res_toss_time = jiffies; 611 r->res_toss_time = jiffies;
561 if (r->res_lvbptr) { 612 if (r->res_lvbptr) {
562 dlm_free_lvb(r->res_lvbptr); 613 dlm_free_lvb(r->res_lvbptr);
@@ -1082,19 +1133,19 @@ static void dir_remove(struct dlm_rsb *r)
1082 r->res_name, r->res_length); 1133 r->res_name, r->res_length);
1083} 1134}
1084 1135
1085/* FIXME: shouldn't this be able to exit as soon as one non-due rsb is 1136/* FIXME: make this more efficient */
1086 found since they are in order of newest to oldest? */
1087 1137
1088static int shrink_bucket(struct dlm_ls *ls, int b) 1138static int shrink_bucket(struct dlm_ls *ls, int b)
1089{ 1139{
1140 struct rb_node *n;
1090 struct dlm_rsb *r; 1141 struct dlm_rsb *r;
1091 int count = 0, found; 1142 int count = 0, found;
1092 1143
1093 for (;;) { 1144 for (;;) {
1094 found = 0; 1145 found = 0;
1095 spin_lock(&ls->ls_rsbtbl[b].lock); 1146 spin_lock(&ls->ls_rsbtbl[b].lock);
1096 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 1147 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) {
1097 res_hashchain) { 1148 r = rb_entry(n, struct dlm_rsb, res_hashnode);
1098 if (!time_after_eq(jiffies, r->res_toss_time + 1149 if (!time_after_eq(jiffies, r->res_toss_time +
1099 dlm_config.ci_toss_secs * HZ)) 1150 dlm_config.ci_toss_secs * HZ))
1100 continue; 1151 continue;
@@ -1108,7 +1159,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
1108 } 1159 }
1109 1160
1110 if (kref_put(&r->res_ref, kill_rsb)) { 1161 if (kref_put(&r->res_ref, kill_rsb)) {
1111 list_del(&r->res_hashchain); 1162 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1112 spin_unlock(&ls->ls_rsbtbl[b].lock); 1163 spin_unlock(&ls->ls_rsbtbl[b].lock);
1113 1164
1114 if (is_master(r)) 1165 if (is_master(r))
@@ -4441,10 +4492,12 @@ int dlm_purge_locks(struct dlm_ls *ls)
4441 4492
4442static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) 4493static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket)
4443{ 4494{
4495 struct rb_node *n;
4444 struct dlm_rsb *r, *r_ret = NULL; 4496 struct dlm_rsb *r, *r_ret = NULL;
4445 4497
4446 spin_lock(&ls->ls_rsbtbl[bucket].lock); 4498 spin_lock(&ls->ls_rsbtbl[bucket].lock);
4447 list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { 4499 for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) {
4500 r = rb_entry(n, struct dlm_rsb, res_hashnode);
4448 if (!rsb_flag(r, RSB_LOCKS_PURGED)) 4501 if (!rsb_flag(r, RSB_LOCKS_PURGED))
4449 continue; 4502 continue;
4450 hold_rsb(r); 4503 hold_rsb(r);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a1d8f1af144b..a1ea25face82 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,12 +386,15 @@ static void threads_stop(void)
386 dlm_lowcomms_stop(); 386 dlm_lowcomms_stop();
387} 387}
388 388
389static int new_lockspace(const char *name, int namelen, void **lockspace, 389static int new_lockspace(const char *name, const char *cluster,
390 uint32_t flags, int lvblen) 390 uint32_t flags, int lvblen,
391 const struct dlm_lockspace_ops *ops, void *ops_arg,
392 int *ops_result, dlm_lockspace_t **lockspace)
391{ 393{
392 struct dlm_ls *ls; 394 struct dlm_ls *ls;
393 int i, size, error; 395 int i, size, error;
394 int do_unreg = 0; 396 int do_unreg = 0;
397 int namelen = strlen(name);
395 398
396 if (namelen > DLM_LOCKSPACE_LEN) 399 if (namelen > DLM_LOCKSPACE_LEN)
397 return -EINVAL; 400 return -EINVAL;
@@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
403 return -EINVAL; 406 return -EINVAL;
404 407
405 if (!dlm_user_daemon_available()) { 408 if (!dlm_user_daemon_available()) {
406 module_put(THIS_MODULE); 409 log_print("dlm user daemon not available");
407 return -EUNATCH; 410 error = -EUNATCH;
411 goto out;
412 }
413
414 if (ops && ops_result) {
415 if (!dlm_config.ci_recover_callbacks)
416 *ops_result = -EOPNOTSUPP;
417 else
418 *ops_result = 0;
419 }
420
421 if (dlm_config.ci_recover_callbacks && cluster &&
422 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
423 log_print("dlm cluster name %s mismatch %s",
424 dlm_config.ci_cluster_name, cluster);
425 error = -EBADR;
426 goto out;
408 } 427 }
409 428
410 error = 0; 429 error = 0;
@@ -442,6 +461,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
442 ls->ls_flags = 0; 461 ls->ls_flags = 0;
443 ls->ls_scan_time = jiffies; 462 ls->ls_scan_time = jiffies;
444 463
464 if (ops && dlm_config.ci_recover_callbacks) {
465 ls->ls_ops = ops;
466 ls->ls_ops_arg = ops_arg;
467 }
468
445 if (flags & DLM_LSFL_TIMEWARN) 469 if (flags & DLM_LSFL_TIMEWARN)
446 set_bit(LSFL_TIMEWARN, &ls->ls_flags); 470 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
447 471
@@ -457,8 +481,8 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
457 if (!ls->ls_rsbtbl) 481 if (!ls->ls_rsbtbl)
458 goto out_lsfree; 482 goto out_lsfree;
459 for (i = 0; i < size; i++) { 483 for (i = 0; i < size; i++) {
460 INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); 484 ls->ls_rsbtbl[i].keep.rb_node = NULL;
461 INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); 485 ls->ls_rsbtbl[i].toss.rb_node = NULL;
462 spin_lock_init(&ls->ls_rsbtbl[i].lock); 486 spin_lock_init(&ls->ls_rsbtbl[i].lock);
463 } 487 }
464 488
@@ -525,6 +549,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
525 if (!ls->ls_recover_buf) 549 if (!ls->ls_recover_buf)
526 goto out_dirfree; 550 goto out_dirfree;
527 551
552 ls->ls_slot = 0;
553 ls->ls_num_slots = 0;
554 ls->ls_slots_size = 0;
555 ls->ls_slots = NULL;
556
528 INIT_LIST_HEAD(&ls->ls_recover_list); 557 INIT_LIST_HEAD(&ls->ls_recover_list);
529 spin_lock_init(&ls->ls_recover_list_lock); 558 spin_lock_init(&ls->ls_recover_list_lock);
530 ls->ls_recover_list_count = 0; 559 ls->ls_recover_list_count = 0;
@@ -614,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
614 return error; 643 return error;
615} 644}
616 645
617int dlm_new_lockspace(const char *name, int namelen, void **lockspace, 646int dlm_new_lockspace(const char *name, const char *cluster,
618 uint32_t flags, int lvblen) 647 uint32_t flags, int lvblen,
648 const struct dlm_lockspace_ops *ops, void *ops_arg,
649 int *ops_result, dlm_lockspace_t **lockspace)
619{ 650{
620 int error = 0; 651 int error = 0;
621 652
@@ -625,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
625 if (error) 656 if (error)
626 goto out; 657 goto out;
627 658
628 error = new_lockspace(name, namelen, lockspace, flags, lvblen); 659 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
660 ops_result, lockspace);
629 if (!error) 661 if (!error)
630 ls_count++; 662 ls_count++;
631 if (error > 0) 663 if (error > 0)
@@ -685,7 +717,7 @@ static int lockspace_busy(struct dlm_ls *ls, int force)
685static int release_lockspace(struct dlm_ls *ls, int force) 717static int release_lockspace(struct dlm_ls *ls, int force)
686{ 718{
687 struct dlm_rsb *rsb; 719 struct dlm_rsb *rsb;
688 struct list_head *head; 720 struct rb_node *n;
689 int i, busy, rv; 721 int i, busy, rv;
690 722
691 busy = lockspace_busy(ls, force); 723 busy = lockspace_busy(ls, force);
@@ -746,20 +778,15 @@ static int release_lockspace(struct dlm_ls *ls, int force)
746 */ 778 */
747 779
748 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 780 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
749 head = &ls->ls_rsbtbl[i].list; 781 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
750 while (!list_empty(head)) { 782 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
751 rsb = list_entry(head->next, struct dlm_rsb, 783 rb_erase(n, &ls->ls_rsbtbl[i].keep);
752 res_hashchain);
753
754 list_del(&rsb->res_hashchain);
755 dlm_free_rsb(rsb); 784 dlm_free_rsb(rsb);
756 } 785 }
757 786
758 head = &ls->ls_rsbtbl[i].toss; 787 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
759 while (!list_empty(head)) { 788 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
760 rsb = list_entry(head->next, struct dlm_rsb, 789 rb_erase(n, &ls->ls_rsbtbl[i].toss);
761 res_hashchain);
762 list_del(&rsb->res_hashchain);
763 dlm_free_rsb(rsb); 790 dlm_free_rsb(rsb);
764 } 791 }
765 } 792 }
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 990626e7da80..0b3109ee4257 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -281,7 +281,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
281 } else { 281 } else {
282 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; 282 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
283 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; 283 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
284 ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); 284 ret6->sin6_addr = in6->sin6_addr;
285 } 285 }
286 286
287 return 0; 287 return 0;
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index b12532e553f8..862640a36d5c 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,280 @@
19#include "config.h" 19#include "config.h"
20#include "lowcomms.h" 20#include "lowcomms.h"
21 21
22int dlm_slots_version(struct dlm_header *h)
23{
24 if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS)
25 return 0;
26 return 1;
27}
28
29void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
30 struct dlm_member *memb)
31{
32 struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
33
34 if (!dlm_slots_version(&rc->rc_header))
35 return;
36
37 memb->slot = le16_to_cpu(rf->rf_our_slot);
38 memb->generation = le32_to_cpu(rf->rf_generation);
39}
40
41void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc)
42{
43 struct dlm_slot *slot;
44 struct rcom_slot *ro;
45 int i;
46
47 ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config));
48
49 /* ls_slots array is sparse, but not rcom_slots */
50
51 for (i = 0; i < ls->ls_slots_size; i++) {
52 slot = &ls->ls_slots[i];
53 if (!slot->nodeid)
54 continue;
55 ro->ro_nodeid = cpu_to_le32(slot->nodeid);
56 ro->ro_slot = cpu_to_le16(slot->slot);
57 ro++;
58 }
59}
60
61#define SLOT_DEBUG_LINE 128
62
63static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots,
64 struct rcom_slot *ro0, struct dlm_slot *array,
65 int array_size)
66{
67 char line[SLOT_DEBUG_LINE];
68 int len = SLOT_DEBUG_LINE - 1;
69 int pos = 0;
70 int ret, i;
71
72 if (!dlm_config.ci_log_debug)
73 return;
74
75 memset(line, 0, sizeof(line));
76
77 if (array) {
78 for (i = 0; i < array_size; i++) {
79 if (!array[i].nodeid)
80 continue;
81
82 ret = snprintf(line + pos, len - pos, " %d:%d",
83 array[i].slot, array[i].nodeid);
84 if (ret >= len - pos)
85 break;
86 pos += ret;
87 }
88 } else if (ro0) {
89 for (i = 0; i < num_slots; i++) {
90 ret = snprintf(line + pos, len - pos, " %d:%d",
91 ro0[i].ro_slot, ro0[i].ro_nodeid);
92 if (ret >= len - pos)
93 break;
94 pos += ret;
95 }
96 }
97
98 log_debug(ls, "generation %u slots %d%s", gen, num_slots, line);
99}
100
101int dlm_slots_copy_in(struct dlm_ls *ls)
102{
103 struct dlm_member *memb;
104 struct dlm_rcom *rc = ls->ls_recover_buf;
105 struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
106 struct rcom_slot *ro0, *ro;
107 int our_nodeid = dlm_our_nodeid();
108 int i, num_slots;
109 uint32_t gen;
110
111 if (!dlm_slots_version(&rc->rc_header))
112 return -1;
113
114 gen = le32_to_cpu(rf->rf_generation);
115 if (gen <= ls->ls_generation) {
116 log_error(ls, "dlm_slots_copy_in gen %u old %u",
117 gen, ls->ls_generation);
118 }
119 ls->ls_generation = gen;
120
121 num_slots = le16_to_cpu(rf->rf_num_slots);
122 if (!num_slots)
123 return -1;
124
125 ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config));
126
127 for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
128 ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid);
129 ro->ro_slot = le16_to_cpu(ro->ro_slot);
130 }
131
132 log_debug_slots(ls, gen, num_slots, ro0, NULL, 0);
133
134 list_for_each_entry(memb, &ls->ls_nodes, list) {
135 for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
136 if (ro->ro_nodeid != memb->nodeid)
137 continue;
138 memb->slot = ro->ro_slot;
139 memb->slot_prev = memb->slot;
140 break;
141 }
142
143 if (memb->nodeid == our_nodeid) {
144 if (ls->ls_slot && ls->ls_slot != memb->slot) {
145 log_error(ls, "dlm_slots_copy_in our slot "
146 "changed %d %d", ls->ls_slot,
147 memb->slot);
148 return -1;
149 }
150
151 if (!ls->ls_slot)
152 ls->ls_slot = memb->slot;
153 }
154
155 if (!memb->slot) {
156 log_error(ls, "dlm_slots_copy_in nodeid %d no slot",
157 memb->nodeid);
158 return -1;
159 }
160 }
161
162 return 0;
163}
164
165/* for any nodes that do not support slots, we will not have set memb->slot
166 in wait_status_all(), so memb->slot will remain -1, and we will not
167 assign slots or set ls_num_slots here */
168
169int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
170 struct dlm_slot **slots_out, uint32_t *gen_out)
171{
172 struct dlm_member *memb;
173 struct dlm_slot *array;
174 int our_nodeid = dlm_our_nodeid();
175 int array_size, max_slots, i;
176 int need = 0;
177 int max = 0;
178 int num = 0;
179 uint32_t gen = 0;
180
181 /* our own memb struct will have slot -1 gen 0 */
182
183 list_for_each_entry(memb, &ls->ls_nodes, list) {
184 if (memb->nodeid == our_nodeid) {
185 memb->slot = ls->ls_slot;
186 memb->generation = ls->ls_generation;
187 break;
188 }
189 }
190
191 list_for_each_entry(memb, &ls->ls_nodes, list) {
192 if (memb->generation > gen)
193 gen = memb->generation;
194
195 /* node doesn't support slots */
196
197 if (memb->slot == -1)
198 return -1;
199
200 /* node needs a slot assigned */
201
202 if (!memb->slot)
203 need++;
204
205 /* node has a slot assigned */
206
207 num++;
208
209 if (!max || max < memb->slot)
210 max = memb->slot;
211
212 /* sanity check, once slot is assigned it shouldn't change */
213
214 if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) {
215 log_error(ls, "nodeid %d slot changed %d %d",
216 memb->nodeid, memb->slot_prev, memb->slot);
217 return -1;
218 }
219 memb->slot_prev = memb->slot;
220 }
221
222 array_size = max + need;
223
224 array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS);
225 if (!array)
226 return -ENOMEM;
227
228 num = 0;
229
230 /* fill in slots (offsets) that are used */
231
232 list_for_each_entry(memb, &ls->ls_nodes, list) {
233 if (!memb->slot)
234 continue;
235
236 if (memb->slot > array_size) {
237 log_error(ls, "invalid slot number %d", memb->slot);
238 kfree(array);
239 return -1;
240 }
241
242 array[memb->slot - 1].nodeid = memb->nodeid;
243 array[memb->slot - 1].slot = memb->slot;
244 num++;
245 }
246
247 /* assign new slots from unused offsets */
248
249 list_for_each_entry(memb, &ls->ls_nodes, list) {
250 if (memb->slot)
251 continue;
252
253 for (i = 0; i < array_size; i++) {
254 if (array[i].nodeid)
255 continue;
256
257 memb->slot = i + 1;
258 memb->slot_prev = memb->slot;
259 array[i].nodeid = memb->nodeid;
260 array[i].slot = memb->slot;
261 num++;
262
263 if (!ls->ls_slot && memb->nodeid == our_nodeid)
264 ls->ls_slot = memb->slot;
265 break;
266 }
267
268 if (!memb->slot) {
269 log_error(ls, "no free slot found");
270 kfree(array);
271 return -1;
272 }
273 }
274
275 gen++;
276
277 log_debug_slots(ls, gen, num, NULL, array, array_size);
278
279 max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) -
280 sizeof(struct rcom_config)) / sizeof(struct rcom_slot);
281
282 if (num > max_slots) {
283 log_error(ls, "num_slots %d exceeds max_slots %d",
284 num, max_slots);
285 kfree(array);
286 return -1;
287 }
288
289 *gen_out = gen;
290 *slots_out = array;
291 *slots_size = array_size;
292 *num_slots = num;
293 return 0;
294}
295
22static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 296static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
23{ 297{
24 struct dlm_member *memb = NULL; 298 struct dlm_member *memb = NULL;
@@ -43,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
43 } 317 }
44} 318}
45 319
46static int dlm_add_member(struct dlm_ls *ls, int nodeid) 320static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
47{ 321{
48 struct dlm_member *memb; 322 struct dlm_member *memb;
49 int w, error; 323 int error;
50 324
51 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); 325 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
52 if (!memb) 326 if (!memb)
53 return -ENOMEM; 327 return -ENOMEM;
54 328
55 w = dlm_node_weight(ls->ls_name, nodeid); 329 error = dlm_lowcomms_connect_node(node->nodeid);
56 if (w < 0) {
57 kfree(memb);
58 return w;
59 }
60
61 error = dlm_lowcomms_connect_node(nodeid);
62 if (error < 0) { 330 if (error < 0) {
63 kfree(memb); 331 kfree(memb);
64 return error; 332 return error;
65 } 333 }
66 334
67 memb->nodeid = nodeid; 335 memb->nodeid = node->nodeid;
68 memb->weight = w; 336 memb->weight = node->weight;
337 memb->comm_seq = node->comm_seq;
69 add_ordered_member(ls, memb); 338 add_ordered_member(ls, memb);
70 ls->ls_num_nodes++; 339 ls->ls_num_nodes++;
71 return 0; 340 return 0;
72} 341}
73 342
74static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) 343static struct dlm_member *find_memb(struct list_head *head, int nodeid)
75{
76 list_move(&memb->list, &ls->ls_nodes_gone);
77 ls->ls_num_nodes--;
78}
79
80int dlm_is_member(struct dlm_ls *ls, int nodeid)
81{ 344{
82 struct dlm_member *memb; 345 struct dlm_member *memb;
83 346
84 list_for_each_entry(memb, &ls->ls_nodes, list) { 347 list_for_each_entry(memb, head, list) {
85 if (memb->nodeid == nodeid) 348 if (memb->nodeid == nodeid)
86 return 1; 349 return memb;
87 } 350 }
351 return NULL;
352}
353
354int dlm_is_member(struct dlm_ls *ls, int nodeid)
355{
356 if (find_memb(&ls->ls_nodes, nodeid))
357 return 1;
88 return 0; 358 return 0;
89} 359}
90 360
91int dlm_is_removed(struct dlm_ls *ls, int nodeid) 361int dlm_is_removed(struct dlm_ls *ls, int nodeid)
92{ 362{
93 struct dlm_member *memb; 363 if (find_memb(&ls->ls_nodes_gone, nodeid))
94 364 return 1;
95 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
96 if (memb->nodeid == nodeid)
97 return 1;
98 }
99 return 0; 365 return 0;
100} 366}
101 367
@@ -176,7 +442,7 @@ static int ping_members(struct dlm_ls *ls)
176 error = dlm_recovery_stopped(ls); 442 error = dlm_recovery_stopped(ls);
177 if (error) 443 if (error)
178 break; 444 break;
179 error = dlm_rcom_status(ls, memb->nodeid); 445 error = dlm_rcom_status(ls, memb->nodeid, 0);
180 if (error) 446 if (error)
181 break; 447 break;
182 } 448 }
@@ -186,10 +452,88 @@ static int ping_members(struct dlm_ls *ls)
186 return error; 452 return error;
187} 453}
188 454
455static void dlm_lsop_recover_prep(struct dlm_ls *ls)
456{
457 if (!ls->ls_ops || !ls->ls_ops->recover_prep)
458 return;
459 ls->ls_ops->recover_prep(ls->ls_ops_arg);
460}
461
462static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
463{
464 struct dlm_slot slot;
465 uint32_t seq;
466 int error;
467
468 if (!ls->ls_ops || !ls->ls_ops->recover_slot)
469 return;
470
471 /* if there is no comms connection with this node
472 or the present comms connection is newer
473 than the one when this member was added, then
474 we consider the node to have failed (versus
475 being removed due to dlm_release_lockspace) */
476
477 error = dlm_comm_seq(memb->nodeid, &seq);
478
479 if (!error && seq == memb->comm_seq)
480 return;
481
482 slot.nodeid = memb->nodeid;
483 slot.slot = memb->slot;
484
485 ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
486}
487
488void dlm_lsop_recover_done(struct dlm_ls *ls)
489{
490 struct dlm_member *memb;
491 struct dlm_slot *slots;
492 int i, num;
493
494 if (!ls->ls_ops || !ls->ls_ops->recover_done)
495 return;
496
497 num = ls->ls_num_nodes;
498
499 slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
500 if (!slots)
501 return;
502
503 i = 0;
504 list_for_each_entry(memb, &ls->ls_nodes, list) {
505 if (i == num) {
506 log_error(ls, "dlm_lsop_recover_done bad num %d", num);
507 goto out;
508 }
509 slots[i].nodeid = memb->nodeid;
510 slots[i].slot = memb->slot;
511 i++;
512 }
513
514 ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
515 ls->ls_slot, ls->ls_generation);
516 out:
517 kfree(slots);
518}
519
520static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
521 int nodeid)
522{
523 int i;
524
525 for (i = 0; i < rv->nodes_count; i++) {
526 if (rv->nodes[i].nodeid == nodeid)
527 return &rv->nodes[i];
528 }
529 return NULL;
530}
531
189int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) 532int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
190{ 533{
191 struct dlm_member *memb, *safe; 534 struct dlm_member *memb, *safe;
192 int i, error, found, pos = 0, neg = 0, low = -1; 535 struct dlm_config_node *node;
536 int i, error, neg = 0, low = -1;
193 537
194 /* previously removed members that we've not finished removing need to 538 /* previously removed members that we've not finished removing need to
195 count as a negative change so the "neg" recovery steps will happen */ 539 count as a negative change so the "neg" recovery steps will happen */
@@ -202,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
202 /* move departed members from ls_nodes to ls_nodes_gone */ 546 /* move departed members from ls_nodes to ls_nodes_gone */
203 547
204 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { 548 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
205 found = 0; 549 node = find_config_node(rv, memb->nodeid);
206 for (i = 0; i < rv->node_count; i++) { 550 if (node && !node->new)
207 if (memb->nodeid == rv->nodeids[i]) { 551 continue;
208 found = 1;
209 break;
210 }
211 }
212 552
213 if (!found) { 553 if (!node) {
214 neg++;
215 dlm_remove_member(ls, memb);
216 log_debug(ls, "remove member %d", memb->nodeid); 554 log_debug(ls, "remove member %d", memb->nodeid);
555 } else {
556 /* removed and re-added */
557 log_debug(ls, "remove member %d comm_seq %u %u",
558 memb->nodeid, memb->comm_seq, node->comm_seq);
217 } 559 }
218 }
219
220 /* Add an entry to ls_nodes_gone for members that were removed and
221 then added again, so that previous state for these nodes will be
222 cleared during recovery. */
223
224 for (i = 0; i < rv->new_count; i++) {
225 if (!dlm_is_member(ls, rv->new[i]))
226 continue;
227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
228 560
229 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
230 if (!memb)
231 return -ENOMEM;
232 memb->nodeid = rv->new[i];
233 list_add_tail(&memb->list, &ls->ls_nodes_gone);
234 neg++; 561 neg++;
562 list_move(&memb->list, &ls->ls_nodes_gone);
563 ls->ls_num_nodes--;
564 dlm_lsop_recover_slot(ls, memb);
235 } 565 }
236 566
237 /* add new members to ls_nodes */ 567 /* add new members to ls_nodes */
238 568
239 for (i = 0; i < rv->node_count; i++) { 569 for (i = 0; i < rv->nodes_count; i++) {
240 if (dlm_is_member(ls, rv->nodeids[i])) 570 node = &rv->nodes[i];
571 if (dlm_is_member(ls, node->nodeid))
241 continue; 572 continue;
242 dlm_add_member(ls, rv->nodeids[i]); 573 dlm_add_member(ls, node);
243 pos++; 574 log_debug(ls, "add member %d", node->nodeid);
244 log_debug(ls, "add member %d", rv->nodeids[i]);
245 } 575 }
246 576
247 list_for_each_entry(memb, &ls->ls_nodes, list) { 577 list_for_each_entry(memb, &ls->ls_nodes, list) {
@@ -251,7 +581,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
251 ls->ls_low_nodeid = low; 581 ls->ls_low_nodeid = low;
252 582
253 make_member_array(ls); 583 make_member_array(ls);
254 dlm_set_recover_status(ls, DLM_RS_NODES);
255 *neg_out = neg; 584 *neg_out = neg;
256 585
257 error = ping_members(ls); 586 error = ping_members(ls);
@@ -261,12 +590,8 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
261 ls->ls_members_result = error; 590 ls->ls_members_result = error;
262 complete(&ls->ls_members_done); 591 complete(&ls->ls_members_done);
263 } 592 }
264 if (error)
265 goto out;
266 593
267 error = dlm_recover_members_wait(ls); 594 log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
268 out:
269 log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
270 return error; 595 return error;
271} 596}
272 597
@@ -327,26 +652,35 @@ int dlm_ls_stop(struct dlm_ls *ls)
327 */ 652 */
328 653
329 dlm_recoverd_suspend(ls); 654 dlm_recoverd_suspend(ls);
655
656 spin_lock(&ls->ls_recover_lock);
657 kfree(ls->ls_slots);
658 ls->ls_slots = NULL;
659 ls->ls_num_slots = 0;
660 ls->ls_slots_size = 0;
330 ls->ls_recover_status = 0; 661 ls->ls_recover_status = 0;
662 spin_unlock(&ls->ls_recover_lock);
663
331 dlm_recoverd_resume(ls); 664 dlm_recoverd_resume(ls);
332 665
333 if (!ls->ls_recover_begin) 666 if (!ls->ls_recover_begin)
334 ls->ls_recover_begin = jiffies; 667 ls->ls_recover_begin = jiffies;
668
669 dlm_lsop_recover_prep(ls);
335 return 0; 670 return 0;
336} 671}
337 672
338int dlm_ls_start(struct dlm_ls *ls) 673int dlm_ls_start(struct dlm_ls *ls)
339{ 674{
340 struct dlm_recover *rv = NULL, *rv_old; 675 struct dlm_recover *rv = NULL, *rv_old;
341 int *ids = NULL, *new = NULL; 676 struct dlm_config_node *nodes;
342 int error, ids_count = 0, new_count = 0; 677 int error, count;
343 678
344 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); 679 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
345 if (!rv) 680 if (!rv)
346 return -ENOMEM; 681 return -ENOMEM;
347 682
348 error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, 683 error = dlm_config_nodes(ls->ls_name, &nodes, &count);
349 &new, &new_count);
350 if (error < 0) 684 if (error < 0)
351 goto fail; 685 goto fail;
352 686
@@ -361,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls)
361 goto fail; 695 goto fail;
362 } 696 }
363 697
364 rv->nodeids = ids; 698 rv->nodes = nodes;
365 rv->node_count = ids_count; 699 rv->nodes_count = count;
366 rv->new = new;
367 rv->new_count = new_count;
368 rv->seq = ++ls->ls_recover_seq; 700 rv->seq = ++ls->ls_recover_seq;
369 rv_old = ls->ls_recover_args; 701 rv_old = ls->ls_recover_args;
370 ls->ls_recover_args = rv; 702 ls->ls_recover_args = rv;
@@ -372,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls)
372 704
373 if (rv_old) { 705 if (rv_old) {
374 log_error(ls, "unused recovery %llx %d", 706 log_error(ls, "unused recovery %llx %d",
375 (unsigned long long)rv_old->seq, rv_old->node_count); 707 (unsigned long long)rv_old->seq, rv_old->nodes_count);
376 kfree(rv_old->nodeids); 708 kfree(rv_old->nodes);
377 kfree(rv_old->new);
378 kfree(rv_old); 709 kfree(rv_old);
379 } 710 }
380 711
@@ -383,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls)
383 714
384 fail: 715 fail:
385 kfree(rv); 716 kfree(rv);
386 kfree(ids); 717 kfree(nodes);
387 kfree(new);
388 return error; 718 return error;
389} 719}
390 720
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 7a26fca1e0b5..3deb70661c69 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -20,6 +20,14 @@ void dlm_clear_members_gone(struct dlm_ls *ls);
20int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); 20int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out);
21int dlm_is_removed(struct dlm_ls *ls, int nodeid); 21int dlm_is_removed(struct dlm_ls *ls, int nodeid);
22int dlm_is_member(struct dlm_ls *ls, int nodeid); 22int dlm_is_member(struct dlm_ls *ls, int nodeid);
23int dlm_slots_version(struct dlm_header *h);
24void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
25 struct dlm_member *memb);
26void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
27int dlm_slots_copy_in(struct dlm_ls *ls);
28int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
29 struct dlm_slot **slots_out, uint32_t *gen_out);
30void dlm_lsop_recover_done(struct dlm_ls *ls);
23 31
24#endif /* __MEMBER_DOT_H__ */ 32#endif /* __MEMBER_DOT_H__ */
25 33
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index f10a50f24e8f..ac5c616c9696 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -23,6 +23,7 @@
23#include "memory.h" 23#include "memory.h"
24#include "lock.h" 24#include "lock.h"
25#include "util.h" 25#include "util.h"
26#include "member.h"
26 27
27 28
28static int rcom_response(struct dlm_ls *ls) 29static int rcom_response(struct dlm_ls *ls)
@@ -72,20 +73,30 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
72 dlm_lowcomms_commit_buffer(mh); 73 dlm_lowcomms_commit_buffer(mh);
73} 74}
74 75
76static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs,
77 uint32_t flags)
78{
79 rs->rs_flags = cpu_to_le32(flags);
80}
81
75/* When replying to a status request, a node also sends back its 82/* When replying to a status request, a node also sends back its
76 configuration values. The requesting node then checks that the remote 83 configuration values. The requesting node then checks that the remote
77 node is configured the same way as itself. */ 84 node is configured the same way as itself. */
78 85
79static void make_config(struct dlm_ls *ls, struct rcom_config *rf) 86static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf,
87 uint32_t num_slots)
80{ 88{
81 rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); 89 rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen);
82 rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); 90 rf->rf_lsflags = cpu_to_le32(ls->ls_exflags);
91
92 rf->rf_our_slot = cpu_to_le16(ls->ls_slot);
93 rf->rf_num_slots = cpu_to_le16(num_slots);
94 rf->rf_generation = cpu_to_le32(ls->ls_generation);
83} 95}
84 96
85static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) 97static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
86{ 98{
87 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; 99 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
88 size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
89 100
90 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { 101 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
91 log_error(ls, "version mismatch: %x nodeid %d: %x", 102 log_error(ls, "version mismatch: %x nodeid %d: %x",
@@ -94,12 +105,6 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
94 return -EPROTO; 105 return -EPROTO;
95 } 106 }
96 107
97 if (rc->rc_header.h_length < conf_size) {
98 log_error(ls, "config too short: %d nodeid %d",
99 rc->rc_header.h_length, nodeid);
100 return -EPROTO;
101 }
102
103 if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || 108 if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen ||
104 le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { 109 le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) {
105 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 110 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -127,7 +132,18 @@ static void disallow_sync_reply(struct dlm_ls *ls)
127 spin_unlock(&ls->ls_rcom_spin); 132 spin_unlock(&ls->ls_rcom_spin);
128} 133}
129 134
130int dlm_rcom_status(struct dlm_ls *ls, int nodeid) 135/*
136 * low nodeid gathers one slot value at a time from each node.
137 * it sets need_slots=0, and saves rf_our_slot returned from each
138 * rcom_config.
139 *
140 * other nodes gather all slot values at once from the low nodeid.
141 * they set need_slots=1, and ignore the rf_our_slot returned from each
142 * rcom_config. they use the rf_num_slots returned from the low
143 * node's rcom_config.
144 */
145
146int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
131{ 147{
132 struct dlm_rcom *rc; 148 struct dlm_rcom *rc;
133 struct dlm_mhandle *mh; 149 struct dlm_mhandle *mh;
@@ -141,10 +157,13 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
141 goto out; 157 goto out;
142 } 158 }
143 159
144 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); 160 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
161 sizeof(struct rcom_status), &rc, &mh);
145 if (error) 162 if (error)
146 goto out; 163 goto out;
147 164
165 set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags);
166
148 allow_sync_reply(ls, &rc->rc_id); 167 allow_sync_reply(ls, &rc->rc_id);
149 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); 168 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
150 169
@@ -161,8 +180,11 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
161 /* we pretend the remote lockspace exists with 0 status */ 180 /* we pretend the remote lockspace exists with 0 status */
162 log_debug(ls, "remote node %d not ready", nodeid); 181 log_debug(ls, "remote node %d not ready", nodeid);
163 rc->rc_result = 0; 182 rc->rc_result = 0;
164 } else 183 error = 0;
165 error = check_config(ls, rc, nodeid); 184 } else {
185 error = check_rcom_config(ls, rc, nodeid);
186 }
187
166 /* the caller looks at rc_result for the remote recovery status */ 188 /* the caller looks at rc_result for the remote recovery status */
167 out: 189 out:
168 return error; 190 return error;
@@ -172,17 +194,60 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
172{ 194{
173 struct dlm_rcom *rc; 195 struct dlm_rcom *rc;
174 struct dlm_mhandle *mh; 196 struct dlm_mhandle *mh;
175 int error, nodeid = rc_in->rc_header.h_nodeid; 197 struct rcom_status *rs;
198 uint32_t status;
199 int nodeid = rc_in->rc_header.h_nodeid;
200 int len = sizeof(struct rcom_config);
201 int num_slots = 0;
202 int error;
203
204 if (!dlm_slots_version(&rc_in->rc_header)) {
205 status = dlm_recover_status(ls);
206 goto do_create;
207 }
208
209 rs = (struct rcom_status *)rc_in->rc_buf;
176 210
211 if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) {
212 status = dlm_recover_status(ls);
213 goto do_create;
214 }
215
216 spin_lock(&ls->ls_recover_lock);
217 status = ls->ls_recover_status;
218 num_slots = ls->ls_num_slots;
219 spin_unlock(&ls->ls_recover_lock);
220 len += num_slots * sizeof(struct rcom_slot);
221
222 do_create:
177 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, 223 error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
178 sizeof(struct rcom_config), &rc, &mh); 224 len, &rc, &mh);
179 if (error) 225 if (error)
180 return; 226 return;
227
181 rc->rc_id = rc_in->rc_id; 228 rc->rc_id = rc_in->rc_id;
182 rc->rc_seq_reply = rc_in->rc_seq; 229 rc->rc_seq_reply = rc_in->rc_seq;
183 rc->rc_result = dlm_recover_status(ls); 230 rc->rc_result = status;
184 make_config(ls, (struct rcom_config *) rc->rc_buf); 231
232 set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots);
233
234 if (!num_slots)
235 goto do_send;
236
237 spin_lock(&ls->ls_recover_lock);
238 if (ls->ls_num_slots != num_slots) {
239 spin_unlock(&ls->ls_recover_lock);
240 log_debug(ls, "receive_rcom_status num_slots %d to %d",
241 num_slots, ls->ls_num_slots);
242 rc->rc_result = 0;
243 set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0);
244 goto do_send;
245 }
246
247 dlm_slots_copy_out(ls, rc);
248 spin_unlock(&ls->ls_recover_lock);
185 249
250 do_send:
186 send_rcom(ls, mh, rc); 251 send_rcom(ls, mh, rc);
187} 252}
188 253
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h
index b09abd29ba38..206723ab744d 100644
--- a/fs/dlm/rcom.h
+++ b/fs/dlm/rcom.h
@@ -14,7 +14,7 @@
14#ifndef __RCOM_DOT_H__ 14#ifndef __RCOM_DOT_H__
15#define __RCOM_DOT_H__ 15#define __RCOM_DOT_H__
16 16
17int dlm_rcom_status(struct dlm_ls *ls, int nodeid); 17int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags);
18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); 18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); 19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 14638235f7b2..34d5adf1fce7 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -85,14 +85,20 @@ uint32_t dlm_recover_status(struct dlm_ls *ls)
85 return status; 85 return status;
86} 86}
87 87
88static void _set_recover_status(struct dlm_ls *ls, uint32_t status)
89{
90 ls->ls_recover_status |= status;
91}
92
88void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) 93void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
89{ 94{
90 spin_lock(&ls->ls_recover_lock); 95 spin_lock(&ls->ls_recover_lock);
91 ls->ls_recover_status |= status; 96 _set_recover_status(ls, status);
92 spin_unlock(&ls->ls_recover_lock); 97 spin_unlock(&ls->ls_recover_lock);
93} 98}
94 99
95static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) 100static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
101 int save_slots)
96{ 102{
97 struct dlm_rcom *rc = ls->ls_recover_buf; 103 struct dlm_rcom *rc = ls->ls_recover_buf;
98 struct dlm_member *memb; 104 struct dlm_member *memb;
@@ -106,10 +112,13 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
106 goto out; 112 goto out;
107 } 113 }
108 114
109 error = dlm_rcom_status(ls, memb->nodeid); 115 error = dlm_rcom_status(ls, memb->nodeid, 0);
110 if (error) 116 if (error)
111 goto out; 117 goto out;
112 118
119 if (save_slots)
120 dlm_slot_save(ls, rc, memb);
121
113 if (rc->rc_result & wait_status) 122 if (rc->rc_result & wait_status)
114 break; 123 break;
115 if (delay < 1000) 124 if (delay < 1000)
@@ -121,7 +130,8 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
121 return error; 130 return error;
122} 131}
123 132
124static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) 133static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
134 uint32_t status_flags)
125{ 135{
126 struct dlm_rcom *rc = ls->ls_recover_buf; 136 struct dlm_rcom *rc = ls->ls_recover_buf;
127 int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; 137 int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
@@ -132,7 +142,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
132 goto out; 142 goto out;
133 } 143 }
134 144
135 error = dlm_rcom_status(ls, nodeid); 145 error = dlm_rcom_status(ls, nodeid, status_flags);
136 if (error) 146 if (error)
137 break; 147 break;
138 148
@@ -152,18 +162,56 @@ static int wait_status(struct dlm_ls *ls, uint32_t status)
152 int error; 162 int error;
153 163
154 if (ls->ls_low_nodeid == dlm_our_nodeid()) { 164 if (ls->ls_low_nodeid == dlm_our_nodeid()) {
155 error = wait_status_all(ls, status); 165 error = wait_status_all(ls, status, 0);
156 if (!error) 166 if (!error)
157 dlm_set_recover_status(ls, status_all); 167 dlm_set_recover_status(ls, status_all);
158 } else 168 } else
159 error = wait_status_low(ls, status_all); 169 error = wait_status_low(ls, status_all, 0);
160 170
161 return error; 171 return error;
162} 172}
163 173
164int dlm_recover_members_wait(struct dlm_ls *ls) 174int dlm_recover_members_wait(struct dlm_ls *ls)
165{ 175{
166 return wait_status(ls, DLM_RS_NODES); 176 struct dlm_member *memb;
177 struct dlm_slot *slots;
178 int num_slots, slots_size;
179 int error, rv;
180 uint32_t gen;
181
182 list_for_each_entry(memb, &ls->ls_nodes, list) {
183 memb->slot = -1;
184 memb->generation = 0;
185 }
186
187 if (ls->ls_low_nodeid == dlm_our_nodeid()) {
188 error = wait_status_all(ls, DLM_RS_NODES, 1);
189 if (error)
190 goto out;
191
192 /* slots array is sparse, slots_size may be > num_slots */
193
194 rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen);
195 if (!rv) {
196 spin_lock(&ls->ls_recover_lock);
197 _set_recover_status(ls, DLM_RS_NODES_ALL);
198 ls->ls_num_slots = num_slots;
199 ls->ls_slots_size = slots_size;
200 ls->ls_slots = slots;
201 ls->ls_generation = gen;
202 spin_unlock(&ls->ls_recover_lock);
203 } else {
204 dlm_set_recover_status(ls, DLM_RS_NODES_ALL);
205 }
206 } else {
207 error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS);
208 if (error)
209 goto out;
210
211 dlm_slots_copy_in(ls);
212 }
213 out:
214 return error;
167} 215}
168 216
169int dlm_recover_directory_wait(struct dlm_ls *ls) 217int dlm_recover_directory_wait(struct dlm_ls *ls)
@@ -542,8 +590,6 @@ int dlm_recover_locks(struct dlm_ls *ls)
542 out: 590 out:
543 if (error) 591 if (error)
544 recover_list_clear(ls); 592 recover_list_clear(ls);
545 else
546 dlm_set_recover_status(ls, DLM_RS_LOCKS);
547 return error; 593 return error;
548} 594}
549 595
@@ -715,6 +761,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls)
715 761
716int dlm_create_root_list(struct dlm_ls *ls) 762int dlm_create_root_list(struct dlm_ls *ls)
717{ 763{
764 struct rb_node *n;
718 struct dlm_rsb *r; 765 struct dlm_rsb *r;
719 int i, error = 0; 766 int i, error = 0;
720 767
@@ -727,7 +774,8 @@ int dlm_create_root_list(struct dlm_ls *ls)
727 774
728 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 775 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
729 spin_lock(&ls->ls_rsbtbl[i].lock); 776 spin_lock(&ls->ls_rsbtbl[i].lock);
730 list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { 777 for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
778 r = rb_entry(n, struct dlm_rsb, res_hashnode);
731 list_add(&r->res_root_list, &ls->ls_root_list); 779 list_add(&r->res_root_list, &ls->ls_root_list);
732 dlm_hold_rsb(r); 780 dlm_hold_rsb(r);
733 } 781 }
@@ -741,7 +789,8 @@ int dlm_create_root_list(struct dlm_ls *ls)
741 continue; 789 continue;
742 } 790 }
743 791
744 list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { 792 for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) {
793 r = rb_entry(n, struct dlm_rsb, res_hashnode);
745 list_add(&r->res_root_list, &ls->ls_root_list); 794 list_add(&r->res_root_list, &ls->ls_root_list);
746 dlm_hold_rsb(r); 795 dlm_hold_rsb(r);
747 } 796 }
@@ -771,16 +820,18 @@ void dlm_release_root_list(struct dlm_ls *ls)
771 820
772void dlm_clear_toss_list(struct dlm_ls *ls) 821void dlm_clear_toss_list(struct dlm_ls *ls)
773{ 822{
774 struct dlm_rsb *r, *safe; 823 struct rb_node *n, *next;
824 struct dlm_rsb *rsb;
775 int i; 825 int i;
776 826
777 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 827 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
778 spin_lock(&ls->ls_rsbtbl[i].lock); 828 spin_lock(&ls->ls_rsbtbl[i].lock);
779 list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, 829 for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) {
780 res_hashchain) { 830 next = rb_next(n);;
781 if (dlm_no_directory(ls) || !is_master(r)) { 831 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
782 list_del(&r->res_hashchain); 832 if (dlm_no_directory(ls) || !is_master(rsb)) {
783 dlm_free_rsb(r); 833 rb_erase(n, &ls->ls_rsbtbl[i].toss);
834 dlm_free_rsb(rsb);
784 } 835 }
785 } 836 }
786 spin_unlock(&ls->ls_rsbtbl[i].lock); 837 spin_unlock(&ls->ls_rsbtbl[i].lock);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 774da3cf92c6..3780caf7ae0c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -54,7 +54,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
54 unsigned long start; 54 unsigned long start;
55 int error, neg = 0; 55 int error, neg = 0;
56 56
57 log_debug(ls, "recover %llx", (unsigned long long)rv->seq); 57 log_debug(ls, "dlm_recover %llx", (unsigned long long)rv->seq);
58 58
59 mutex_lock(&ls->ls_recoverd_active); 59 mutex_lock(&ls->ls_recoverd_active);
60 60
@@ -76,14 +76,22 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
76 76
77 /* 77 /*
78 * Add or remove nodes from the lockspace's ls_nodes list. 78 * Add or remove nodes from the lockspace's ls_nodes list.
79 * Also waits for all nodes to complete dlm_recover_members.
80 */ 79 */
81 80
82 error = dlm_recover_members(ls, rv, &neg); 81 error = dlm_recover_members(ls, rv, &neg);
83 if (error) { 82 if (error) {
84 log_debug(ls, "recover_members failed %d", error); 83 log_debug(ls, "dlm_recover_members error %d", error);
85 goto fail; 84 goto fail;
86 } 85 }
86
87 dlm_set_recover_status(ls, DLM_RS_NODES);
88
89 error = dlm_recover_members_wait(ls);
90 if (error) {
91 log_debug(ls, "dlm_recover_members_wait error %d", error);
92 goto fail;
93 }
94
87 start = jiffies; 95 start = jiffies;
88 96
89 /* 97 /*
@@ -93,17 +101,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
93 101
94 error = dlm_recover_directory(ls); 102 error = dlm_recover_directory(ls);
95 if (error) { 103 if (error) {
96 log_debug(ls, "recover_directory failed %d", error); 104 log_debug(ls, "dlm_recover_directory error %d", error);
97 goto fail; 105 goto fail;
98 } 106 }
99 107
100 /* 108 dlm_set_recover_status(ls, DLM_RS_DIR);
101 * Wait for all nodes to complete directory rebuild.
102 */
103 109
104 error = dlm_recover_directory_wait(ls); 110 error = dlm_recover_directory_wait(ls);
105 if (error) { 111 if (error) {
106 log_debug(ls, "recover_directory_wait failed %d", error); 112 log_debug(ls, "dlm_recover_directory_wait error %d", error);
107 goto fail; 113 goto fail;
108 } 114 }
109 115
@@ -133,7 +139,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
133 139
134 error = dlm_recover_masters(ls); 140 error = dlm_recover_masters(ls);
135 if (error) { 141 if (error) {
136 log_debug(ls, "recover_masters failed %d", error); 142 log_debug(ls, "dlm_recover_masters error %d", error);
137 goto fail; 143 goto fail;
138 } 144 }
139 145
@@ -143,13 +149,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
143 149
144 error = dlm_recover_locks(ls); 150 error = dlm_recover_locks(ls);
145 if (error) { 151 if (error) {
146 log_debug(ls, "recover_locks failed %d", error); 152 log_debug(ls, "dlm_recover_locks error %d", error);
147 goto fail; 153 goto fail;
148 } 154 }
149 155
156 dlm_set_recover_status(ls, DLM_RS_LOCKS);
157
150 error = dlm_recover_locks_wait(ls); 158 error = dlm_recover_locks_wait(ls);
151 if (error) { 159 if (error) {
152 log_debug(ls, "recover_locks_wait failed %d", error); 160 log_debug(ls, "dlm_recover_locks_wait error %d", error);
153 goto fail; 161 goto fail;
154 } 162 }
155 163
@@ -170,7 +178,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
170 178
171 error = dlm_recover_locks_wait(ls); 179 error = dlm_recover_locks_wait(ls);
172 if (error) { 180 if (error) {
173 log_debug(ls, "recover_locks_wait failed %d", error); 181 log_debug(ls, "dlm_recover_locks_wait error %d", error);
174 goto fail; 182 goto fail;
175 } 183 }
176 } 184 }
@@ -186,9 +194,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
186 dlm_purge_requestqueue(ls); 194 dlm_purge_requestqueue(ls);
187 195
188 dlm_set_recover_status(ls, DLM_RS_DONE); 196 dlm_set_recover_status(ls, DLM_RS_DONE);
197
189 error = dlm_recover_done_wait(ls); 198 error = dlm_recover_done_wait(ls);
190 if (error) { 199 if (error) {
191 log_debug(ls, "recover_done_wait failed %d", error); 200 log_debug(ls, "dlm_recover_done_wait error %d", error);
192 goto fail; 201 goto fail;
193 } 202 }
194 203
@@ -200,34 +209,35 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
200 209
201 error = enable_locking(ls, rv->seq); 210 error = enable_locking(ls, rv->seq);
202 if (error) { 211 if (error) {
203 log_debug(ls, "enable_locking failed %d", error); 212 log_debug(ls, "enable_locking error %d", error);
204 goto fail; 213 goto fail;
205 } 214 }
206 215
207 error = dlm_process_requestqueue(ls); 216 error = dlm_process_requestqueue(ls);
208 if (error) { 217 if (error) {
209 log_debug(ls, "process_requestqueue failed %d", error); 218 log_debug(ls, "dlm_process_requestqueue error %d", error);
210 goto fail; 219 goto fail;
211 } 220 }
212 221
213 error = dlm_recover_waiters_post(ls); 222 error = dlm_recover_waiters_post(ls);
214 if (error) { 223 if (error) {
215 log_debug(ls, "recover_waiters_post failed %d", error); 224 log_debug(ls, "dlm_recover_waiters_post error %d", error);
216 goto fail; 225 goto fail;
217 } 226 }
218 227
219 dlm_grant_after_purge(ls); 228 dlm_grant_after_purge(ls);
220 229
221 log_debug(ls, "recover %llx done: %u ms", 230 log_debug(ls, "dlm_recover %llx generation %u done: %u ms",
222 (unsigned long long)rv->seq, 231 (unsigned long long)rv->seq, ls->ls_generation,
223 jiffies_to_msecs(jiffies - start)); 232 jiffies_to_msecs(jiffies - start));
224 mutex_unlock(&ls->ls_recoverd_active); 233 mutex_unlock(&ls->ls_recoverd_active);
225 234
235 dlm_lsop_recover_done(ls);
226 return 0; 236 return 0;
227 237
228 fail: 238 fail:
229 dlm_release_root_list(ls); 239 dlm_release_root_list(ls);
230 log_debug(ls, "recover %llx error %d", 240 log_debug(ls, "dlm_recover %llx error %d",
231 (unsigned long long)rv->seq, error); 241 (unsigned long long)rv->seq, error);
232 mutex_unlock(&ls->ls_recoverd_active); 242 mutex_unlock(&ls->ls_recoverd_active);
233 return error; 243 return error;
@@ -250,8 +260,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
250 260
251 if (rv) { 261 if (rv) {
252 ls_recover(ls, rv); 262 ls_recover(ls, rv);
253 kfree(rv->nodeids); 263 kfree(rv->nodes);
254 kfree(rv->new);
255 kfree(rv); 264 kfree(rv);
256 } 265 }
257} 266}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d8ea60756403..eb4ed9ba3098 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -392,8 +392,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
392 if (!capable(CAP_SYS_ADMIN)) 392 if (!capable(CAP_SYS_ADMIN))
393 return -EPERM; 393 return -EPERM;
394 394
395 error = dlm_new_lockspace(params->name, strlen(params->name), 395 error = dlm_new_lockspace(params->name, NULL, params->flags,
396 &lockspace, params->flags, DLM_USER_LVB_LEN); 396 DLM_USER_LVB_LEN, NULL, NULL, NULL,
397 &lockspace);
397 if (error) 398 if (error)
398 return error; 399 return error;
399 400
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f90a3ae63e..19a8ca4ab1dd 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -144,24 +144,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
144} 144}
145 145
146/** 146/**
147 * ecryptfs_create_underlying_file
148 * @lower_dir_inode: inode of the parent in the lower fs of the new file
149 * @dentry: New file's dentry
150 * @mode: The mode of the new file
151 *
152 * Creates the file in the lower file system.
153 *
154 * Returns zero on success; non-zero on error condition
155 */
156static int
157ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
158 struct dentry *dentry, int mode)
159{
160 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
161 return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
162}
163
164/**
165 * ecryptfs_do_create 147 * ecryptfs_do_create
166 * @directory_inode: inode of the new file's dentry's parent in ecryptfs 148 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
167 * @ecryptfs_dentry: New file's dentry in ecryptfs 149 * @ecryptfs_dentry: New file's dentry in ecryptfs
@@ -176,7 +158,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
176 */ 158 */
177static struct inode * 159static struct inode *
178ecryptfs_do_create(struct inode *directory_inode, 160ecryptfs_do_create(struct inode *directory_inode,
179 struct dentry *ecryptfs_dentry, int mode) 161 struct dentry *ecryptfs_dentry, umode_t mode)
180{ 162{
181 int rc; 163 int rc;
182 struct dentry *lower_dentry; 164 struct dentry *lower_dentry;
@@ -191,8 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
191 inode = ERR_CAST(lower_dir_dentry); 173 inode = ERR_CAST(lower_dir_dentry);
192 goto out; 174 goto out;
193 } 175 }
194 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 176 rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL);
195 ecryptfs_dentry, mode);
196 if (rc) { 177 if (rc) {
197 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 178 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
198 "rc = [%d]\n", __func__, rc); 179 "rc = [%d]\n", __func__, rc);
@@ -267,7 +248,7 @@ out:
267 */ 248 */
268static int 249static int
269ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, 250ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
270 int mode, struct nameidata *nd) 251 umode_t mode, struct nameidata *nd)
271{ 252{
272 struct inode *ecryptfs_inode; 253 struct inode *ecryptfs_inode;
273 int rc; 254 int rc;
@@ -559,7 +540,7 @@ out_lock:
559 return rc; 540 return rc;
560} 541}
561 542
562static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 543static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
563{ 544{
564 int rc; 545 int rc;
565 struct dentry *lower_dentry; 546 struct dentry *lower_dentry;
@@ -607,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
607} 588}
608 589
609static int 590static int
610ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 591ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
611{ 592{
612 int rc; 593 int rc;
613 struct dentry *lower_dentry; 594 struct dentry *lower_dentry;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index dbd52d40df4c..9df7fd6e0c39 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -69,7 +69,6 @@ static void ecryptfs_i_callback(struct rcu_head *head)
69 struct ecryptfs_inode_info *inode_info; 69 struct ecryptfs_inode_info *inode_info;
70 inode_info = ecryptfs_inode_to_private(inode); 70 inode_info = ecryptfs_inode_to_private(inode);
71 71
72 INIT_LIST_HEAD(&inode->i_dentry);
73 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 72 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
74} 73}
75 74
@@ -132,9 +131,9 @@ static void ecryptfs_evict_inode(struct inode *inode)
132 * Prints the mount options for a given superblock. 131 * Prints the mount options for a given superblock.
133 * Returns zero; does not fail. 132 * Returns zero; does not fail.
134 */ 133 */
135static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) 134static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
136{ 135{
137 struct super_block *sb = mnt->mnt_sb; 136 struct super_block *sb = root->d_sb;
138 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 137 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
139 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; 138 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
140 struct ecryptfs_global_auth_tok *walker; 139 struct ecryptfs_global_auth_tok *walker;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 0f31acb0131c..981106429a9f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -68,7 +68,6 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
68static void efs_i_callback(struct rcu_head *head) 68static void efs_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu); 70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
72 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 71 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
73} 72}
74 73
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 828e750af23a..aabdfc38cf24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
197 197
198 /* The user that created the eventpoll descriptor */ 198 /* The user that created the eventpoll descriptor */
199 struct user_struct *user; 199 struct user_struct *user;
200
201 struct file *file;
202
203 /* used to optimize loop detection check */
204 int visited;
205 struct list_head visited_list_link;
200}; 206};
201 207
202/* Wait structure used by the poll hooks */ 208/* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
255/* Slab cache used to allocate "struct eppoll_entry" */ 261/* Slab cache used to allocate "struct eppoll_entry" */
256static struct kmem_cache *pwq_cache __read_mostly; 262static struct kmem_cache *pwq_cache __read_mostly;
257 263
264/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
265static LIST_HEAD(visited_list);
266
267/*
268 * List of files with newly added links, where we may need to limit the number
269 * of emanating paths. Protected by the epmutex.
270 */
271static LIST_HEAD(tfile_check_list);
272
258#ifdef CONFIG_SYSCTL 273#ifdef CONFIG_SYSCTL
259 274
260#include <linux/sysctl.h> 275#include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
276}; 291};
277#endif /* CONFIG_SYSCTL */ 292#endif /* CONFIG_SYSCTL */
278 293
294static const struct file_operations eventpoll_fops;
295
296static inline int is_file_epoll(struct file *f)
297{
298 return f->f_op == &eventpoll_fops;
299}
279 300
280/* Setup the structure that is used as key for the RB tree */ 301/* Setup the structure that is used as key for the RB tree */
281static inline void ep_set_ffd(struct epoll_filefd *ffd, 302static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = {
711 .llseek = noop_llseek, 732 .llseek = noop_llseek,
712}; 733};
713 734
714/* Fast test to see if the file is an eventpoll file */
715static inline int is_file_epoll(struct file *f)
716{
717 return f->f_op == &eventpoll_fops;
718}
719
720/* 735/*
721 * This is called from eventpoll_release() to unlink files from the eventpoll 736 * This is called from eventpoll_release() to unlink files from the eventpoll
722 * interface. We need to have this facility to cleanup correctly files that are 737 * interface. We need to have this facility to cleanup correctly files that are
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
926 rb_insert_color(&epi->rbn, &ep->rbr); 941 rb_insert_color(&epi->rbn, &ep->rbr);
927} 942}
928 943
944
945
946#define PATH_ARR_SIZE 5
947/*
948 * These are the number paths of length 1 to 5, that we are allowing to emanate
949 * from a single file of interest. For example, we allow 1000 paths of length
950 * 1, to emanate from each file of interest. This essentially represents the
951 * potential wakeup paths, which need to be limited in order to avoid massive
952 * uncontrolled wakeup storms. The common use case should be a single ep which
953 * is connected to n file sources. In this case each file source has 1 path
954 * of length 1. Thus, the numbers below should be more than sufficient. These
955 * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
956 * and delete can't add additional paths. Protected by the epmutex.
957 */
958static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
959static int path_count[PATH_ARR_SIZE];
960
961static int path_count_inc(int nests)
962{
963 if (++path_count[nests] > path_limits[nests])
964 return -1;
965 return 0;
966}
967
968static void path_count_init(void)
969{
970 int i;
971
972 for (i = 0; i < PATH_ARR_SIZE; i++)
973 path_count[i] = 0;
974}
975
976static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
977{
978 int error = 0;
979 struct file *file = priv;
980 struct file *child_file;
981 struct epitem *epi;
982
983 list_for_each_entry(epi, &file->f_ep_links, fllink) {
984 child_file = epi->ep->file;
985 if (is_file_epoll(child_file)) {
986 if (list_empty(&child_file->f_ep_links)) {
987 if (path_count_inc(call_nests)) {
988 error = -1;
989 break;
990 }
991 } else {
992 error = ep_call_nested(&poll_loop_ncalls,
993 EP_MAX_NESTS,
994 reverse_path_check_proc,
995 child_file, child_file,
996 current);
997 }
998 if (error != 0)
999 break;
1000 } else {
1001 printk(KERN_ERR "reverse_path_check_proc: "
1002 "file is not an ep!\n");
1003 }
1004 }
1005 return error;
1006}
1007
1008/**
1009 * reverse_path_check - The tfile_check_list is list of file *, which have
1010 * links that are proposed to be newly added. We need to
1011 * make sure that those added links don't add too many
1012 * paths such that we will spend all our time waking up
1013 * eventpoll objects.
1014 *
1015 * Returns: Returns zero if the proposed links don't create too many paths,
1016 * -1 otherwise.
1017 */
1018static int reverse_path_check(void)
1019{
1020 int length = 0;
1021 int error = 0;
1022 struct file *current_file;
1023
1024 /* let's call this for all tfiles */
1025 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1026 length++;
1027 path_count_init();
1028 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1029 reverse_path_check_proc, current_file,
1030 current_file, current);
1031 if (error)
1032 break;
1033 }
1034 return error;
1035}
1036
929/* 1037/*
930 * Must be called with "mtx" held. 1038 * Must be called with "mtx" held.
931 */ 1039 */
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
987 */ 1095 */
988 ep_rbtree_insert(ep, epi); 1096 ep_rbtree_insert(ep, epi);
989 1097
1098 /* now check if we've created too many backpaths */
1099 error = -EINVAL;
1100 if (reverse_path_check())
1101 goto error_remove_epi;
1102
990 /* We have to drop the new item inside our item list to keep track of it */ 1103 /* We have to drop the new item inside our item list to keep track of it */
991 spin_lock_irqsave(&ep->lock, flags); 1104 spin_lock_irqsave(&ep->lock, flags);
992 1105
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1011 1124
1012 return 0; 1125 return 0;
1013 1126
1127error_remove_epi:
1128 spin_lock(&tfile->f_lock);
1129 if (ep_is_linked(&epi->fllink))
1130 list_del_init(&epi->fllink);
1131 spin_unlock(&tfile->f_lock);
1132
1133 rb_erase(&epi->rbn, &ep->rbr);
1134
1014error_unregister: 1135error_unregister:
1015 ep_unregister_pollwait(ep, epi); 1136 ep_unregister_pollwait(ep, epi);
1016 1137
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1275 int error = 0; 1396 int error = 0;
1276 struct file *file = priv; 1397 struct file *file = priv;
1277 struct eventpoll *ep = file->private_data; 1398 struct eventpoll *ep = file->private_data;
1399 struct eventpoll *ep_tovisit;
1278 struct rb_node *rbp; 1400 struct rb_node *rbp;
1279 struct epitem *epi; 1401 struct epitem *epi;
1280 1402
1281 mutex_lock_nested(&ep->mtx, call_nests + 1); 1403 mutex_lock_nested(&ep->mtx, call_nests + 1);
1404 ep->visited = 1;
1405 list_add(&ep->visited_list_link, &visited_list);
1282 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1406 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1283 epi = rb_entry(rbp, struct epitem, rbn); 1407 epi = rb_entry(rbp, struct epitem, rbn);
1284 if (unlikely(is_file_epoll(epi->ffd.file))) { 1408 if (unlikely(is_file_epoll(epi->ffd.file))) {
1409 ep_tovisit = epi->ffd.file->private_data;
1410 if (ep_tovisit->visited)
1411 continue;
1285 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1412 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1286 ep_loop_check_proc, epi->ffd.file, 1413 ep_loop_check_proc, epi->ffd.file,
1287 epi->ffd.file->private_data, current); 1414 ep_tovisit, current);
1288 if (error != 0) 1415 if (error != 0)
1289 break; 1416 break;
1417 } else {
1418 /*
1419 * If we've reached a file that is not associated with
1420 * an ep, then we need to check if the newly added
1421 * links are going to add too many wakeup paths. We do
1422 * this by adding it to the tfile_check_list, if it's
1423 * not already there, and calling reverse_path_check()
1424 * during ep_insert().
1425 */
1426 if (list_empty(&epi->ffd.file->f_tfile_llink))
1427 list_add(&epi->ffd.file->f_tfile_llink,
1428 &tfile_check_list);
1290 } 1429 }
1291 } 1430 }
1292 mutex_unlock(&ep->mtx); 1431 mutex_unlock(&ep->mtx);
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1307 */ 1446 */
1308static int ep_loop_check(struct eventpoll *ep, struct file *file) 1447static int ep_loop_check(struct eventpoll *ep, struct file *file)
1309{ 1448{
1310 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1449 int ret;
1450 struct eventpoll *ep_cur, *ep_next;
1451
1452 ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1311 ep_loop_check_proc, file, ep, current); 1453 ep_loop_check_proc, file, ep, current);
1454 /* clear visited list */
1455 list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
1456 visited_list_link) {
1457 ep_cur->visited = 0;
1458 list_del(&ep_cur->visited_list_link);
1459 }
1460 return ret;
1461}
1462
1463static void clear_tfile_check_list(void)
1464{
1465 struct file *file;
1466
1467 /* first clear the tfile_check_list */
1468 while (!list_empty(&tfile_check_list)) {
1469 file = list_first_entry(&tfile_check_list, struct file,
1470 f_tfile_llink);
1471 list_del_init(&file->f_tfile_llink);
1472 }
1473 INIT_LIST_HEAD(&tfile_check_list);
1312} 1474}
1313 1475
1314/* 1476/*
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
1316 */ 1478 */
1317SYSCALL_DEFINE1(epoll_create1, int, flags) 1479SYSCALL_DEFINE1(epoll_create1, int, flags)
1318{ 1480{
1319 int error; 1481 int error, fd;
1320 struct eventpoll *ep = NULL; 1482 struct eventpoll *ep = NULL;
1483 struct file *file;
1321 1484
1322 /* Check the EPOLL_* constant for consistency. */ 1485 /* Check the EPOLL_* constant for consistency. */
1323 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1486 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1334 * Creates all the items needed to setup an eventpoll file. That is, 1497 * Creates all the items needed to setup an eventpoll file. That is,
1335 * a file structure and a free file descriptor. 1498 * a file structure and a free file descriptor.
1336 */ 1499 */
1337 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1500 fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
1501 if (fd < 0) {
1502 error = fd;
1503 goto out_free_ep;
1504 }
1505 file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
1338 O_RDWR | (flags & O_CLOEXEC)); 1506 O_RDWR | (flags & O_CLOEXEC));
1339 if (error < 0) 1507 if (IS_ERR(file)) {
1340 ep_free(ep); 1508 error = PTR_ERR(file);
1341 1509 goto out_free_fd;
1510 }
1511 fd_install(fd, file);
1512 ep->file = file;
1513 return fd;
1514
1515out_free_fd:
1516 put_unused_fd(fd);
1517out_free_ep:
1518 ep_free(ep);
1342 return error; 1519 return error;
1343} 1520}
1344 1521
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1404 /* 1581 /*
1405 * When we insert an epoll file descriptor, inside another epoll file 1582 * When we insert an epoll file descriptor, inside another epoll file
1406 * descriptor, there is the change of creating closed loops, which are 1583 * descriptor, there is the change of creating closed loops, which are
1407 * better be handled here, than in more critical paths. 1584 * better be handled here, than in more critical paths. While we are
1585 * checking for loops we also determine the list of files reachable
1586 * and hang them on the tfile_check_list, so we can check that we
1587 * haven't created too many possible wakeup paths.
1408 * 1588 *
1409 * We hold epmutex across the loop check and the insert in this case, in 1589 * We need to hold the epmutex across both ep_insert and ep_remove
1410 * order to prevent two separate inserts from racing and each doing the 1590 * b/c we want to make sure we are looking at a coherent view of
1411 * insert "at the same time" such that ep_loop_check passes on both 1591 * epoll network.
1412 * before either one does the insert, thereby creating a cycle.
1413 */ 1592 */
1414 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { 1593 if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
1415 mutex_lock(&epmutex); 1594 mutex_lock(&epmutex);
1416 did_lock_epmutex = 1; 1595 did_lock_epmutex = 1;
1417 error = -ELOOP;
1418 if (ep_loop_check(ep, tfile) != 0)
1419 goto error_tgt_fput;
1420 } 1596 }
1421 1597 if (op == EPOLL_CTL_ADD) {
1598 if (is_file_epoll(tfile)) {
1599 error = -ELOOP;
1600 if (ep_loop_check(ep, tfile) != 0)
1601 goto error_tgt_fput;
1602 } else
1603 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1604 }
1422 1605
1423 mutex_lock_nested(&ep->mtx, 0); 1606 mutex_lock_nested(&ep->mtx, 0);
1424 1607
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1437 error = ep_insert(ep, &epds, tfile, fd); 1620 error = ep_insert(ep, &epds, tfile, fd);
1438 } else 1621 } else
1439 error = -EEXIST; 1622 error = -EEXIST;
1623 clear_tfile_check_list();
1440 break; 1624 break;
1441 case EPOLL_CTL_DEL: 1625 case EPOLL_CTL_DEL:
1442 if (epi) 1626 if (epi)
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1455 mutex_unlock(&ep->mtx); 1639 mutex_unlock(&ep->mtx);
1456 1640
1457error_tgt_fput: 1641error_tgt_fput:
1458 if (unlikely(did_lock_epmutex)) 1642 if (did_lock_epmutex)
1459 mutex_unlock(&epmutex); 1643 mutex_unlock(&epmutex);
1460 1644
1461 fput(tfile); 1645 fput(tfile);
diff --git a/fs/exec.c b/fs/exec.c
index 36254645b7cc..aeb135c7ff5c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,6 +59,8 @@
59#include <asm/uaccess.h> 59#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
61#include <asm/tlb.h> 61#include <asm/tlb.h>
62
63#include <trace/events/task.h>
62#include "internal.h" 64#include "internal.h"
63 65
64int core_uses_pid; 66int core_uses_pid;
@@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf)
1054{ 1056{
1055 task_lock(tsk); 1057 task_lock(tsk);
1056 1058
1059 trace_task_rename(tsk, buf);
1060
1057 /* 1061 /*
1058 * Threads may access current->comm without holding 1062 * Threads may access current->comm without holding
1059 * the task lock, so write the string carefully. 1063 * the task lock, so write the string carefully.
@@ -1225,7 +1229,7 @@ EXPORT_SYMBOL(install_exec_creds);
1225 * - the caller must hold ->cred_guard_mutex to protect against 1229 * - the caller must hold ->cred_guard_mutex to protect against
1226 * PTRACE_ATTACH 1230 * PTRACE_ATTACH
1227 */ 1231 */
1228int check_unsafe_exec(struct linux_binprm *bprm) 1232static int check_unsafe_exec(struct linux_binprm *bprm)
1229{ 1233{
1230 struct task_struct *p = current, *t; 1234 struct task_struct *p = current, *t;
1231 unsigned n_fs; 1235 unsigned n_fs;
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index da42f32c49be..86194b2f799d 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,14 +1,3 @@
1# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
2# for every ORE user we do it like this. Any user should add itself here
3# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
4# selected here, and we default to "ON". So in effect it is like been
5# selected by any of the users.
6config ORE
7 tristate
8 depends on EXOFS_FS || PNFS_OBJLAYOUT
9 select ASYNC_XOR
10 default SCSI_OSD_ULD
11
12config EXOFS_FS 1config EXOFS_FS
13 tristate "exofs: OSD based file system support" 2 tristate "exofs: OSD based file system support"
14 depends on SCSI_OSD_ULD 3 depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
new file mode 100644
index 000000000000..1ca7fb7b6ba8
--- /dev/null
+++ b/fs/exofs/Kconfig.ore
@@ -0,0 +1,12 @@
1# ORE - Objects Raid Engine (libore.ko)
2#
3# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
4# for every ORE user we do it like this. Any user should add itself here
5# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
6# selected here, and we default to "ON". So in effect it is like been
7# selected by any of the users.
8config ORE
9 tristate
10 depends on EXOFS_FS || PNFS_OBJLAYOUT
11 select ASYNC_XOR
12 default SCSI_OSD_ULD
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d0941c6a1f72..80405836ba6e 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -234,7 +234,7 @@ static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
234static inline 234static inline
235void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) 235void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
236{ 236{
237 mode_t mode = inode->i_mode; 237 umode_t mode = inode->i_mode;
238 de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 238 de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
239} 239}
240 240
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 51f4b4c40f09..ca9d49665ef6 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -154,7 +154,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
154 loff_t pos, unsigned len, unsigned flags, 154 loff_t pos, unsigned len, unsigned flags,
155 struct page **pagep, void **fsdata); 155 struct page **pagep, void **fsdata);
156extern struct inode *exofs_iget(struct super_block *, unsigned long); 156extern struct inode *exofs_iget(struct super_block *, unsigned long);
157struct inode *exofs_new_inode(struct inode *, int); 157struct inode *exofs_new_inode(struct inode *, umode_t);
158extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); 158extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
159extern void exofs_evict_inode(struct inode *); 159extern void exofs_evict_inode(struct inode *);
160 160
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f6dbf7768ce6..ea5e1f97806a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1276,7 +1276,7 @@ static void create_done(struct ore_io_state *ios, void *p)
1276/* 1276/*
1277 * Set up a new inode and create an object for it on the OSD 1277 * Set up a new inode and create an object for it on the OSD
1278 */ 1278 */
1279struct inode *exofs_new_inode(struct inode *dir, int mode) 1279struct inode *exofs_new_inode(struct inode *dir, umode_t mode)
1280{ 1280{
1281 struct super_block *sb = dir->i_sb; 1281 struct super_block *sb = dir->i_sb;
1282 struct exofs_sb_info *sbi = sb->s_fs_info; 1282 struct exofs_sb_info *sbi = sb->s_fs_info;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b54c43775f17..9dbf0c301030 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
59 return d_splice_alias(inode, dentry); 59 return d_splice_alias(inode, dentry);
60} 60}
61 61
62static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, 62static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
63 struct nameidata *nd) 63 struct nameidata *nd)
64{ 64{
65 struct inode *inode = exofs_new_inode(dir, mode); 65 struct inode *inode = exofs_new_inode(dir, mode);
@@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
74 return err; 74 return err;
75} 75}
76 76
77static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode, 77static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
78 dev_t rdev) 78 dev_t rdev)
79{ 79{
80 struct inode *inode; 80 struct inode *inode;
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
153 return exofs_add_nondir(dentry, inode); 153 return exofs_add_nondir(dentry, inode);
154} 154}
155 155
156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
157{ 157{
158 struct inode *inode; 158 struct inode *inode;
159 int err = -EMLINK; 159 int err = -EMLINK;
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index d271ad837202..49cf230554a2 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
266 266
267 /* first/last seg is split */ 267 /* first/last seg is split */
268 num_raid_units += layout->group_width; 268 num_raid_units += layout->group_width;
269 sgs_per_dev = div_u64(num_raid_units, data_devs); 269 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
270 } else { 270 } else {
271 /* For Writes add parity pages array. */ 271 /* For Writes add parity pages array. */
272 max_par_pages = num_raid_units * pages_in_unit * 272 max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
445 u64 residual = ios->reading ? 445 u64 residual = ios->reading ?
446 or->in.residual : or->out.residual; 446 or->in.residual : or->out.residual;
447 u64 offset = (ios->offset + ios->length) - residual; 447 u64 offset = (ios->offset + ios->length) - residual;
448 struct ore_dev *od = ios->oc->ods[ 448 unsigned dev = per_dev->dev - ios->oc->first_dev;
449 per_dev->dev - ios->oc->first_dev]; 449 struct ore_dev *od = ios->oc->ods[dev];
450 450
451 on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, 451 on_dev_error(ios, od, dev, osi.osd_err_pri,
452 offset, residual); 452 offset, residual);
453 } 453 }
454 if (osi.osd_err_pri >= acumulated_osd_err) { 454 if (osi.osd_err_pri >= acumulated_osd_err) {
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 29c47e5c4a86..d222c77cfa1b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
328/* @si contains info of the to-be-inserted page. Update of @si should be 328/* @si contains info of the to-be-inserted page. Update of @si should be
329 * maintained by caller. Specificaly si->dev, si->obj_offset, ... 329 * maintained by caller. Specificaly si->dev, si->obj_offset, ...
330 */ 330 */
331static int _add_to_read_4_write(struct ore_io_state *ios, 331static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
332 struct ore_striping_info *si, struct page *page) 332 struct page *page, unsigned pg_len)
333{ 333{
334 struct request_queue *q; 334 struct request_queue *q;
335 struct ore_per_dev_state *per_dev; 335 struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
366 _ore_add_sg_seg(per_dev, gap, true); 366 _ore_add_sg_seg(per_dev, gap, true);
367 } 367 }
368 q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); 368 q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
369 added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); 369 added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
370 if (unlikely(added_len != PAGE_SIZE)) { 370 si->obj_offset % PAGE_SIZE);
371 if (unlikely(added_len != pg_len)) {
371 ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", 372 ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
372 per_dev->bio->bi_vcnt); 373 per_dev->bio->bi_vcnt);
373 return -ENOMEM; 374 return -ENOMEM;
374 } 375 }
375 376
376 per_dev->length += PAGE_SIZE; 377 per_dev->length += pg_len;
377 return 0; 378 return 0;
378} 379}
379 380
381/* read the beginning of an unaligned first page */
382static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
383{
384 struct ore_striping_info si;
385 unsigned pg_len;
386
387 ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
388
389 pg_len = si.obj_offset % PAGE_SIZE;
390 si.obj_offset -= pg_len;
391
392 ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
393 _LLU(si.obj_offset), pg_len, page->index, si.dev);
394
395 return _add_to_r4w(ios, &si, page, pg_len);
396}
397
398/* read the end of an incomplete last page */
399static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
400{
401 struct ore_striping_info si;
402 struct page *page;
403 unsigned pg_len, p, c;
404
405 ore_calc_stripe_info(ios->layout, *offset, 0, &si);
406
407 p = si.unit_off / PAGE_SIZE;
408 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
409 ios->layout->mirrors_p1, si.par_dev, si.dev);
410 page = ios->sp2d->_1p_stripes[p].pages[c];
411
412 pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
413 *offset += pg_len;
414
415 ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
416 p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
417
418 BUG_ON(!page);
419
420 return _add_to_r4w(ios, &si, page, pg_len);
421}
422
380static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) 423static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
381{ 424{
382 struct bio_vec *bv; 425 struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
444 struct page **pp = &_1ps->pages[c]; 487 struct page **pp = &_1ps->pages[c];
445 bool uptodate; 488 bool uptodate;
446 489
447 if (*pp) 490 if (*pp) {
491 if (ios->offset % PAGE_SIZE)
492 /* Read the remainder of the page */
493 _add_to_r4w_first_page(ios, *pp);
448 /* to-be-written pages start here */ 494 /* to-be-written pages start here */
449 goto read_last_stripe; 495 goto read_last_stripe;
496 }
450 497
451 *pp = ios->r4w->get_page(ios->private, offset, 498 *pp = ios->r4w->get_page(ios->private, offset,
452 &uptodate); 499 &uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
454 return -ENOMEM; 501 return -ENOMEM;
455 502
456 if (!uptodate) 503 if (!uptodate)
457 _add_to_read_4_write(ios, &read_si, *pp); 504 _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
458 505
459 /* Mark read-pages to be cache_released */ 506 /* Mark read-pages to be cache_released */
460 _1ps->page_is_read[c] = true; 507 _1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
465 } 512 }
466 513
467read_last_stripe: 514read_last_stripe:
468 offset = ios->offset + (ios->length + PAGE_SIZE - 1) / 515 offset = ios->offset + ios->length;
469 PAGE_SIZE * PAGE_SIZE; 516 if (offset % PAGE_SIZE)
517 _add_to_r4w_last_page(ios, &offset);
518 /* offset will be aligned to next page */
519
470 last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) 520 last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
471 * bytes_in_stripe; 521 * bytes_in_stripe;
472 if (offset == last_stripe_end) /* Optimize for the aligned case */ 522 if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
503 /* Mark read-pages to be cache_released */ 553 /* Mark read-pages to be cache_released */
504 _1ps->page_is_read[c] = true; 554 _1ps->page_is_read[c] = true;
505 if (!uptodate) 555 if (!uptodate)
506 _add_to_read_4_write(ios, &read_si, page); 556 _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
507 } 557 }
508 558
509 offset += PAGE_SIZE; 559 offset += PAGE_SIZE;
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
551 unsigned cur_len) 601 unsigned cur_len)
552{ 602{
553 if (ios->reading) { 603 if (ios->reading) {
554 BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); 604 if (per_dev->cur_sg >= ios->sgs_per_dev) {
605 ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
606 per_dev->cur_sg, ios->sgs_per_dev);
607 return -ENOMEM;
608 }
555 _ore_add_sg_seg(per_dev, cur_len, true); 609 _ore_add_sg_seg(per_dev, cur_len, true);
556 } else { 610 } else {
557 struct __stripe_pages_2d *sp2d = ios->sp2d; 611 struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
612 return -ENOMEM; 666 return -ENOMEM;
613 } 667 }
614 668
615 BUG_ON(ios->offset % PAGE_SIZE);
616
617 /* Round io down to last full strip */ 669 /* Round io down to last full strip */
618 first_stripe = div_u64(ios->offset, stripe_size); 670 first_stripe = div_u64(ios->offset, stripe_size);
619 last_stripe = div_u64(ios->offset + ios->length, stripe_size); 671 last_stripe = div_u64(ios->offset + ios->length, stripe_size);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index e6085ec192d6..d22cd168c6ee 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -166,7 +166,6 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
166static void exofs_i_callback(struct rcu_head *head) 166static void exofs_i_callback(struct rcu_head *head)
167{ 167{
168 struct inode *inode = container_of(head, struct inode, i_rcu); 168 struct inode *inode = container_of(head, struct inode, i_rcu);
169 INIT_LIST_HEAD(&inode->i_dentry);
170 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 169 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
171} 170}
172 171
@@ -839,6 +838,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
839 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); 838 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
840 if (ret) { 839 if (ret) {
841 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); 840 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
841 dput(sb->s_root);
842 sb->s_root = NULL;
842 goto free_sbi; 843 goto free_sbi;
843 } 844 }
844 845
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 47cda410b548..d37df352d324 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -279,7 +279,7 @@ static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
279 279
280static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) 280static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
281{ 281{
282 mode_t mode = inode->i_mode; 282 umode_t mode = inode->i_mode;
283 if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) 283 if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
284 de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 284 de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
285 else 285 else
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a4e5e206d08..75ad433c6691 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
111 111
112/* ialloc.c */ 112/* ialloc.c */
113extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *); 113extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
114extern void ext2_free_inode (struct inode *); 114extern void ext2_free_inode (struct inode *);
115extern unsigned long ext2_count_free_inodes (struct super_block *); 115extern unsigned long ext2_count_free_inodes (struct super_block *);
116extern void ext2_check_inodes_bitmap (struct super_block *); 116extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c4e81dfb74ba..8b15cf8cef37 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,7 @@ found:
429 return group; 429 return group;
430} 430}
431 431
432struct inode *ext2_new_inode(struct inode *dir, int mode, 432struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
433 const struct qstr *qstr) 433 const struct qstr *qstr)
434{ 434{
435 struct super_block *sb; 435 struct super_block *sb;
@@ -573,8 +573,11 @@ got:
573 inode->i_generation = sbi->s_next_generation++; 573 inode->i_generation = sbi->s_next_generation++;
574 spin_unlock(&sbi->s_next_gen_lock); 574 spin_unlock(&sbi->s_next_gen_lock);
575 if (insert_inode_locked(inode) < 0) { 575 if (insert_inode_locked(inode) < 0) {
576 err = -EINVAL; 576 ext2_error(sb, "ext2_new_inode",
577 goto fail_drop; 577 "inode number already in use - inode=%lu",
578 (unsigned long) ino);
579 err = -EIO;
580 goto fail;
578 } 581 }
579 582
580 dquot_initialize(inode); 583 dquot_initialize(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 91a6945af6d8..740cad8dcd8d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -26,7 +26,6 @@
26#include <linux/highuid.h> 26#include <linux/highuid.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/module.h>
30#include <linux/writeback.h> 29#include <linux/writeback.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include <linux/mpage.h> 31#include <linux/mpage.h>
@@ -36,10 +35,6 @@
36#include "acl.h" 35#include "acl.h"
37#include "xip.h" 36#include "xip.h"
38 37
39MODULE_AUTHOR("Remy Card and others");
40MODULE_DESCRIPTION("Second Extended Filesystem");
41MODULE_LICENSE("GPL");
42
43static int __ext2_write_inode(struct inode *inode, int do_sync); 38static int __ext2_write_inode(struct inode *inode, int do_sync);
44 39
45/* 40/*
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index f81e250ac5c4..1089f760c847 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -35,7 +35,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
35 case EXT2_IOC_SETFLAGS: { 35 case EXT2_IOC_SETFLAGS: {
36 unsigned int oldflags; 36 unsigned int oldflags;
37 37
38 ret = mnt_want_write(filp->f_path.mnt); 38 ret = mnt_want_write_file(filp);
39 if (ret) 39 if (ret)
40 return ret; 40 return ret;
41 41
@@ -83,7 +83,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
83 inode->i_ctime = CURRENT_TIME_SEC; 83 inode->i_ctime = CURRENT_TIME_SEC;
84 mark_inode_dirty(inode); 84 mark_inode_dirty(inode);
85setflags_out: 85setflags_out:
86 mnt_drop_write(filp->f_path.mnt); 86 mnt_drop_write_file(filp);
87 return ret; 87 return ret;
88 } 88 }
89 case EXT2_IOC_GETVERSION: 89 case EXT2_IOC_GETVERSION:
@@ -91,7 +91,7 @@ setflags_out:
91 case EXT2_IOC_SETVERSION: 91 case EXT2_IOC_SETVERSION:
92 if (!inode_owner_or_capable(inode)) 92 if (!inode_owner_or_capable(inode))
93 return -EPERM; 93 return -EPERM;
94 ret = mnt_want_write(filp->f_path.mnt); 94 ret = mnt_want_write_file(filp);
95 if (ret) 95 if (ret)
96 return ret; 96 return ret;
97 if (get_user(inode->i_generation, (int __user *) arg)) { 97 if (get_user(inode->i_generation, (int __user *) arg)) {
@@ -100,7 +100,7 @@ setflags_out:
100 inode->i_ctime = CURRENT_TIME_SEC; 100 inode->i_ctime = CURRENT_TIME_SEC;
101 mark_inode_dirty(inode); 101 mark_inode_dirty(inode);
102 } 102 }
103 mnt_drop_write(filp->f_path.mnt); 103 mnt_drop_write_file(filp);
104 return ret; 104 return ret;
105 case EXT2_IOC_GETRSVSZ: 105 case EXT2_IOC_GETRSVSZ:
106 if (test_opt(inode->i_sb, RESERVATION) 106 if (test_opt(inode->i_sb, RESERVATION)
@@ -121,7 +121,7 @@ setflags_out:
121 if (get_user(rsv_window_size, (int __user *)arg)) 121 if (get_user(rsv_window_size, (int __user *)arg))
122 return -EFAULT; 122 return -EFAULT;
123 123
124 ret = mnt_want_write(filp->f_path.mnt); 124 ret = mnt_want_write_file(filp);
125 if (ret) 125 if (ret)
126 return ret; 126 return ret;
127 127
@@ -145,7 +145,7 @@ setflags_out:
145 rsv->rsv_goal_size = rsv_window_size; 145 rsv->rsv_goal_size = rsv_window_size;
146 } 146 }
147 mutex_unlock(&ei->truncate_mutex); 147 mutex_unlock(&ei->truncate_mutex);
148 mnt_drop_write(filp->f_path.mnt); 148 mnt_drop_write_file(filp);
149 return 0; 149 return 0;
150 } 150 }
151 default: 151 default:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 761fde807fc9..080419814bae 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
94 * If the create succeeds, we fill in the inode information 94 * If the create succeeds, we fill in the inode information
95 * with d_instantiate(). 95 * with d_instantiate().
96 */ 96 */
97static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) 97static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
98{ 98{
99 struct inode *inode; 99 struct inode *inode;
100 100
@@ -119,7 +119,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
119 return ext2_add_nondir(dentry, inode); 119 return ext2_add_nondir(dentry, inode);
120} 120}
121 121
122static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 122static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
123{ 123{
124 struct inode * inode; 124 struct inode * inode;
125 int err; 125 int err;
@@ -214,7 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
214 return err; 214 return err;
215} 215}
216 216
217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) 217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
218{ 218{
219 struct inode * inode; 219 struct inode * inode;
220 int err = -EMLINK; 220 int err = -EMLINK;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index bd8ac164a3bf..0090595beb28 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -173,7 +173,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
173static void ext2_i_callback(struct rcu_head *head) 173static void ext2_i_callback(struct rcu_head *head)
174{ 174{
175 struct inode *inode = container_of(head, struct inode, i_rcu); 175 struct inode *inode = container_of(head, struct inode, i_rcu);
176 INIT_LIST_HEAD(&inode->i_dentry);
177 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 176 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
178} 177}
179 178
@@ -211,9 +210,9 @@ static void destroy_inodecache(void)
211 kmem_cache_destroy(ext2_inode_cachep); 210 kmem_cache_destroy(ext2_inode_cachep);
212} 211}
213 212
214static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) 213static int ext2_show_options(struct seq_file *seq, struct dentry *root)
215{ 214{
216 struct super_block *sb = vfs->mnt_sb; 215 struct super_block *sb = root->d_sb;
217 struct ext2_sb_info *sbi = EXT2_SB(sb); 216 struct ext2_sb_info *sbi = EXT2_SB(sb);
218 struct ext2_super_block *es = sbi->s_es; 217 struct ext2_super_block *es = sbi->s_es;
219 unsigned long def_mount_opts; 218 unsigned long def_mount_opts;
@@ -1521,5 +1520,8 @@ static void __exit exit_ext2_fs(void)
1521 exit_ext2_xattr(); 1520 exit_ext2_xattr();
1522} 1521}
1523 1522
1523MODULE_AUTHOR("Remy Card and others");
1524MODULE_DESCRIPTION("Second Extended Filesystem");
1525MODULE_LICENSE("GPL");
1524module_init(init_ext2_fs) 1526module_init(init_ext2_fs)
1525module_exit(exit_ext2_fs) 1527module_exit(exit_ext2_fs)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index d27b71f1d183..6dcafc7efdfd 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -54,7 +54,6 @@
54 */ 54 */
55 55
56#include <linux/buffer_head.h> 56#include <linux/buffer_head.h>
57#include <linux/module.h>
58#include <linux/init.h> 57#include <linux/init.h>
59#include <linux/slab.h> 58#include <linux/slab.h>
60#include <linux/mbcache.h> 59#include <linux/mbcache.h>
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index c922adc8ef41..be7a8d02c9a7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -3,7 +3,6 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/slab.h> 6#include <linux/slab.h>
8#include <linux/string.h> 7#include <linux/string.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 667e46a8d62d..2989467d3595 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/capability.h> 9#include <linux/capability.h>
11#include <linux/fs.h> 10#include <linux/fs.h>
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 099d20f47163..f470e44c4b8d 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/string.h> 9#include <linux/string.h>
11#include "ext2.h" 10#include "ext2.h"
12#include "xattr.h" 11#include "xattr.h"
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 5c866e06e7ab..1cde28438014 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -371,7 +371,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
371 * group to find a free inode. 371 * group to find a free inode.
372 */ 372 */
373struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, 373struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
374 const struct qstr *qstr, int mode) 374 const struct qstr *qstr, umode_t mode)
375{ 375{
376 struct super_block *sb; 376 struct super_block *sb;
377 struct buffer_head *bitmap_bh = NULL; 377 struct buffer_head *bitmap_bh = NULL;
@@ -525,8 +525,12 @@ got:
525 if (IS_DIRSYNC(inode)) 525 if (IS_DIRSYNC(inode))
526 handle->h_sync = 1; 526 handle->h_sync = 1;
527 if (insert_inode_locked(inode) < 0) { 527 if (insert_inode_locked(inode) < 0) {
528 err = -EINVAL; 528 /*
529 goto fail_drop; 529 * Likely a bitmap corruption causing inode to be allocated
530 * twice.
531 */
532 err = -EIO;
533 goto fail;
530 } 534 }
531 spin_lock(&sbi->s_next_gen_lock); 535 spin_lock(&sbi->s_next_gen_lock);
532 inode->i_generation = sbi->s_next_generation++; 536 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 85fe655fe3e0..2d0afeca0b47 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -22,7 +22,6 @@
22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25#include <linux/module.h>
26#include <linux/fs.h> 25#include <linux/fs.h>
27#include <linux/time.h> 26#include <linux/time.h>
28#include <linux/ext3_jbd.h> 27#include <linux/ext3_jbd.h>
@@ -223,8 +222,12 @@ void ext3_evict_inode (struct inode *inode)
223 * 222 *
224 * Note that directories do not have this problem because they don't 223 * Note that directories do not have this problem because they don't
225 * use page cache. 224 * use page cache.
225 *
226 * The s_journal check handles the case when ext3_get_journal() fails
227 * and puts the journal inode.
226 */ 228 */
227 if (inode->i_nlink && ext3_should_journal_data(inode) && 229 if (inode->i_nlink && ext3_should_journal_data(inode) &&
230 EXT3_SB(inode->i_sb)->s_journal &&
228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 231 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid); 232 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 233 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
@@ -1132,9 +1135,11 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1132 bh = ext3_getblk(handle, inode, block, create, err); 1135 bh = ext3_getblk(handle, inode, block, create, err);
1133 if (!bh) 1136 if (!bh)
1134 return bh; 1137 return bh;
1135 if (buffer_uptodate(bh)) 1138 if (bh_uptodate_or_lock(bh))
1136 return bh; 1139 return bh;
1137 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 1140 get_bh(bh);
1141 bh->b_end_io = end_buffer_read_sync;
1142 submit_bh(READ | REQ_META | REQ_PRIO, bh);
1138 wait_on_buffer(bh); 1143 wait_on_buffer(bh);
1139 if (buffer_uptodate(bh)) 1144 if (buffer_uptodate(bh))
1140 return bh; 1145 return bh;
@@ -1617,7 +1622,13 @@ static int ext3_ordered_writepage(struct page *page,
1617 int err; 1622 int err;
1618 1623
1619 J_ASSERT(PageLocked(page)); 1624 J_ASSERT(PageLocked(page));
1620 WARN_ON_ONCE(IS_RDONLY(inode)); 1625 /*
1626 * We don't want to warn for emergency remount. The condition is
1627 * ordered to avoid dereferencing inode->i_sb in non-error case to
1628 * avoid slow-downs.
1629 */
1630 WARN_ON_ONCE(IS_RDONLY(inode) &&
1631 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1621 1632
1622 /* 1633 /*
1623 * We give up here if we're reentered, because it might be for a 1634 * We give up here if we're reentered, because it might be for a
@@ -1692,7 +1703,13 @@ static int ext3_writeback_writepage(struct page *page,
1692 int err; 1703 int err;
1693 1704
1694 J_ASSERT(PageLocked(page)); 1705 J_ASSERT(PageLocked(page));
1695 WARN_ON_ONCE(IS_RDONLY(inode)); 1706 /*
1707 * We don't want to warn for emergency remount. The condition is
1708 * ordered to avoid dereferencing inode->i_sb in non-error case to
1709 * avoid slow-downs.
1710 */
1711 WARN_ON_ONCE(IS_RDONLY(inode) &&
1712 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1696 1713
1697 if (ext3_journal_current_handle()) 1714 if (ext3_journal_current_handle())
1698 goto out_fail; 1715 goto out_fail;
@@ -1735,7 +1752,13 @@ static int ext3_journalled_writepage(struct page *page,
1735 int err; 1752 int err;
1736 1753
1737 J_ASSERT(PageLocked(page)); 1754 J_ASSERT(PageLocked(page));
1738 WARN_ON_ONCE(IS_RDONLY(inode)); 1755 /*
1756 * We don't want to warn for emergency remount. The condition is
1757 * ordered to avoid dereferencing inode->i_sb in non-error case to
1758 * avoid slow-downs.
1759 */
1760 WARN_ON_ONCE(IS_RDONLY(inode) &&
1761 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1739 1762
1740 if (ext3_journal_current_handle()) 1763 if (ext3_journal_current_handle())
1741 goto no_write; 1764 goto no_write;
@@ -2064,12 +2087,10 @@ static int ext3_block_truncate_page(struct inode *inode, loff_t from)
2064 if (PageUptodate(page)) 2087 if (PageUptodate(page))
2065 set_buffer_uptodate(bh); 2088 set_buffer_uptodate(bh);
2066 2089
2067 if (!buffer_uptodate(bh)) { 2090 if (!bh_uptodate_or_lock(bh)) {
2068 err = -EIO; 2091 err = bh_submit_read(bh);
2069 ll_rw_block(READ, 1, &bh);
2070 wait_on_buffer(bh);
2071 /* Uhhuh. Read error. Complain and punt. */ 2092 /* Uhhuh. Read error. Complain and punt. */
2072 if (!buffer_uptodate(bh)) 2093 if (err)
2073 goto unlock; 2094 goto unlock;
2074 } 2095 }
2075 2096
@@ -2490,7 +2511,7 @@ int ext3_can_truncate(struct inode *inode)
2490 * transaction, and VFS/VM ensures that ext3_truncate() cannot run 2511 * transaction, and VFS/VM ensures that ext3_truncate() cannot run
2491 * simultaneously on behalf of the same inode. 2512 * simultaneously on behalf of the same inode.
2492 * 2513 *
2493 * As we work through the truncate and commmit bits of it to the journal there 2514 * As we work through the truncate and commit bits of it to the journal there
2494 * is one core, guiding principle: the file's tree must always be consistent on 2515 * is one core, guiding principle: the file's tree must always be consistent on
2495 * disk. We must be able to restart the truncate after a crash. 2516 * disk. We must be able to restart the truncate after a crash.
2496 * 2517 *
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index ba1b54e23cae..4af574ce4a46 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -44,7 +44,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
45 return -EFAULT; 45 return -EFAULT;
46 46
47 err = mnt_want_write(filp->f_path.mnt); 47 err = mnt_want_write_file(filp);
48 if (err) 48 if (err)
49 return err; 49 return err;
50 50
@@ -110,7 +110,7 @@ flags_err:
110 err = ext3_change_inode_journal_flag(inode, jflag); 110 err = ext3_change_inode_journal_flag(inode, jflag);
111flags_out: 111flags_out:
112 mutex_unlock(&inode->i_mutex); 112 mutex_unlock(&inode->i_mutex);
113 mnt_drop_write(filp->f_path.mnt); 113 mnt_drop_write_file(filp);
114 return err; 114 return err;
115 } 115 }
116 case EXT3_IOC_GETVERSION: 116 case EXT3_IOC_GETVERSION:
@@ -126,7 +126,7 @@ flags_out:
126 if (!inode_owner_or_capable(inode)) 126 if (!inode_owner_or_capable(inode))
127 return -EPERM; 127 return -EPERM;
128 128
129 err = mnt_want_write(filp->f_path.mnt); 129 err = mnt_want_write_file(filp);
130 if (err) 130 if (err)
131 return err; 131 return err;
132 if (get_user(generation, (int __user *) arg)) { 132 if (get_user(generation, (int __user *) arg)) {
@@ -134,10 +134,11 @@ flags_out:
134 goto setversion_out; 134 goto setversion_out;
135 } 135 }
136 136
137 mutex_lock(&inode->i_mutex);
137 handle = ext3_journal_start(inode, 1); 138 handle = ext3_journal_start(inode, 1);
138 if (IS_ERR(handle)) { 139 if (IS_ERR(handle)) {
139 err = PTR_ERR(handle); 140 err = PTR_ERR(handle);
140 goto setversion_out; 141 goto unlock_out;
141 } 142 }
142 err = ext3_reserve_inode_write(handle, inode, &iloc); 143 err = ext3_reserve_inode_write(handle, inode, &iloc);
143 if (err == 0) { 144 if (err == 0) {
@@ -146,8 +147,11 @@ flags_out:
146 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 147 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
147 } 148 }
148 ext3_journal_stop(handle); 149 ext3_journal_stop(handle);
150
151unlock_out:
152 mutex_unlock(&inode->i_mutex);
149setversion_out: 153setversion_out:
150 mnt_drop_write(filp->f_path.mnt); 154 mnt_drop_write_file(filp);
151 return err; 155 return err;
152 } 156 }
153 case EXT3_IOC_GETRSVSZ: 157 case EXT3_IOC_GETRSVSZ:
@@ -164,7 +168,7 @@ setversion_out:
164 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 168 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
165 return -ENOTTY; 169 return -ENOTTY;
166 170
167 err = mnt_want_write(filp->f_path.mnt); 171 err = mnt_want_write_file(filp);
168 if (err) 172 if (err)
169 return err; 173 return err;
170 174
@@ -195,7 +199,7 @@ setversion_out:
195 } 199 }
196 mutex_unlock(&ei->truncate_mutex); 200 mutex_unlock(&ei->truncate_mutex);
197setrsvsz_out: 201setrsvsz_out:
198 mnt_drop_write(filp->f_path.mnt); 202 mnt_drop_write_file(filp);
199 return err; 203 return err;
200 } 204 }
201 case EXT3_IOC_GROUP_EXTEND: { 205 case EXT3_IOC_GROUP_EXTEND: {
@@ -206,7 +210,7 @@ setrsvsz_out:
206 if (!capable(CAP_SYS_RESOURCE)) 210 if (!capable(CAP_SYS_RESOURCE))
207 return -EPERM; 211 return -EPERM;
208 212
209 err = mnt_want_write(filp->f_path.mnt); 213 err = mnt_want_write_file(filp);
210 if (err) 214 if (err)
211 return err; 215 return err;
212 216
@@ -221,7 +225,7 @@ setrsvsz_out:
221 if (err == 0) 225 if (err == 0)
222 err = err2; 226 err = err2;
223group_extend_out: 227group_extend_out:
224 mnt_drop_write(filp->f_path.mnt); 228 mnt_drop_write_file(filp);
225 return err; 229 return err;
226 } 230 }
227 case EXT3_IOC_GROUP_ADD: { 231 case EXT3_IOC_GROUP_ADD: {
@@ -232,7 +236,7 @@ group_extend_out:
232 if (!capable(CAP_SYS_RESOURCE)) 236 if (!capable(CAP_SYS_RESOURCE))
233 return -EPERM; 237 return -EPERM;
234 238
235 err = mnt_want_write(filp->f_path.mnt); 239 err = mnt_want_write_file(filp);
236 if (err) 240 if (err)
237 return err; 241 return err;
238 242
@@ -249,7 +253,7 @@ group_extend_out:
249 if (err == 0) 253 if (err == 0)
250 err = err2; 254 err = err2;
251group_add_out: 255group_add_out:
252 mnt_drop_write(filp->f_path.mnt); 256 mnt_drop_write_file(filp);
253 return err; 257 return err;
254 } 258 }
255 case FITRIM: { 259 case FITRIM: {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 642dc6d66dfd..e8e211795e9f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -921,9 +921,12 @@ restart:
921 num++; 921 num++;
922 bh = ext3_getblk(NULL, dir, b++, 0, &err); 922 bh = ext3_getblk(NULL, dir, b++, 0, &err);
923 bh_use[ra_max] = bh; 923 bh_use[ra_max] = bh;
924 if (bh) 924 if (bh && !bh_uptodate_or_lock(bh)) {
925 ll_rw_block(READ | REQ_META | REQ_PRIO, 925 get_bh(bh);
926 1, &bh); 926 bh->b_end_io = end_buffer_read_sync;
927 submit_bh(READ | REQ_META | REQ_PRIO,
928 bh);
929 }
927 } 930 }
928 } 931 }
929 if ((bh = bh_use[ra_ptr++]) == NULL) 932 if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1698,7 +1701,7 @@ static int ext3_add_nondir(handle_t *handle,
1698 * If the create succeeds, we fill in the inode information 1701 * If the create succeeds, we fill in the inode information
1699 * with d_instantiate(). 1702 * with d_instantiate().
1700 */ 1703 */
1701static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, 1704static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
1702 struct nameidata *nd) 1705 struct nameidata *nd)
1703{ 1706{
1704 handle_t *handle; 1707 handle_t *handle;
@@ -1732,7 +1735,7 @@ retry:
1732} 1735}
1733 1736
1734static int ext3_mknod (struct inode * dir, struct dentry *dentry, 1737static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1735 int mode, dev_t rdev) 1738 umode_t mode, dev_t rdev)
1736{ 1739{
1737 handle_t *handle; 1740 handle_t *handle;
1738 struct inode *inode; 1741 struct inode *inode;
@@ -1768,7 +1771,7 @@ retry:
1768 return err; 1771 return err;
1769} 1772}
1770 1773
1771static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) 1774static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
1772{ 1775{
1773 handle_t *handle; 1776 handle_t *handle;
1774 struct inode * inode; 1777 struct inode * inode;
@@ -2272,7 +2275,7 @@ retry:
2272 err = PTR_ERR(handle); 2275 err = PTR_ERR(handle);
2273 goto err_drop_inode; 2276 goto err_drop_inode;
2274 } 2277 }
2275 inc_nlink(inode); 2278 set_nlink(inode, 1);
2276 err = ext3_orphan_del(handle, inode); 2279 err = ext3_orphan_del(handle, inode);
2277 if (err) { 2280 if (err) {
2278 ext3_journal_stop(handle); 2281 ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 922d289aeeb3..726c7ef6cdf1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -511,7 +511,6 @@ static int ext3_drop_inode(struct inode *inode)
511static void ext3_i_callback(struct rcu_head *head) 511static void ext3_i_callback(struct rcu_head *head)
512{ 512{
513 struct inode *inode = container_of(head, struct inode, i_rcu); 513 struct inode *inode = container_of(head, struct inode, i_rcu);
514 INIT_LIST_HEAD(&inode->i_dentry);
515 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 514 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
516} 515}
517 516
@@ -611,9 +610,9 @@ static char *data_mode_string(unsigned long mode)
611 * - it's set to a non-default value OR 610 * - it's set to a non-default value OR
612 * - if the per-sb default is different from the global default 611 * - if the per-sb default is different from the global default
613 */ 612 */
614static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 613static int ext3_show_options(struct seq_file *seq, struct dentry *root)
615{ 614{
616 struct super_block *sb = vfs->mnt_sb; 615 struct super_block *sb = root->d_sb;
617 struct ext3_sb_info *sbi = EXT3_SB(sb); 616 struct ext3_sb_info *sbi = EXT3_SB(sb);
618 struct ext3_super_block *es = sbi->s_es; 617 struct ext3_super_block *es = sbi->s_es;
619 unsigned long def_mount_opts; 618 unsigned long def_mount_opts;
@@ -2060,9 +2059,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2060 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2059 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2061 ext3_orphan_cleanup(sb, es); 2060 ext3_orphan_cleanup(sb, es);
2062 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2061 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2063 if (needs_recovery) 2062 if (needs_recovery) {
2063 ext3_mark_recovery_complete(sb, es);
2064 ext3_msg(sb, KERN_INFO, "recovery complete"); 2064 ext3_msg(sb, KERN_INFO, "recovery complete");
2065 ext3_mark_recovery_complete(sb, es); 2065 }
2066 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", 2066 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
@@ -2230,11 +2230,11 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2230 goto out_bdev; 2230 goto out_bdev;
2231 } 2231 }
2232 journal->j_private = sb; 2232 journal->j_private = sb;
2233 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2233 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
2234 wait_on_buffer(journal->j_sb_buffer); 2234 if (bh_submit_read(journal->j_sb_buffer)) {
2235 if (!buffer_uptodate(journal->j_sb_buffer)) { 2235 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2236 ext3_msg(sb, KERN_ERR, "I/O error on journal device"); 2236 goto out_journal;
2237 goto out_journal; 2237 }
2238 } 2238 }
2239 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2239 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2240 ext3_msg(sb, KERN_ERR, 2240 ext3_msg(sb, KERN_ERR,
@@ -2910,7 +2910,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2910 return -EINVAL; 2910 return -EINVAL;
2911 2911
2912 /* Quotafile not on the same filesystem? */ 2912 /* Quotafile not on the same filesystem? */
2913 if (path->mnt->mnt_sb != sb) 2913 if (path->dentry->d_sb != sb)
2914 return -EXDEV; 2914 return -EXDEV;
2915 /* Journaling quota? */ 2915 /* Journaling quota? */
2916 if (EXT3_SB(sb)->s_qf_names[type]) { 2916 if (EXT3_SB(sb)->s_qf_names[type]) {
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 3c218b8a51d4..ea26f2acab94 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -3,7 +3,6 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/slab.h> 6#include <linux/slab.h>
8#include <linux/string.h> 7#include <linux/string.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index dc8edda9ffe0..2526a8829de8 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/capability.h> 9#include <linux/capability.h>
11#include <linux/fs.h> 10#include <linux/fs.h>
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 7a321974d584..b32e473a1e33 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/ext3_jbd.h> 10#include <linux/ext3_jbd.h>
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 12ccacda44e0..f9e2cd8cf711 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -23,6 +23,8 @@
23 23
24#include <trace/events/ext4.h> 24#include <trace/events/ext4.h>
25 25
26static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
27 ext4_group_t block_group);
26/* 28/*
27 * balloc.c contains the blocks allocation and deallocation routines 29 * balloc.c contains the blocks allocation and deallocation routines
28 */ 30 */
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
668 * This function returns the number of file system metadata clusters at 670 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks. 671 * the beginning of a block group, including the reserved gdt blocks.
670 */ 672 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb, 673static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group) 674 ext4_group_t block_group)
673{ 675{
674 struct ext4_sb_info *sbi = EXT4_SB(sb); 676 struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 8efb2f0a3447..3f11656bd72e 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -13,7 +13,6 @@
13#include <linux/namei.h> 13#include <linux/namei.h>
14#include <linux/quotaops.h> 14#include <linux/quotaops.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/module.h>
17#include <linux/swap.h> 16#include <linux/swap.h>
18#include <linux/pagemap.h> 17#include <linux/pagemap.h>
19#include <linux/blkdev.h> 18#include <linux/blkdev.h>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b0e26a1272d..513004fc3d84 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
511 __u32 free_blocks_count; 511 __u32 free_blocks_count;
512}; 512};
513 513
514/* Indexes used to index group tables in ext4_new_group_data */
515enum {
516 BLOCK_BITMAP = 0, /* block bitmap */
517 INODE_BITMAP, /* inode bitmap */
518 INODE_TABLE, /* inode tables */
519 GROUP_TABLE_COUNT,
520};
521
514/* 522/*
515 * Flags used by ext4_map_blocks() 523 * Flags used by ext4_map_blocks()
516 */ 524 */
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
575 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 583 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
576#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 584#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
577#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 585#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
586#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
578 587
579#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 588#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
580/* 589/*
@@ -957,12 +966,13 @@ struct ext4_inode_info {
957#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 966#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
958 EXT4_MOUNT2_##opt) 967 EXT4_MOUNT2_##opt)
959 968
960#define ext4_set_bit __test_and_set_bit_le 969#define ext4_test_and_set_bit __test_and_set_bit_le
970#define ext4_set_bit __set_bit_le
961#define ext4_set_bit_atomic ext2_set_bit_atomic 971#define ext4_set_bit_atomic ext2_set_bit_atomic
962#define ext4_clear_bit __test_and_clear_bit_le 972#define ext4_test_and_clear_bit __test_and_clear_bit_le
973#define ext4_clear_bit __clear_bit_le
963#define ext4_clear_bit_atomic ext2_clear_bit_atomic 974#define ext4_clear_bit_atomic ext2_clear_bit_atomic
964#define ext4_test_bit test_bit_le 975#define ext4_test_bit test_bit_le
965#define ext4_find_first_zero_bit find_first_zero_bit_le
966#define ext4_find_next_zero_bit find_next_zero_bit_le 976#define ext4_find_next_zero_bit find_next_zero_bit_le
967#define ext4_find_next_bit find_next_bit_le 977#define ext4_find_next_bit find_next_bit_le
968 978
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1407#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1408#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1409#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1410#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
1400 1411
1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1412#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1413#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1409#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1420#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1410#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1421#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1411#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1422#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1423#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
1424#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1412 1425
1413#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1426#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1414#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1427#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1803extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1791 ext4_group_t block_group, 1804 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp); 1805 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb, 1806extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group, 1807 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp); 1808 struct ext4_group_desc *gdp);
@@ -1819,7 +1830,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1819 dx_hash_info *hinfo); 1830 dx_hash_info *hinfo);
1820 1831
1821/* ialloc.c */ 1832/* ialloc.c */
1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1833extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
1823 const struct qstr *qstr, __u32 goal, 1834 const struct qstr *qstr, __u32 goal,
1824 uid_t *owner); 1835 uid_t *owner);
1825extern void ext4_free_inode(handle_t *, struct inode *); 1836extern void ext4_free_inode(handle_t *, struct inode *);
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
1880extern void ext4_set_aops(struct inode *inode); 1891extern void ext4_set_aops(struct inode *inode);
1881extern int ext4_writepage_trans_blocks(struct inode *); 1892extern int ext4_writepage_trans_blocks(struct inode *);
1882extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1893extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1883extern int ext4_block_truncate_page(handle_t *handle,
1884 struct address_space *mapping, loff_t from);
1885extern int ext4_block_zero_page_range(handle_t *handle,
1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle, 1894extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from, 1895 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags); 1896 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1897extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1894extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1898extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1895extern void ext4_da_update_reserve_space(struct inode *inode, 1899extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
1924extern int ext4_group_extend(struct super_block *sb, 1928extern int ext4_group_extend(struct super_block *sb,
1925 struct ext4_super_block *es, 1929 struct ext4_super_block *es,
1926 ext4_fsblk_t n_blocks_count); 1930 ext4_fsblk_t n_blocks_count);
1931extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
1927 1932
1928/* super.c */ 1933/* super.c */
1929extern void *ext4_kvmalloc(size_t size, gfp_t flags); 1934extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 607b1557d292..74f23c292e1b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -29,7 +29,6 @@
29 * - smart tree reduction 29 * - smart tree reduction
30 */ 30 */
31 31
32#include <linux/module.h>
33#include <linux/fs.h> 32#include <linux/fs.h>
34#include <linux/time.h> 33#include <linux/time.h>
35#include <linux/jbd2.h> 34#include <linux/jbd2.h>
@@ -3281,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
3281 ext4_lblk_t i, pg_lblk; 3280 ext4_lblk_t i, pg_lblk;
3282 pgoff_t index; 3281 pgoff_t index;
3283 3282
3283 if (!test_opt(inode->i_sb, DELALLOC))
3284 return 0;
3285
3284 /* reverse search wont work if fs block size is less than page size */ 3286 /* reverse search wont work if fs block size is less than page size */
3285 if (inode->i_blkbits < PAGE_CACHE_SHIFT) 3287 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3286 search_hint_reverse = 0; 3288 search_hint_reverse = 0;
@@ -3453,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3453 int err = 0; 3455 int err = 0;
3454 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3456 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3455 3457
3456 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3458 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3457 "block %llu, max_blocks %u, flags %d, allocated %u", 3459 "block %llu, max_blocks %u, flags %x, allocated %u\n",
3458 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3460 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
3459 flags, allocated); 3461 flags, allocated);
3460 ext4_ext_show_leaf(inode, path); 3462 ext4_ext_show_leaf(inode, path);
@@ -3625,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3625 struct ext4_sb_info *sbi = EXT4_SB(sb); 3627 struct ext4_sb_info *sbi = EXT4_SB(sb);
3626 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3628 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3627 ext4_lblk_t ex_cluster_start, ex_cluster_end; 3629 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3628 ext4_lblk_t rr_cluster_start, rr_cluster_end; 3630 ext4_lblk_t rr_cluster_start;
3629 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3631 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3630 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3632 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3631 unsigned short ee_len = ext4_ext_get_actual_len(ex); 3633 unsigned short ee_len = ext4_ext_get_actual_len(ex);
@@ -3636,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3636 3638
3637 /* The requested region passed into ext4_map_blocks() */ 3639 /* The requested region passed into ext4_map_blocks() */
3638 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); 3640 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3639 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3640 3641
3641 if ((rr_cluster_start == ex_cluster_end) || 3642 if ((rr_cluster_start == ex_cluster_end) ||
3642 (rr_cluster_start == ex_cluster_start)) { 3643 (rr_cluster_start == ex_cluster_start)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 00beb4f9cc4f..25d8c9781ad9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
252 fatal = ext4_journal_get_write_access(handle, bh2); 252 fatal = ext4_journal_get_write_access(handle, bh2);
253 } 253 }
254 ext4_lock_group(sb, block_group); 254 ext4_lock_group(sb, block_group);
255 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 255 cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
256 if (fatal || !cleared) { 256 if (fatal || !cleared) {
257 ext4_unlock_group(sb, block_group); 257 ext4_unlock_group(sb, block_group);
258 goto out; 258 goto out;
@@ -351,14 +351,14 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
351 */ 351 */
352 352
353static int find_group_orlov(struct super_block *sb, struct inode *parent, 353static int find_group_orlov(struct super_block *sb, struct inode *parent,
354 ext4_group_t *group, int mode, 354 ext4_group_t *group, umode_t mode,
355 const struct qstr *qstr) 355 const struct qstr *qstr)
356{ 356{
357 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 357 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
358 struct ext4_sb_info *sbi = EXT4_SB(sb); 358 struct ext4_sb_info *sbi = EXT4_SB(sb);
359 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
361 unsigned int freei, avefreei; 361 unsigned int freei, avefreei, grp_free;
362 ext4_fsblk_t freeb, avefreec; 362 ext4_fsblk_t freeb, avefreec;
363 unsigned int ndirs; 363 unsigned int ndirs;
364 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
@@ -477,8 +477,8 @@ fallback_retry:
477 for (i = 0; i < ngroups; i++) { 477 for (i = 0; i < ngroups; i++) {
478 grp = (parent_group + i) % ngroups; 478 grp = (parent_group + i) % ngroups;
479 desc = ext4_get_group_desc(sb, grp, NULL); 479 desc = ext4_get_group_desc(sb, grp, NULL);
480 if (desc && ext4_free_inodes_count(sb, desc) && 480 grp_free = ext4_free_inodes_count(sb, desc);
481 ext4_free_inodes_count(sb, desc) >= avefreei) { 481 if (desc && grp_free && grp_free >= avefreei) {
482 *group = grp; 482 *group = grp;
483 return 0; 483 return 0;
484 } 484 }
@@ -497,7 +497,7 @@ fallback_retry:
497} 497}
498 498
499static int find_group_other(struct super_block *sb, struct inode *parent, 499static int find_group_other(struct super_block *sb, struct inode *parent,
500 ext4_group_t *group, int mode) 500 ext4_group_t *group, umode_t mode)
501{ 501{
502 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 502 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
503 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); 503 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
@@ -602,7 +602,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
602 */ 602 */
603static int ext4_claim_inode(struct super_block *sb, 603static int ext4_claim_inode(struct super_block *sb,
604 struct buffer_head *inode_bitmap_bh, 604 struct buffer_head *inode_bitmap_bh,
605 unsigned long ino, ext4_group_t group, int mode) 605 unsigned long ino, ext4_group_t group, umode_t mode)
606{ 606{
607 int free = 0, retval = 0, count; 607 int free = 0, retval = 0, count;
608 struct ext4_sb_info *sbi = EXT4_SB(sb); 608 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
618 */ 618 */
619 down_read(&grp->alloc_sem); 619 down_read(&grp->alloc_sem);
620 ext4_lock_group(sb, group); 620 ext4_lock_group(sb, group);
621 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 621 if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622 /* not a free inode */ 622 /* not a free inode */
623 retval = 1; 623 retval = 1;
624 goto err_ret; 624 goto err_ret;
@@ -690,7 +690,7 @@ err_ret:
690 * For other inodes, search forward from the parent directory's block 690 * For other inodes, search forward from the parent directory's block
691 * group to find a free inode. 691 * group to find a free inode.
692 */ 692 */
693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
694 const struct qstr *qstr, __u32 goal, uid_t *owner) 694 const struct qstr *qstr, __u32 goal, uid_t *owner)
695{ 695{
696 struct super_block *sb; 696 struct super_block *sb;
@@ -885,8 +885,12 @@ got:
885 if (IS_DIRSYNC(inode)) 885 if (IS_DIRSYNC(inode))
886 ext4_handle_sync(handle); 886 ext4_handle_sync(handle);
887 if (insert_inode_locked(inode) < 0) { 887 if (insert_inode_locked(inode) < 0) {
888 err = -EINVAL; 888 /*
889 goto fail_drop; 889 * Likely a bitmap corruption causing inode to be allocated
890 * twice.
891 */
892 err = -EIO;
893 goto fail;
890 } 894 }
891 spin_lock(&sbi->s_next_gen_lock); 895 spin_lock(&sbi->s_next_gen_lock);
892 inode->i_generation = sbi->s_next_generation++; 896 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 3cfc73fbca8e..830e1b2bf145 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,7 +20,6 @@
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23#include <linux/module.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "truncate.h" 24#include "truncate.h"
26 25
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 92655fd89657..feaa82fe629d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -18,7 +18,6 @@
18 * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 18 * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
19 */ 19 */
20 20
21#include <linux/module.h>
22#include <linux/fs.h> 21#include <linux/fs.h>
23#include <linux/time.h> 22#include <linux/time.h>
24#include <linux/jbd2.h> 23#include <linux/jbd2.h>
@@ -72,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
72static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); 71static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
73static int __ext4_journalled_writepage(struct page *page, unsigned int len); 72static int __ext4_journalled_writepage(struct page *page, unsigned int len);
74static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 73static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
74static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
75 struct inode *inode, struct page *page, loff_t from,
76 loff_t length, int flags);
75 77
76/* 78/*
77 * Test whether an inode is a fast symlink. 79 * Test whether an inode is a fast symlink.
@@ -1881,7 +1883,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1881 * a[0] = 'a'; 1883 * a[0] = 'a';
1882 * truncate(f, 4096); 1884 * truncate(f, 4096);
1883 * we have in the page first buffer_head mapped via page_mkwrite call back 1885 * we have in the page first buffer_head mapped via page_mkwrite call back
1884 * but other bufer_heads would be unmapped but dirty(dirty done via the 1886 * but other buffer_heads would be unmapped but dirty (dirty done via the
1885 * do_wp_page). So writepage should write the first block. If we modify 1887 * do_wp_page). So writepage should write the first block. If we modify
1886 * the mmap area beyond 1024 we will again get a page_fault and the 1888 * the mmap area beyond 1024 we will again get a page_fault and the
1887 * page_mkwrite callback will do the block allocation and mark the 1889 * page_mkwrite callback will do the block allocation and mark the
@@ -2760,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2760 if (!io_end || !size) 2762 if (!io_end || !size)
2761 goto out; 2763 goto out;
2762 2764
2763 ext_debug("ext4_end_io_dio(): io_end 0x%p" 2765 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2764 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2766 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
2765 iocb->private, io_end->inode->i_ino, iocb, offset, 2767 iocb->private, io_end->inode->i_ino, iocb, offset,
2766 size); 2768 size);
@@ -3161,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3161 * 3163 *
3162 * Returns zero on sucess or negative on failure. 3164 * Returns zero on sucess or negative on failure.
3163 */ 3165 */
3164int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3166static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3165 struct inode *inode, struct page *page, loff_t from, 3167 struct inode *inode, struct page *page, loff_t from,
3166 loff_t length, int flags) 3168 loff_t length, int flags)
3167{ 3169{
@@ -3301,126 +3303,6 @@ next:
3301 return err; 3303 return err;
3302} 3304}
3303 3305
3304/*
3305 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3306 * up to the end of the block which corresponds to `from'.
3307 * This required during truncate. We need to physically zero the tail end
3308 * of that block so it doesn't yield old data if the file is later grown.
3309 */
3310int ext4_block_truncate_page(handle_t *handle,
3311 struct address_space *mapping, loff_t from)
3312{
3313 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3314 unsigned length;
3315 unsigned blocksize;
3316 struct inode *inode = mapping->host;
3317
3318 blocksize = inode->i_sb->s_blocksize;
3319 length = blocksize - (offset & (blocksize - 1));
3320
3321 return ext4_block_zero_page_range(handle, mapping, from, length);
3322}
3323
3324/*
3325 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3326 * starting from file offset 'from'. The range to be zero'd must
3327 * be contained with in one block. If the specified range exceeds
3328 * the end of the block it will be shortened to end of the block
3329 * that cooresponds to 'from'
3330 */
3331int ext4_block_zero_page_range(handle_t *handle,
3332 struct address_space *mapping, loff_t from, loff_t length)
3333{
3334 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3335 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3336 unsigned blocksize, max, pos;
3337 ext4_lblk_t iblock;
3338 struct inode *inode = mapping->host;
3339 struct buffer_head *bh;
3340 struct page *page;
3341 int err = 0;
3342
3343 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3344 mapping_gfp_mask(mapping) & ~__GFP_FS);
3345 if (!page)
3346 return -ENOMEM;
3347
3348 blocksize = inode->i_sb->s_blocksize;
3349 max = blocksize - (offset & (blocksize - 1));
3350
3351 /*
3352 * correct length if it does not fall between
3353 * 'from' and the end of the block
3354 */
3355 if (length > max || length < 0)
3356 length = max;
3357
3358 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3359
3360 if (!page_has_buffers(page))
3361 create_empty_buffers(page, blocksize, 0);
3362
3363 /* Find the buffer that contains "offset" */
3364 bh = page_buffers(page);
3365 pos = blocksize;
3366 while (offset >= pos) {
3367 bh = bh->b_this_page;
3368 iblock++;
3369 pos += blocksize;
3370 }
3371
3372 err = 0;
3373 if (buffer_freed(bh)) {
3374 BUFFER_TRACE(bh, "freed: skip");
3375 goto unlock;
3376 }
3377
3378 if (!buffer_mapped(bh)) {
3379 BUFFER_TRACE(bh, "unmapped");
3380 ext4_get_block(inode, iblock, bh, 0);
3381 /* unmapped? It's a hole - nothing to do */
3382 if (!buffer_mapped(bh)) {
3383 BUFFER_TRACE(bh, "still unmapped");
3384 goto unlock;
3385 }
3386 }
3387
3388 /* Ok, it's mapped. Make sure it's up-to-date */
3389 if (PageUptodate(page))
3390 set_buffer_uptodate(bh);
3391
3392 if (!buffer_uptodate(bh)) {
3393 err = -EIO;
3394 ll_rw_block(READ, 1, &bh);
3395 wait_on_buffer(bh);
3396 /* Uhhuh. Read error. Complain and punt. */
3397 if (!buffer_uptodate(bh))
3398 goto unlock;
3399 }
3400
3401 if (ext4_should_journal_data(inode)) {
3402 BUFFER_TRACE(bh, "get write access");
3403 err = ext4_journal_get_write_access(handle, bh);
3404 if (err)
3405 goto unlock;
3406 }
3407
3408 zero_user(page, offset, length);
3409
3410 BUFFER_TRACE(bh, "zeroed end of block");
3411
3412 err = 0;
3413 if (ext4_should_journal_data(inode)) {
3414 err = ext4_handle_dirty_metadata(handle, inode, bh);
3415 } else
3416 mark_buffer_dirty(bh);
3417
3418unlock:
3419 unlock_page(page);
3420 page_cache_release(page);
3421 return err;
3422}
3423
3424int ext4_can_truncate(struct inode *inode) 3306int ext4_can_truncate(struct inode *inode)
3425{ 3307{
3426 if (S_ISREG(inode->i_mode)) 3308 if (S_ISREG(inode->i_mode))
@@ -3469,7 +3351,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3469 * transaction, and VFS/VM ensures that ext4_truncate() cannot run 3351 * transaction, and VFS/VM ensures that ext4_truncate() cannot run
3470 * simultaneously on behalf of the same inode. 3352 * simultaneously on behalf of the same inode.
3471 * 3353 *
3472 * As we work through the truncate and commmit bits of it to the journal there 3354 * As we work through the truncate and commit bits of it to the journal there
3473 * is one core, guiding principle: the file's tree must always be consistent on 3355 * is one core, guiding principle: the file's tree must always be consistent on
3474 * disk. We must be able to restart the truncate after a crash. 3356 * disk. We must be able to restart the truncate after a crash.
3475 * 3357 *
@@ -4647,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4647 return 0; 4529 return 0;
4648 if (is_journal_aborted(journal)) 4530 if (is_journal_aborted(journal))
4649 return -EROFS; 4531 return -EROFS;
4532 /* We have to allocate physical blocks for delalloc blocks
4533 * before flushing journal. otherwise delalloc blocks can not
4534 * be allocated any more. even more truncate on delalloc blocks
4535 * could trigger BUG by flushing delalloc blocks in journal.
4536 * There is no delalloc block in non-journal data mode.
4537 */
4538 if (val && test_opt(inode->i_sb, DELALLOC)) {
4539 err = ext4_alloc_da_blocks(inode);
4540 if (err < 0)
4541 return err;
4542 }
4650 4543
4651 jbd2_journal_lock_updates(journal); 4544 jbd2_journal_lock_updates(journal);
4652 jbd2_journal_flush(journal);
4653 4545
4654 /* 4546 /*
4655 * OK, there are no updates running now, and all cached data is 4547 * OK, there are no updates running now, and all cached data is
@@ -4661,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4661 4553
4662 if (val) 4554 if (val)
4663 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4555 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4664 else 4556 else {
4557 jbd2_journal_flush(journal);
4665 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4558 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4559 }
4666 ext4_set_aops(inode); 4560 ext4_set_aops(inode);
4667 4561
4668 jbd2_journal_unlock_updates(journal); 4562 jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a56796814d6a..6eee25591b81 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
20 20
21#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
22
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 24{
23 struct inode *inode = filp->f_dentry->d_inode; 25 struct inode *inode = filp->f_dentry->d_inode;
@@ -45,7 +47,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
45 if (get_user(flags, (int __user *) arg)) 47 if (get_user(flags, (int __user *) arg))
46 return -EFAULT; 48 return -EFAULT;
47 49
48 err = mnt_want_write(filp->f_path.mnt); 50 err = mnt_want_write_file(filp);
49 if (err) 51 if (err)
50 return err; 52 return err;
51 53
@@ -134,7 +136,7 @@ flags_err:
134 err = ext4_ext_migrate(inode); 136 err = ext4_ext_migrate(inode);
135flags_out: 137flags_out:
136 mutex_unlock(&inode->i_mutex); 138 mutex_unlock(&inode->i_mutex);
137 mnt_drop_write(filp->f_path.mnt); 139 mnt_drop_write_file(filp);
138 return err; 140 return err;
139 } 141 }
140 case EXT4_IOC_GETVERSION: 142 case EXT4_IOC_GETVERSION:
@@ -150,7 +152,7 @@ flags_out:
150 if (!inode_owner_or_capable(inode)) 152 if (!inode_owner_or_capable(inode))
151 return -EPERM; 153 return -EPERM;
152 154
153 err = mnt_want_write(filp->f_path.mnt); 155 err = mnt_want_write_file(filp);
154 if (err) 156 if (err)
155 return err; 157 return err;
156 if (get_user(generation, (int __user *) arg)) { 158 if (get_user(generation, (int __user *) arg)) {
@@ -158,10 +160,11 @@ flags_out:
158 goto setversion_out; 160 goto setversion_out;
159 } 161 }
160 162
163 mutex_lock(&inode->i_mutex);
161 handle = ext4_journal_start(inode, 1); 164 handle = ext4_journal_start(inode, 1);
162 if (IS_ERR(handle)) { 165 if (IS_ERR(handle)) {
163 err = PTR_ERR(handle); 166 err = PTR_ERR(handle);
164 goto setversion_out; 167 goto unlock_out;
165 } 168 }
166 err = ext4_reserve_inode_write(handle, inode, &iloc); 169 err = ext4_reserve_inode_write(handle, inode, &iloc);
167 if (err == 0) { 170 if (err == 0) {
@@ -170,8 +173,11 @@ flags_out:
170 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 173 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
171 } 174 }
172 ext4_journal_stop(handle); 175 ext4_journal_stop(handle);
176
177unlock_out:
178 mutex_unlock(&inode->i_mutex);
173setversion_out: 179setversion_out:
174 mnt_drop_write(filp->f_path.mnt); 180 mnt_drop_write_file(filp);
175 return err; 181 return err;
176 } 182 }
177 case EXT4_IOC_GROUP_EXTEND: { 183 case EXT4_IOC_GROUP_EXTEND: {
@@ -182,19 +188,22 @@ setversion_out:
182 if (err) 188 if (err)
183 return err; 189 return err;
184 190
185 if (get_user(n_blocks_count, (__u32 __user *)arg)) 191 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
186 return -EFAULT; 192 err = -EFAULT;
193 goto group_extend_out;
194 }
187 195
188 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 196 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
189 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 197 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
190 ext4_msg(sb, KERN_ERR, 198 ext4_msg(sb, KERN_ERR,
191 "Online resizing not supported with bigalloc"); 199 "Online resizing not supported with bigalloc");
192 return -EOPNOTSUPP; 200 err = -EOPNOTSUPP;
201 goto group_extend_out;
193 } 202 }
194 203
195 err = mnt_want_write(filp->f_path.mnt); 204 err = mnt_want_write_file(filp);
196 if (err) 205 if (err)
197 return err; 206 goto group_extend_out;
198 207
199 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 208 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
200 if (EXT4_SB(sb)->s_journal) { 209 if (EXT4_SB(sb)->s_journal) {
@@ -204,9 +213,9 @@ setversion_out:
204 } 213 }
205 if (err == 0) 214 if (err == 0)
206 err = err2; 215 err = err2;
207 mnt_drop_write(filp->f_path.mnt); 216 mnt_drop_write_file(filp);
217group_extend_out:
208 ext4_resize_end(sb); 218 ext4_resize_end(sb);
209
210 return err; 219 return err;
211 } 220 }
212 221
@@ -240,15 +249,14 @@ setversion_out:
240 return -EOPNOTSUPP; 249 return -EOPNOTSUPP;
241 } 250 }
242 251
243 err = mnt_want_write(filp->f_path.mnt); 252 err = mnt_want_write_file(filp);
244 if (err) 253 if (err)
245 goto mext_out; 254 goto mext_out;
246 255
247 err = ext4_move_extents(filp, donor_filp, me.orig_start, 256 err = ext4_move_extents(filp, donor_filp, me.orig_start,
248 me.donor_start, me.len, &me.moved_len); 257 me.donor_start, me.len, &me.moved_len);
258 mnt_drop_write_file(filp);
249 mnt_drop_write(filp->f_path.mnt); 259 mnt_drop_write(filp->f_path.mnt);
250 if (me.moved_len > 0)
251 file_remove_suid(donor_filp);
252 260
253 if (copy_to_user((struct move_extent __user *)arg, 261 if (copy_to_user((struct move_extent __user *)arg,
254 &me, sizeof(me))) 262 &me, sizeof(me)))
@@ -267,19 +275,22 @@ mext_out:
267 return err; 275 return err;
268 276
269 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 277 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
270 sizeof(input))) 278 sizeof(input))) {
271 return -EFAULT; 279 err = -EFAULT;
280 goto group_add_out;
281 }
272 282
273 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 283 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
274 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 284 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
275 ext4_msg(sb, KERN_ERR, 285 ext4_msg(sb, KERN_ERR,
276 "Online resizing not supported with bigalloc"); 286 "Online resizing not supported with bigalloc");
277 return -EOPNOTSUPP; 287 err = -EOPNOTSUPP;
288 goto group_add_out;
278 } 289 }
279 290
280 err = mnt_want_write(filp->f_path.mnt); 291 err = mnt_want_write_file(filp);
281 if (err) 292 if (err)
282 return err; 293 goto group_add_out;
283 294
284 err = ext4_group_add(sb, &input); 295 err = ext4_group_add(sb, &input);
285 if (EXT4_SB(sb)->s_journal) { 296 if (EXT4_SB(sb)->s_journal) {
@@ -289,9 +300,9 @@ mext_out:
289 } 300 }
290 if (err == 0) 301 if (err == 0)
291 err = err2; 302 err = err2;
292 mnt_drop_write(filp->f_path.mnt); 303 mnt_drop_write_file(filp);
304group_add_out:
293 ext4_resize_end(sb); 305 ext4_resize_end(sb);
294
295 return err; 306 return err;
296 } 307 }
297 308
@@ -301,7 +312,7 @@ mext_out:
301 if (!inode_owner_or_capable(inode)) 312 if (!inode_owner_or_capable(inode))
302 return -EACCES; 313 return -EACCES;
303 314
304 err = mnt_want_write(filp->f_path.mnt); 315 err = mnt_want_write_file(filp);
305 if (err) 316 if (err)
306 return err; 317 return err;
307 /* 318 /*
@@ -313,7 +324,7 @@ mext_out:
313 mutex_lock(&(inode->i_mutex)); 324 mutex_lock(&(inode->i_mutex));
314 err = ext4_ext_migrate(inode); 325 err = ext4_ext_migrate(inode);
315 mutex_unlock(&(inode->i_mutex)); 326 mutex_unlock(&(inode->i_mutex));
316 mnt_drop_write(filp->f_path.mnt); 327 mnt_drop_write_file(filp);
317 return err; 328 return err;
318 } 329 }
319 330
@@ -323,11 +334,65 @@ mext_out:
323 if (!inode_owner_or_capable(inode)) 334 if (!inode_owner_or_capable(inode))
324 return -EACCES; 335 return -EACCES;
325 336
326 err = mnt_want_write(filp->f_path.mnt); 337 err = mnt_want_write_file(filp);
327 if (err) 338 if (err)
328 return err; 339 return err;
329 err = ext4_alloc_da_blocks(inode); 340 err = ext4_alloc_da_blocks(inode);
341 mnt_drop_write_file(filp);
342 return err;
343 }
344
345 case EXT4_IOC_RESIZE_FS: {
346 ext4_fsblk_t n_blocks_count;
347 struct super_block *sb = inode->i_sb;
348 int err = 0, err2 = 0;
349
350 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
351 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
352 ext4_msg(sb, KERN_ERR,
353 "Online resizing not (yet) supported with bigalloc");
354 return -EOPNOTSUPP;
355 }
356
357 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
358 EXT4_FEATURE_INCOMPAT_META_BG)) {
359 ext4_msg(sb, KERN_ERR,
360 "Online resizing not (yet) supported with meta_bg");
361 return -EOPNOTSUPP;
362 }
363
364 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
365 sizeof(__u64))) {
366 return -EFAULT;
367 }
368
369 if (n_blocks_count > MAX_32_NUM &&
370 !EXT4_HAS_INCOMPAT_FEATURE(sb,
371 EXT4_FEATURE_INCOMPAT_64BIT)) {
372 ext4_msg(sb, KERN_ERR,
373 "File system only supports 32-bit block numbers");
374 return -EOPNOTSUPP;
375 }
376
377 err = ext4_resize_begin(sb);
378 if (err)
379 return err;
380
381 err = mnt_want_write(filp->f_path.mnt);
382 if (err)
383 goto resizefs_out;
384
385 err = ext4_resize_fs(sb, n_blocks_count);
386 if (EXT4_SB(sb)->s_journal) {
387 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
388 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
389 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
390 }
391 if (err == 0)
392 err = err2;
330 mnt_drop_write(filp->f_path.mnt); 393 mnt_drop_write(filp->f_path.mnt);
394resizefs_out:
395 ext4_resize_end(sb);
331 return err; 396 return err;
332 } 397 }
333 398
@@ -429,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
429 } 494 }
430 case EXT4_IOC_MOVE_EXT: 495 case EXT4_IOC_MOVE_EXT:
431 case FITRIM: 496 case FITRIM:
497 case EXT4_IOC_RESIZE_FS:
432 break; 498 break;
433 default: 499 default:
434 return -ENOIOCTLCMD; 500 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2d8be8f28bf..cb990b21c698 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3671 ext4_group_t group; 3671 ext4_group_t group;
3672 ext4_grpblk_t bit; 3672 ext4_grpblk_t bit;
3673 3673
3674 trace_ext4_mb_release_group_pa(pa); 3674 trace_ext4_mb_release_group_pa(sb, pa);
3675 BUG_ON(pa->pa_deleted == 0); 3675 BUG_ON(pa->pa_deleted == 0);
3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 16ac228dbec6..e7d6bb0acfa6 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -12,7 +12,6 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/module.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include "ext4_jbd2.h" 16#include "ext4_jbd2.h"
18 17
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index aa4c782c9dd7..2043f482375d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1736,7 +1736,7 @@ static int ext4_add_nondir(handle_t *handle,
1736 * If the create succeeds, we fill in the inode information 1736 * If the create succeeds, we fill in the inode information
1737 * with d_instantiate(). 1737 * with d_instantiate().
1738 */ 1738 */
1739static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, 1739static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1740 struct nameidata *nd) 1740 struct nameidata *nd)
1741{ 1741{
1742 handle_t *handle; 1742 handle_t *handle;
@@ -1770,7 +1770,7 @@ retry:
1770} 1770}
1771 1771
1772static int ext4_mknod(struct inode *dir, struct dentry *dentry, 1772static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1773 int mode, dev_t rdev) 1773 umode_t mode, dev_t rdev)
1774{ 1774{
1775 handle_t *handle; 1775 handle_t *handle;
1776 struct inode *inode; 1776 struct inode *inode;
@@ -1806,7 +1806,7 @@ retry:
1806 return err; 1806 return err;
1807} 1807}
1808 1808
1809static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1809static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1810{ 1810{
1811 handle_t *handle; 1811 handle_t *handle;
1812 struct inode *inode; 1812 struct inode *inode;
@@ -2315,7 +2315,7 @@ retry:
2315 err = PTR_ERR(handle); 2315 err = PTR_ERR(handle);
2316 goto err_drop_inode; 2316 goto err_drop_inode;
2317 } 2317 }
2318 inc_nlink(inode); 2318 set_nlink(inode, 1);
2319 err = ext4_orphan_del(handle, inode); 2319 err = ext4_orphan_del(handle, inode);
2320 if (err) { 2320 if (err) {
2321 ext4_journal_stop(handle); 2321 ext4_journal_stop(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7e106c810c62..475851896518 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -6,7 +6,6 @@
6 * Written by Theodore Ts'o, 2010. 6 * Written by Theodore Ts'o, 2010.
7 */ 7 */
8 8
9#include <linux/module.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/time.h> 10#include <linux/time.h>
12#include <linux/jbd2.h> 11#include <linux/jbd2.h>
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 996780ab4f4e..f9d948f0eb86 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
134 return err; 134 return err;
135} 135}
136 136
137/*
138 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
139 * group each time.
140 */
141struct ext4_new_flex_group_data {
142 struct ext4_new_group_data *groups; /* new_group_data for groups
143 in the flex group */
144 __u16 *bg_flags; /* block group flags of groups
145 in @groups */
146 ext4_group_t count; /* number of groups in @groups
147 */
148};
149
150/*
151 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
152 * @flexbg_size.
153 *
154 * Returns NULL on failure otherwise address of the allocated structure.
155 */
156static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
157{
158 struct ext4_new_flex_group_data *flex_gd;
159
160 flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
161 if (flex_gd == NULL)
162 goto out3;
163
164 flex_gd->count = flexbg_size;
165
166 flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
167 flexbg_size, GFP_NOFS);
168 if (flex_gd->groups == NULL)
169 goto out2;
170
171 flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
172 if (flex_gd->bg_flags == NULL)
173 goto out1;
174
175 return flex_gd;
176
177out1:
178 kfree(flex_gd->groups);
179out2:
180 kfree(flex_gd);
181out3:
182 return NULL;
183}
184
185static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
186{
187 kfree(flex_gd->bg_flags);
188 kfree(flex_gd->groups);
189 kfree(flex_gd);
190}
191
192/*
193 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
194 * and inode tables for a flex group.
195 *
196 * This function is used by 64bit-resize. Note that this function allocates
197 * group tables from the 1st group of groups contained by @flexgd, which may
198 * be a partial of a flex group.
199 *
200 * @sb: super block of fs to which the groups belongs
201 */
202static void ext4_alloc_group_tables(struct super_block *sb,
203 struct ext4_new_flex_group_data *flex_gd,
204 int flexbg_size)
205{
206 struct ext4_new_group_data *group_data = flex_gd->groups;
207 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
208 ext4_fsblk_t start_blk;
209 ext4_fsblk_t last_blk;
210 ext4_group_t src_group;
211 ext4_group_t bb_index = 0;
212 ext4_group_t ib_index = 0;
213 ext4_group_t it_index = 0;
214 ext4_group_t group;
215 ext4_group_t last_group;
216 unsigned overhead;
217
218 BUG_ON(flex_gd->count == 0 || group_data == NULL);
219
220 src_group = group_data[0].group;
221 last_group = src_group + flex_gd->count - 1;
222
223 BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
224 (last_group & ~(flexbg_size - 1))));
225next_group:
226 group = group_data[0].group;
227 start_blk = ext4_group_first_block_no(sb, src_group);
228 last_blk = start_blk + group_data[src_group - group].blocks_count;
229
230 overhead = ext4_bg_has_super(sb, src_group) ?
231 (1 + ext4_bg_num_gdb(sb, src_group) +
232 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
233
234 start_blk += overhead;
235
236 BUG_ON(src_group >= group_data[0].group + flex_gd->count);
237 /* We collect contiguous blocks as much as possible. */
238 src_group++;
239 for (; src_group <= last_group; src_group++)
240 if (!ext4_bg_has_super(sb, src_group))
241 last_blk += group_data[src_group - group].blocks_count;
242 else
243 break;
244
245 /* Allocate block bitmaps */
246 for (; bb_index < flex_gd->count; bb_index++) {
247 if (start_blk >= last_blk)
248 goto next_group;
249 group_data[bb_index].block_bitmap = start_blk++;
250 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
251 group -= group_data[0].group;
252 group_data[group].free_blocks_count--;
253 if (flexbg_size > 1)
254 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
255 }
256
257 /* Allocate inode bitmaps */
258 for (; ib_index < flex_gd->count; ib_index++) {
259 if (start_blk >= last_blk)
260 goto next_group;
261 group_data[ib_index].inode_bitmap = start_blk++;
262 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
263 group -= group_data[0].group;
264 group_data[group].free_blocks_count--;
265 if (flexbg_size > 1)
266 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
267 }
268
269 /* Allocate inode tables */
270 for (; it_index < flex_gd->count; it_index++) {
271 if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
272 goto next_group;
273 group_data[it_index].inode_table = start_blk;
274 ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
275 group -= group_data[0].group;
276 group_data[group].free_blocks_count -=
277 EXT4_SB(sb)->s_itb_per_group;
278 if (flexbg_size > 1)
279 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
280
281 start_blk += EXT4_SB(sb)->s_itb_per_group;
282 }
283
284 if (test_opt(sb, DEBUG)) {
285 int i;
286 group = group_data[0].group;
287
288 printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
289 "%d groups, flexbg size is %d:\n", flex_gd->count,
290 flexbg_size);
291
292 for (i = 0; i < flex_gd->count; i++) {
293 printk(KERN_DEBUG "adding %s group %u: %u "
294 "blocks (%d free)\n",
295 ext4_bg_has_super(sb, group + i) ? "normal" :
296 "no-super", group + i,
297 group_data[i].blocks_count,
298 group_data[i].free_blocks_count);
299 }
300 }
301}
302
137static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 303static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
138 ext4_fsblk_t blk) 304 ext4_fsblk_t blk)
139{ 305{
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
179} 345}
180 346
181/* 347/*
182 * Set up the block and inode bitmaps, and the inode table for the new group. 348 * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
349 *
350 * Helper function for ext4_setup_new_group_blocks() which set .
351 *
352 * @sb: super block
353 * @handle: journal handle
354 * @flex_gd: flex group data
355 */
356static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
357 struct ext4_new_flex_group_data *flex_gd,
358 ext4_fsblk_t block, ext4_group_t count)
359{
360 ext4_group_t count2;
361
362 ext4_debug("mark blocks [%llu/%u] used\n", block, count);
363 for (count2 = count; count > 0; count -= count2, block += count2) {
364 ext4_fsblk_t start;
365 struct buffer_head *bh;
366 ext4_group_t group;
367 int err;
368
369 ext4_get_group_no_and_offset(sb, block, &group, NULL);
370 start = ext4_group_first_block_no(sb, group);
371 group -= flex_gd->groups[0].group;
372
373 count2 = sb->s_blocksize * 8 - (block - start);
374 if (count2 > count)
375 count2 = count;
376
377 if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
378 BUG_ON(flex_gd->count > 1);
379 continue;
380 }
381
382 err = extend_or_restart_transaction(handle, 1);
383 if (err)
384 return err;
385
386 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
387 if (!bh)
388 return -EIO;
389
390 err = ext4_journal_get_write_access(handle, bh);
391 if (err)
392 return err;
393 ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
394 block - start, count2);
395 ext4_set_bits(bh->b_data, block - start, count2);
396
397 err = ext4_handle_dirty_metadata(handle, NULL, bh);
398 if (unlikely(err))
399 return err;
400 brelse(bh);
401 }
402
403 return 0;
404}
405
406/*
407 * Set up the block and inode bitmaps, and the inode table for the new groups.
183 * This doesn't need to be part of the main transaction, since we are only 408 * This doesn't need to be part of the main transaction, since we are only
184 * changing blocks outside the actual filesystem. We still do journaling to 409 * changing blocks outside the actual filesystem. We still do journaling to
185 * ensure the recovery is correct in case of a failure just after resize. 410 * ensure the recovery is correct in case of a failure just after resize.
186 * If any part of this fails, we simply abort the resize. 411 * If any part of this fails, we simply abort the resize.
412 *
413 * setup_new_flex_group_blocks handles a flex group as follow:
414 * 1. copy super block and GDT, and initialize group tables if necessary.
415 * In this step, we only set bits in blocks bitmaps for blocks taken by
416 * super block and GDT.
417 * 2. allocate group tables in block bitmaps, that is, set bits in block
418 * bitmap for blocks taken by group tables.
187 */ 419 */
188static int setup_new_group_blocks(struct super_block *sb, 420static int setup_new_flex_group_blocks(struct super_block *sb,
189 struct ext4_new_group_data *input) 421 struct ext4_new_flex_group_data *flex_gd)
190{ 422{
423 int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
424 ext4_fsblk_t start;
425 ext4_fsblk_t block;
191 struct ext4_sb_info *sbi = EXT4_SB(sb); 426 struct ext4_sb_info *sbi = EXT4_SB(sb);
192 ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 427 struct ext4_super_block *es = sbi->s_es;
193 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 428 struct ext4_new_group_data *group_data = flex_gd->groups;
194 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 429 __u16 *bg_flags = flex_gd->bg_flags;
195 unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
196 struct buffer_head *bh;
197 handle_t *handle; 430 handle_t *handle;
198 ext4_fsblk_t block; 431 ext4_group_t group, count;
199 ext4_grpblk_t bit; 432 struct buffer_head *bh = NULL;
200 int i; 433 int reserved_gdb, i, j, err = 0, err2;
201 int err = 0, err2; 434
435 BUG_ON(!flex_gd->count || !group_data ||
436 group_data[0].group != sbi->s_groups_count);
437
438 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
202 439
203 /* This transaction may be extended/restarted along the way */ 440 /* This transaction may be extended/restarted along the way */
204 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 441 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
205
206 if (IS_ERR(handle)) 442 if (IS_ERR(handle))
207 return PTR_ERR(handle); 443 return PTR_ERR(handle);
208 444
209 BUG_ON(input->group != sbi->s_groups_count); 445 group = group_data[0].group;
446 for (i = 0; i < flex_gd->count; i++, group++) {
447 unsigned long gdblocks;
210 448
211 /* Copy all of the GDT blocks into the backup in this group */ 449 gdblocks = ext4_bg_num_gdb(sb, group);
212 for (i = 0, bit = 1, block = start + 1; 450 start = ext4_group_first_block_no(sb, group);
213 i < gdblocks; i++, block++, bit++) {
214 struct buffer_head *gdb;
215 451
216 ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 452 /* Copy all of the GDT blocks into the backup in this group */
217 err = extend_or_restart_transaction(handle, 1); 453 for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
218 if (err) 454 struct buffer_head *gdb;
219 goto exit_journal;
220 455
221 gdb = sb_getblk(sb, block); 456 ext4_debug("update backup group %#04llx\n", block);
222 if (!gdb) { 457 err = extend_or_restart_transaction(handle, 1);
223 err = -EIO; 458 if (err)
224 goto exit_journal; 459 goto out;
225 } 460
226 if ((err = ext4_journal_get_write_access(handle, gdb))) { 461 gdb = sb_getblk(sb, block);
462 if (!gdb) {
463 err = -EIO;
464 goto out;
465 }
466
467 err = ext4_journal_get_write_access(handle, gdb);
468 if (err) {
469 brelse(gdb);
470 goto out;
471 }
472 memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
473 gdb->b_size);
474 set_buffer_uptodate(gdb);
475
476 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
477 if (unlikely(err)) {
478 brelse(gdb);
479 goto out;
480 }
227 brelse(gdb); 481 brelse(gdb);
228 goto exit_journal;
229 } 482 }
230 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 483
231 set_buffer_uptodate(gdb); 484 /* Zero out all of the reserved backup group descriptor
232 err = ext4_handle_dirty_metadata(handle, NULL, gdb); 485 * table blocks
233 if (unlikely(err)) { 486 */
234 brelse(gdb); 487 if (ext4_bg_has_super(sb, group)) {
235 goto exit_journal; 488 err = sb_issue_zeroout(sb, gdblocks + start + 1,
489 reserved_gdb, GFP_NOFS);
490 if (err)
491 goto out;
236 } 492 }
237 brelse(gdb);
238 }
239 493
240 /* Zero out all of the reserved backup group descriptor table blocks */ 494 /* Initialize group tables of the grop @group */
241 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 495 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
242 block, sbi->s_itb_per_group); 496 goto handle_bb;
243 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
244 GFP_NOFS);
245 if (err)
246 goto exit_journal;
247 497
248 err = extend_or_restart_transaction(handle, 2); 498 /* Zero out all of the inode table blocks */
249 if (err) 499 block = group_data[i].inode_table;
250 goto exit_journal; 500 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
501 block, sbi->s_itb_per_group);
502 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
503 GFP_NOFS);
504 if (err)
505 goto out;
251 506
252 bh = bclean(handle, sb, input->block_bitmap); 507handle_bb:
253 if (IS_ERR(bh)) { 508 if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
254 err = PTR_ERR(bh); 509 goto handle_ib;
255 goto exit_journal;
256 }
257 510
258 if (ext4_bg_has_super(sb, input->group)) { 511 /* Initialize block bitmap of the @group */
259 ext4_debug("mark backup group tables %#04llx (+0)\n", start); 512 block = group_data[i].block_bitmap;
260 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); 513 err = extend_or_restart_transaction(handle, 1);
261 } 514 if (err)
515 goto out;
262 516
263 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 517 bh = bclean(handle, sb, block);
264 input->block_bitmap - start); 518 if (IS_ERR(bh)) {
265 ext4_set_bit(input->block_bitmap - start, bh->b_data); 519 err = PTR_ERR(bh);
266 ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 520 goto out;
267 input->inode_bitmap - start); 521 }
268 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 522 if (ext4_bg_has_super(sb, group)) {
269 523 ext4_debug("mark backup superblock %#04llx (+0)\n",
270 /* Zero out all of the inode table blocks */ 524 start);
271 block = input->inode_table; 525 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
272 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 526 1);
273 block, sbi->s_itb_per_group); 527 }
274 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 528 ext4_mark_bitmap_end(group_data[i].blocks_count,
275 if (err) 529 sb->s_blocksize * 8, bh->b_data);
276 goto exit_bh; 530 err = ext4_handle_dirty_metadata(handle, NULL, bh);
277 ext4_set_bits(bh->b_data, input->inode_table - start, 531 if (err)
278 sbi->s_itb_per_group); 532 goto out;
533 brelse(bh);
279 534
535handle_ib:
536 if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
537 continue;
280 538
281 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 539 /* Initialize inode bitmap of the @group */
282 bh->b_data); 540 block = group_data[i].inode_bitmap;
283 err = ext4_handle_dirty_metadata(handle, NULL, bh); 541 err = extend_or_restart_transaction(handle, 1);
284 if (unlikely(err)) { 542 if (err)
285 ext4_std_error(sb, err); 543 goto out;
286 goto exit_bh; 544 /* Mark unused entries in inode bitmap used */
545 bh = bclean(handle, sb, block);
546 if (IS_ERR(bh)) {
547 err = PTR_ERR(bh);
548 goto out;
549 }
550
551 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
552 sb->s_blocksize * 8, bh->b_data);
553 err = ext4_handle_dirty_metadata(handle, NULL, bh);
554 if (err)
555 goto out;
556 brelse(bh);
287 } 557 }
288 brelse(bh); 558 bh = NULL;
289 /* Mark unused entries in inode bitmap used */ 559
290 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 560 /* Mark group tables in block bitmap */
291 input->inode_bitmap, input->inode_bitmap - start); 561 for (j = 0; j < GROUP_TABLE_COUNT; j++) {
292 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 562 count = group_table_count[j];
293 err = PTR_ERR(bh); 563 start = (&group_data[0].block_bitmap)[j];
294 goto exit_journal; 564 block = start;
565 for (i = 1; i < flex_gd->count; i++) {
566 block += group_table_count[j];
567 if (block == (&group_data[i].block_bitmap)[j]) {
568 count += group_table_count[j];
569 continue;
570 }
571 err = set_flexbg_block_bitmap(sb, handle,
572 flex_gd, start, count);
573 if (err)
574 goto out;
575 count = group_table_count[j];
576 start = group_data[i].block_bitmap;
577 block = start;
578 }
579
580 if (count) {
581 err = set_flexbg_block_bitmap(sb, handle,
582 flex_gd, start, count);
583 if (err)
584 goto out;
585 }
295 } 586 }
296 587
297 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 588out:
298 bh->b_data);
299 err = ext4_handle_dirty_metadata(handle, NULL, bh);
300 if (unlikely(err))
301 ext4_std_error(sb, err);
302exit_bh:
303 brelse(bh); 589 brelse(bh);
304 590 err2 = ext4_journal_stop(handle);
305exit_journal: 591 if (err2 && !err)
306 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 592 err = err2;
308 593
309 return err; 594 return err;
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
351 * groups in current filesystem that have BACKUPS, or -ve error code. 636 * groups in current filesystem that have BACKUPS, or -ve error code.
352 */ 637 */
353static int verify_reserved_gdb(struct super_block *sb, 638static int verify_reserved_gdb(struct super_block *sb,
639 ext4_group_t end,
354 struct buffer_head *primary) 640 struct buffer_head *primary)
355{ 641{
356 const ext4_fsblk_t blk = primary->b_blocknr; 642 const ext4_fsblk_t blk = primary->b_blocknr;
357 const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
358 unsigned three = 1; 643 unsigned three = 1;
359 unsigned five = 5; 644 unsigned five = 5;
360 unsigned seven = 7; 645 unsigned seven = 7;
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
429 if (!gdb_bh) 714 if (!gdb_bh)
430 return -EIO; 715 return -EIO;
431 716
432 gdbackups = verify_reserved_gdb(sb, gdb_bh); 717 gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
433 if (gdbackups < 0) { 718 if (gdbackups < 0) {
434 err = gdbackups; 719 err = gdbackups;
435 goto exit_bh; 720 goto exit_bh;
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
592 err = -EIO; 877 err = -EIO;
593 goto exit_bh; 878 goto exit_bh;
594 } 879 }
595 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 880 gdbackups = verify_reserved_gdb(sb, group, primary[res]);
881 if (gdbackups < 0) {
596 brelse(primary[res]); 882 brelse(primary[res]);
597 err = gdbackups; 883 err = gdbackups;
598 goto exit_bh; 884 goto exit_bh;
@@ -735,6 +1021,348 @@ exit_err:
735 } 1021 }
736} 1022}
737 1023
1024/*
1025 * ext4_add_new_descs() adds @count group descriptor of groups
1026 * starting at @group
1027 *
1028 * @handle: journal handle
1029 * @sb: super block
1030 * @group: the group no. of the first group desc to be added
1031 * @resize_inode: the resize inode
1032 * @count: number of group descriptors to be added
1033 */
1034static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1035 ext4_group_t group, struct inode *resize_inode,
1036 ext4_group_t count)
1037{
1038 struct ext4_sb_info *sbi = EXT4_SB(sb);
1039 struct ext4_super_block *es = sbi->s_es;
1040 struct buffer_head *gdb_bh;
1041 int i, gdb_off, gdb_num, err = 0;
1042
1043 for (i = 0; i < count; i++, group++) {
1044 int reserved_gdb = ext4_bg_has_super(sb, group) ?
1045 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
1046
1047 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1048 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1049
1050 /*
1051 * We will only either add reserved group blocks to a backup group
1052 * or remove reserved blocks for the first group in a new group block.
1053 * Doing both would be mean more complex code, and sane people don't
1054 * use non-sparse filesystems anymore. This is already checked above.
1055 */
1056 if (gdb_off) {
1057 gdb_bh = sbi->s_group_desc[gdb_num];
1058 err = ext4_journal_get_write_access(handle, gdb_bh);
1059
1060 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1061 err = reserve_backup_gdb(handle, resize_inode, group);
1062 } else
1063 err = add_new_gdb(handle, resize_inode, group);
1064 if (err)
1065 break;
1066 }
1067 return err;
1068}
1069
1070/*
1071 * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
1072 */
1073static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1074 struct ext4_new_flex_group_data *flex_gd)
1075{
1076 struct ext4_new_group_data *group_data = flex_gd->groups;
1077 struct ext4_group_desc *gdp;
1078 struct ext4_sb_info *sbi = EXT4_SB(sb);
1079 struct buffer_head *gdb_bh;
1080 ext4_group_t group;
1081 __u16 *bg_flags = flex_gd->bg_flags;
1082 int i, gdb_off, gdb_num, err = 0;
1083
1084
1085 for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
1086 group = group_data->group;
1087
1088 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1089 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1090
1091 /*
1092 * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
1093 */
1094 gdb_bh = sbi->s_group_desc[gdb_num];
1095 /* Update group descriptor block for new group */
1096 gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
1097 gdb_off * EXT4_DESC_SIZE(sb));
1098
1099 memset(gdp, 0, EXT4_DESC_SIZE(sb));
1100 ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
1101 ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
1102 ext4_inode_table_set(sb, gdp, group_data->inode_table);
1103 ext4_free_group_clusters_set(sb, gdp,
1104 EXT4_B2C(sbi, group_data->free_blocks_count));
1105 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1106 gdp->bg_flags = cpu_to_le16(*bg_flags);
1107 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1108
1109 err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
1110 if (unlikely(err)) {
1111 ext4_std_error(sb, err);
1112 break;
1113 }
1114
1115 /*
1116 * We can allocate memory for mb_alloc based on the new group
1117 * descriptor
1118 */
1119 err = ext4_mb_add_groupinfo(sb, group, gdp);
1120 if (err)
1121 break;
1122 }
1123 return err;
1124}
1125
1126/*
1127 * ext4_update_super() updates the super block so that the newly added
1128 * groups can be seen by the filesystem.
1129 *
1130 * @sb: super block
1131 * @flex_gd: new added groups
1132 */
1133static void ext4_update_super(struct super_block *sb,
1134 struct ext4_new_flex_group_data *flex_gd)
1135{
1136 ext4_fsblk_t blocks_count = 0;
1137 ext4_fsblk_t free_blocks = 0;
1138 ext4_fsblk_t reserved_blocks = 0;
1139 struct ext4_new_group_data *group_data = flex_gd->groups;
1140 struct ext4_sb_info *sbi = EXT4_SB(sb);
1141 struct ext4_super_block *es = sbi->s_es;
1142 int i;
1143
1144 BUG_ON(flex_gd->count == 0 || group_data == NULL);
1145 /*
1146 * Make the new blocks and inodes valid next. We do this before
1147 * increasing the group count so that once the group is enabled,
1148 * all of its blocks and inodes are already valid.
1149 *
1150 * We always allocate group-by-group, then block-by-block or
1151 * inode-by-inode within a group, so enabling these
1152 * blocks/inodes before the group is live won't actually let us
1153 * allocate the new space yet.
1154 */
1155 for (i = 0; i < flex_gd->count; i++) {
1156 blocks_count += group_data[i].blocks_count;
1157 free_blocks += group_data[i].free_blocks_count;
1158 }
1159
1160 reserved_blocks = ext4_r_blocks_count(es) * 100;
1161 do_div(reserved_blocks, ext4_blocks_count(es));
1162 reserved_blocks *= blocks_count;
1163 do_div(reserved_blocks, 100);
1164
1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
1166 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1167 flex_gd->count);
1168
1169 /*
1170 * We need to protect s_groups_count against other CPUs seeing
1171 * inconsistent state in the superblock.
1172 *
1173 * The precise rules we use are:
1174 *
1175 * * Writers must perform a smp_wmb() after updating all
1176 * dependent data and before modifying the groups count
1177 *
1178 * * Readers must perform an smp_rmb() after reading the groups
1179 * count and before reading any dependent data.
1180 *
1181 * NB. These rules can be relaxed when checking the group count
1182 * while freeing data, as we can only allocate from a block
1183 * group after serialising against the group count, and we can
1184 * only then free after serialising in turn against that
1185 * allocation.
1186 */
1187 smp_wmb();
1188
1189 /* Update the global fs size fields */
1190 sbi->s_groups_count += flex_gd->count;
1191
1192 /* Update the reserved block counts only once the new group is
1193 * active. */
1194 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
1195 reserved_blocks);
1196
1197 /* Update the free space counts */
1198 percpu_counter_add(&sbi->s_freeclusters_counter,
1199 EXT4_B2C(sbi, free_blocks));
1200 percpu_counter_add(&sbi->s_freeinodes_counter,
1201 EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1202
1203 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1204 EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1205 sbi->s_log_groups_per_flex) {
1206 ext4_group_t flex_group;
1207 flex_group = ext4_flex_group(sbi, group_data[0].group);
1208 atomic_add(EXT4_B2C(sbi, free_blocks),
1209 &sbi->s_flex_groups[flex_group].free_clusters);
1210 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1211 &sbi->s_flex_groups[flex_group].free_inodes);
1212 }
1213
1214 if (test_opt(sb, DEBUG))
1215 printk(KERN_DEBUG "EXT4-fs: added group %u:"
1216 "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
1217 blocks_count, free_blocks, reserved_blocks);
1218}
1219
1220/* Add a flex group to an fs. Ensure we handle all possible error conditions
1221 * _before_ we start modifying the filesystem, because we cannot abort the
1222 * transaction and not have it write the data to disk.
1223 */
1224static int ext4_flex_group_add(struct super_block *sb,
1225 struct inode *resize_inode,
1226 struct ext4_new_flex_group_data *flex_gd)
1227{
1228 struct ext4_sb_info *sbi = EXT4_SB(sb);
1229 struct ext4_super_block *es = sbi->s_es;
1230 ext4_fsblk_t o_blocks_count;
1231 ext4_grpblk_t last;
1232 ext4_group_t group;
1233 handle_t *handle;
1234 unsigned reserved_gdb;
1235 int err = 0, err2 = 0, credit;
1236
1237 BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
1238
1239 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
1240 o_blocks_count = ext4_blocks_count(es);
1241 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1242 BUG_ON(last);
1243
1244 err = setup_new_flex_group_blocks(sb, flex_gd);
1245 if (err)
1246 goto exit;
1247 /*
1248 * We will always be modifying at least the superblock and GDT
1249 * block. If we are adding a group past the last current GDT block,
1250 * we will also modify the inode and the dindirect block. If we
1251 * are adding a group with superblock/GDT backups we will also
1252 * modify each of the reserved GDT dindirect blocks.
1253 */
1254 credit = flex_gd->count * 4 + reserved_gdb;
1255 handle = ext4_journal_start_sb(sb, credit);
1256 if (IS_ERR(handle)) {
1257 err = PTR_ERR(handle);
1258 goto exit;
1259 }
1260
1261 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1262 if (err)
1263 goto exit_journal;
1264
1265 group = flex_gd->groups[0].group;
1266 BUG_ON(group != EXT4_SB(sb)->s_groups_count);
1267 err = ext4_add_new_descs(handle, sb, group,
1268 resize_inode, flex_gd->count);
1269 if (err)
1270 goto exit_journal;
1271
1272 err = ext4_setup_new_descs(handle, sb, flex_gd);
1273 if (err)
1274 goto exit_journal;
1275
1276 ext4_update_super(sb, flex_gd);
1277
1278 err = ext4_handle_dirty_super(handle, sb);
1279
1280exit_journal:
1281 err2 = ext4_journal_stop(handle);
1282 if (!err)
1283 err = err2;
1284
1285 if (!err) {
1286 int i;
1287 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1288 sizeof(struct ext4_super_block));
1289 for (i = 0; i < flex_gd->count; i++, group++) {
1290 struct buffer_head *gdb_bh;
1291 int gdb_num;
1292 gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
1293 gdb_bh = sbi->s_group_desc[gdb_num];
1294 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1295 gdb_bh->b_size);
1296 }
1297 }
1298exit:
1299 return err;
1300}
1301
1302static int ext4_setup_next_flex_gd(struct super_block *sb,
1303 struct ext4_new_flex_group_data *flex_gd,
1304 ext4_fsblk_t n_blocks_count,
1305 unsigned long flexbg_size)
1306{
1307 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1308 struct ext4_new_group_data *group_data = flex_gd->groups;
1309 ext4_fsblk_t o_blocks_count;
1310 ext4_group_t n_group;
1311 ext4_group_t group;
1312 ext4_group_t last_group;
1313 ext4_grpblk_t last;
1314 ext4_grpblk_t blocks_per_group;
1315 unsigned long i;
1316
1317 blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
1318
1319 o_blocks_count = ext4_blocks_count(es);
1320
1321 if (o_blocks_count == n_blocks_count)
1322 return 0;
1323
1324 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1325 BUG_ON(last);
1326 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
1327
1328 last_group = group | (flexbg_size - 1);
1329 if (last_group > n_group)
1330 last_group = n_group;
1331
1332 flex_gd->count = last_group - group + 1;
1333
1334 for (i = 0; i < flex_gd->count; i++) {
1335 int overhead;
1336
1337 group_data[i].group = group + i;
1338 group_data[i].blocks_count = blocks_per_group;
1339 overhead = ext4_bg_has_super(sb, group + i) ?
1340 (1 + ext4_bg_num_gdb(sb, group + i) +
1341 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
1342 group_data[i].free_blocks_count = blocks_per_group - overhead;
1343 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1344 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1345 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1346 EXT4_BG_INODE_UNINIT;
1347 else
1348 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1349 }
1350
1351 if (last_group == n_group &&
1352 EXT4_HAS_RO_COMPAT_FEATURE(sb,
1353 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1354 /* We need to initialize block bitmap of last group. */
1355 flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
1356
1357 if ((last_group == n_group) && (last != blocks_per_group - 1)) {
1358 group_data[i - 1].blocks_count = last + 1;
1359 group_data[i - 1].free_blocks_count -= blocks_per_group-
1360 last - 1;
1361 }
1362
1363 return 1;
1364}
1365
738/* Add group descriptor data to an existing or new group descriptor block. 1366/* Add group descriptor data to an existing or new group descriptor block.
739 * Ensure we handle all possible error conditions _before_ we start modifying 1367 * Ensure we handle all possible error conditions _before_ we start modifying
740 * the filesystem, because we cannot abort the transaction and not have it 1368 * the filesystem, because we cannot abort the transaction and not have it
@@ -750,16 +1378,15 @@ exit_err:
750 */ 1378 */
751int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 1379int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
752{ 1380{
1381 struct ext4_new_flex_group_data flex_gd;
753 struct ext4_sb_info *sbi = EXT4_SB(sb); 1382 struct ext4_sb_info *sbi = EXT4_SB(sb);
754 struct ext4_super_block *es = sbi->s_es; 1383 struct ext4_super_block *es = sbi->s_es;
755 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 1384 int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
756 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1385 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
757 struct buffer_head *primary = NULL;
758 struct ext4_group_desc *gdp;
759 struct inode *inode = NULL; 1386 struct inode *inode = NULL;
760 handle_t *handle;
761 int gdb_off, gdb_num; 1387 int gdb_off, gdb_num;
762 int err, err2; 1388 int err;
1389 __u16 bg_flags = 0;
763 1390
764 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 1391 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
765 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); 1392 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
798 } 1425 }
799 1426
800 1427
801 if ((err = verify_group_input(sb, input))) 1428 err = verify_group_input(sb, input);
802 goto exit_put; 1429 if (err)
1430 goto out;
803 1431
804 if ((err = setup_new_group_blocks(sb, input))) 1432 flex_gd.count = 1;
805 goto exit_put; 1433 flex_gd.groups = input;
1434 flex_gd.bg_flags = &bg_flags;
1435 err = ext4_flex_group_add(sb, inode, &flex_gd);
1436out:
1437 iput(inode);
1438 return err;
1439} /* ext4_group_add */
806 1440
807 /* 1441/*
808 * We will always be modifying at least the superblock and a GDT 1442 * extend a group without checking assuming that checking has been done.
809 * block. If we are adding a group past the last current GDT block, 1443 */
810 * we will also modify the inode and the dindirect block. If we 1444static int ext4_group_extend_no_check(struct super_block *sb,
811 * are adding a group with superblock/GDT backups we will also 1445 ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
812 * modify each of the reserved GDT dindirect blocks. 1446{
1447 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1448 handle_t *handle;
1449 int err = 0, err2;
1450
1451 /* We will update the superblock, one block bitmap, and
1452 * one group descriptor via ext4_group_add_blocks().
813 */ 1453 */
814 handle = ext4_journal_start_sb(sb, 1454 handle = ext4_journal_start_sb(sb, 3);
815 ext4_bg_has_super(sb, input->group) ?
816 3 + reserved_gdb : 4);
817 if (IS_ERR(handle)) { 1455 if (IS_ERR(handle)) {
818 err = PTR_ERR(handle); 1456 err = PTR_ERR(handle);
819 goto exit_put; 1457 ext4_warning(sb, "error %d on journal start", err);
1458 return err;
820 } 1459 }
821 1460
822 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 1461 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
823 goto exit_journal; 1462 if (err) {
824 1463 ext4_warning(sb, "error %d on journal write access", err);
825 /* 1464 goto errout;
826 * We will only either add reserved group blocks to a backup group
827 * or remove reserved blocks for the first group in a new group block.
828 * Doing both would be mean more complex code, and sane people don't
829 * use non-sparse filesystems anymore. This is already checked above.
830 */
831 if (gdb_off) {
832 primary = sbi->s_group_desc[gdb_num];
833 if ((err = ext4_journal_get_write_access(handle, primary)))
834 goto exit_journal;
835
836 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
837 err = reserve_backup_gdb(handle, inode, input->group);
838 if (err)
839 goto exit_journal;
840 }
841 } else {
842 /*
843 * Note that we can access new group descriptor block safely
844 * only if add_new_gdb() succeeds.
845 */
846 err = add_new_gdb(handle, inode, input->group);
847 if (err)
848 goto exit_journal;
849 primary = sbi->s_group_desc[gdb_num];
850 } 1465 }
851 1466
852 /* 1467 ext4_blocks_count_set(es, o_blocks_count + add);
853 * OK, now we've set up the new group. Time to make it active. 1468 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
854 * 1469 o_blocks_count + add);
855 * so we have to be safe wrt. concurrent accesses the group 1470 /* We add the blocks to the bitmap and set the group need init bit */
856 * data. So we need to be careful to set all of the relevant 1471 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
857 * group descriptor data etc. *before* we enable the group.
858 *
859 * The key field here is sbi->s_groups_count: as long as
860 * that retains its old value, nobody is going to access the new
861 * group.
862 *
863 * So first we update all the descriptor metadata for the new
864 * group; then we update the total disk blocks count; then we
865 * update the groups count to enable the group; then finally we
866 * update the free space counts so that the system can start
867 * using the new disk blocks.
868 */
869
870 /* Update group descriptor block for new group */
871 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
872 gdb_off * EXT4_DESC_SIZE(sb));
873
874 memset(gdp, 0, EXT4_DESC_SIZE(sb));
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
882
883 /*
884 * We can allocate memory for mb_alloc based on the new group
885 * descriptor
886 */
887 err = ext4_mb_add_groupinfo(sb, input->group, gdp);
888 if (err) 1472 if (err)
889 goto exit_journal; 1473 goto errout;
890
891 /*
892 * Make the new blocks and inodes valid next. We do this before
893 * increasing the group count so that once the group is enabled,
894 * all of its blocks and inodes are already valid.
895 *
896 * We always allocate group-by-group, then block-by-block or
897 * inode-by-inode within a group, so enabling these
898 * blocks/inodes before the group is live won't actually let us
899 * allocate the new space yet.
900 */
901 ext4_blocks_count_set(es, ext4_blocks_count(es) +
902 input->blocks_count);
903 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
904
905 /*
906 * We need to protect s_groups_count against other CPUs seeing
907 * inconsistent state in the superblock.
908 *
909 * The precise rules we use are:
910 *
911 * * Writers must perform a smp_wmb() after updating all dependent
912 * data and before modifying the groups count
913 *
914 * * Readers must perform an smp_rmb() after reading the groups count
915 * and before reading any dependent data.
916 *
917 * NB. These rules can be relaxed when checking the group count
918 * while freeing data, as we can only allocate from a block
919 * group after serialising against the group count, and we can
920 * only then free after serialising in turn against that
921 * allocation.
922 */
923 smp_wmb();
924
925 /* Update the global fs size fields */
926 sbi->s_groups_count++;
927
928 err = ext4_handle_dirty_metadata(handle, NULL, primary);
929 if (unlikely(err)) {
930 ext4_std_error(sb, err);
931 goto exit_journal;
932 }
933
934 /* Update the reserved block counts only once the new group is
935 * active. */
936 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
937 input->reserved_blocks);
938
939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb));
944
945 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes);
953 }
954
955 ext4_handle_dirty_super(handle, sb); 1474 ext4_handle_dirty_super(handle, sb);
956 1475 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
957exit_journal: 1476 o_blocks_count + add);
958 if ((err2 = ext4_journal_stop(handle)) && !err) 1477errout:
1478 err2 = ext4_journal_stop(handle);
1479 if (err2 && !err)
959 err = err2; 1480 err = err2;
960 if (!err && primary) { 1481
961 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1482 if (!err) {
1483 if (test_opt(sb, DEBUG))
1484 printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1485 "blocks\n", ext4_blocks_count(es));
1486 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
962 sizeof(struct ext4_super_block)); 1487 sizeof(struct ext4_super_block));
963 update_backups(sb, primary->b_blocknr, primary->b_data,
964 primary->b_size);
965 } 1488 }
966exit_put:
967 iput(inode);
968 return err; 1489 return err;
969} /* ext4_group_add */ 1490}
970 1491
971/* 1492/*
972 * Extend the filesystem to the new number of blocks specified. This entry 1493 * Extend the filesystem to the new number of blocks specified. This entry
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
985 ext4_grpblk_t last; 1506 ext4_grpblk_t last;
986 ext4_grpblk_t add; 1507 ext4_grpblk_t add;
987 struct buffer_head *bh; 1508 struct buffer_head *bh;
988 handle_t *handle; 1509 int err;
989 int err, err2;
990 ext4_group_t group; 1510 ext4_group_t group;
991 1511
992 o_blocks_count = ext4_blocks_count(es); 1512 o_blocks_count = ext4_blocks_count(es);
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1042 } 1562 }
1043 brelse(bh); 1563 brelse(bh);
1044 1564
1045 /* We will update the superblock, one block bitmap, and 1565 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1046 * one group descriptor via ext4_free_blocks(). 1566 return err;
1047 */ 1567} /* ext4_group_extend */
1048 handle = ext4_journal_start_sb(sb, 3); 1568
1049 if (IS_ERR(handle)) { 1569/*
1050 err = PTR_ERR(handle); 1570 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1051 ext4_warning(sb, "error %d on journal start", err); 1571 *
1052 goto exit_put; 1572 * @sb: super block of the fs to be resized
1573 * @n_blocks_count: the number of blocks resides in the resized fs
1574 */
1575int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1576{
1577 struct ext4_new_flex_group_data *flex_gd = NULL;
1578 struct ext4_sb_info *sbi = EXT4_SB(sb);
1579 struct ext4_super_block *es = sbi->s_es;
1580 struct buffer_head *bh;
1581 struct inode *resize_inode;
1582 ext4_fsblk_t o_blocks_count;
1583 ext4_group_t o_group;
1584 ext4_group_t n_group;
1585 ext4_grpblk_t offset;
1586 unsigned long n_desc_blocks;
1587 unsigned long o_desc_blocks;
1588 unsigned long desc_blocks;
1589 int err = 0, flexbg_size = 1;
1590
1591 o_blocks_count = ext4_blocks_count(es);
1592
1593 if (test_opt(sb, DEBUG))
1594 printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
1595 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1596
1597 if (n_blocks_count < o_blocks_count) {
1598 /* On-line shrinking not supported */
1599 ext4_warning(sb, "can't shrink FS - resize aborted");
1600 return -EINVAL;
1053 } 1601 }
1054 1602
1055 if ((err = ext4_journal_get_write_access(handle, 1603 if (n_blocks_count == o_blocks_count)
1056 EXT4_SB(sb)->s_sbh))) { 1604 /* Nothing need to do */
1057 ext4_warning(sb, "error %d on journal write access", err); 1605 return 0;
1058 ext4_journal_stop(handle); 1606
1059 goto exit_put; 1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1608 ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
1609
1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
1611 EXT4_DESC_PER_BLOCK(sb);
1612 o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
1613 EXT4_DESC_PER_BLOCK(sb);
1614 desc_blocks = n_desc_blocks - o_desc_blocks;
1615
1616 if (desc_blocks &&
1617 (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
1618 le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
1619 ext4_warning(sb, "No reserved GDT blocks, can't resize");
1620 return -EPERM;
1060 } 1621 }
1061 ext4_blocks_count_set(es, o_blocks_count + add);
1062 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1063 o_blocks_count + add);
1064 /* We add the blocks to the bitmap and set the group need init bit */
1065 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
1066 ext4_handle_dirty_super(handle, sb);
1067 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1068 o_blocks_count + add);
1069 err2 = ext4_journal_stop(handle);
1070 if (!err && err2)
1071 err = err2;
1072 1622
1073 if (err) 1623 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
1074 goto exit_put; 1624 if (IS_ERR(resize_inode)) {
1625 ext4_warning(sb, "Error opening resize inode");
1626 return PTR_ERR(resize_inode);
1627 }
1075 1628
1629 /* See if the device is actually as big as what was requested */
1630 bh = sb_bread(sb, n_blocks_count - 1);
1631 if (!bh) {
1632 ext4_warning(sb, "can't read last block, resize aborted");
1633 return -ENOSPC;
1634 }
1635 brelse(bh);
1636
1637 if (offset != 0) {
1638 /* extend the last group */
1639 ext4_grpblk_t add;
1640 add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
1641 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1642 if (err)
1643 goto out;
1644 }
1645
1646 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1647 es->s_log_groups_per_flex)
1648 flexbg_size = 1 << es->s_log_groups_per_flex;
1649
1650 o_blocks_count = ext4_blocks_count(es);
1651 if (o_blocks_count == n_blocks_count)
1652 goto out;
1653
1654 flex_gd = alloc_flex_gd(flexbg_size);
1655 if (flex_gd == NULL) {
1656 err = -ENOMEM;
1657 goto out;
1658 }
1659
1660 /* Add flex groups. Note that a regular group is a
1661 * flex group with 1 group.
1662 */
1663 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
1664 flexbg_size)) {
1665 ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
1666 err = ext4_flex_group_add(sb, resize_inode, flex_gd);
1667 if (unlikely(err))
1668 break;
1669 }
1670
1671out:
1672 if (flex_gd)
1673 free_flex_gd(flex_gd);
1674
1675 iput(resize_inode);
1076 if (test_opt(sb, DEBUG)) 1676 if (test_opt(sb, DEBUG))
1077 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1677 printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
1078 ext4_blocks_count(es)); 1678 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1079 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
1080 sizeof(struct ext4_super_block));
1081exit_put:
1082 return err; 1679 return err;
1083} /* ext4_group_extend */ 1680}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3e1329e2f826..502c61fd7392 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -930,7 +930,6 @@ static int ext4_drop_inode(struct inode *inode)
930static void ext4_i_callback(struct rcu_head *head) 930static void ext4_i_callback(struct rcu_head *head)
931{ 931{
932 struct inode *inode = container_of(head, struct inode, i_rcu); 932 struct inode *inode = container_of(head, struct inode, i_rcu);
933 INIT_LIST_HEAD(&inode->i_dentry);
934 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 933 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
935} 934}
936 935
@@ -1033,11 +1032,11 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
1033 * - it's set to a non-default value OR 1032 * - it's set to a non-default value OR
1034 * - if the per-sb default is different from the global default 1033 * - if the per-sb default is different from the global default
1035 */ 1034 */
1036static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 1035static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1037{ 1036{
1038 int def_errors; 1037 int def_errors;
1039 unsigned long def_mount_opts; 1038 unsigned long def_mount_opts;
1040 struct super_block *sb = vfs->mnt_sb; 1039 struct super_block *sb = root->d_sb;
1041 struct ext4_sb_info *sbi = EXT4_SB(sb); 1040 struct ext4_sb_info *sbi = EXT4_SB(sb);
1042 struct ext4_super_block *es = sbi->s_es; 1041 struct ext4_super_block *es = sbi->s_es;
1043 1042
@@ -1096,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1096 } 1095 }
1097 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
1098 seq_printf(seq, ",max_batch_time=%u", 1097 seq_printf(seq, ",max_batch_time=%u",
1099 (unsigned) sbi->s_min_batch_time); 1098 (unsigned) sbi->s_max_batch_time);
1100 } 1099 }
1101 1100
1102 /* 1101 /*
@@ -2006,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
2006 struct ext4_group_desc *gdp = NULL; 2005 struct ext4_group_desc *gdp = NULL;
2007 ext4_group_t flex_group_count; 2006 ext4_group_t flex_group_count;
2008 ext4_group_t flex_group; 2007 ext4_group_t flex_group;
2009 int groups_per_flex = 0; 2008 unsigned int groups_per_flex = 0;
2010 size_t size; 2009 size_t size;
2011 int i; 2010 int i;
2012 2011
2013 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2014 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 2013 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2015
2016 if (groups_per_flex < 2) {
2017 sbi->s_log_groups_per_flex = 0; 2014 sbi->s_log_groups_per_flex = 0;
2018 return 1; 2015 return 1;
2019 } 2016 }
2017 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
2020 2018
2021 /* We allocate both existing and potentially added groups */ 2019 /* We allocate both existing and potentially added groups */
2022 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 2020 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -2883,8 +2881,7 @@ cont_thread:
2883 } 2881 }
2884 mutex_unlock(&eli->li_list_mtx); 2882 mutex_unlock(&eli->li_list_mtx);
2885 2883
2886 if (freezing(current)) 2884 try_to_freeze();
2887 refrigerator();
2888 2885
2889 cur = jiffies; 2886 cur = jiffies;
2890 if ((time_after_eq(cur, next_wakeup)) || 2887 if ((time_after_eq(cur, next_wakeup)) ||
@@ -3508,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3508 * of the filesystem. 3505 * of the filesystem.
3509 */ 3506 */
3510 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3507 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3511 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3508 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3512 "block %u is beyond end of filesystem (%llu)", 3509 "block %u is beyond end of filesystem (%llu)",
3513 le32_to_cpu(es->s_first_data_block), 3510 le32_to_cpu(es->s_first_data_block),
3514 ext4_blocks_count(es)); 3511 ext4_blocks_count(es));
@@ -3735,10 +3732,12 @@ no_journal:
3735 } 3732 }
3736 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3733 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3737 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3734 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3735 iput(root);
3738 goto failed_mount4; 3736 goto failed_mount4;
3739 } 3737 }
3740 sb->s_root = d_alloc_root(root); 3738 sb->s_root = d_alloc_root(root);
3741 if (!sb->s_root) { 3739 if (!sb->s_root) {
3740 iput(root);
3742 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3741 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3743 ret = -ENOMEM; 3742 ret = -ENOMEM;
3744 goto failed_mount4; 3743 goto failed_mount4;
@@ -3775,7 +3774,7 @@ no_journal:
3775 if (err) { 3774 if (err) {
3776 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3775 ext4_msg(sb, KERN_ERR, "failed to initialize system "
3777 "zone (%d)", err); 3776 "zone (%d)", err);
3778 goto failed_mount4; 3777 goto failed_mount4a;
3779 } 3778 }
3780 3779
3781 ext4_ext_init(sb); 3780 ext4_ext_init(sb);
@@ -3832,13 +3831,14 @@ cantfind_ext4:
3832failed_mount7: 3831failed_mount7:
3833 ext4_unregister_li_request(sb); 3832 ext4_unregister_li_request(sb);
3834failed_mount6: 3833failed_mount6:
3835 ext4_ext_release(sb);
3836failed_mount5:
3837 ext4_mb_release(sb); 3834 ext4_mb_release(sb);
3835failed_mount5:
3836 ext4_ext_release(sb);
3838 ext4_release_system_zone(sb); 3837 ext4_release_system_zone(sb);
3839failed_mount4: 3838failed_mount4a:
3840 iput(root); 3839 dput(sb->s_root);
3841 sb->s_root = NULL; 3840 sb->s_root = NULL;
3841failed_mount4:
3842 ext4_msg(sb, KERN_ERR, "mount failed"); 3842 ext4_msg(sb, KERN_ERR, "mount failed");
3843 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3843 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3844failed_mount_wq: 3844failed_mount_wq:
@@ -4782,7 +4782,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4782 return -EINVAL; 4782 return -EINVAL;
4783 4783
4784 /* Quotafile not on the same filesystem? */ 4784 /* Quotafile not on the same filesystem? */
4785 if (path->mnt->mnt_sb != sb) 4785 if (path->dentry->d_sb != sb)
4786 return -EXDEV; 4786 return -EXDEV;
4787 /* Journaling quota? */ 4787 /* Journaling quota? */
4788 if (EXT4_SB(sb)->s_qf_names[type]) { 4788 if (EXT4_SB(sb)->s_qf_names[type]) {
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 34e4350dd4d9..d2a200624af5 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -3,7 +3,6 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/string.h> 6#include <linux/string.h>
8#include <linux/fs.h> 7#include <linux/fs.h>
9#include <linux/security.h> 8#include <linux/security.h>
@@ -48,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
48 name, value, size, flags); 47 name, value, size, flags);
49} 48}
50 49
51int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, 50static int
52 void *fs_info) 51ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
52 void *fs_info)
53{ 53{
54 const struct xattr *xattr; 54 const struct xattr *xattr;
55 handle_t *handle = fs_info; 55 handle_t *handle = fs_info;
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 37e6ebca2cc3..95f1f4ab59a4 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/capability.h> 9#include <linux/capability.h>
11#include <linux/fs.h> 10#include <linux/fs.h>
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 98c375352d0e..0edb7611ffbe 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -5,7 +5,6 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/string.h> 8#include <linux/string.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include "ext4_jbd2.h" 10#include "ext4_jbd2.h"
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 1510a4d51990..66994f316e18 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -141,7 +141,7 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
141static inline int fat_mode_can_hold_ro(struct inode *inode) 141static inline int fat_mode_can_hold_ro(struct inode *inode)
142{ 142{
143 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 143 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
144 mode_t mask; 144 umode_t mask;
145 145
146 if (S_ISDIR(inode->i_mode)) { 146 if (S_ISDIR(inode->i_mode)) {
147 if (!sbi->options.rodir) 147 if (!sbi->options.rodir)
@@ -156,8 +156,8 @@ static inline int fat_mode_can_hold_ro(struct inode *inode)
156} 156}
157 157
158/* Convert attribute bits and a mask to the UNIX mode. */ 158/* Convert attribute bits and a mask to the UNIX mode. */
159static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, 159static inline umode_t fat_make_mode(struct msdos_sb_info *sbi,
160 u8 attrs, mode_t mode) 160 u8 attrs, umode_t mode)
161{ 161{
162 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) 162 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
163 mode &= ~S_IWUGO; 163 mode &= ~S_IWUGO;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c118acf16e43..a71fe3715ee8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -44,7 +44,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
44 goto out; 44 goto out;
45 45
46 mutex_lock(&inode->i_mutex); 46 mutex_lock(&inode->i_mutex);
47 err = mnt_want_write(file->f_path.mnt); 47 err = mnt_want_write_file(file);
48 if (err) 48 if (err)
49 goto out_unlock_inode; 49 goto out_unlock_inode;
50 50
@@ -108,7 +108,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
109 mark_inode_dirty(inode); 109 mark_inode_dirty(inode);
110out_drop_write: 110out_drop_write:
111 mnt_drop_write(file->f_path.mnt); 111 mnt_drop_write_file(file);
112out_unlock_inode: 112out_unlock_inode:
113 mutex_unlock(&inode->i_mutex); 113 mutex_unlock(&inode->i_mutex);
114out: 114out:
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(fat_getattr);
314static int fat_sanitize_mode(const struct msdos_sb_info *sbi, 314static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
315 struct inode *inode, umode_t *mode_ptr) 315 struct inode *inode, umode_t *mode_ptr)
316{ 316{
317 mode_t mask, perm; 317 umode_t mask, perm;
318 318
319 /* 319 /*
320 * Note, the basic check is already done by a caller of 320 * Note, the basic check is already done by a caller of
@@ -351,7 +351,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
351 351
352static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) 352static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
353{ 353{
354 mode_t allow_utime = sbi->options.allow_utime; 354 umode_t allow_utime = sbi->options.allow_utime;
355 355
356 if (current_fsuid() != inode->i_uid) { 356 if (current_fsuid() != inode->i_uid) {
357 if (in_group_p(inode->i_gid)) 357 if (in_group_p(inode->i_gid))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 808cac7edcfb..3ab841054d53 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,6 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
518static void fat_i_callback(struct rcu_head *head) 518static void fat_i_callback(struct rcu_head *head)
519{ 519{
520 struct inode *inode = container_of(head, struct inode, i_rcu); 520 struct inode *inode = container_of(head, struct inode, i_rcu);
521 INIT_LIST_HEAD(&inode->i_dentry);
522 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 521 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
523} 522}
524 523
@@ -672,7 +671,7 @@ int fat_sync_inode(struct inode *inode)
672 671
673EXPORT_SYMBOL_GPL(fat_sync_inode); 672EXPORT_SYMBOL_GPL(fat_sync_inode);
674 673
675static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); 674static int fat_show_options(struct seq_file *m, struct dentry *root);
676static const struct super_operations fat_sops = { 675static const struct super_operations fat_sops = {
677 .alloc_inode = fat_alloc_inode, 676 .alloc_inode = fat_alloc_inode,
678 .destroy_inode = fat_destroy_inode, 677 .destroy_inode = fat_destroy_inode,
@@ -811,9 +810,9 @@ static const struct export_operations fat_export_ops = {
811 .get_parent = fat_get_parent, 810 .get_parent = fat_get_parent,
812}; 811};
813 812
814static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) 813static int fat_show_options(struct seq_file *m, struct dentry *root)
815{ 814{
816 struct msdos_sb_info *sbi = MSDOS_SB(mnt->mnt_sb); 815 struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
817 struct fat_mount_options *opts = &sbi->options; 816 struct fat_mount_options *opts = &sbi->options;
818 int isvfat = opts->isvfat; 817 int isvfat = opts->isvfat;
819 818
@@ -898,7 +897,7 @@ enum {
898 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 897 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
899 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 898 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
900 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 899 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
901 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 900 Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
902 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err, 901 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
903}; 902};
904 903
@@ -928,17 +927,17 @@ static const match_table_t fat_tokens = {
928 {Opt_err_panic, "errors=panic"}, 927 {Opt_err_panic, "errors=panic"},
929 {Opt_err_ro, "errors=remount-ro"}, 928 {Opt_err_ro, "errors=remount-ro"},
930 {Opt_discard, "discard"}, 929 {Opt_discard, "discard"},
931 {Opt_obsolate, "conv=binary"}, 930 {Opt_obsolete, "conv=binary"},
932 {Opt_obsolate, "conv=text"}, 931 {Opt_obsolete, "conv=text"},
933 {Opt_obsolate, "conv=auto"}, 932 {Opt_obsolete, "conv=auto"},
934 {Opt_obsolate, "conv=b"}, 933 {Opt_obsolete, "conv=b"},
935 {Opt_obsolate, "conv=t"}, 934 {Opt_obsolete, "conv=t"},
936 {Opt_obsolate, "conv=a"}, 935 {Opt_obsolete, "conv=a"},
937 {Opt_obsolate, "fat=%u"}, 936 {Opt_obsolete, "fat=%u"},
938 {Opt_obsolate, "blocksize=%u"}, 937 {Opt_obsolete, "blocksize=%u"},
939 {Opt_obsolate, "cvf_format=%20s"}, 938 {Opt_obsolete, "cvf_format=%20s"},
940 {Opt_obsolate, "cvf_options=%100s"}, 939 {Opt_obsolete, "cvf_options=%100s"},
941 {Opt_obsolate, "posix"}, 940 {Opt_obsolete, "posix"},
942 {Opt_err, NULL}, 941 {Opt_err, NULL},
943}; 942};
944static const match_table_t msdos_tokens = { 943static const match_table_t msdos_tokens = {
@@ -1170,7 +1169,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
1170 break; 1169 break;
1171 1170
1172 /* obsolete mount options */ 1171 /* obsolete mount options */
1173 case Opt_obsolate: 1172 case Opt_obsolete:
1174 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, " 1173 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1175 "not supported now", p); 1174 "not supported now", p);
1176 break; 1175 break;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 216b419f30e2..c5938c9084b9 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -264,7 +264,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
264} 264}
265 265
266/***** Create a file */ 266/***** Create a file */
267static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, 267static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
268 struct nameidata *nd) 268 struct nameidata *nd)
269{ 269{
270 struct super_block *sb = dir->i_sb; 270 struct super_block *sb = dir->i_sb;
@@ -346,7 +346,7 @@ out:
346} 346}
347 347
348/***** Make a directory */ 348/***** Make a directory */
349static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) 349static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
350{ 350{
351 struct super_block *sb = dir->i_sb; 351 struct super_block *sb = dir->i_sb;
352 struct fat_slot_info sinfo; 352 struct fat_slot_info sinfo;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a87a65663c25..a81eb2367d39 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
512 int charlen; 512 int charlen;
513 513
514 if (utf8) { 514 if (utf8) {
515 *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); 515 *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
516 (wchar_t *) outname, FAT_LFN_LEN + 2);
516 if (*outlen < 0) 517 if (*outlen < 0)
517 return *outlen; 518 return *outlen;
518 else if (*outlen > FAT_LFN_LEN) 519 else if (*outlen > FAT_LFN_LEN)
@@ -781,7 +782,7 @@ error:
781 return ERR_PTR(err); 782 return ERR_PTR(err);
782} 783}
783 784
784static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, 785static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
785 struct nameidata *nd) 786 struct nameidata *nd)
786{ 787{
787 struct super_block *sb = dir->i_sb; 788 struct super_block *sb = dir->i_sb;
@@ -870,7 +871,7 @@ out:
870 return err; 871 return err;
871} 872}
872 873
873static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) 874static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
874{ 875{
875 struct super_block *sb = dir->i_sb; 876 struct super_block *sb = dir->i_sb;
876 struct inode *inode; 877 struct inode *inode;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6b088641f5bf..a48e4a139be1 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -10,6 +10,7 @@
10#include <linux/personality.h> 10#include <linux/personality.h>
11#include <asm/uaccess.h> 11#include <asm/uaccess.h>
12#include "internal.h" 12#include "internal.h"
13#include "mount.h"
13 14
14static long do_sys_name_to_handle(struct path *path, 15static long do_sys_name_to_handle(struct path *path,
15 struct file_handle __user *ufh, 16 struct file_handle __user *ufh,
@@ -24,8 +25,8 @@ static long do_sys_name_to_handle(struct path *path,
24 * We need t make sure wether the file system 25 * We need t make sure wether the file system
25 * support decoding of the file handle 26 * support decoding of the file handle
26 */ 27 */
27 if (!path->mnt->mnt_sb->s_export_op || 28 if (!path->dentry->d_sb->s_export_op ||
28 !path->mnt->mnt_sb->s_export_op->fh_to_dentry) 29 !path->dentry->d_sb->s_export_op->fh_to_dentry)
29 return -EOPNOTSUPP; 30 return -EOPNOTSUPP;
30 31
31 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) 32 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
@@ -66,7 +67,8 @@ static long do_sys_name_to_handle(struct path *path,
66 } else 67 } else
67 retval = 0; 68 retval = 0;
68 /* copy the mount id */ 69 /* copy the mount id */
69 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) || 70 if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id,
71 sizeof(*mnt_id)) ||
70 copy_to_user(ufh, handle, 72 copy_to_user(ufh, handle,
71 sizeof(struct file_handle) + handle_bytes)) 73 sizeof(struct file_handle) + handle_bytes))
72 retval = -EFAULT; 74 retval = -EFAULT;
diff --git a/fs/file_table.c b/fs/file_table.c
index c322794f7360..20002e39754d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -474,29 +474,6 @@ void file_sb_list_del(struct file *file)
474 474
475#endif 475#endif
476 476
477int fs_may_remount_ro(struct super_block *sb)
478{
479 struct file *file;
480 /* Check that no files are currently opened for writing. */
481 lg_global_lock(files_lglock);
482 do_file_list_for_each_entry(sb, file) {
483 struct inode *inode = file->f_path.dentry->d_inode;
484
485 /* File with pending delete? */
486 if (inode->i_nlink == 0)
487 goto too_bad;
488
489 /* Writeable file? */
490 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
491 goto too_bad;
492 } while_file_list_for_each_entry;
493 lg_global_unlock(files_lglock);
494 return 1; /* Tis' cool bro. */
495too_bad:
496 lg_global_unlock(files_lglock);
497 return 0;
498}
499
500/** 477/**
501 * mark_files_ro - mark all files read-only 478 * mark_files_ro - mark all files read-only
502 * @sb: superblock in question 479 * @sb: superblock in question
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 0845f84f2a5f..96f24286667a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,6 @@ int register_filesystem(struct file_system_type * fs)
74 BUG_ON(strchr(fs->name, '.')); 74 BUG_ON(strchr(fs->name, '.'));
75 if (fs->next) 75 if (fs->next)
76 return -EBUSY; 76 return -EBUSY;
77 INIT_LIST_HEAD(&fs->fs_supers);
78 write_lock(&file_systems_lock); 77 write_lock(&file_systems_lock);
79 p = find_filesystem(fs->name, strlen(fs->name)); 78 p = find_filesystem(fs->name, strlen(fs->name));
80 if (*p) 79 if (*p)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 7b2af5abe2fa..cf9ef918a2a9 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -187,10 +187,10 @@ vxfs_stiget(struct super_block *sbp, ino_t ino)
187 * vxfs_transmod returns a Linux mode_t for a given 187 * vxfs_transmod returns a Linux mode_t for a given
188 * VxFS inode structure. 188 * VxFS inode structure.
189 */ 189 */
190static __inline__ mode_t 190static __inline__ umode_t
191vxfs_transmod(struct vxfs_inode_info *vip) 191vxfs_transmod(struct vxfs_inode_info *vip)
192{ 192{
193 mode_t ret = vip->vii_mode & ~VXFS_TYPE_MASK; 193 umode_t ret = vip->vii_mode & ~VXFS_TYPE_MASK;
194 194
195 if (VXFS_ISFIFO(vip)) 195 if (VXFS_ISFIFO(vip))
196 ret |= S_IFIFO; 196 ret |= S_IFIFO;
@@ -340,7 +340,6 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
340static void vxfs_i_callback(struct rcu_head *head) 340static void vxfs_i_callback(struct rcu_head *head)
341{ 341{
342 struct inode *inode = container_of(head, struct inode, i_rcu); 342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(vxfs_inode_cachep, inode->i_private); 343 kmem_cache_free(vxfs_inode_cachep, inode->i_private);
345} 344}
346 345
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 517f211a3bd4..f855916657ba 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -20,16 +20,21 @@
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/pagemap.h>
23#include <linux/kthread.h> 24#include <linux/kthread.h>
24#include <linux/freezer.h> 25#include <linux/freezer.h>
25#include <linux/writeback.h> 26#include <linux/writeback.h>
26#include <linux/blkdev.h> 27#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/buffer_head.h>
29#include <linux/tracepoint.h> 29#include <linux/tracepoint.h>
30#include "internal.h" 30#include "internal.h"
31 31
32/* 32/*
33 * 4MB minimal write chunk size
34 */
35#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10))
36
37/*
33 * Passed into wb_writeback(), essentially a subset of writeback_control 38 * Passed into wb_writeback(), essentially a subset of writeback_control
34 */ 39 */
35struct wb_writeback_work { 40struct wb_writeback_work {
@@ -743,11 +748,17 @@ static long wb_writeback(struct bdi_writeback *wb,
743 if (work->for_background && !over_bground_thresh(wb->bdi)) 748 if (work->for_background && !over_bground_thresh(wb->bdi))
744 break; 749 break;
745 750
751 /*
752 * Kupdate and background works are special and we want to
753 * include all inodes that need writing. Livelock avoidance is
754 * handled by these works yielding to any other work so we are
755 * safe.
756 */
746 if (work->for_kupdate) { 757 if (work->for_kupdate) {
747 oldest_jif = jiffies - 758 oldest_jif = jiffies -
748 msecs_to_jiffies(dirty_expire_interval * 10); 759 msecs_to_jiffies(dirty_expire_interval * 10);
749 work->older_than_this = &oldest_jif; 760 } else if (work->for_background)
750 } 761 oldest_jif = jiffies;
751 762
752 trace_writeback_start(wb->bdi, work); 763 trace_writeback_start(wb->bdi, work);
753 if (list_empty(&wb->b_io)) 764 if (list_empty(&wb->b_io))
@@ -937,7 +948,7 @@ int bdi_writeback_thread(void *data)
937 948
938 trace_writeback_thread_start(bdi); 949 trace_writeback_thread_start(bdi);
939 950
940 while (!kthread_should_stop()) { 951 while (!kthread_freezable_should_stop(NULL)) {
941 /* 952 /*
942 * Remove own delayed wake-up timer, since we are already awake 953 * Remove own delayed wake-up timer, since we are already awake
943 * and we'll take care of the preriodic write-back. 954 * and we'll take care of the preriodic write-back.
@@ -967,8 +978,6 @@ int bdi_writeback_thread(void *data)
967 */ 978 */
968 schedule(); 979 schedule();
969 } 980 }
970
971 try_to_freeze();
972 } 981 }
973 982
974 /* Flush any work that raced with us exiting */ 983 /* Flush any work that raced with us exiting */
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2aaf3eaaf13d..5f3368ab0fa9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1378 down_read(&fc->killsb); 1378 down_read(&fc->killsb);
1379 err = -ENOENT; 1379 err = -ENOENT;
1380 if (fc->sb) 1380 if (fc->sb)
1381 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); 1381 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1382 up_read(&fc->killsb);
1383 kfree(buf);
1384 return err;
1385
1386err:
1387 kfree(buf);
1388 fuse_copy_finish(cs);
1389 return err;
1390}
1391
1392static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1393 struct fuse_copy_state *cs)
1394{
1395 struct fuse_notify_delete_out outarg;
1396 int err = -ENOMEM;
1397 char *buf;
1398 struct qstr name;
1399
1400 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1401 if (!buf)
1402 goto err;
1403
1404 err = -EINVAL;
1405 if (size < sizeof(outarg))
1406 goto err;
1407
1408 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1409 if (err)
1410 goto err;
1411
1412 err = -ENAMETOOLONG;
1413 if (outarg.namelen > FUSE_NAME_MAX)
1414 goto err;
1415
1416 err = -EINVAL;
1417 if (size != sizeof(outarg) + outarg.namelen + 1)
1418 goto err;
1419
1420 name.name = buf;
1421 name.len = outarg.namelen;
1422 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1423 if (err)
1424 goto err;
1425 fuse_copy_finish(cs);
1426 buf[outarg.namelen] = 0;
1427 name.hash = full_name_hash(name.name, name.len);
1428
1429 down_read(&fc->killsb);
1430 err = -ENOENT;
1431 if (fc->sb)
1432 err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1433 outarg.child, &name);
1382 up_read(&fc->killsb); 1434 up_read(&fc->killsb);
1383 kfree(buf); 1435 kfree(buf);
1384 return err; 1436 return err;
@@ -1597,6 +1649,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1597 case FUSE_NOTIFY_RETRIEVE: 1649 case FUSE_NOTIFY_RETRIEVE:
1598 return fuse_notify_retrieve(fc, size, cs); 1650 return fuse_notify_retrieve(fc, size, cs);
1599 1651
1652 case FUSE_NOTIFY_DELETE:
1653 return fuse_notify_delete(fc, size, cs);
1654
1600 default: 1655 default:
1601 fuse_copy_finish(cs); 1656 fuse_copy_finish(cs);
1602 return -EINVAL; 1657 return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..206632887bb4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,8 +369,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
369 * If the filesystem doesn't support this, then fall back to separate 369 * If the filesystem doesn't support this, then fall back to separate
370 * 'mknod' + 'open' requests. 370 * 'mknod' + 'open' requests.
371 */ 371 */
372static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, 372static int fuse_create_open(struct inode *dir, struct dentry *entry,
373 struct nameidata *nd) 373 umode_t mode, struct nameidata *nd)
374{ 374{
375 int err; 375 int err;
376 struct inode *inode; 376 struct inode *inode;
@@ -480,7 +480,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
480 */ 480 */
481static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, 481static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
482 struct inode *dir, struct dentry *entry, 482 struct inode *dir, struct dentry *entry,
483 int mode) 483 umode_t mode)
484{ 484{
485 struct fuse_entry_out outarg; 485 struct fuse_entry_out outarg;
486 struct inode *inode; 486 struct inode *inode;
@@ -547,7 +547,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
547 return err; 547 return err;
548} 548}
549 549
550static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, 550static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
551 dev_t rdev) 551 dev_t rdev)
552{ 552{
553 struct fuse_mknod_in inarg; 553 struct fuse_mknod_in inarg;
@@ -573,7 +573,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
573 return create_new_entry(fc, req, dir, entry, mode); 573 return create_new_entry(fc, req, dir, entry, mode);
574} 574}
575 575
576static int fuse_create(struct inode *dir, struct dentry *entry, int mode, 576static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
577 struct nameidata *nd) 577 struct nameidata *nd)
578{ 578{
579 if (nd) { 579 if (nd) {
@@ -585,7 +585,7 @@ static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
585 return fuse_mknod(dir, entry, mode, 0); 585 return fuse_mknod(dir, entry, mode, 0);
586} 586}
587 587
588static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) 588static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
589{ 589{
590 struct fuse_mkdir_in inarg; 590 struct fuse_mkdir_in inarg;
591 struct fuse_conn *fc = get_fuse_conn(dir); 591 struct fuse_conn *fc = get_fuse_conn(dir);
@@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
868} 868}
869 869
870int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 870int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
871 struct qstr *name) 871 u64 child_nodeid, struct qstr *name)
872{ 872{
873 int err = -ENOTDIR; 873 int err = -ENOTDIR;
874 struct inode *parent; 874 struct inode *parent;
@@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
895 895
896 fuse_invalidate_attr(parent); 896 fuse_invalidate_attr(parent);
897 fuse_invalidate_entry(entry); 897 fuse_invalidate_entry(entry);
898
899 if (child_nodeid != 0 && entry->d_inode) {
900 mutex_lock(&entry->d_inode->i_mutex);
901 if (get_node_id(entry->d_inode) != child_nodeid) {
902 err = -ENOENT;
903 goto badentry;
904 }
905 if (d_mountpoint(entry)) {
906 err = -EBUSY;
907 goto badentry;
908 }
909 if (S_ISDIR(entry->d_inode->i_mode)) {
910 shrink_dcache_parent(entry);
911 if (!simple_empty(entry)) {
912 err = -ENOTEMPTY;
913 goto badentry;
914 }
915 entry->d_inode->i_flags |= S_DEAD;
916 }
917 dont_mount(entry);
918 clear_nlink(entry->d_inode);
919 err = 0;
920 badentry:
921 mutex_unlock(&entry->d_inode->i_mutex);
922 if (!err)
923 d_delete(entry);
924 } else {
925 err = 0;
926 }
898 dput(entry); 927 dput(entry);
899 err = 0;
900 928
901 unlock: 929 unlock:
902 mutex_unlock(&parent->i_mutex); 930 mutex_unlock(&parent->i_mutex);
@@ -1182,6 +1210,30 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1182 return fuse_fsync_common(file, start, end, datasync, 1); 1210 return fuse_fsync_common(file, start, end, datasync, 1);
1183} 1211}
1184 1212
1213static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1214 unsigned long arg)
1215{
1216 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1217
1218 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1219 if (fc->minor < 18)
1220 return -ENOTTY;
1221
1222 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1223}
1224
1225static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1226 unsigned long arg)
1227{
1228 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1229
1230 if (fc->minor < 18)
1231 return -ENOTTY;
1232
1233 return fuse_ioctl_common(file, cmd, arg,
1234 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1235}
1236
1185static bool update_mtime(unsigned ivalid) 1237static bool update_mtime(unsigned ivalid)
1186{ 1238{
1187 /* Always update if mtime is explicitly set */ 1239 /* Always update if mtime is explicitly set */
@@ -1596,6 +1648,8 @@ static const struct file_operations fuse_dir_operations = {
1596 .open = fuse_dir_open, 1648 .open = fuse_dir_open,
1597 .release = fuse_dir_release, 1649 .release = fuse_dir_release,
1598 .fsync = fuse_dir_fsync, 1650 .fsync = fuse_dir_fsync,
1651 .unlocked_ioctl = fuse_dir_ioctl,
1652 .compat_ioctl = fuse_dir_compat_ioctl,
1599}; 1653};
1600 1654
1601static const struct inode_operations fuse_common_inode_operations = { 1655static const struct inode_operations fuse_common_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 0c84100acd44..4a199fd93fbd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1555,48 +1555,16 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1555 loff_t retval; 1555 loff_t retval;
1556 struct inode *inode = file->f_path.dentry->d_inode; 1556 struct inode *inode = file->f_path.dentry->d_inode;
1557 1557
1558 mutex_lock(&inode->i_mutex); 1558 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
1559 if (origin != SEEK_CUR && origin != SEEK_SET) { 1559 if (origin == SEEK_CUR || origin == SEEK_SET)
1560 retval = fuse_update_attributes(inode, NULL, file, NULL); 1560 return generic_file_llseek(file, offset, origin);
1561 if (retval)
1562 goto exit;
1563 }
1564 1561
1565 switch (origin) { 1562 mutex_lock(&inode->i_mutex);
1566 case SEEK_END: 1563 retval = fuse_update_attributes(inode, NULL, file, NULL);
1567 offset += i_size_read(inode); 1564 if (!retval)
1568 break; 1565 retval = generic_file_llseek(file, offset, origin);
1569 case SEEK_CUR:
1570 if (offset == 0) {
1571 retval = file->f_pos;
1572 goto exit;
1573 }
1574 offset += file->f_pos;
1575 break;
1576 case SEEK_DATA:
1577 if (offset >= i_size_read(inode)) {
1578 retval = -ENXIO;
1579 goto exit;
1580 }
1581 break;
1582 case SEEK_HOLE:
1583 if (offset >= i_size_read(inode)) {
1584 retval = -ENXIO;
1585 goto exit;
1586 }
1587 offset = i_size_read(inode);
1588 break;
1589 }
1590 retval = -EINVAL;
1591 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
1592 if (offset != file->f_pos) {
1593 file->f_pos = offset;
1594 file->f_version = 0;
1595 }
1596 retval = offset;
1597 }
1598exit:
1599 mutex_unlock(&inode->i_mutex); 1566 mutex_unlock(&inode->i_mutex);
1567
1600 return retval; 1568 return retval;
1601} 1569}
1602 1570
@@ -1808,7 +1776,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1808 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 1776 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1809 1777
1810 err = -ENOMEM; 1778 err = -ENOMEM;
1811 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); 1779 pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
1812 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 1780 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
1813 if (!pages || !iov_page) 1781 if (!pages || !iov_page)
1814 goto out; 1782 goto out;
@@ -1958,8 +1926,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1958} 1926}
1959EXPORT_SYMBOL_GPL(fuse_do_ioctl); 1927EXPORT_SYMBOL_GPL(fuse_do_ioctl);
1960 1928
1961static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, 1929long fuse_ioctl_common(struct file *file, unsigned int cmd,
1962 unsigned long arg, unsigned int flags) 1930 unsigned long arg, unsigned int flags)
1963{ 1931{
1964 struct inode *inode = file->f_dentry->d_inode; 1932 struct inode *inode = file->f_dentry->d_inode;
1965 struct fuse_conn *fc = get_fuse_conn(inode); 1933 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1976,13 +1944,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
1976static long fuse_file_ioctl(struct file *file, unsigned int cmd, 1944static long fuse_file_ioctl(struct file *file, unsigned int cmd,
1977 unsigned long arg) 1945 unsigned long arg)
1978{ 1946{
1979 return fuse_file_ioctl_common(file, cmd, arg, 0); 1947 return fuse_ioctl_common(file, cmd, arg, 0);
1980} 1948}
1981 1949
1982static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, 1950static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
1983 unsigned long arg) 1951 unsigned long arg)
1984{ 1952{
1985 return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); 1953 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
1986} 1954}
1987 1955
1988/* 1956/*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cf6db0a93219..572cefc78012 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -80,7 +80,7 @@ struct fuse_inode {
80 80
81 /** The sticky bit in inode->i_mode may have been removed, so 81 /** The sticky bit in inode->i_mode may have been removed, so
82 preserve the original mode */ 82 preserve the original mode */
83 mode_t orig_i_mode; 83 umode_t orig_i_mode;
84 84
85 /** Version of last attribute change */ 85 /** Version of last attribute change */
86 u64 attr_version; 86 u64 attr_version;
@@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
755/** 755/**
756 * File-system tells the kernel to invalidate parent attributes and 756 * File-system tells the kernel to invalidate parent attributes and
757 * the dentry matching parent/name. 757 * the dentry matching parent/name.
758 *
759 * If the child_nodeid is non-zero and:
760 * - matches the inode number for the dentry matching parent/name,
761 * - is not a mount point
762 * - is a file or oan empty directory
763 * then the dentry is unhashed (d_delete()).
758 */ 764 */
759int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 765int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
760 struct qstr *name); 766 u64 child_nodeid, struct qstr *name);
761 767
762int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 768int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
763 bool isdir); 769 bool isdir);
@@ -765,6 +771,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
765 size_t count, loff_t *ppos, int write); 771 size_t count, loff_t *ppos, int write);
766long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 772long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
767 unsigned int flags); 773 unsigned int flags);
774long fuse_ioctl_common(struct file *file, unsigned int cmd,
775 unsigned long arg, unsigned int flags);
768unsigned fuse_file_poll(struct file *file, poll_table *wait); 776unsigned fuse_file_poll(struct file *file, poll_table *wait);
769int fuse_dev_release(struct inode *inode, struct file *file); 777int fuse_dev_release(struct inode *inode, struct file *file);
770 778
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index aa83109b9431..64cf8d07393e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -107,7 +107,6 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
107static void fuse_i_callback(struct rcu_head *head) 107static void fuse_i_callback(struct rcu_head *head)
108{ 108{
109 struct inode *inode = container_of(head, struct inode, i_rcu); 109 struct inode *inode = container_of(head, struct inode, i_rcu);
110 INIT_LIST_HEAD(&inode->i_dentry);
111 kmem_cache_free(fuse_inode_cachep, inode); 110 kmem_cache_free(fuse_inode_cachep, inode);
112} 111}
113 112
@@ -498,9 +497,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
498 return 1; 497 return 1;
499} 498}
500 499
501static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) 500static int fuse_show_options(struct seq_file *m, struct dentry *root)
502{ 501{
503 struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb); 502 struct super_block *sb = root->d_sb;
503 struct fuse_conn *fc = get_fuse_conn_super(sb);
504 504
505 seq_printf(m, ",user_id=%u", fc->user_id); 505 seq_printf(m, ",user_id=%u", fc->user_id);
506 seq_printf(m, ",group_id=%u", fc->group_id); 506 seq_printf(m, ",group_id=%u", fc->group_id);
@@ -510,9 +510,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
510 seq_puts(m, ",allow_other"); 510 seq_puts(m, ",allow_other");
511 if (fc->max_read != ~0) 511 if (fc->max_read != ~0)
512 seq_printf(m, ",max_read=%u", fc->max_read); 512 seq_printf(m, ",max_read=%u", fc->max_read);
513 if (mnt->mnt_sb->s_bdev && 513 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
514 mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 514 seq_printf(m, ",blksize=%lu", sb->s_blocksize);
515 seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
516 return 0; 515 return 0;
517} 516}
518 517
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 65978d7885c8..230eb0f005b6 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -38,8 +38,9 @@ static const char *gfs2_acl_name(int type)
38 return NULL; 38 return NULL;
39} 39}
40 40
41static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) 41struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
42{ 42{
43 struct gfs2_inode *ip = GFS2_I(inode);
43 struct posix_acl *acl; 44 struct posix_acl *acl;
44 const char *name; 45 const char *name;
45 char *data; 46 char *data;
@@ -67,11 +68,6 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
67 return acl; 68 return acl;
68} 69}
69 70
70struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
71{
72 return gfs2_acl_get(GFS2_I(inode), type);
73}
74
75static int gfs2_set_mode(struct inode *inode, umode_t mode) 71static int gfs2_set_mode(struct inode *inode, umode_t mode)
76{ 72{
77 int error = 0; 73 int error = 0;
@@ -125,7 +121,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
125 if (S_ISLNK(inode->i_mode)) 121 if (S_ISLNK(inode->i_mode))
126 return 0; 122 return 0;
127 123
128 acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); 124 acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT);
129 if (IS_ERR(acl)) 125 if (IS_ERR(acl))
130 return PTR_ERR(acl); 126 return PTR_ERR(acl);
131 if (!acl) { 127 if (!acl) {
@@ -166,7 +162,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
166 unsigned int len; 162 unsigned int len;
167 int error; 163 int error;
168 164
169 acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); 165 acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS);
170 if (IS_ERR(acl)) 166 if (IS_ERR(acl))
171 return PTR_ERR(acl); 167 return PTR_ERR(acl);
172 if (!acl) 168 if (!acl)
@@ -216,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
216 if (type < 0) 212 if (type < 0)
217 return type; 213 return type;
218 214
219 acl = gfs2_acl_get(GFS2_I(inode), type); 215 acl = gfs2_get_acl(inode, type);
220 if (IS_ERR(acl)) 216 if (IS_ERR(acl))
221 return PTR_ERR(acl); 217 return PTR_ERR(acl);
222 if (acl == NULL) 218 if (acl == NULL)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4858e1fed8b1..501e5cba09b3 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
615 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 615 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
616 int alloc_required; 616 int alloc_required;
617 int error = 0; 617 int error = 0;
618 struct gfs2_alloc *al = NULL; 618 struct gfs2_qadata *qa = NULL;
619 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 619 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
620 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 620 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
621 struct page *page; 621 struct page *page;
@@ -639,8 +639,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
639 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 639 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
640 640
641 if (alloc_required) { 641 if (alloc_required) {
642 al = gfs2_alloc_get(ip); 642 qa = gfs2_qadata_get(ip);
643 if (!al) { 643 if (!qa) {
644 error = -ENOMEM; 644 error = -ENOMEM;
645 goto out_unlock; 645 goto out_unlock;
646 } 646 }
@@ -649,8 +649,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
649 if (error) 649 if (error)
650 goto out_alloc_put; 650 goto out_alloc_put;
651 651
652 al->al_requested = data_blocks + ind_blocks; 652 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
653 error = gfs2_inplace_reserve(ip);
654 if (error) 653 if (error)
655 goto out_qunlock; 654 goto out_qunlock;
656 } 655 }
@@ -711,7 +710,7 @@ out_trans_fail:
711out_qunlock: 710out_qunlock:
712 gfs2_quota_unlock(ip); 711 gfs2_quota_unlock(ip);
713out_alloc_put: 712out_alloc_put:
714 gfs2_alloc_put(ip); 713 gfs2_qadata_put(ip);
715 } 714 }
716out_unlock: 715out_unlock:
717 if (&ip->i_inode == sdp->sd_rindex) { 716 if (&ip->i_inode == sdp->sd_rindex) {
@@ -848,7 +847,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
848 struct gfs2_sbd *sdp = GFS2_SB(inode); 847 struct gfs2_sbd *sdp = GFS2_SB(inode);
849 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 848 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
850 struct buffer_head *dibh; 849 struct buffer_head *dibh;
851 struct gfs2_alloc *al = ip->i_alloc; 850 struct gfs2_qadata *qa = ip->i_qadata;
852 unsigned int from = pos & (PAGE_CACHE_SIZE - 1); 851 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
853 unsigned int to = from + len; 852 unsigned int to = from + len;
854 int ret; 853 int ret;
@@ -880,10 +879,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
880 brelse(dibh); 879 brelse(dibh);
881failed: 880failed:
882 gfs2_trans_end(sdp); 881 gfs2_trans_end(sdp);
883 if (al) { 882 if (ip->i_res)
884 gfs2_inplace_release(ip); 883 gfs2_inplace_release(ip);
884 if (qa) {
885 gfs2_quota_unlock(ip); 885 gfs2_quota_unlock(ip);
886 gfs2_alloc_put(ip); 886 gfs2_qadata_put(ip);
887 } 887 }
888 if (inode == sdp->sd_rindex) { 888 if (inode == sdp->sd_rindex) {
889 gfs2_glock_dq(&m_ip->i_gh); 889 gfs2_glock_dq(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 41d494d79709..14a704015970 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -133,7 +133,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
133 and write it out to disk */ 133 and write it out to disk */
134 134
135 unsigned int n = 1; 135 unsigned int n = 1;
136 error = gfs2_alloc_block(ip, &block, &n); 136 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
137 if (error) 137 if (error)
138 goto out_brelse; 138 goto out_brelse;
139 if (isdir) { 139 if (isdir) {
@@ -503,7 +503,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
503 do { 503 do {
504 int error; 504 int error;
505 n = blks - alloced; 505 n = blks - alloced;
506 error = gfs2_alloc_block(ip, &bn, &n); 506 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
507 if (error) 507 if (error)
508 return error; 508 return error;
509 alloced += n; 509 alloced += n;
@@ -743,9 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
743 else if (ip->i_depth) 743 else if (ip->i_depth)
744 revokes = sdp->sd_inptrs; 744 revokes = sdp->sd_inptrs;
745 745
746 if (error)
747 return error;
748
749 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 746 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
750 bstart = 0; 747 bstart = 0;
751 blen = 0; 748 blen = 0;
@@ -1044,7 +1041,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1044 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; 1041 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
1045 1042
1046 find_metapath(sdp, lblock, &mp, ip->i_height); 1043 find_metapath(sdp, lblock, &mp, ip->i_height);
1047 if (!gfs2_alloc_get(ip)) 1044 if (!gfs2_qadata_get(ip))
1048 return -ENOMEM; 1045 return -ENOMEM;
1049 1046
1050 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1047 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1064,7 +1061,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1064 gfs2_quota_unhold(ip); 1061 gfs2_quota_unhold(ip);
1065 1062
1066out: 1063out:
1067 gfs2_alloc_put(ip); 1064 gfs2_qadata_put(ip);
1068 return error; 1065 return error;
1069} 1066}
1070 1067
@@ -1166,21 +1163,20 @@ static int do_grow(struct inode *inode, u64 size)
1166 struct gfs2_inode *ip = GFS2_I(inode); 1163 struct gfs2_inode *ip = GFS2_I(inode);
1167 struct gfs2_sbd *sdp = GFS2_SB(inode); 1164 struct gfs2_sbd *sdp = GFS2_SB(inode);
1168 struct buffer_head *dibh; 1165 struct buffer_head *dibh;
1169 struct gfs2_alloc *al = NULL; 1166 struct gfs2_qadata *qa = NULL;
1170 int error; 1167 int error;
1171 1168
1172 if (gfs2_is_stuffed(ip) && 1169 if (gfs2_is_stuffed(ip) &&
1173 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1170 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1174 al = gfs2_alloc_get(ip); 1171 qa = gfs2_qadata_get(ip);
1175 if (al == NULL) 1172 if (qa == NULL)
1176 return -ENOMEM; 1173 return -ENOMEM;
1177 1174
1178 error = gfs2_quota_lock_check(ip); 1175 error = gfs2_quota_lock_check(ip);
1179 if (error) 1176 if (error)
1180 goto do_grow_alloc_put; 1177 goto do_grow_alloc_put;
1181 1178
1182 al->al_requested = 1; 1179 error = gfs2_inplace_reserve(ip, 1);
1183 error = gfs2_inplace_reserve(ip);
1184 if (error) 1180 if (error)
1185 goto do_grow_qunlock; 1181 goto do_grow_qunlock;
1186 } 1182 }
@@ -1189,7 +1185,7 @@ static int do_grow(struct inode *inode, u64 size)
1189 if (error) 1185 if (error)
1190 goto do_grow_release; 1186 goto do_grow_release;
1191 1187
1192 if (al) { 1188 if (qa) {
1193 error = gfs2_unstuff_dinode(ip, NULL); 1189 error = gfs2_unstuff_dinode(ip, NULL);
1194 if (error) 1190 if (error)
1195 goto do_end_trans; 1191 goto do_end_trans;
@@ -1208,12 +1204,12 @@ static int do_grow(struct inode *inode, u64 size)
1208do_end_trans: 1204do_end_trans:
1209 gfs2_trans_end(sdp); 1205 gfs2_trans_end(sdp);
1210do_grow_release: 1206do_grow_release:
1211 if (al) { 1207 if (qa) {
1212 gfs2_inplace_release(ip); 1208 gfs2_inplace_release(ip);
1213do_grow_qunlock: 1209do_grow_qunlock:
1214 gfs2_quota_unlock(ip); 1210 gfs2_quota_unlock(ip);
1215do_grow_alloc_put: 1211do_grow_alloc_put:
1216 gfs2_alloc_put(ip); 1212 gfs2_qadata_put(ip);
1217 } 1213 }
1218 return error; 1214 return error;
1219} 1215}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8ccad2467cb6..c35573abd371 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -76,6 +76,8 @@
76#define IS_LEAF 1 /* Hashed (leaf) directory */ 76#define IS_LEAF 1 /* Hashed (leaf) directory */
77#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 77#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
78 78
79#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
80
79#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 81#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
80#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 82#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
81 83
@@ -821,7 +823,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
821 struct gfs2_dirent *dent; 823 struct gfs2_dirent *dent;
822 struct qstr name = { .name = "", .len = 0, .hash = 0 }; 824 struct qstr name = { .name = "", .len = 0, .hash = 0 };
823 825
824 error = gfs2_alloc_block(ip, &bn, &n); 826 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
825 if (error) 827 if (error)
826 return NULL; 828 return NULL;
827 bh = gfs2_meta_new(ip->i_gl, bn); 829 bh = gfs2_meta_new(ip->i_gl, bn);
@@ -1376,6 +1378,52 @@ out:
1376 return error; 1378 return error;
1377} 1379}
1378 1380
1381/**
1382 * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
1383 *
1384 * Note: we can't calculate each index like dir_e_read can because we don't
1385 * have the leaf, and therefore we don't have the depth, and therefore we
1386 * don't have the length. So we have to just read enough ahead to make up
1387 * for the loss of information.
1388 */
1389static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
1390 struct file_ra_state *f_ra)
1391{
1392 struct gfs2_inode *ip = GFS2_I(inode);
1393 struct gfs2_glock *gl = ip->i_gl;
1394 struct buffer_head *bh;
1395 u64 blocknr = 0, last;
1396 unsigned count;
1397
1398 /* First check if we've already read-ahead for the whole range. */
1399 if (index + MAX_RA_BLOCKS < f_ra->start)
1400 return;
1401
1402 f_ra->start = max((pgoff_t)index, f_ra->start);
1403 for (count = 0; count < MAX_RA_BLOCKS; count++) {
1404 if (f_ra->start >= hsize) /* if exceeded the hash table */
1405 break;
1406
1407 last = blocknr;
1408 blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
1409 f_ra->start++;
1410 if (blocknr == last)
1411 continue;
1412
1413 bh = gfs2_getbuf(gl, blocknr, 1);
1414 if (trylock_buffer(bh)) {
1415 if (buffer_uptodate(bh)) {
1416 unlock_buffer(bh);
1417 brelse(bh);
1418 continue;
1419 }
1420 bh->b_end_io = end_buffer_read_sync;
1421 submit_bh(READA | REQ_META, bh);
1422 continue;
1423 }
1424 brelse(bh);
1425 }
1426}
1379 1427
1380/** 1428/**
1381 * dir_e_read - Reads the entries from a directory into a filldir buffer 1429 * dir_e_read - Reads the entries from a directory into a filldir buffer
@@ -1388,7 +1436,7 @@ out:
1388 */ 1436 */
1389 1437
1390static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1438static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1391 filldir_t filldir) 1439 filldir_t filldir, struct file_ra_state *f_ra)
1392{ 1440{
1393 struct gfs2_inode *dip = GFS2_I(inode); 1441 struct gfs2_inode *dip = GFS2_I(inode);
1394 u32 hsize, len = 0; 1442 u32 hsize, len = 0;
@@ -1402,10 +1450,14 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1402 hash = gfs2_dir_offset2hash(*offset); 1450 hash = gfs2_dir_offset2hash(*offset);
1403 index = hash >> (32 - dip->i_depth); 1451 index = hash >> (32 - dip->i_depth);
1404 1452
1453 if (dip->i_hash_cache == NULL)
1454 f_ra->start = 0;
1405 lp = gfs2_dir_get_hash_table(dip); 1455 lp = gfs2_dir_get_hash_table(dip);
1406 if (IS_ERR(lp)) 1456 if (IS_ERR(lp))
1407 return PTR_ERR(lp); 1457 return PTR_ERR(lp);
1408 1458
1459 gfs2_dir_readahead(inode, hsize, index, f_ra);
1460
1409 while (index < hsize) { 1461 while (index < hsize) {
1410 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, 1462 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1411 &copied, &depth, 1463 &copied, &depth,
@@ -1423,7 +1475,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1423} 1475}
1424 1476
1425int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1477int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1426 filldir_t filldir) 1478 filldir_t filldir, struct file_ra_state *f_ra)
1427{ 1479{
1428 struct gfs2_inode *dip = GFS2_I(inode); 1480 struct gfs2_inode *dip = GFS2_I(inode);
1429 struct gfs2_sbd *sdp = GFS2_SB(inode); 1481 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1437,7 +1489,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1437 return 0; 1489 return 0;
1438 1490
1439 if (dip->i_diskflags & GFS2_DIF_EXHASH) 1491 if (dip->i_diskflags & GFS2_DIF_EXHASH)
1440 return dir_e_read(inode, offset, opaque, filldir); 1492 return dir_e_read(inode, offset, opaque, filldir, f_ra);
1441 1493
1442 if (!gfs2_is_stuffed(dip)) { 1494 if (!gfs2_is_stuffed(dip)) {
1443 gfs2_consist_inode(dip); 1495 gfs2_consist_inode(dip);
@@ -1798,7 +1850,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1798 if (!ht) 1850 if (!ht)
1799 return -ENOMEM; 1851 return -ENOMEM;
1800 1852
1801 if (!gfs2_alloc_get(dip)) { 1853 if (!gfs2_qadata_get(dip)) {
1802 error = -ENOMEM; 1854 error = -ENOMEM;
1803 goto out; 1855 goto out;
1804 } 1856 }
@@ -1887,7 +1939,7 @@ out_rlist:
1887 gfs2_rlist_free(&rlist); 1939 gfs2_rlist_free(&rlist);
1888 gfs2_quota_unhold(dip); 1940 gfs2_quota_unhold(dip);
1889out_put: 1941out_put:
1890 gfs2_alloc_put(dip); 1942 gfs2_qadata_put(dip);
1891out: 1943out:
1892 kfree(ht); 1944 kfree(ht);
1893 return error; 1945 return error;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index ff5772fbf024..98c960beab35 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -25,7 +25,7 @@ extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir, struct file_ra_state *f_ra);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
30 const struct gfs2_inode *nip, unsigned int new_type); 30 const struct gfs2_inode *nip, unsigned int new_type);
31 31
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index fe9945f2ff72..70ba891654f8 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -99,6 +99,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
99 struct gfs2_holder gh; 99 struct gfs2_holder gh;
100 u64 offset = 0; 100 u64 offset = 0;
101 int error; 101 int error;
102 struct file_ra_state f_ra = { .start = 0 };
102 103
103 if (!dir) 104 if (!dir)
104 return -EINVAL; 105 return -EINVAL;
@@ -118,7 +119,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
118 if (error) 119 if (error)
119 return error; 120 return error;
120 121
121 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); 122 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra);
122 123
123 gfs2_glock_dq_uninit(&gh); 124 gfs2_glock_dq_uninit(&gh);
124 125
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ce36a56dfeac..c5fb3597f696 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -105,7 +105,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
105 return error; 105 return error;
106 } 106 }
107 107
108 error = gfs2_dir_read(dir, &offset, dirent, filldir); 108 error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra);
109 109
110 gfs2_glock_dq_uninit(&d_gh); 110 gfs2_glock_dq_uninit(&d_gh);
111 111
@@ -223,7 +223,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
223 int error; 223 int error;
224 u32 new_flags, flags; 224 u32 new_flags, flags;
225 225
226 error = mnt_want_write(filp->f_path.mnt); 226 error = mnt_want_write_file(filp);
227 if (error) 227 if (error)
228 return error; 228 return error;
229 229
@@ -285,7 +285,7 @@ out_trans_end:
285out: 285out:
286 gfs2_glock_dq_uninit(&gh); 286 gfs2_glock_dq_uninit(&gh);
287out_drop_write: 287out_drop_write:
288 mnt_drop_write(filp->f_path.mnt); 288 mnt_drop_write_file(filp);
289 return error; 289 return error;
290} 290}
291 291
@@ -365,7 +365,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
365 u64 pos = page->index << PAGE_CACHE_SHIFT; 365 u64 pos = page->index << PAGE_CACHE_SHIFT;
366 unsigned int data_blocks, ind_blocks, rblocks; 366 unsigned int data_blocks, ind_blocks, rblocks;
367 struct gfs2_holder gh; 367 struct gfs2_holder gh;
368 struct gfs2_alloc *al; 368 struct gfs2_qadata *qa;
369 loff_t size; 369 loff_t size;
370 int ret; 370 int ret;
371 371
@@ -393,16 +393,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
393 } 393 }
394 394
395 ret = -ENOMEM; 395 ret = -ENOMEM;
396 al = gfs2_alloc_get(ip); 396 qa = gfs2_qadata_get(ip);
397 if (al == NULL) 397 if (qa == NULL)
398 goto out_unlock; 398 goto out_unlock;
399 399
400 ret = gfs2_quota_lock_check(ip); 400 ret = gfs2_quota_lock_check(ip);
401 if (ret) 401 if (ret)
402 goto out_alloc_put; 402 goto out_alloc_put;
403 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 403 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
404 al->al_requested = data_blocks + ind_blocks; 404 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
405 ret = gfs2_inplace_reserve(ip);
406 if (ret) 405 if (ret)
407 goto out_quota_unlock; 406 goto out_quota_unlock;
408 407
@@ -448,7 +447,7 @@ out_trans_fail:
448out_quota_unlock: 447out_quota_unlock:
449 gfs2_quota_unlock(ip); 448 gfs2_quota_unlock(ip);
450out_alloc_put: 449out_alloc_put:
451 gfs2_alloc_put(ip); 450 gfs2_qadata_put(ip);
452out_unlock: 451out_unlock:
453 gfs2_glock_dq(&gh); 452 gfs2_glock_dq(&gh);
454out: 453out:
@@ -609,7 +608,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
609 struct inode *inode = mapping->host; 608 struct inode *inode = mapping->host;
610 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 609 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
611 struct gfs2_inode *ip = GFS2_I(inode); 610 struct gfs2_inode *ip = GFS2_I(inode);
612 int ret, ret1 = 0; 611 int ret = 0, ret1 = 0;
613 612
614 if (mapping->nrpages) { 613 if (mapping->nrpages) {
615 ret1 = filemap_fdatawrite_range(mapping, start, end); 614 ret1 = filemap_fdatawrite_range(mapping, start, end);
@@ -750,8 +749,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
750 struct gfs2_inode *ip = GFS2_I(inode); 749 struct gfs2_inode *ip = GFS2_I(inode);
751 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 750 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
752 loff_t bytes, max_bytes; 751 loff_t bytes, max_bytes;
753 struct gfs2_alloc *al; 752 struct gfs2_qadata *qa;
754 int error; 753 int error;
754 const loff_t pos = offset;
755 const loff_t count = len;
755 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); 756 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
756 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 757 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
757 loff_t max_chunk_size = UINT_MAX & bsize_mask; 758 loff_t max_chunk_size = UINT_MAX & bsize_mask;
@@ -782,8 +783,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
782 while (len > 0) { 783 while (len > 0) {
783 if (len < bytes) 784 if (len < bytes)
784 bytes = len; 785 bytes = len;
785 al = gfs2_alloc_get(ip); 786 qa = gfs2_qadata_get(ip);
786 if (!al) { 787 if (!qa) {
787 error = -ENOMEM; 788 error = -ENOMEM;
788 goto out_unlock; 789 goto out_unlock;
789 } 790 }
@@ -795,8 +796,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
795retry: 796retry:
796 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 797 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
797 798
798 al->al_requested = data_blocks + ind_blocks; 799 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
799 error = gfs2_inplace_reserve(ip);
800 if (error) { 800 if (error) {
801 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 801 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
802 bytes >>= 1; 802 bytes >>= 1;
@@ -810,7 +810,6 @@ retry:
810 max_bytes = bytes; 810 max_bytes = bytes;
811 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 811 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
812 &max_bytes, &data_blocks, &ind_blocks); 812 &max_bytes, &data_blocks, &ind_blocks);
813 al->al_requested = data_blocks + ind_blocks;
814 813
815 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 814 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
816 RES_RG_HDR + gfs2_rg_blocks(ip); 815 RES_RG_HDR + gfs2_rg_blocks(ip);
@@ -832,8 +831,11 @@ retry:
832 offset += max_bytes; 831 offset += max_bytes;
833 gfs2_inplace_release(ip); 832 gfs2_inplace_release(ip);
834 gfs2_quota_unlock(ip); 833 gfs2_quota_unlock(ip);
835 gfs2_alloc_put(ip); 834 gfs2_qadata_put(ip);
836 } 835 }
836
837 if (error == 0)
838 error = generic_write_sync(file, pos, count);
837 goto out_unlock; 839 goto out_unlock;
838 840
839out_trans_fail: 841out_trans_fail:
@@ -841,7 +843,7 @@ out_trans_fail:
841out_qunlock: 843out_qunlock:
842 gfs2_quota_unlock(ip); 844 gfs2_quota_unlock(ip);
843out_alloc_put: 845out_alloc_put:
844 gfs2_alloc_put(ip); 846 gfs2_qadata_put(ip);
845out_unlock: 847out_unlock:
846 gfs2_glock_dq(&ip->i_gh); 848 gfs2_glock_dq(&ip->i_gh);
847out_uninit: 849out_uninit:
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 88e8a23d0026..376816fcd040 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1353 spin_lock(&gl->gl_spin); 1353 spin_lock(&gl->gl_spin);
1354 gl->gl_reply = ret; 1354 gl->gl_reply = ret;
1355 1355
1356 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { 1356 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1357 if (gfs2_should_freeze(gl)) { 1357 if (gfs2_should_freeze(gl)) {
1358 set_bit(GLF_FROZEN, &gl->gl_flags); 1358 set_bit(GLF_FROZEN, &gl->gl_flags);
1359 spin_unlock(&gl->gl_spin); 1359 spin_unlock(&gl->gl_spin);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2553b858a72e..307ac31df781 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -121,8 +121,11 @@ enum {
121 121
122struct lm_lockops { 122struct lm_lockops {
123 const char *lm_proto_name; 123 const char *lm_proto_name;
124 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 124 int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
125 void (*lm_unmount) (struct gfs2_sbd *sdp); 125 void (*lm_first_done) (struct gfs2_sbd *sdp);
126 void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
127 unsigned int result);
128 void (*lm_unmount) (struct gfs2_sbd *sdp);
126 void (*lm_withdraw) (struct gfs2_sbd *sdp); 129 void (*lm_withdraw) (struct gfs2_sbd *sdp);
127 void (*lm_put_lock) (struct gfs2_glock *gl); 130 void (*lm_put_lock) (struct gfs2_glock *gl);
128 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 131 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7389dfdcc9ef..97742a7ea9cc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -139,8 +139,45 @@ struct gfs2_bufdata {
139#define GDLM_STRNAME_BYTES 25 139#define GDLM_STRNAME_BYTES 25
140#define GDLM_LVB_SIZE 32 140#define GDLM_LVB_SIZE 32
141 141
142/*
143 * ls_recover_flags:
144 *
145 * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
146 * held by failed nodes whose journals need recovery. Those locks should
147 * only be used for journal recovery until the journal recovery is done.
148 * This is set by the dlm recover_prep callback and cleared by the
149 * gfs2_control thread when journal recovery is complete. To avoid
150 * races between recover_prep setting and gfs2_control clearing, recover_spin
151 * is held while changing this bit and reading/writing recover_block
152 * and recover_start.
153 *
154 * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
155 *
156 * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
157 * recovery of all journals before allowing other nodes to mount the fs.
158 * This is cleared when FIRST_MOUNT_DONE is set.
159 *
160 * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
161 * recovery of all journals, and now allows other nodes to mount the fs.
162 *
163 * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
164 * BLOCK_LOCKS for the first time. The gfs2_control thread should now
165 * control clearing BLOCK_LOCKS for further recoveries.
166 *
167 * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
168 *
169 * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
170 * and recover_done(), i.e. set while recover_block == recover_start.
171 */
172
142enum { 173enum {
143 DFL_BLOCK_LOCKS = 0, 174 DFL_BLOCK_LOCKS = 0,
175 DFL_NO_DLM_OPS = 1,
176 DFL_FIRST_MOUNT = 2,
177 DFL_FIRST_MOUNT_DONE = 3,
178 DFL_MOUNT_DONE = 4,
179 DFL_UNMOUNT = 5,
180 DFL_DLM_RECOVERY = 6,
144}; 181};
145 182
146struct lm_lockname { 183struct lm_lockname {
@@ -244,17 +281,16 @@ struct gfs2_glock {
244 281
245#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 282#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
246 283
247struct gfs2_alloc { 284struct gfs2_qadata { /* quota allocation data */
248 /* Quota stuff */ 285 /* Quota stuff */
249 struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; 286 struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
250 struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; 287 struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
251 unsigned int al_qd_num; 288 unsigned int qa_qd_num;
252 289};
253 u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
254 u32 al_alloced; /* Filled in by gfs2_alloc_*() */
255 290
256 /* Filled in by gfs2_inplace_reserve() */ 291struct gfs2_blkreserv {
257 struct gfs2_holder al_rgd_gh; 292 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
293 struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
258}; 294};
259 295
260enum { 296enum {
@@ -275,7 +311,8 @@ struct gfs2_inode {
275 struct gfs2_glock *i_gl; /* Move into i_gh? */ 311 struct gfs2_glock *i_gl; /* Move into i_gh? */
276 struct gfs2_holder i_iopen_gh; 312 struct gfs2_holder i_iopen_gh;
277 struct gfs2_holder i_gh; /* for prepare/commit_write only */ 313 struct gfs2_holder i_gh; /* for prepare/commit_write only */
278 struct gfs2_alloc *i_alloc; 314 struct gfs2_qadata *i_qadata; /* quota allocation data */
315 struct gfs2_blkreserv *i_res; /* resource group block reservation */
279 struct gfs2_rgrpd *i_rgd; 316 struct gfs2_rgrpd *i_rgd;
280 u64 i_goal; /* goal block for allocations */ 317 u64 i_goal; /* goal block for allocations */
281 struct rw_semaphore i_rw_mutex; 318 struct rw_semaphore i_rw_mutex;
@@ -392,6 +429,7 @@ struct gfs2_jdesc {
392#define JDF_RECOVERY 1 429#define JDF_RECOVERY 1
393 unsigned int jd_jid; 430 unsigned int jd_jid;
394 unsigned int jd_blocks; 431 unsigned int jd_blocks;
432 int jd_recover_error;
395}; 433};
396 434
397struct gfs2_statfs_change_host { 435struct gfs2_statfs_change_host {
@@ -461,6 +499,7 @@ enum {
461 SDF_NORECOVERY = 4, 499 SDF_NORECOVERY = 4,
462 SDF_DEMOTE = 5, 500 SDF_DEMOTE = 5,
463 SDF_NOJOURNALID = 6, 501 SDF_NOJOURNALID = 6,
502 SDF_RORECOVERY = 7, /* read only recovery */
464}; 503};
465 504
466#define GFS2_FSNAME_LEN 256 505#define GFS2_FSNAME_LEN 256
@@ -499,14 +538,26 @@ struct gfs2_sb_host {
499struct lm_lockstruct { 538struct lm_lockstruct {
500 int ls_jid; 539 int ls_jid;
501 unsigned int ls_first; 540 unsigned int ls_first;
502 unsigned int ls_first_done;
503 unsigned int ls_nodir; 541 unsigned int ls_nodir;
504 const struct lm_lockops *ls_ops; 542 const struct lm_lockops *ls_ops;
505 unsigned long ls_flags;
506 dlm_lockspace_t *ls_dlm; 543 dlm_lockspace_t *ls_dlm;
507 544
508 int ls_recover_jid_done; 545 int ls_recover_jid_done; /* These two are deprecated, */
509 int ls_recover_jid_status; 546 int ls_recover_jid_status; /* used previously by gfs_controld */
547
548 struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
549 struct dlm_lksb ls_control_lksb; /* control_lock */
550 char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
551 struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
552
553 spinlock_t ls_recover_spin; /* protects following fields */
554 unsigned long ls_recover_flags; /* DFL_ */
555 uint32_t ls_recover_mount; /* gen in first recover_done cb */
556 uint32_t ls_recover_start; /* gen in last recover_done cb */
557 uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
558 uint32_t ls_recover_size; /* size of recover_submit, recover_result */
559 uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
560 uint32_t *ls_recover_result; /* result of last jid recovery */
510}; 561};
511 562
512struct gfs2_sbd { 563struct gfs2_sbd {
@@ -544,6 +595,7 @@ struct gfs2_sbd {
544 wait_queue_head_t sd_glock_wait; 595 wait_queue_head_t sd_glock_wait;
545 atomic_t sd_glock_disposal; 596 atomic_t sd_glock_disposal;
546 struct completion sd_locking_init; 597 struct completion sd_locking_init;
598 struct delayed_work sd_control_work;
547 599
548 /* Inode Stuff */ 600 /* Inode Stuff */
549 601
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cfd4959b218c..a7d611b93f0f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -333,7 +333,7 @@ out:
333 */ 333 */
334 334
335static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 335static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
336 unsigned int mode) 336 umode_t mode)
337{ 337{
338 int error; 338 int error;
339 339
@@ -364,7 +364,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
364 return 0; 364 return 0;
365} 365}
366 366
367static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 367static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode,
368 unsigned int *uid, unsigned int *gid) 368 unsigned int *uid, unsigned int *gid)
369{ 369{
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
@@ -389,12 +389,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
389{ 389{
390 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 390 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
391 int error; 391 int error;
392 int dblocks = 1;
392 393
393 if (gfs2_alloc_get(dip) == NULL) 394 error = gfs2_rindex_update(sdp);
394 return -ENOMEM; 395 if (error)
396 fs_warn(sdp, "rindex update returns %d\n", error);
395 397
396 dip->i_alloc->al_requested = RES_DINODE; 398 error = gfs2_inplace_reserve(dip, RES_DINODE);
397 error = gfs2_inplace_reserve(dip);
398 if (error) 399 if (error)
399 goto out; 400 goto out;
400 401
@@ -402,14 +403,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
402 if (error) 403 if (error)
403 goto out_ipreserv; 404 goto out_ipreserv;
404 405
405 error = gfs2_alloc_di(dip, no_addr, generation); 406 error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation);
406 407
407 gfs2_trans_end(sdp); 408 gfs2_trans_end(sdp);
408 409
409out_ipreserv: 410out_ipreserv:
410 gfs2_inplace_release(dip); 411 gfs2_inplace_release(dip);
411out: 412out:
412 gfs2_alloc_put(dip);
413 return error; 413 return error;
414} 414}
415 415
@@ -447,7 +447,7 @@ static void gfs2_init_dir(struct buffer_head *dibh,
447 */ 447 */
448 448
449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
450 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, umode_t mode,
451 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
452 const u64 *generation, dev_t dev, const char *symname, 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp) 453 unsigned size, struct buffer_head **bhp)
@@ -516,7 +516,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
516} 516}
517 517
518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
519 unsigned int mode, const struct gfs2_inum_host *inum, 519 umode_t mode, const struct gfs2_inum_host *inum,
520 const u64 *generation, dev_t dev, const char *symname, 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp) 521 unsigned int size, struct buffer_head **bhp)
522{ 522{
@@ -525,7 +525,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
525 int error; 525 int error;
526 526
527 munge_mode_uid_gid(dip, &mode, &uid, &gid); 527 munge_mode_uid_gid(dip, &mode, &uid, &gid);
528 if (!gfs2_alloc_get(dip)) 528 if (!gfs2_qadata_get(dip))
529 return -ENOMEM; 529 return -ENOMEM;
530 530
531 error = gfs2_quota_lock(dip, uid, gid); 531 error = gfs2_quota_lock(dip, uid, gid);
@@ -547,7 +547,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
547out_quota: 547out_quota:
548 gfs2_quota_unlock(dip); 548 gfs2_quota_unlock(dip);
549out: 549out:
550 gfs2_alloc_put(dip); 550 gfs2_qadata_put(dip);
551 return error; 551 return error;
552} 552}
553 553
@@ -555,13 +555,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
555 struct gfs2_inode *ip) 555 struct gfs2_inode *ip)
556{ 556{
557 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 557 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
558 struct gfs2_alloc *al; 558 struct gfs2_qadata *qa;
559 int alloc_required; 559 int alloc_required;
560 struct buffer_head *dibh; 560 struct buffer_head *dibh;
561 int error; 561 int error;
562 562
563 al = gfs2_alloc_get(dip); 563 qa = gfs2_qadata_get(dip);
564 if (!al) 564 if (!qa)
565 return -ENOMEM; 565 return -ENOMEM;
566 566
567 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 567 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -576,9 +576,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
576 if (error) 576 if (error)
577 goto fail_quota_locks; 577 goto fail_quota_locks;
578 578
579 al->al_requested = sdp->sd_max_dirres; 579 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
580
581 error = gfs2_inplace_reserve(dip);
582 if (error) 580 if (error)
583 goto fail_quota_locks; 581 goto fail_quota_locks;
584 582
@@ -601,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
601 error = gfs2_meta_inode_buffer(ip, &dibh); 599 error = gfs2_meta_inode_buffer(ip, &dibh);
602 if (error) 600 if (error)
603 goto fail_end_trans; 601 goto fail_end_trans;
604 inc_nlink(&ip->i_inode); 602 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
607 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 603 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
608 gfs2_dinode_out(ip, dibh->b_data); 604 gfs2_dinode_out(ip, dibh->b_data);
609 brelse(dibh); 605 brelse(dibh);
@@ -619,11 +615,11 @@ fail_quota_locks:
619 gfs2_quota_unlock(dip); 615 gfs2_quota_unlock(dip);
620 616
621fail: 617fail:
622 gfs2_alloc_put(dip); 618 gfs2_qadata_put(dip);
623 return error; 619 return error;
624} 620}
625 621
626int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 622static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
627 void *fs_info) 623 void *fs_info)
628{ 624{
629 const struct xattr *xattr; 625 const struct xattr *xattr;
@@ -659,7 +655,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
659 */ 655 */
660 656
661static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, 657static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
662 unsigned int mode, dev_t dev, const char *symname, 658 umode_t mode, dev_t dev, const char *symname,
663 unsigned int size, int excl) 659 unsigned int size, int excl)
664{ 660{
665 const struct qstr *name = &dentry->d_name; 661 const struct qstr *name = &dentry->d_name;
@@ -728,9 +724,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
728 brelse(bh); 724 brelse(bh);
729 725
730 gfs2_trans_end(sdp); 726 gfs2_trans_end(sdp);
731 gfs2_inplace_release(dip); 727 /* Check if we reserved space in the rgrp. Function link_dinode may
728 not, depending on whether alloc is required. */
729 if (dip->i_res)
730 gfs2_inplace_release(dip);
732 gfs2_quota_unlock(dip); 731 gfs2_quota_unlock(dip);
733 gfs2_alloc_put(dip); 732 gfs2_qadata_put(dip);
734 mark_inode_dirty(inode); 733 mark_inode_dirty(inode);
735 gfs2_glock_dq_uninit_m(2, ghs); 734 gfs2_glock_dq_uninit_m(2, ghs);
736 d_instantiate(dentry, inode); 735 d_instantiate(dentry, inode);
@@ -760,7 +759,7 @@ fail:
760 */ 759 */
761 760
762static int gfs2_create(struct inode *dir, struct dentry *dentry, 761static int gfs2_create(struct inode *dir, struct dentry *dentry,
763 int mode, struct nameidata *nd) 762 umode_t mode, struct nameidata *nd)
764{ 763{
765 int excl = 0; 764 int excl = 0;
766 if (nd && (nd->flags & LOOKUP_EXCL)) 765 if (nd && (nd->flags & LOOKUP_EXCL))
@@ -875,8 +874,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
875 error = 0; 874 error = 0;
876 875
877 if (alloc_required) { 876 if (alloc_required) {
878 struct gfs2_alloc *al = gfs2_alloc_get(dip); 877 struct gfs2_qadata *qa = gfs2_qadata_get(dip);
879 if (!al) { 878
879 if (!qa) {
880 error = -ENOMEM; 880 error = -ENOMEM;
881 goto out_gunlock; 881 goto out_gunlock;
882 } 882 }
@@ -885,9 +885,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
885 if (error) 885 if (error)
886 goto out_alloc; 886 goto out_alloc;
887 887
888 al->al_requested = sdp->sd_max_dirres; 888 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
889
890 error = gfs2_inplace_reserve(dip);
891 if (error) 889 if (error)
892 goto out_gunlock_q; 890 goto out_gunlock_q;
893 891
@@ -930,7 +928,7 @@ out_gunlock_q:
930 gfs2_quota_unlock(dip); 928 gfs2_quota_unlock(dip);
931out_alloc: 929out_alloc:
932 if (alloc_required) 930 if (alloc_required)
933 gfs2_alloc_put(dip); 931 gfs2_qadata_put(dip);
934out_gunlock: 932out_gunlock:
935 gfs2_glock_dq(ghs + 1); 933 gfs2_glock_dq(ghs + 1);
936out_child: 934out_child:
@@ -1037,12 +1035,14 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1037 struct buffer_head *bh; 1035 struct buffer_head *bh;
1038 struct gfs2_holder ghs[3]; 1036 struct gfs2_holder ghs[3];
1039 struct gfs2_rgrpd *rgd; 1037 struct gfs2_rgrpd *rgd;
1040 int error; 1038 int error = -EROFS;
1041 1039
1042 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1040 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1043 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1041 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1044 1042
1045 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1043 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1044 if (!rgd)
1045 goto out_inodes;
1046 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 1046 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1047 1047
1048 1048
@@ -1088,12 +1088,13 @@ out_end_trans:
1088out_gunlock: 1088out_gunlock:
1089 gfs2_glock_dq(ghs + 2); 1089 gfs2_glock_dq(ghs + 2);
1090out_rgrp: 1090out_rgrp:
1091 gfs2_holder_uninit(ghs + 2);
1092 gfs2_glock_dq(ghs + 1); 1091 gfs2_glock_dq(ghs + 1);
1093out_child: 1092out_child:
1094 gfs2_holder_uninit(ghs + 1);
1095 gfs2_glock_dq(ghs); 1093 gfs2_glock_dq(ghs);
1096out_parent: 1094out_parent:
1095 gfs2_holder_uninit(ghs + 2);
1096out_inodes:
1097 gfs2_holder_uninit(ghs + 1);
1097 gfs2_holder_uninit(ghs); 1098 gfs2_holder_uninit(ghs);
1098 return error; 1099 return error;
1099} 1100}
@@ -1129,7 +1130,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1129 * Returns: errno 1130 * Returns: errno
1130 */ 1131 */
1131 1132
1132static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1133static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1133{ 1134{
1134 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); 1135 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
1135} 1136}
@@ -1143,7 +1144,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1143 * 1144 *
1144 */ 1145 */
1145 1146
1146static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, 1147static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
1147 dev_t dev) 1148 dev_t dev)
1148{ 1149{
1149 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); 1150 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
@@ -1350,8 +1351,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1350 error = 0; 1351 error = 0;
1351 1352
1352 if (alloc_required) { 1353 if (alloc_required) {
1353 struct gfs2_alloc *al = gfs2_alloc_get(ndip); 1354 struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
1354 if (!al) { 1355
1356 if (!qa) {
1355 error = -ENOMEM; 1357 error = -ENOMEM;
1356 goto out_gunlock; 1358 goto out_gunlock;
1357 } 1359 }
@@ -1360,9 +1362,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1360 if (error) 1362 if (error)
1361 goto out_alloc; 1363 goto out_alloc;
1362 1364
1363 al->al_requested = sdp->sd_max_dirres; 1365 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
1364
1365 error = gfs2_inplace_reserve(ndip);
1366 if (error) 1366 if (error)
1367 goto out_gunlock_q; 1367 goto out_gunlock_q;
1368 1368
@@ -1423,7 +1423,7 @@ out_gunlock_q:
1423 gfs2_quota_unlock(ndip); 1423 gfs2_quota_unlock(ndip);
1424out_alloc: 1424out_alloc:
1425 if (alloc_required) 1425 if (alloc_required)
1426 gfs2_alloc_put(ndip); 1426 gfs2_qadata_put(ndip);
1427out_gunlock: 1427out_gunlock:
1428 while (x--) { 1428 while (x--) {
1429 gfs2_glock_dq(ghs + x); 1429 gfs2_glock_dq(ghs + x);
@@ -1584,7 +1584,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1584 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1584 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1585 ogid = ngid = NO_QUOTA_CHANGE; 1585 ogid = ngid = NO_QUOTA_CHANGE;
1586 1586
1587 if (!gfs2_alloc_get(ip)) 1587 if (!gfs2_qadata_get(ip))
1588 return -ENOMEM; 1588 return -ENOMEM;
1589 1589
1590 error = gfs2_quota_lock(ip, nuid, ngid); 1590 error = gfs2_quota_lock(ip, nuid, ngid);
@@ -1616,7 +1616,7 @@ out_end_trans:
1616out_gunlock_q: 1616out_gunlock_q:
1617 gfs2_quota_unlock(ip); 1617 gfs2_quota_unlock(ip);
1618out_alloc: 1618out_alloc:
1619 gfs2_alloc_put(ip); 1619 gfs2_qadata_put(ip);
1620 return error; 1620 return error;
1621} 1621}
1622 1622
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 98c80d8c2a62..8944d1e32ab5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 3 * Copyright 2004-2011 Red Hat, Inc.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -11,12 +11,15 @@
11#include <linux/dlm.h> 11#include <linux/dlm.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/delay.h>
14#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
15 16
16#include "incore.h" 17#include "incore.h"
17#include "glock.h" 18#include "glock.h"
18#include "util.h" 19#include "util.h"
20#include "sys.h"
19 21
22extern struct workqueue_struct *gfs2_control_wq;
20 23
21static void gdlm_ast(void *arg) 24static void gdlm_ast(void *arg)
22{ 25{
@@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl)
185 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); 188 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
186} 189}
187 190
188static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) 191/*
192 * dlm/gfs2 recovery coordination using dlm_recover callbacks
193 *
194 * 1. dlm_controld sees lockspace members change
195 * 2. dlm_controld blocks dlm-kernel locking activity
196 * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
197 * 4. dlm_controld starts and finishes its own user level recovery
198 * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery
199 * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot)
200 * 7. dlm_recoverd does its own lock recovery
201 * 8. dlm_recoverd unblocks dlm-kernel locking activity
202 * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation)
203 * 10. gfs2_control updates control_lock lvb with new generation and jid bits
204 * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none)
205 * 12. gfs2_recover dequeues and recovers journals of failed nodes
206 * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result)
207 * 14. gfs2_control updates control_lock lvb jid bits for recovered journals
208 * 15. gfs2_control unblocks normal locking when all journals are recovered
209 *
210 * - failures during recovery
211 *
212 * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control
213 * clears BLOCK_LOCKS (step 15), e.g. another node fails while still
214 * recovering for a prior failure. gfs2_control needs a way to detect
215 * this so it can leave BLOCK_LOCKS set in step 15. This is managed using
216 * the recover_block and recover_start values.
217 *
218 * recover_done() provides a new lockspace generation number each time it
219 * is called (step 9). This generation number is saved as recover_start.
220 * When recover_prep() is called, it sets BLOCK_LOCKS and sets
221 * recover_block = recover_start. So, while recover_block is equal to
222 * recover_start, BLOCK_LOCKS should remain set. (recover_spin must
223 * be held around the BLOCK_LOCKS/recover_block/recover_start logic.)
224 *
225 * - more specific gfs2 steps in sequence above
226 *
227 * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start
228 * 6. recover_slot records any failed jids (maybe none)
229 * 9. recover_done sets recover_start = new generation number
230 * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids
231 * 12. gfs2_recover does journal recoveries for failed jids identified above
232 * 14. gfs2_control clears control_lock lvb bits for recovered jids
233 * 15. gfs2_control checks if recover_block == recover_start (step 3 occured
234 * again) then do nothing, otherwise if recover_start > recover_block
235 * then clear BLOCK_LOCKS.
236 *
237 * - parallel recovery steps across all nodes
238 *
239 * All nodes attempt to update the control_lock lvb with the new generation
240 * number and jid bits, but only the first to get the control_lock EX will
241 * do so; others will see that it's already done (lvb already contains new
242 * generation number.)
243 *
244 * . All nodes get the same recover_prep/recover_slot/recover_done callbacks
245 * . All nodes attempt to set control_lock lvb gen + bits for the new gen
246 * . One node gets control_lock first and writes the lvb, others see it's done
247 * . All nodes attempt to recover jids for which they see control_lock bits set
248 * . One node succeeds for a jid, and that one clears the jid bit in the lvb
249 * . All nodes will eventually see all lvb bits clear and unblock locks
250 *
251 * - is there a problem with clearing an lvb bit that should be set
252 * and missing a journal recovery?
253 *
254 * 1. jid fails
255 * 2. lvb bit set for step 1
256 * 3. jid recovered for step 1
257 * 4. jid taken again (new mount)
258 * 5. jid fails (for step 4)
259 * 6. lvb bit set for step 5 (will already be set)
260 * 7. lvb bit cleared for step 3
261 *
262 * This is not a problem because the failure in step 5 does not
263 * require recovery, because the mount in step 4 could not have
264 * progressed far enough to unblock locks and access the fs. The
265 * control_mount() function waits for all recoveries to be complete
266 * for the latest lockspace generation before ever unblocking locks
267 * and returning. The mount in step 4 waits until the recovery in
268 * step 1 is done.
269 *
270 * - special case of first mounter: first node to mount the fs
271 *
272 * The first node to mount a gfs2 fs needs to check all the journals
273 * and recover any that need recovery before other nodes are allowed
274 * to mount the fs. (Others may begin mounting, but they must wait
275 * for the first mounter to be done before taking locks on the fs
276 * or accessing the fs.) This has two parts:
277 *
278 * 1. The mounted_lock tells a node it's the first to mount the fs.
279 * Each node holds the mounted_lock in PR while it's mounted.
280 * Each node tries to acquire the mounted_lock in EX when it mounts.
281 * If a node is granted the mounted_lock EX it means there are no
282 * other mounted nodes (no PR locks exist), and it is the first mounter.
283 * The mounted_lock is demoted to PR when first recovery is done, so
284 * others will fail to get an EX lock, but will get a PR lock.
285 *
286 * 2. The control_lock blocks others in control_mount() while the first
287 * mounter is doing first mount recovery of all journals.
288 * A mounting node needs to acquire control_lock in EX mode before
289 * it can proceed. The first mounter holds control_lock in EX while doing
290 * the first mount recovery, blocking mounts from other nodes, then demotes
291 * control_lock to NL when it's done (others_may_mount/first_done),
292 * allowing other nodes to continue mounting.
293 *
294 * first mounter:
295 * control_lock EX/NOQUEUE success
296 * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters)
297 * set first=1
298 * do first mounter recovery
299 * mounted_lock EX->PR
300 * control_lock EX->NL, write lvb generation
301 *
302 * other mounter:
303 * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry)
304 * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR)
305 * mounted_lock PR/NOQUEUE success
306 * read lvb generation
307 * control_lock EX->NL
308 * set first=0
309 *
310 * - mount during recovery
311 *
312 * If a node mounts while others are doing recovery (not first mounter),
313 * the mounting node will get its initial recover_done() callback without
314 * having seen any previous failures/callbacks.
315 *
316 * It must wait for all recoveries preceding its mount to be finished
317 * before it unblocks locks. It does this by repeating the "other mounter"
318 * steps above until the lvb generation number is >= its mount generation
319 * number (from initial recover_done) and all lvb bits are clear.
320 *
321 * - control_lock lvb format
322 *
323 * 4 bytes generation number: the latest dlm lockspace generation number
324 * from recover_done callback. Indicates the jid bitmap has been updated
325 * to reflect all slot failures through that generation.
326 * 4 bytes unused.
327 * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates
328 * that jid N needs recovery.
329 */
330
331#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */
332
333static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
334 char *lvb_bits)
335{
336 uint32_t gen;
337 memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
338 memcpy(&gen, lvb_bits, sizeof(uint32_t));
339 *lvb_gen = le32_to_cpu(gen);
340}
341
342static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
343 char *lvb_bits)
344{
345 uint32_t gen;
346 memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
347 gen = cpu_to_le32(lvb_gen);
348 memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
349}
350
351static int all_jid_bits_clear(char *lvb)
352{
353 int i;
354 for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
355 if (lvb[i])
356 return 0;
357 }
358 return 1;
359}
360
361static void sync_wait_cb(void *arg)
362{
363 struct lm_lockstruct *ls = arg;
364 complete(&ls->ls_sync_wait);
365}
366
367static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
189{ 368{
190 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 369 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
191 int error; 370 int error;
192 371
193 if (fsname == NULL) { 372 error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
194 fs_info(sdp, "no fsname found\n"); 373 if (error) {
195 return -EINVAL; 374 fs_err(sdp, "%s lkid %x error %d\n",
375 name, lksb->sb_lkid, error);
376 return error;
377 }
378
379 wait_for_completion(&ls->ls_sync_wait);
380
381 if (lksb->sb_status != -DLM_EUNLOCK) {
382 fs_err(sdp, "%s lkid %x status %d\n",
383 name, lksb->sb_lkid, lksb->sb_status);
384 return -1;
385 }
386 return 0;
387}
388
389static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
390 unsigned int num, struct dlm_lksb *lksb, char *name)
391{
392 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
393 char strname[GDLM_STRNAME_BYTES];
394 int error, status;
395
396 memset(strname, 0, GDLM_STRNAME_BYTES);
397 snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
398
399 error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
400 strname, GDLM_STRNAME_BYTES - 1,
401 0, sync_wait_cb, ls, NULL);
402 if (error) {
403 fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
404 name, lksb->sb_lkid, flags, mode, error);
405 return error;
406 }
407
408 wait_for_completion(&ls->ls_sync_wait);
409
410 status = lksb->sb_status;
411
412 if (status && status != -EAGAIN) {
413 fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
414 name, lksb->sb_lkid, flags, mode, status);
415 }
416
417 return status;
418}
419
420static int mounted_unlock(struct gfs2_sbd *sdp)
421{
422 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
423 return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
424}
425
426static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
427{
428 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
429 return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
430 &ls->ls_mounted_lksb, "mounted_lock");
431}
432
433static int control_unlock(struct gfs2_sbd *sdp)
434{
435 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
436 return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
437}
438
439static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
440{
441 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
442 return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
443 &ls->ls_control_lksb, "control_lock");
444}
445
446static void gfs2_control_func(struct work_struct *work)
447{
448 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
449 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
450 char lvb_bits[GDLM_LVB_SIZE];
451 uint32_t block_gen, start_gen, lvb_gen, flags;
452 int recover_set = 0;
453 int write_lvb = 0;
454 int recover_size;
455 int i, error;
456
457 spin_lock(&ls->ls_recover_spin);
458 /*
459 * No MOUNT_DONE means we're still mounting; control_mount()
460 * will set this flag, after which this thread will take over
461 * all further clearing of BLOCK_LOCKS.
462 *
463 * FIRST_MOUNT means this node is doing first mounter recovery,
464 * for which recovery control is handled by
465 * control_mount()/control_first_done(), not this thread.
466 */
467 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
468 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
469 spin_unlock(&ls->ls_recover_spin);
470 return;
471 }
472 block_gen = ls->ls_recover_block;
473 start_gen = ls->ls_recover_start;
474 spin_unlock(&ls->ls_recover_spin);
475
476 /*
477 * Equal block_gen and start_gen implies we are between
478 * recover_prep and recover_done callbacks, which means
479 * dlm recovery is in progress and dlm locking is blocked.
480 * There's no point trying to do any work until recover_done.
481 */
482
483 if (block_gen == start_gen)
484 return;
485
486 /*
487 * Propagate recover_submit[] and recover_result[] to lvb:
488 * dlm_recoverd adds to recover_submit[] jids needing recovery
489 * gfs2_recover adds to recover_result[] journal recovery results
490 *
491 * set lvb bit for jids in recover_submit[] if the lvb has not
492 * yet been updated for the generation of the failure
493 *
494 * clear lvb bit for jids in recover_result[] if the result of
495 * the journal recovery is SUCCESS
496 */
497
498 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
499 if (error) {
500 fs_err(sdp, "control lock EX error %d\n", error);
501 return;
502 }
503
504 control_lvb_read(ls, &lvb_gen, lvb_bits);
505
506 spin_lock(&ls->ls_recover_spin);
507 if (block_gen != ls->ls_recover_block ||
508 start_gen != ls->ls_recover_start) {
509 fs_info(sdp, "recover generation %u block1 %u %u\n",
510 start_gen, block_gen, ls->ls_recover_block);
511 spin_unlock(&ls->ls_recover_spin);
512 control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
513 return;
514 }
515
516 recover_size = ls->ls_recover_size;
517
518 if (lvb_gen <= start_gen) {
519 /*
520 * Clear lvb bits for jids we've successfully recovered.
521 * Because all nodes attempt to recover failed journals,
522 * a journal can be recovered multiple times successfully
523 * in succession. Only the first will really do recovery,
524 * the others find it clean, but still report a successful
525 * recovery. So, another node may have already recovered
526 * the jid and cleared the lvb bit for it.
527 */
528 for (i = 0; i < recover_size; i++) {
529 if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
530 continue;
531
532 ls->ls_recover_result[i] = 0;
533
534 if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
535 continue;
536
537 __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
538 write_lvb = 1;
539 }
540 }
541
542 if (lvb_gen == start_gen) {
543 /*
544 * Failed slots before start_gen are already set in lvb.
545 */
546 for (i = 0; i < recover_size; i++) {
547 if (!ls->ls_recover_submit[i])
548 continue;
549 if (ls->ls_recover_submit[i] < lvb_gen)
550 ls->ls_recover_submit[i] = 0;
551 }
552 } else if (lvb_gen < start_gen) {
553 /*
554 * Failed slots before start_gen are not yet set in lvb.
555 */
556 for (i = 0; i < recover_size; i++) {
557 if (!ls->ls_recover_submit[i])
558 continue;
559 if (ls->ls_recover_submit[i] < start_gen) {
560 ls->ls_recover_submit[i] = 0;
561 __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
562 }
563 }
564 /* even if there are no bits to set, we need to write the
565 latest generation to the lvb */
566 write_lvb = 1;
567 } else {
568 /*
569 * we should be getting a recover_done() for lvb_gen soon
570 */
571 }
572 spin_unlock(&ls->ls_recover_spin);
573
574 if (write_lvb) {
575 control_lvb_write(ls, start_gen, lvb_bits);
576 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
577 } else {
578 flags = DLM_LKF_CONVERT;
579 }
580
581 error = control_lock(sdp, DLM_LOCK_NL, flags);
582 if (error) {
583 fs_err(sdp, "control lock NL error %d\n", error);
584 return;
585 }
586
587 /*
588 * Everyone will see jid bits set in the lvb, run gfs2_recover_set(),
589 * and clear a jid bit in the lvb if the recovery is a success.
590 * Eventually all journals will be recovered, all jid bits will
591 * be cleared in the lvb, and everyone will clear BLOCK_LOCKS.
592 */
593
594 for (i = 0; i < recover_size; i++) {
595 if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
596 fs_info(sdp, "recover generation %u jid %d\n",
597 start_gen, i);
598 gfs2_recover_set(sdp, i);
599 recover_set++;
600 }
601 }
602 if (recover_set)
603 return;
604
605 /*
606 * No more jid bits set in lvb, all recovery is done, unblock locks
607 * (unless a new recover_prep callback has occured blocking locks
608 * again while working above)
609 */
610
611 spin_lock(&ls->ls_recover_spin);
612 if (ls->ls_recover_block == block_gen &&
613 ls->ls_recover_start == start_gen) {
614 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
615 spin_unlock(&ls->ls_recover_spin);
616 fs_info(sdp, "recover generation %u done\n", start_gen);
617 gfs2_glock_thaw(sdp);
618 } else {
619 fs_info(sdp, "recover generation %u block2 %u %u\n",
620 start_gen, block_gen, ls->ls_recover_block);
621 spin_unlock(&ls->ls_recover_spin);
622 }
623}
624
625static int control_mount(struct gfs2_sbd *sdp)
626{
627 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
628 char lvb_bits[GDLM_LVB_SIZE];
629 uint32_t start_gen, block_gen, mount_gen, lvb_gen;
630 int mounted_mode;
631 int retries = 0;
632 int error;
633
634 memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
635 memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
636 memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
637 ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
638 init_completion(&ls->ls_sync_wait);
639
640 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
641
642 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
643 if (error) {
644 fs_err(sdp, "control_mount control_lock NL error %d\n", error);
645 return error;
646 }
647
648 error = mounted_lock(sdp, DLM_LOCK_NL, 0);
649 if (error) {
650 fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
651 control_unlock(sdp);
652 return error;
653 }
654 mounted_mode = DLM_LOCK_NL;
655
656restart:
657 if (retries++ && signal_pending(current)) {
658 error = -EINTR;
659 goto fail;
660 }
661
662 /*
663 * We always start with both locks in NL. control_lock is
664 * demoted to NL below so we don't need to do it here.
665 */
666
667 if (mounted_mode != DLM_LOCK_NL) {
668 error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
669 if (error)
670 goto fail;
671 mounted_mode = DLM_LOCK_NL;
672 }
673
674 /*
675 * Other nodes need to do some work in dlm recovery and gfs2_control
676 * before the recover_done and control_lock will be ready for us below.
677 * A delay here is not required but often avoids having to retry.
678 */
679
680 msleep_interruptible(500);
681
682 /*
683 * Acquire control_lock in EX and mounted_lock in either EX or PR.
684 * control_lock lvb keeps track of any pending journal recoveries.
685 * mounted_lock indicates if any other nodes have the fs mounted.
686 */
687
688 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
689 if (error == -EAGAIN) {
690 goto restart;
691 } else if (error) {
692 fs_err(sdp, "control_mount control_lock EX error %d\n", error);
693 goto fail;
694 }
695
696 error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
697 if (!error) {
698 mounted_mode = DLM_LOCK_EX;
699 goto locks_done;
700 } else if (error != -EAGAIN) {
701 fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
702 goto fail;
703 }
704
705 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
706 if (!error) {
707 mounted_mode = DLM_LOCK_PR;
708 goto locks_done;
709 } else {
710 /* not even -EAGAIN should happen here */
711 fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
712 goto fail;
713 }
714
715locks_done:
716 /*
717 * If we got both locks above in EX, then we're the first mounter.
718 * If not, then we need to wait for the control_lock lvb to be
719 * updated by other mounted nodes to reflect our mount generation.
720 *
721 * In simple first mounter cases, first mounter will see zero lvb_gen,
722 * but in cases where all existing nodes leave/fail before mounting
723 * nodes finish control_mount, then all nodes will be mounting and
724 * lvb_gen will be non-zero.
725 */
726
727 control_lvb_read(ls, &lvb_gen, lvb_bits);
728
729 if (lvb_gen == 0xFFFFFFFF) {
730 /* special value to force mount attempts to fail */
731 fs_err(sdp, "control_mount control_lock disabled\n");
732 error = -EINVAL;
733 goto fail;
734 }
735
736 if (mounted_mode == DLM_LOCK_EX) {
737 /* first mounter, keep both EX while doing first recovery */
738 spin_lock(&ls->ls_recover_spin);
739 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
740 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
741 set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
742 spin_unlock(&ls->ls_recover_spin);
743 fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
744 return 0;
745 }
746
747 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
748 if (error)
749 goto fail;
750
751 /*
752 * We are not first mounter, now we need to wait for the control_lock
753 * lvb generation to be >= the generation from our first recover_done
754 * and all lvb bits to be clear (no pending journal recoveries.)
755 */
756
757 if (!all_jid_bits_clear(lvb_bits)) {
758 /* journals need recovery, wait until all are clear */
759 fs_info(sdp, "control_mount wait for journal recovery\n");
760 goto restart;
761 }
762
763 spin_lock(&ls->ls_recover_spin);
764 block_gen = ls->ls_recover_block;
765 start_gen = ls->ls_recover_start;
766 mount_gen = ls->ls_recover_mount;
767
768 if (lvb_gen < mount_gen) {
769 /* wait for mounted nodes to update control_lock lvb to our
770 generation, which might include new recovery bits set */
771 fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
772 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
773 lvb_gen, ls->ls_recover_flags);
774 spin_unlock(&ls->ls_recover_spin);
775 goto restart;
776 }
777
778 if (lvb_gen != start_gen) {
779 /* wait for mounted nodes to update control_lock lvb to the
780 latest recovery generation */
781 fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
782 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
783 lvb_gen, ls->ls_recover_flags);
784 spin_unlock(&ls->ls_recover_spin);
785 goto restart;
786 }
787
788 if (block_gen == start_gen) {
789 /* dlm recovery in progress, wait for it to finish */
790 fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
791 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
792 lvb_gen, ls->ls_recover_flags);
793 spin_unlock(&ls->ls_recover_spin);
794 goto restart;
196 } 795 }
197 796
198 error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm, 797 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
199 DLM_LSFL_FS | DLM_LSFL_NEWEXCL | 798 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
200 (ls->ls_nodir ? DLM_LSFL_NODIR : 0), 799 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
201 GDLM_LVB_SIZE); 800 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
801 spin_unlock(&ls->ls_recover_spin);
802 return 0;
803
804fail:
805 mounted_unlock(sdp);
806 control_unlock(sdp);
807 return error;
808}
809
810static int dlm_recovery_wait(void *word)
811{
812 schedule();
813 return 0;
814}
815
816static int control_first_done(struct gfs2_sbd *sdp)
817{
818 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
819 char lvb_bits[GDLM_LVB_SIZE];
820 uint32_t start_gen, block_gen;
821 int error;
822
823restart:
824 spin_lock(&ls->ls_recover_spin);
825 start_gen = ls->ls_recover_start;
826 block_gen = ls->ls_recover_block;
827
828 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
829 !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
830 !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
831 /* sanity check, should not happen */
832 fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
833 start_gen, block_gen, ls->ls_recover_flags);
834 spin_unlock(&ls->ls_recover_spin);
835 control_unlock(sdp);
836 return -1;
837 }
838
839 if (start_gen == block_gen) {
840 /*
841 * Wait for the end of a dlm recovery cycle to switch from
842 * first mounter recovery. We can ignore any recover_slot
843 * callbacks between the recover_prep and next recover_done
844 * because we are still the first mounter and any failed nodes
845 * have not fully mounted, so they don't need recovery.
846 */
847 spin_unlock(&ls->ls_recover_spin);
848 fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
849
850 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
851 dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
852 goto restart;
853 }
854
855 clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
856 set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
857 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
858 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
859 spin_unlock(&ls->ls_recover_spin);
860
861 memset(lvb_bits, 0, sizeof(lvb_bits));
862 control_lvb_write(ls, start_gen, lvb_bits);
863
864 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
865 if (error)
866 fs_err(sdp, "control_first_done mounted PR error %d\n", error);
867
868 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
202 if (error) 869 if (error)
203 printk(KERN_ERR "dlm_new_lockspace error %d", error); 870 fs_err(sdp, "control_first_done control NL error %d\n", error);
204 871
205 return error; 872 return error;
206} 873}
207 874
875/*
876 * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC)
877 * to accomodate the largest slot number. (NB dlm slot numbers start at 1,
878 * gfs2 jids start at 0, so jid = slot - 1)
879 */
880
881#define RECOVER_SIZE_INC 16
882
883static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
884 int num_slots)
885{
886 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
887 uint32_t *submit = NULL;
888 uint32_t *result = NULL;
889 uint32_t old_size, new_size;
890 int i, max_jid;
891
892 max_jid = 0;
893 for (i = 0; i < num_slots; i++) {
894 if (max_jid < slots[i].slot - 1)
895 max_jid = slots[i].slot - 1;
896 }
897
898 old_size = ls->ls_recover_size;
899
900 if (old_size >= max_jid + 1)
901 return 0;
902
903 new_size = old_size + RECOVER_SIZE_INC;
904
905 submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
906 result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
907 if (!submit || !result) {
908 kfree(submit);
909 kfree(result);
910 return -ENOMEM;
911 }
912
913 spin_lock(&ls->ls_recover_spin);
914 memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
915 memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
916 kfree(ls->ls_recover_submit);
917 kfree(ls->ls_recover_result);
918 ls->ls_recover_submit = submit;
919 ls->ls_recover_result = result;
920 ls->ls_recover_size = new_size;
921 spin_unlock(&ls->ls_recover_spin);
922 return 0;
923}
924
925static void free_recover_size(struct lm_lockstruct *ls)
926{
927 kfree(ls->ls_recover_submit);
928 kfree(ls->ls_recover_result);
929 ls->ls_recover_submit = NULL;
930 ls->ls_recover_result = NULL;
931 ls->ls_recover_size = 0;
932}
933
934/* dlm calls before it does lock recovery */
935
936static void gdlm_recover_prep(void *arg)
937{
938 struct gfs2_sbd *sdp = arg;
939 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
940
941 spin_lock(&ls->ls_recover_spin);
942 ls->ls_recover_block = ls->ls_recover_start;
943 set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
944
945 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
946 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
947 spin_unlock(&ls->ls_recover_spin);
948 return;
949 }
950 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
951 spin_unlock(&ls->ls_recover_spin);
952}
953
954/* dlm calls after recover_prep has been completed on all lockspace members;
955 identifies slot/jid of failed member */
956
957static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
958{
959 struct gfs2_sbd *sdp = arg;
960 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
961 int jid = slot->slot - 1;
962
963 spin_lock(&ls->ls_recover_spin);
964 if (ls->ls_recover_size < jid + 1) {
965 fs_err(sdp, "recover_slot jid %d gen %u short size %d",
966 jid, ls->ls_recover_block, ls->ls_recover_size);
967 spin_unlock(&ls->ls_recover_spin);
968 return;
969 }
970
971 if (ls->ls_recover_submit[jid]) {
972 fs_info(sdp, "recover_slot jid %d gen %u prev %u",
973 jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
974 }
975 ls->ls_recover_submit[jid] = ls->ls_recover_block;
976 spin_unlock(&ls->ls_recover_spin);
977}
978
979/* dlm calls after recover_slot and after it completes lock recovery */
980
981static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
982 int our_slot, uint32_t generation)
983{
984 struct gfs2_sbd *sdp = arg;
985 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
986
987 /* ensure the ls jid arrays are large enough */
988 set_recover_size(sdp, slots, num_slots);
989
990 spin_lock(&ls->ls_recover_spin);
991 ls->ls_recover_start = generation;
992
993 if (!ls->ls_recover_mount) {
994 ls->ls_recover_mount = generation;
995 ls->ls_jid = our_slot - 1;
996 }
997
998 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
999 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
1000
1001 clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1002 smp_mb__after_clear_bit();
1003 wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
1004 spin_unlock(&ls->ls_recover_spin);
1005}
1006
1007/* gfs2_recover thread has a journal recovery result */
1008
1009static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1010 unsigned int result)
1011{
1012 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1013
1014 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1015 return;
1016
1017 /* don't care about the recovery of own journal during mount */
1018 if (jid == ls->ls_jid)
1019 return;
1020
1021 spin_lock(&ls->ls_recover_spin);
1022 if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1023 spin_unlock(&ls->ls_recover_spin);
1024 return;
1025 }
1026 if (ls->ls_recover_size < jid + 1) {
1027 fs_err(sdp, "recovery_result jid %d short size %d",
1028 jid, ls->ls_recover_size);
1029 spin_unlock(&ls->ls_recover_spin);
1030 return;
1031 }
1032
1033 fs_info(sdp, "recover jid %d result %s\n", jid,
1034 result == LM_RD_GAVEUP ? "busy" : "success");
1035
1036 ls->ls_recover_result[jid] = result;
1037
1038 /* GAVEUP means another node is recovering the journal; delay our
1039 next attempt to recover it, to give the other node a chance to
1040 finish before trying again */
1041
1042 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1043 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
1044 result == LM_RD_GAVEUP ? HZ : 0);
1045 spin_unlock(&ls->ls_recover_spin);
1046}
1047
1048const struct dlm_lockspace_ops gdlm_lockspace_ops = {
1049 .recover_prep = gdlm_recover_prep,
1050 .recover_slot = gdlm_recover_slot,
1051 .recover_done = gdlm_recover_done,
1052};
1053
1054static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
1055{
1056 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1057 char cluster[GFS2_LOCKNAME_LEN];
1058 const char *fsname;
1059 uint32_t flags;
1060 int error, ops_result;
1061
1062 /*
1063 * initialize everything
1064 */
1065
1066 INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
1067 spin_lock_init(&ls->ls_recover_spin);
1068 ls->ls_recover_flags = 0;
1069 ls->ls_recover_mount = 0;
1070 ls->ls_recover_start = 0;
1071 ls->ls_recover_block = 0;
1072 ls->ls_recover_size = 0;
1073 ls->ls_recover_submit = NULL;
1074 ls->ls_recover_result = NULL;
1075
1076 error = set_recover_size(sdp, NULL, 0);
1077 if (error)
1078 goto fail;
1079
1080 /*
1081 * prepare dlm_new_lockspace args
1082 */
1083
1084 fsname = strchr(table, ':');
1085 if (!fsname) {
1086 fs_info(sdp, "no fsname found\n");
1087 error = -EINVAL;
1088 goto fail_free;
1089 }
1090 memset(cluster, 0, sizeof(cluster));
1091 memcpy(cluster, table, strlen(table) - strlen(fsname));
1092 fsname++;
1093
1094 flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
1095 if (ls->ls_nodir)
1096 flags |= DLM_LSFL_NODIR;
1097
1098 /*
1099 * create/join lockspace
1100 */
1101
1102 error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
1103 &gdlm_lockspace_ops, sdp, &ops_result,
1104 &ls->ls_dlm);
1105 if (error) {
1106 fs_err(sdp, "dlm_new_lockspace error %d\n", error);
1107 goto fail_free;
1108 }
1109
1110 if (ops_result < 0) {
1111 /*
1112 * dlm does not support ops callbacks,
1113 * old dlm_controld/gfs_controld are used, try without ops.
1114 */
1115 fs_info(sdp, "dlm lockspace ops not used\n");
1116 free_recover_size(ls);
1117 set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
1118 return 0;
1119 }
1120
1121 if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
1122 fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
1123 error = -EINVAL;
1124 goto fail_release;
1125 }
1126
1127 /*
1128 * control_mount() uses control_lock to determine first mounter,
1129 * and for later mounts, waits for any recoveries to be cleared.
1130 */
1131
1132 error = control_mount(sdp);
1133 if (error) {
1134 fs_err(sdp, "mount control error %d\n", error);
1135 goto fail_release;
1136 }
1137
1138 ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1139 clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
1140 smp_mb__after_clear_bit();
1141 wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
1142 return 0;
1143
1144fail_release:
1145 dlm_release_lockspace(ls->ls_dlm, 2);
1146fail_free:
1147 free_recover_size(ls);
1148fail:
1149 return error;
1150}
1151
1152static void gdlm_first_done(struct gfs2_sbd *sdp)
1153{
1154 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1155 int error;
1156
1157 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1158 return;
1159
1160 error = control_first_done(sdp);
1161 if (error)
1162 fs_err(sdp, "mount first_done error %d\n", error);
1163}
1164
208static void gdlm_unmount(struct gfs2_sbd *sdp) 1165static void gdlm_unmount(struct gfs2_sbd *sdp)
209{ 1166{
210 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1167 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
211 1168
1169 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1170 goto release;
1171
1172 /* wait for gfs2_control_wq to be done with this mount */
1173
1174 spin_lock(&ls->ls_recover_spin);
1175 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
1176 spin_unlock(&ls->ls_recover_spin);
1177 flush_delayed_work_sync(&sdp->sd_control_work);
1178
1179 /* mounted_lock and control_lock will be purged in dlm recovery */
1180release:
212 if (ls->ls_dlm) { 1181 if (ls->ls_dlm) {
213 dlm_release_lockspace(ls->ls_dlm, 2); 1182 dlm_release_lockspace(ls->ls_dlm, 2);
214 ls->ls_dlm = NULL; 1183 ls->ls_dlm = NULL;
215 } 1184 }
1185
1186 free_recover_size(ls);
216} 1187}
217 1188
218static const match_table_t dlm_tokens = { 1189static const match_table_t dlm_tokens = {
@@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = {
226const struct lm_lockops gfs2_dlm_ops = { 1197const struct lm_lockops gfs2_dlm_ops = {
227 .lm_proto_name = "lock_dlm", 1198 .lm_proto_name = "lock_dlm",
228 .lm_mount = gdlm_mount, 1199 .lm_mount = gdlm_mount,
1200 .lm_first_done = gdlm_first_done,
1201 .lm_recovery_result = gdlm_recovery_result,
229 .lm_unmount = gdlm_unmount, 1202 .lm_unmount = gdlm_unmount,
230 .lm_put_lock = gdlm_put_lock, 1203 .lm_put_lock = gdlm_put_lock,
231 .lm_lock = gdlm_lock, 1204 .lm_lock = gdlm_lock,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 598646434362..756fae9eaf8f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -626,7 +626,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else 628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); 629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
630 wait_on_buffer(bh); 630 wait_on_buffer(bh);
631 631
632 if (!buffer_uptodate(bh)) 632 if (!buffer_uptodate(bh))
@@ -951,8 +951,8 @@ int gfs2_logd(void *data)
951 wake_up(&sdp->sd_log_waitq); 951 wake_up(&sdp->sd_log_waitq);
952 952
953 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 953 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
954 if (freezing(current)) 954
955 refrigerator(); 955 try_to_freeze();
956 956
957 do { 957 do {
958 prepare_to_wait(&sdp->sd_logd_waitq, &wait, 958 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 8a139ff1919f..a8d9bcd0e19c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -28,6 +28,8 @@
28#include "recovery.h" 28#include "recovery.h"
29#include "dir.h" 29#include "dir.h"
30 30
31struct workqueue_struct *gfs2_control_wq;
32
31static struct shrinker qd_shrinker = { 33static struct shrinker qd_shrinker = {
32 .shrink = gfs2_shrink_qd_memory, 34 .shrink = gfs2_shrink_qd_memory,
33 .seeks = DEFAULT_SEEKS, 35 .seeks = DEFAULT_SEEKS,
@@ -40,7 +42,8 @@ static void gfs2_init_inode_once(void *foo)
40 inode_init_once(&ip->i_inode); 42 inode_init_once(&ip->i_inode);
41 init_rwsem(&ip->i_rw_mutex); 43 init_rwsem(&ip->i_rw_mutex);
42 INIT_LIST_HEAD(&ip->i_trunc_list); 44 INIT_LIST_HEAD(&ip->i_trunc_list);
43 ip->i_alloc = NULL; 45 ip->i_qadata = NULL;
46 ip->i_res = NULL;
44 ip->i_hash_cache = NULL; 47 ip->i_hash_cache = NULL;
45} 48}
46 49
@@ -145,12 +148,19 @@ static int __init init_gfs2_fs(void)
145 if (!gfs_recovery_wq) 148 if (!gfs_recovery_wq)
146 goto fail_wq; 149 goto fail_wq;
147 150
151 gfs2_control_wq = alloc_workqueue("gfs2_control",
152 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
153 if (!gfs2_control_wq)
154 goto fail_control;
155
148 gfs2_register_debugfs(); 156 gfs2_register_debugfs();
149 157
150 printk("GFS2 installed\n"); 158 printk("GFS2 installed\n");
151 159
152 return 0; 160 return 0;
153 161
162fail_control:
163 destroy_workqueue(gfs_recovery_wq);
154fail_wq: 164fail_wq:
155 unregister_filesystem(&gfs2meta_fs_type); 165 unregister_filesystem(&gfs2meta_fs_type);
156fail_unregister: 166fail_unregister:
@@ -194,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
194 unregister_filesystem(&gfs2_fs_type); 204 unregister_filesystem(&gfs2_fs_type);
195 unregister_filesystem(&gfs2meta_fs_type); 205 unregister_filesystem(&gfs2meta_fs_type);
196 destroy_workqueue(gfs_recovery_wq); 206 destroy_workqueue(gfs_recovery_wq);
207 destroy_workqueue(gfs2_control_wq);
197 208
198 rcu_barrier(); 209 rcu_barrier();
199 210
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index be29858900f6..181586e673f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
435 if (buffer_uptodate(first_bh)) 435 if (buffer_uptodate(first_bh))
436 goto out; 436 goto out;
437 if (!buffer_locked(first_bh)) 437 if (!buffer_locked(first_bh))
438 ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); 438 ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
439 439
440 dblock++; 440 dblock++;
441 extlen--; 441 extlen--;
@@ -444,7 +444,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
444 bh = gfs2_getbuf(gl, dblock, CREATE); 444 bh = gfs2_getbuf(gl, dblock, CREATE);
445 445
446 if (!buffer_uptodate(bh) && !buffer_locked(bh)) 446 if (!buffer_uptodate(bh) && !buffer_locked(bh))
447 ll_rw_block(READA, 1, &bh); 447 ll_rw_block(READA | REQ_META, 1, &bh);
448 brelse(bh); 448 brelse(bh);
449 dblock++; 449 dblock++;
450 extlen--; 450 extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cb23c2be731a..6aacf3f230a2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
224 224
225 bio->bi_end_io = end_bio_io_page; 225 bio->bi_end_io = end_bio_io_page;
226 bio->bi_private = page; 226 bio->bi_private = page;
227 submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); 227 submit_bio(READ_SYNC | REQ_META, bio);
228 wait_on_page_locked(page); 228 wait_on_page_locked(page);
229 bio_put(bio); 229 bio_put(bio);
230 if (!PageUptodate(page)) { 230 if (!PageUptodate(page)) {
@@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
562{ 562{
563 char *message = "FIRSTMOUNT=Done"; 563 char *message = "FIRSTMOUNT=Done";
564 char *envp[] = { message, NULL }; 564 char *envp[] = { message, NULL };
565 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 565
566 ls->ls_first_done = 1; 566 fs_info(sdp, "first mount done, others may mount\n");
567
568 if (sdp->sd_lockstruct.ls_ops->lm_first_done)
569 sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
570
567 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); 571 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
568} 572}
569 573
@@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
944 struct gfs2_args *args = &sdp->sd_args; 948 struct gfs2_args *args = &sdp->sd_args;
945 const char *proto = sdp->sd_proto_name; 949 const char *proto = sdp->sd_proto_name;
946 const char *table = sdp->sd_table_name; 950 const char *table = sdp->sd_table_name;
947 const char *fsname;
948 char *o, *options; 951 char *o, *options;
949 int ret; 952 int ret;
950 953
@@ -1004,21 +1007,12 @@ hostdata_error:
1004 } 1007 }
1005 } 1008 }
1006 1009
1007 if (sdp->sd_args.ar_spectator)
1008 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
1009 else
1010 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
1011 sdp->sd_lockstruct.ls_jid);
1012
1013 fsname = strchr(table, ':');
1014 if (fsname)
1015 fsname++;
1016 if (lm->lm_mount == NULL) { 1010 if (lm->lm_mount == NULL) {
1017 fs_info(sdp, "Now mounting FS...\n"); 1011 fs_info(sdp, "Now mounting FS...\n");
1018 complete_all(&sdp->sd_locking_init); 1012 complete_all(&sdp->sd_locking_init);
1019 return 0; 1013 return 0;
1020 } 1014 }
1021 ret = lm->lm_mount(sdp, fsname); 1015 ret = lm->lm_mount(sdp, table);
1022 if (ret == 0) 1016 if (ret == 0)
1023 fs_info(sdp, "Joined cluster. Now mounting FS...\n"); 1017 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
1024 complete_all(&sdp->sd_locking_init); 1018 complete_all(&sdp->sd_locking_init);
@@ -1084,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1084 1078
1085 if (sdp->sd_args.ar_spectator) { 1079 if (sdp->sd_args.ar_spectator) {
1086 sb->s_flags |= MS_RDONLY; 1080 sb->s_flags |= MS_RDONLY;
1087 set_bit(SDF_NORECOVERY, &sdp->sd_flags); 1081 set_bit(SDF_RORECOVERY, &sdp->sd_flags);
1088 } 1082 }
1089 if (sdp->sd_args.ar_posix_acl) 1083 if (sdp->sd_args.ar_posix_acl)
1090 sb->s_flags |= MS_POSIXACL; 1084 sb->s_flags |= MS_POSIXACL;
@@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1124 if (error) 1118 if (error)
1125 goto fail; 1119 goto fail;
1126 1120
1121 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
1122
1127 gfs2_create_debugfs_file(sdp); 1123 gfs2_create_debugfs_file(sdp);
1128 1124
1129 error = gfs2_sys_fs_add(sdp); 1125 error = gfs2_sys_fs_add(sdp);
@@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1160 goto fail_sb; 1156 goto fail_sb;
1161 } 1157 }
1162 1158
1159 if (sdp->sd_args.ar_spectator)
1160 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
1161 sdp->sd_table_name);
1162 else
1163 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
1164 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
1165
1163 error = init_inodes(sdp, DO); 1166 error = init_inodes(sdp, DO);
1164 if (error) 1167 if (error)
1165 goto fail_sb; 1168 goto fail_sb;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7e528dc14f85..a45b21b03915 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,11 @@ static void qdsb_put(struct gfs2_quota_data *qd)
494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
495{ 495{
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_alloc *al = ip->i_alloc; 497 struct gfs2_qadata *qa = ip->i_qadata;
498 struct gfs2_quota_data **qd = al->al_qd; 498 struct gfs2_quota_data **qd = qa->qa_qd;
499 int error; 499 int error;
500 500
501 if (gfs2_assert_warn(sdp, !al->al_qd_num) || 501 if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
502 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) 502 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
503 return -EIO; 503 return -EIO;
504 504
@@ -508,20 +508,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
508 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 508 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
509 if (error) 509 if (error)
510 goto out; 510 goto out;
511 al->al_qd_num++; 511 qa->qa_qd_num++;
512 qd++; 512 qd++;
513 513
514 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 514 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
515 if (error) 515 if (error)
516 goto out; 516 goto out;
517 al->al_qd_num++; 517 qa->qa_qd_num++;
518 qd++; 518 qd++;
519 519
520 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 520 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
521 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 521 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
522 if (error) 522 if (error)
523 goto out; 523 goto out;
524 al->al_qd_num++; 524 qa->qa_qd_num++;
525 qd++; 525 qd++;
526 } 526 }
527 527
@@ -529,7 +529,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
529 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 529 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
530 if (error) 530 if (error)
531 goto out; 531 goto out;
532 al->al_qd_num++; 532 qa->qa_qd_num++;
533 qd++; 533 qd++;
534 } 534 }
535 535
@@ -542,16 +542,16 @@ out:
542void gfs2_quota_unhold(struct gfs2_inode *ip) 542void gfs2_quota_unhold(struct gfs2_inode *ip)
543{ 543{
544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
545 struct gfs2_alloc *al = ip->i_alloc; 545 struct gfs2_qadata *qa = ip->i_qadata;
546 unsigned int x; 546 unsigned int x;
547 547
548 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); 548 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
549 549
550 for (x = 0; x < al->al_qd_num; x++) { 550 for (x = 0; x < qa->qa_qd_num; x++) {
551 qdsb_put(al->al_qd[x]); 551 qdsb_put(qa->qa_qd[x]);
552 al->al_qd[x] = NULL; 552 qa->qa_qd[x] = NULL;
553 } 553 }
554 al->al_qd_num = 0; 554 qa->qa_qd_num = 0;
555} 555}
556 556
557static int sort_qd(const void *a, const void *b) 557static int sort_qd(const void *a, const void *b)
@@ -712,7 +712,7 @@ get_a_page:
712 set_buffer_uptodate(bh); 712 set_buffer_uptodate(bh);
713 713
714 if (!buffer_uptodate(bh)) { 714 if (!buffer_uptodate(bh)) {
715 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 715 ll_rw_block(READ | REQ_META, 1, &bh);
716 wait_on_buffer(bh); 716 wait_on_buffer(bh);
717 if (!buffer_uptodate(bh)) 717 if (!buffer_uptodate(bh))
718 goto unlock_out; 718 goto unlock_out;
@@ -762,7 +762,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
762 struct gfs2_quota_data *qd; 762 struct gfs2_quota_data *qd;
763 loff_t offset; 763 loff_t offset;
764 unsigned int nalloc = 0, blocks; 764 unsigned int nalloc = 0, blocks;
765 struct gfs2_alloc *al = NULL;
766 int error; 765 int error;
767 766
768 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 767 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
@@ -792,26 +791,19 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
792 nalloc++; 791 nalloc++;
793 } 792 }
794 793
795 al = gfs2_alloc_get(ip);
796 if (!al) {
797 error = -ENOMEM;
798 goto out_gunlock;
799 }
800 /* 794 /*
801 * 1 blk for unstuffing inode if stuffed. We add this extra 795 * 1 blk for unstuffing inode if stuffed. We add this extra
802 * block to the reservation unconditionally. If the inode 796 * block to the reservation unconditionally. If the inode
803 * doesn't need unstuffing, the block will be released to the 797 * doesn't need unstuffing, the block will be released to the
804 * rgrp since it won't be allocated during the transaction 798 * rgrp since it won't be allocated during the transaction
805 */ 799 */
806 al->al_requested = 1;
807 /* +3 in the end for unstuffing block, inode size update block 800 /* +3 in the end for unstuffing block, inode size update block
808 * and another block in case quota straddles page boundary and 801 * and another block in case quota straddles page boundary and
809 * two blocks need to be updated instead of 1 */ 802 * two blocks need to be updated instead of 1 */
810 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 803 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
811 804
812 if (nalloc) 805 error = gfs2_inplace_reserve(ip, 1 +
813 al->al_requested += nalloc * (data_blocks + ind_blocks); 806 (nalloc * (data_blocks + ind_blocks)));
814 error = gfs2_inplace_reserve(ip);
815 if (error) 807 if (error)
816 goto out_alloc; 808 goto out_alloc;
817 809
@@ -840,8 +832,6 @@ out_end_trans:
840out_ipres: 832out_ipres:
841 gfs2_inplace_release(ip); 833 gfs2_inplace_release(ip);
842out_alloc: 834out_alloc:
843 gfs2_alloc_put(ip);
844out_gunlock:
845 gfs2_glock_dq_uninit(&i_gh); 835 gfs2_glock_dq_uninit(&i_gh);
846out: 836out:
847 while (qx--) 837 while (qx--)
@@ -925,7 +915,7 @@ fail:
925int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 915int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
926{ 916{
927 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 917 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
928 struct gfs2_alloc *al = ip->i_alloc; 918 struct gfs2_qadata *qa = ip->i_qadata;
929 struct gfs2_quota_data *qd; 919 struct gfs2_quota_data *qd;
930 unsigned int x; 920 unsigned int x;
931 int error = 0; 921 int error = 0;
@@ -938,15 +928,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
938 sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 928 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
939 return 0; 929 return 0;
940 930
941 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *), 931 sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
942 sort_qd, NULL); 932 sort_qd, NULL);
943 933
944 for (x = 0; x < al->al_qd_num; x++) { 934 for (x = 0; x < qa->qa_qd_num; x++) {
945 int force = NO_FORCE; 935 int force = NO_FORCE;
946 qd = al->al_qd[x]; 936 qd = qa->qa_qd[x];
947 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 937 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
948 force = FORCE; 938 force = FORCE;
949 error = do_glock(qd, force, &al->al_qd_ghs[x]); 939 error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
950 if (error) 940 if (error)
951 break; 941 break;
952 } 942 }
@@ -955,7 +945,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
955 set_bit(GIF_QD_LOCKED, &ip->i_flags); 945 set_bit(GIF_QD_LOCKED, &ip->i_flags);
956 else { 946 else {
957 while (x--) 947 while (x--)
958 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); 948 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
959 gfs2_quota_unhold(ip); 949 gfs2_quota_unhold(ip);
960 } 950 }
961 951
@@ -1000,7 +990,7 @@ static int need_sync(struct gfs2_quota_data *qd)
1000 990
1001void gfs2_quota_unlock(struct gfs2_inode *ip) 991void gfs2_quota_unlock(struct gfs2_inode *ip)
1002{ 992{
1003 struct gfs2_alloc *al = ip->i_alloc; 993 struct gfs2_qadata *qa = ip->i_qadata;
1004 struct gfs2_quota_data *qda[4]; 994 struct gfs2_quota_data *qda[4];
1005 unsigned int count = 0; 995 unsigned int count = 0;
1006 unsigned int x; 996 unsigned int x;
@@ -1008,14 +998,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
1008 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) 998 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
1009 goto out; 999 goto out;
1010 1000
1011 for (x = 0; x < al->al_qd_num; x++) { 1001 for (x = 0; x < qa->qa_qd_num; x++) {
1012 struct gfs2_quota_data *qd; 1002 struct gfs2_quota_data *qd;
1013 int sync; 1003 int sync;
1014 1004
1015 qd = al->al_qd[x]; 1005 qd = qa->qa_qd[x];
1016 sync = need_sync(qd); 1006 sync = need_sync(qd);
1017 1007
1018 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); 1008 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
1019 1009
1020 if (sync && qd_trylock(qd)) 1010 if (sync && qd_trylock(qd))
1021 qda[count++] = qd; 1011 qda[count++] = qd;
@@ -1048,7 +1038,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1048int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1038int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1049{ 1039{
1050 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1040 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1051 struct gfs2_alloc *al = ip->i_alloc; 1041 struct gfs2_qadata *qa = ip->i_qadata;
1052 struct gfs2_quota_data *qd; 1042 struct gfs2_quota_data *qd;
1053 s64 value; 1043 s64 value;
1054 unsigned int x; 1044 unsigned int x;
@@ -1060,8 +1050,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1060 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 1050 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
1061 return 0; 1051 return 0;
1062 1052
1063 for (x = 0; x < al->al_qd_num; x++) { 1053 for (x = 0; x < qa->qa_qd_num; x++) {
1064 qd = al->al_qd[x]; 1054 qd = qa->qa_qd[x];
1065 1055
1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1056 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1057 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1099,7 +1089,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1099void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1089void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1100 u32 uid, u32 gid) 1090 u32 uid, u32 gid)
1101{ 1091{
1102 struct gfs2_alloc *al = ip->i_alloc; 1092 struct gfs2_qadata *qa = ip->i_qadata;
1103 struct gfs2_quota_data *qd; 1093 struct gfs2_quota_data *qd;
1104 unsigned int x; 1094 unsigned int x;
1105 1095
@@ -1108,8 +1098,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1108 if (ip->i_diskflags & GFS2_DIF_SYSTEM) 1098 if (ip->i_diskflags & GFS2_DIF_SYSTEM)
1109 return; 1099 return;
1110 1100
1111 for (x = 0; x < al->al_qd_num; x++) { 1101 for (x = 0; x < qa->qa_qd_num; x++) {
1112 qd = al->al_qd[x]; 1102 qd = qa->qa_qd[x];
1113 1103
1114 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1104 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1115 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1105 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1427,8 +1417,8 @@ int gfs2_quotad(void *data)
1427 /* Check for & recover partially truncated inodes */ 1417 /* Check for & recover partially truncated inodes */
1428 quotad_check_trunc_list(sdp); 1418 quotad_check_trunc_list(sdp);
1429 1419
1430 if (freezing(current)) 1420 try_to_freeze();
1431 refrigerator(); 1421
1432 t = min(quotad_timeo, statfs_timeo); 1422 t = min(quotad_timeo, statfs_timeo);
1433 1423
1434 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); 1424 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
@@ -1529,7 +1519,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1529 unsigned int data_blocks, ind_blocks; 1519 unsigned int data_blocks, ind_blocks;
1530 unsigned int blocks = 0; 1520 unsigned int blocks = 0;
1531 int alloc_required; 1521 int alloc_required;
1532 struct gfs2_alloc *al;
1533 loff_t offset; 1522 loff_t offset;
1534 int error; 1523 int error;
1535 1524
@@ -1594,15 +1583,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1594 if (gfs2_is_stuffed(ip)) 1583 if (gfs2_is_stuffed(ip))
1595 alloc_required = 1; 1584 alloc_required = 1;
1596 if (alloc_required) { 1585 if (alloc_required) {
1597 al = gfs2_alloc_get(ip);
1598 if (al == NULL)
1599 goto out_i;
1600 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1586 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1601 &data_blocks, &ind_blocks); 1587 &data_blocks, &ind_blocks);
1602 blocks = al->al_requested = 1 + data_blocks + ind_blocks; 1588 blocks = 1 + data_blocks + ind_blocks;
1603 error = gfs2_inplace_reserve(ip); 1589 error = gfs2_inplace_reserve(ip, blocks);
1604 if (error) 1590 if (error)
1605 goto out_alloc; 1591 goto out_i;
1606 blocks += gfs2_rg_blocks(ip); 1592 blocks += gfs2_rg_blocks(ip);
1607 } 1593 }
1608 1594
@@ -1617,11 +1603,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1617 1603
1618 gfs2_trans_end(sdp); 1604 gfs2_trans_end(sdp);
1619out_release: 1605out_release:
1620 if (alloc_required) { 1606 if (alloc_required)
1621 gfs2_inplace_release(ip); 1607 gfs2_inplace_release(ip);
1622out_alloc:
1623 gfs2_alloc_put(ip);
1624 }
1625out_i: 1608out_i:
1626 gfs2_glock_dq_uninit(&i_gh); 1609 gfs2_glock_dq_uninit(&i_gh);
1627out_q: 1610out_q:
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index f2a02edcac8f..963b2d75200c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
436 char env_status[20]; 436 char env_status[20];
437 char *envp[] = { env_jid, env_status, NULL }; 437 char *envp[] = { env_jid, env_status, NULL };
438 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 438 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
439
439 ls->ls_recover_jid_done = jid; 440 ls->ls_recover_jid_done = jid;
440 ls->ls_recover_jid_status = message; 441 ls->ls_recover_jid_status = message;
441 sprintf(env_jid, "JID=%d", jid); 442 sprintf(env_jid, "JID=%d", jid);
442 sprintf(env_status, "RECOVERY=%s", 443 sprintf(env_status, "RECOVERY=%s",
443 message == LM_RD_SUCCESS ? "Done" : "Failed"); 444 message == LM_RD_SUCCESS ? "Done" : "Failed");
444 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); 445 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
446
447 if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
448 sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
445} 449}
446 450
447void gfs2_recover_func(struct work_struct *work) 451void gfs2_recover_func(struct work_struct *work)
@@ -512,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work)
512 if (error) 516 if (error)
513 goto fail_gunlock_ji; 517 goto fail_gunlock_ji;
514 518
515 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { 519 if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
520 ro = 1;
521 } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
516 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) 522 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
517 ro = 1; 523 ro = 1;
518 } else { 524 } else {
@@ -577,6 +583,7 @@ fail_gunlock_j:
577 583
578 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); 584 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
579fail: 585fail:
586 jd->jd_recover_error = error;
580 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); 587 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
581done: 588done:
582 clear_bit(JDF_RECOVERY, &jd->jd_flags); 589 clear_bit(JDF_RECOVERY, &jd->jd_flags);
@@ -605,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
605 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, 612 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
606 TASK_UNINTERRUPTIBLE); 613 TASK_UNINTERRUPTIBLE);
607 614
608 return 0; 615 return wait ? jd->jd_recover_error : 0;
609} 616}
610 617
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 96bd6d759f29..981bfa32121a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -65,8 +65,8 @@ static const char valid_change[16] = {
65}; 65};
66 66
67static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 67static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
68 unsigned char old_state, unsigned char new_state, 68 unsigned char old_state,
69 unsigned int *n); 69 struct gfs2_bitmap **rbi);
70 70
71/** 71/**
72 * gfs2_setbit - Set a bit in the bitmaps 72 * gfs2_setbit - Set a bit in the bitmaps
@@ -860,22 +860,36 @@ fail:
860} 860}
861 861
862/** 862/**
863 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode 863 * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
864 * @ip: the incore GFS2 inode structure 864 * @ip: the incore GFS2 inode structure
865 * 865 *
866 * Returns: the struct gfs2_alloc 866 * Returns: the struct gfs2_qadata
867 */ 867 */
868 868
869struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) 869struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
870{ 870{
871 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 871 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
872 int error; 872 int error;
873 BUG_ON(ip->i_alloc != NULL); 873 BUG_ON(ip->i_qadata != NULL);
874 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); 874 ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
875 error = gfs2_rindex_update(sdp); 875 error = gfs2_rindex_update(sdp);
876 if (error) 876 if (error)
877 fs_warn(sdp, "rindex update returns %d\n", error); 877 fs_warn(sdp, "rindex update returns %d\n", error);
878 return ip->i_alloc; 878 return ip->i_qadata;
879}
880
881/**
882 * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
883 * @ip: the incore GFS2 inode structure
884 *
885 * Returns: the struct gfs2_qadata
886 */
887
888static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip)
889{
890 BUG_ON(ip->i_res != NULL);
891 ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS);
892 return ip->i_res;
879} 893}
880 894
881/** 895/**
@@ -890,15 +904,20 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
890 904
891static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) 905static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
892{ 906{
893 const struct gfs2_alloc *al = ip->i_alloc; 907 const struct gfs2_blkreserv *rs = ip->i_res;
894 908
895 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 909 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
896 return 0; 910 return 0;
897 if (rgd->rd_free_clone >= al->al_requested) 911 if (rgd->rd_free_clone >= rs->rs_requested)
898 return 1; 912 return 1;
899 return 0; 913 return 0;
900} 914}
901 915
916static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
917{
918 return (bi->bi_start * GFS2_NBBY) + blk;
919}
920
902/** 921/**
903 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 922 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
904 * @rgd: The rgrp 923 * @rgd: The rgrp
@@ -912,20 +931,20 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
912 u32 goal = 0, block; 931 u32 goal = 0, block;
913 u64 no_addr; 932 u64 no_addr;
914 struct gfs2_sbd *sdp = rgd->rd_sbd; 933 struct gfs2_sbd *sdp = rgd->rd_sbd;
915 unsigned int n;
916 struct gfs2_glock *gl; 934 struct gfs2_glock *gl;
917 struct gfs2_inode *ip; 935 struct gfs2_inode *ip;
918 int error; 936 int error;
919 int found = 0; 937 int found = 0;
938 struct gfs2_bitmap *bi;
920 939
921 while (goal < rgd->rd_data) { 940 while (goal < rgd->rd_data) {
922 down_write(&sdp->sd_log_flush_lock); 941 down_write(&sdp->sd_log_flush_lock);
923 n = 1; 942 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
924 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
925 GFS2_BLKST_UNLINKED, &n);
926 up_write(&sdp->sd_log_flush_lock); 943 up_write(&sdp->sd_log_flush_lock);
927 if (block == BFITNOENT) 944 if (block == BFITNOENT)
928 break; 945 break;
946
947 block = gfs2_bi2rgd_blk(bi, block);
929 /* rgblk_search can return a block < goal, so we need to 948 /* rgblk_search can return a block < goal, so we need to
930 keep it marching forward. */ 949 keep it marching forward. */
931 no_addr = block + rgd->rd_data0; 950 no_addr = block + rgd->rd_data0;
@@ -977,8 +996,8 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
977{ 996{
978 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 997 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
979 struct gfs2_rgrpd *rgd, *begin = NULL; 998 struct gfs2_rgrpd *rgd, *begin = NULL;
980 struct gfs2_alloc *al = ip->i_alloc; 999 struct gfs2_blkreserv *rs = ip->i_res;
981 int error, rg_locked; 1000 int error, rg_locked, flags = LM_FLAG_TRY;
982 int loops = 0; 1001 int loops = 0;
983 1002
984 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1003 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
@@ -997,7 +1016,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
997 error = 0; 1016 error = 0;
998 } else { 1017 } else {
999 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1018 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1000 LM_FLAG_TRY, &al->al_rgd_gh); 1019 flags, &rs->rs_rgd_gh);
1001 } 1020 }
1002 switch (error) { 1021 switch (error) {
1003 case 0: 1022 case 0:
@@ -1008,12 +1027,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1008 if (rgd->rd_flags & GFS2_RDF_CHECK) 1027 if (rgd->rd_flags & GFS2_RDF_CHECK)
1009 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1028 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1010 if (!rg_locked) 1029 if (!rg_locked)
1011 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1030 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1012 /* fall through */ 1031 /* fall through */
1013 case GLR_TRYFAILED: 1032 case GLR_TRYFAILED:
1014 rgd = gfs2_rgrpd_get_next(rgd); 1033 rgd = gfs2_rgrpd_get_next(rgd);
1015 if (rgd == begin) 1034 if (rgd == begin) {
1035 flags = 0;
1016 loops++; 1036 loops++;
1037 }
1017 break; 1038 break;
1018 default: 1039 default:
1019 return error; 1040 return error;
@@ -1023,6 +1044,13 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1023 return -ENOSPC; 1044 return -ENOSPC;
1024} 1045}
1025 1046
1047static void gfs2_blkrsv_put(struct gfs2_inode *ip)
1048{
1049 BUG_ON(ip->i_res == NULL);
1050 kfree(ip->i_res);
1051 ip->i_res = NULL;
1052}
1053
1026/** 1054/**
1027 * gfs2_inplace_reserve - Reserve space in the filesystem 1055 * gfs2_inplace_reserve - Reserve space in the filesystem
1028 * @ip: the inode to reserve space for 1056 * @ip: the inode to reserve space for
@@ -1030,16 +1058,23 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1030 * Returns: errno 1058 * Returns: errno
1031 */ 1059 */
1032 1060
1033int gfs2_inplace_reserve(struct gfs2_inode *ip) 1061int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1034{ 1062{
1035 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1063 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1036 struct gfs2_alloc *al = ip->i_alloc; 1064 struct gfs2_blkreserv *rs;
1037 int error = 0; 1065 int error = 0;
1038 u64 last_unlinked = NO_BLOCK; 1066 u64 last_unlinked = NO_BLOCK;
1039 int tries = 0; 1067 int tries = 0;
1040 1068
1041 if (gfs2_assert_warn(sdp, al->al_requested)) 1069 rs = gfs2_blkrsv_get(ip);
1042 return -EINVAL; 1070 if (!rs)
1071 return -ENOMEM;
1072
1073 rs->rs_requested = requested;
1074 if (gfs2_assert_warn(sdp, requested)) {
1075 error = -EINVAL;
1076 goto out;
1077 }
1043 1078
1044 do { 1079 do {
1045 error = get_local_rgrp(ip, &last_unlinked); 1080 error = get_local_rgrp(ip, &last_unlinked);
@@ -1056,6 +1091,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
1056 gfs2_log_flush(sdp, NULL); 1091 gfs2_log_flush(sdp, NULL);
1057 } while (tries++ < 3); 1092 } while (tries++ < 3);
1058 1093
1094out:
1095 if (error)
1096 gfs2_blkrsv_put(ip);
1059 return error; 1097 return error;
1060} 1098}
1061 1099
@@ -1068,10 +1106,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
1068 1106
1069void gfs2_inplace_release(struct gfs2_inode *ip) 1107void gfs2_inplace_release(struct gfs2_inode *ip)
1070{ 1108{
1071 struct gfs2_alloc *al = ip->i_alloc; 1109 struct gfs2_blkreserv *rs = ip->i_res;
1072 1110
1073 if (al->al_rgd_gh.gh_gl) 1111 if (rs->rs_rgd_gh.gh_gl)
1074 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1112 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1113 gfs2_blkrsv_put(ip);
1075} 1114}
1076 1115
1077/** 1116/**
@@ -1108,39 +1147,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1108} 1147}
1109 1148
1110/** 1149/**
1111 * rgblk_search - find a block in @old_state, change allocation 1150 * rgblk_search - find a block in @state
1112 * state to @new_state
1113 * @rgd: the resource group descriptor 1151 * @rgd: the resource group descriptor
1114 * @goal: the goal block within the RG (start here to search for avail block) 1152 * @goal: the goal block within the RG (start here to search for avail block)
1115 * @old_state: GFS2_BLKST_XXX the before-allocation state to find 1153 * @state: GFS2_BLKST_XXX the before-allocation state to find
1116 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1154 * @dinode: TRUE if the first block we allocate is for a dinode
1117 * @n: The extent length 1155 * @rbi: address of the pointer to the bitmap containing the block found
1118 * 1156 *
1119 * Walk rgrp's bitmap to find bits that represent a block in @old_state. 1157 * Walk rgrp's bitmap to find bits that represent a block in @state.
1120 * Add the found bitmap buffer to the transaction.
1121 * Set the found bits to @new_state to change block's allocation state.
1122 * 1158 *
1123 * This function never fails, because we wouldn't call it unless we 1159 * This function never fails, because we wouldn't call it unless we
1124 * know (from reservation results, etc.) that a block is available. 1160 * know (from reservation results, etc.) that a block is available.
1125 * 1161 *
1126 * Scope of @goal and returned block is just within rgrp, not the whole 1162 * Scope of @goal is just within rgrp, not the whole filesystem.
1127 * filesystem. 1163 * Scope of @returned block is just within bitmap, not the whole filesystem.
1128 * 1164 *
1129 * Returns: the block number allocated 1165 * Returns: the block number found relative to the bitmap rbi
1130 */ 1166 */
1131 1167
1132static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1168static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1133 unsigned char old_state, unsigned char new_state, 1169 unsigned char state,
1134 unsigned int *n) 1170 struct gfs2_bitmap **rbi)
1135{ 1171{
1136 struct gfs2_bitmap *bi = NULL; 1172 struct gfs2_bitmap *bi = NULL;
1137 const u32 length = rgd->rd_length; 1173 const u32 length = rgd->rd_length;
1138 u32 blk = BFITNOENT; 1174 u32 blk = BFITNOENT;
1139 unsigned int buf, x; 1175 unsigned int buf, x;
1140 const unsigned int elen = *n;
1141 const u8 *buffer = NULL; 1176 const u8 *buffer = NULL;
1142 1177
1143 *n = 0; 1178 *rbi = NULL;
1144 /* Find bitmap block that contains bits for goal block */ 1179 /* Find bitmap block that contains bits for goal block */
1145 for (buf = 0; buf < length; buf++) { 1180 for (buf = 0; buf < length; buf++) {
1146 bi = rgd->rd_bits + buf; 1181 bi = rgd->rd_bits + buf;
@@ -1163,21 +1198,21 @@ do_search:
1163 bi = rgd->rd_bits + buf; 1198 bi = rgd->rd_bits + buf;
1164 1199
1165 if (test_bit(GBF_FULL, &bi->bi_flags) && 1200 if (test_bit(GBF_FULL, &bi->bi_flags) &&
1166 (old_state == GFS2_BLKST_FREE)) 1201 (state == GFS2_BLKST_FREE))
1167 goto skip; 1202 goto skip;
1168 1203
1169 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1204 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1170 bitmaps, so we must search the originals for that. */ 1205 bitmaps, so we must search the originals for that. */
1171 buffer = bi->bi_bh->b_data + bi->bi_offset; 1206 buffer = bi->bi_bh->b_data + bi->bi_offset;
1172 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1207 WARN_ON(!buffer_uptodate(bi->bi_bh));
1173 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1208 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1174 buffer = bi->bi_clone + bi->bi_offset; 1209 buffer = bi->bi_clone + bi->bi_offset;
1175 1210
1176 blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); 1211 blk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1177 if (blk != BFITNOENT) 1212 if (blk != BFITNOENT)
1178 break; 1213 break;
1179 1214
1180 if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) 1215 if ((goal == 0) && (state == GFS2_BLKST_FREE))
1181 set_bit(GBF_FULL, &bi->bi_flags); 1216 set_bit(GBF_FULL, &bi->bi_flags);
1182 1217
1183 /* Try next bitmap block (wrap back to rgrp header if at end) */ 1218 /* Try next bitmap block (wrap back to rgrp header if at end) */
@@ -1187,16 +1222,37 @@ skip:
1187 goal = 0; 1222 goal = 0;
1188 } 1223 }
1189 1224
1190 if (blk == BFITNOENT) 1225 if (blk != BFITNOENT)
1191 return blk; 1226 *rbi = bi;
1192 1227
1193 *n = 1; 1228 return blk;
1194 if (old_state == new_state) 1229}
1195 goto out; 1230
1231/**
1232 * gfs2_alloc_extent - allocate an extent from a given bitmap
1233 * @rgd: the resource group descriptor
1234 * @bi: the bitmap within the rgrp
1235 * @blk: the block within the bitmap
1236 * @dinode: TRUE if the first block we allocate is for a dinode
1237 * @n: The extent length
1238 *
1239 * Add the found bitmap buffer to the transaction.
1240 * Set the found bits to @new_state to change block's allocation state.
1241 * Returns: starting block number of the extent (fs scope)
1242 */
1243static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1244 u32 blk, bool dinode, unsigned int *n)
1245{
1246 const unsigned int elen = *n;
1247 u32 goal;
1248 const u8 *buffer = NULL;
1196 1249
1250 *n = 0;
1251 buffer = bi->bi_bh->b_data + bi->bi_offset;
1197 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1252 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1198 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1253 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1199 bi, blk, new_state); 1254 bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1255 (*n)++;
1200 goal = blk; 1256 goal = blk;
1201 while (*n < elen) { 1257 while (*n < elen) {
1202 goal++; 1258 goal++;
@@ -1206,11 +1262,12 @@ skip:
1206 GFS2_BLKST_FREE) 1262 GFS2_BLKST_FREE)
1207 break; 1263 break;
1208 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1264 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1209 bi, goal, new_state); 1265 bi, goal, GFS2_BLKST_USED);
1210 (*n)++; 1266 (*n)++;
1211 } 1267 }
1212out: 1268 blk = gfs2_bi2rgd_blk(bi, blk);
1213 return (bi->bi_start * GFS2_NBBY) + blk; 1269 rgd->rd_last_alloc = blk + *n - 1;
1270 return rgd->rd_data0 + blk;
1214} 1271}
1215 1272
1216/** 1273/**
@@ -1298,121 +1355,93 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1298} 1355}
1299 1356
1300/** 1357/**
1301 * gfs2_alloc_block - Allocate one or more blocks 1358 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1302 * @ip: the inode to allocate the block for 1359 * @ip: the inode to allocate the block for
1303 * @bn: Used to return the starting block number 1360 * @bn: Used to return the starting block number
1304 * @n: requested number of blocks/extent length (value/result) 1361 * @ndata: requested number of blocks/extent length (value/result)
1362 * @dinode: 1 if we're allocating a dinode block, else 0
1363 * @generation: the generation number of the inode
1305 * 1364 *
1306 * Returns: 0 or error 1365 * Returns: 0 or error
1307 */ 1366 */
1308 1367
1309int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) 1368int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1369 bool dinode, u64 *generation)
1310{ 1370{
1311 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1371 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1312 struct buffer_head *dibh; 1372 struct buffer_head *dibh;
1313 struct gfs2_alloc *al = ip->i_alloc;
1314 struct gfs2_rgrpd *rgd; 1373 struct gfs2_rgrpd *rgd;
1315 u32 goal, blk; 1374 unsigned int ndata;
1316 u64 block; 1375 u32 goal, blk; /* block, within the rgrp scope */
1376 u64 block; /* block, within the file system scope */
1317 int error; 1377 int error;
1378 struct gfs2_bitmap *bi;
1318 1379
1319 /* Only happens if there is a bug in gfs2, return something distinctive 1380 /* Only happens if there is a bug in gfs2, return something distinctive
1320 * to ensure that it is noticed. 1381 * to ensure that it is noticed.
1321 */ 1382 */
1322 if (al == NULL) 1383 if (ip->i_res == NULL)
1323 return -ECANCELED; 1384 return -ECANCELED;
1324 1385
1325 rgd = ip->i_rgd; 1386 rgd = ip->i_rgd;
1326 1387
1327 if (rgrp_contains_block(rgd, ip->i_goal)) 1388 if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
1328 goal = ip->i_goal - rgd->rd_data0; 1389 goal = ip->i_goal - rgd->rd_data0;
1329 else 1390 else
1330 goal = rgd->rd_last_alloc; 1391 goal = rgd->rd_last_alloc;
1331 1392
1332 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); 1393 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
1333 1394
1334 /* Since all blocks are reserved in advance, this shouldn't happen */ 1395 /* Since all blocks are reserved in advance, this shouldn't happen */
1335 if (blk == BFITNOENT) 1396 if (blk == BFITNOENT)
1336 goto rgrp_error; 1397 goto rgrp_error;
1337 1398
1338 rgd->rd_last_alloc = blk; 1399 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
1339 block = rgd->rd_data0 + blk; 1400 ndata = *nblocks;
1340 ip->i_goal = block + *n - 1; 1401 if (dinode)
1341 error = gfs2_meta_inode_buffer(ip, &dibh); 1402 ndata--;
1342 if (error == 0) { 1403
1343 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; 1404 if (!dinode) {
1344 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1405 ip->i_goal = block + ndata - 1;
1345 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); 1406 error = gfs2_meta_inode_buffer(ip, &dibh);
1346 brelse(dibh); 1407 if (error == 0) {
1408 struct gfs2_dinode *di =
1409 (struct gfs2_dinode *)dibh->b_data;
1410 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1411 di->di_goal_meta = di->di_goal_data =
1412 cpu_to_be64(ip->i_goal);
1413 brelse(dibh);
1414 }
1347 } 1415 }
1348 if (rgd->rd_free < *n) 1416 if (rgd->rd_free < *nblocks)
1349 goto rgrp_error; 1417 goto rgrp_error;
1350 1418
1351 rgd->rd_free -= *n; 1419 rgd->rd_free -= *nblocks;
1352 1420 if (dinode) {
1353 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1421 rgd->rd_dinodes++;
1354 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1355
1356 al->al_alloced += *n;
1357
1358 gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
1359 gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1360
1361 rgd->rd_free_clone -= *n;
1362 trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
1363 *bn = block;
1364 return 0;
1365
1366rgrp_error:
1367 gfs2_rgrp_error(rgd);
1368 return -EIO;
1369}
1370
1371/**
1372 * gfs2_alloc_di - Allocate a dinode
1373 * @dip: the directory that the inode is going in
1374 * @bn: the block number which is allocated
1375 * @generation: the generation number of the inode
1376 *
1377 * Returns: 0 on success or error
1378 */
1379
1380int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
1381{
1382 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1383 struct gfs2_alloc *al = dip->i_alloc;
1384 struct gfs2_rgrpd *rgd = dip->i_rgd;
1385 u32 blk;
1386 u64 block;
1387 unsigned int n = 1;
1388
1389 blk = rgblk_search(rgd, rgd->rd_last_alloc,
1390 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1391
1392 /* Since all blocks are reserved in advance, this shouldn't happen */
1393 if (blk == BFITNOENT)
1394 goto rgrp_error;
1395
1396 rgd->rd_last_alloc = blk;
1397 block = rgd->rd_data0 + blk;
1398 if (rgd->rd_free == 0)
1399 goto rgrp_error;
1400
1401 rgd->rd_free--;
1402 rgd->rd_dinodes++;
1403 *generation = rgd->rd_igeneration++;
1404 if (*generation == 0)
1405 *generation = rgd->rd_igeneration++; 1422 *generation = rgd->rd_igeneration++;
1423 if (*generation == 0)
1424 *generation = rgd->rd_igeneration++;
1425 }
1426
1406 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1427 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1407 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1428 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1408 1429
1409 al->al_alloced++; 1430 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
1431 if (dinode)
1432 gfs2_trans_add_unrevoke(sdp, block, 1);
1410 1433
1411 gfs2_statfs_change(sdp, 0, -1, +1); 1434 /*
1412 gfs2_trans_add_unrevoke(sdp, block, 1); 1435 * This needs reviewing to see why we cannot do the quota change
1436 * at this point in the dinode case.
1437 */
1438 if (ndata)
1439 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
1440 ip->i_inode.i_gid);
1413 1441
1414 rgd->rd_free_clone--; 1442 rgd->rd_free_clone -= *nblocks;
1415 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); 1443 trace_gfs2_block_alloc(ip, block, *nblocks,
1444 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1416 *bn = block; 1445 *bn = block;
1417 return 0; 1446 return 0;
1418 1447
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index cf5c50180192..ceec9106cdf4 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -28,19 +28,19 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
28extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); 28extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
29extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); 29extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
30 30
31extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 31extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
32static inline void gfs2_alloc_put(struct gfs2_inode *ip) 32static inline void gfs2_qadata_put(struct gfs2_inode *ip)
33{ 33{
34 BUG_ON(ip->i_alloc == NULL); 34 BUG_ON(ip->i_qadata == NULL);
35 kfree(ip->i_alloc); 35 kfree(ip->i_qadata);
36 ip->i_alloc = NULL; 36 ip->i_qadata = NULL;
37} 37}
38 38
39extern int gfs2_inplace_reserve(struct gfs2_inode *ip); 39extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
40extern void gfs2_inplace_release(struct gfs2_inode *ip); 40extern void gfs2_inplace_release(struct gfs2_inode *ip);
41 41
42extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 42extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
43extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 43 bool dinode, u64 *generation);
44 44
45extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 45extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
46extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 46extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 71e420989f77..4553ce515f62 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1284,18 +1284,18 @@ static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
1284/** 1284/**
1285 * gfs2_show_options - Show mount options for /proc/mounts 1285 * gfs2_show_options - Show mount options for /proc/mounts
1286 * @s: seq_file structure 1286 * @s: seq_file structure
1287 * @mnt: vfsmount 1287 * @root: root of this (sub)tree
1288 * 1288 *
1289 * Returns: 0 on success or error code 1289 * Returns: 0 on success or error code
1290 */ 1290 */
1291 1291
1292static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 1292static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1293{ 1293{
1294 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; 1294 struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
1295 struct gfs2_args *args = &sdp->sd_args; 1295 struct gfs2_args *args = &sdp->sd_args;
1296 int val; 1296 int val;
1297 1297
1298 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) 1298 if (is_ancestor(root, sdp->sd_master_dir))
1299 seq_printf(s, ",meta"); 1299 seq_printf(s, ",meta");
1300 if (args->ar_lockproto[0]) 1300 if (args->ar_lockproto[0])
1301 seq_printf(s, ",lockproto=%s", args->ar_lockproto); 1301 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
@@ -1399,8 +1399,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
1399static int gfs2_dinode_dealloc(struct gfs2_inode *ip) 1399static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1400{ 1400{
1401 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1401 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1402 struct gfs2_alloc *al; 1402 struct gfs2_qadata *qa;
1403 struct gfs2_rgrpd *rgd; 1403 struct gfs2_rgrpd *rgd;
1404 struct gfs2_holder gh;
1404 int error; 1405 int error;
1405 1406
1406 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 1407 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
@@ -1408,8 +1409,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1408 return -EIO; 1409 return -EIO;
1409 } 1410 }
1410 1411
1411 al = gfs2_alloc_get(ip); 1412 qa = gfs2_qadata_get(ip);
1412 if (!al) 1413 if (!qa)
1413 return -ENOMEM; 1414 return -ENOMEM;
1414 1415
1415 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1416 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1423,8 +1424,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1423 goto out_qs; 1424 goto out_qs;
1424 } 1425 }
1425 1426
1426 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 1427 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1427 &al->al_rgd_gh);
1428 if (error) 1428 if (error)
1429 goto out_qs; 1429 goto out_qs;
1430 1430
@@ -1440,11 +1440,11 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1440 gfs2_trans_end(sdp); 1440 gfs2_trans_end(sdp);
1441 1441
1442out_rg_gunlock: 1442out_rg_gunlock:
1443 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1443 gfs2_glock_dq_uninit(&gh);
1444out_qs: 1444out_qs:
1445 gfs2_quota_unhold(ip); 1445 gfs2_quota_unhold(ip);
1446out: 1446out:
1447 gfs2_alloc_put(ip); 1447 gfs2_qadata_put(ip);
1448 return error; 1448 return error;
1449} 1449}
1450 1450
@@ -1582,7 +1582,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1582static void gfs2_i_callback(struct rcu_head *head) 1582static void gfs2_i_callback(struct rcu_head *head)
1583{ 1583{
1584 struct inode *inode = container_of(head, struct inode, i_rcu); 1584 struct inode *inode = container_of(head, struct inode, i_rcu);
1585 INIT_LIST_HEAD(&inode->i_dentry);
1586 kmem_cache_free(gfs2_inode_cachep, inode); 1585 kmem_cache_free(gfs2_inode_cachep, inode);
1587} 1586}
1588 1587
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 443cabcfcd23..d33172c291ba 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
298 ssize_t ret; 298 ssize_t ret;
299 int val = 0; 299 int val = 0;
300 300
301 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags)) 301 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
302 val = 1; 302 val = 1;
303 ret = sprintf(buf, "%d\n", val); 303 ret = sprintf(buf, "%d\n", val);
304 return ret; 304 return ret;
@@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
313 val = simple_strtol(buf, NULL, 0); 313 val = simple_strtol(buf, NULL, 0);
314 314
315 if (val == 1) 315 if (val == 1)
316 set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); 316 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
317 else if (val == 0) { 317 else if (val == 0) {
318 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); 318 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
319 smp_mb__after_clear_bit(); 319 smp_mb__after_clear_bit();
320 gfs2_glock_thaw(sdp); 320 gfs2_glock_thaw(sdp);
321 } else { 321 } else {
@@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
350 goto out; 350 goto out;
351 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) 351 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
352 goto out; 352 goto out;
353 sdp->sd_lockstruct.ls_first = first; 353 sdp->sd_lockstruct.ls_first = first;
354 rv = 0; 354 rv = 0;
355out: 355out:
356 spin_unlock(&sdp->sd_jindex_spin); 356 spin_unlock(&sdp->sd_jindex_spin);
357 return rv ? rv : len; 357 return rv ? rv : len;
@@ -360,19 +360,14 @@ out:
360static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) 360static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
361{ 361{
362 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 362 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
363 return sprintf(buf, "%d\n", ls->ls_first_done); 363 return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
364} 364}
365 365
366static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 366int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
367{ 367{
368 unsigned jid;
369 struct gfs2_jdesc *jd; 368 struct gfs2_jdesc *jd;
370 int rv; 369 int rv;
371 370
372 rv = sscanf(buf, "%u", &jid);
373 if (rv != 1)
374 return -EINVAL;
375
376 rv = -ESHUTDOWN; 371 rv = -ESHUTDOWN;
377 spin_lock(&sdp->sd_jindex_spin); 372 spin_lock(&sdp->sd_jindex_spin);
378 if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) 373 if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
@@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
389 } 384 }
390out: 385out:
391 spin_unlock(&sdp->sd_jindex_spin); 386 spin_unlock(&sdp->sd_jindex_spin);
387 return rv;
388}
389
390static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
391{
392 unsigned jid;
393 int rv;
394
395 rv = sscanf(buf, "%u", &jid);
396 if (rv != 1)
397 return -EINVAL;
398
399 rv = gfs2_recover_set(sdp, jid);
400
392 return rv ? rv : len; 401 return rv ? rv : len;
393} 402}
394 403
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index e94560e836d7..79182d6ad6ac 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h
@@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
19int gfs2_sys_init(void); 19int gfs2_sys_init(void);
20void gfs2_sys_uninit(void); 20void gfs2_sys_uninit(void);
21 21
22int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
23
22#endif /* __SYS_DOT_H__ */ 24#endif /* __SYS_DOT_H__ */
23 25
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index f8f101ef600c..125d4572e1c0 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -30,9 +30,9 @@ struct gfs2_glock;
30 * block, or all of the blocks in the rg, whichever is smaller */ 30 * block, or all of the blocks in the rg, whichever is smaller */
31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) 31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
32{ 32{
33 const struct gfs2_alloc *al = ip->i_alloc; 33 const struct gfs2_blkreserv *rs = ip->i_res;
34 if (al->al_requested < ip->i_rgd->rd_length) 34 if (rs->rs_requested < ip->i_rgd->rd_length)
35 return al->al_requested + 1; 35 return rs->rs_requested + 1;
36 return ip->i_rgd->rd_length; 36 return ip->i_rgd->rd_length;
37} 37}
38 38
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 71d7bf830c09..e9636591b5d5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -321,11 +321,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
321 struct gfs2_ea_header *ea, 321 struct gfs2_ea_header *ea,
322 struct gfs2_ea_header *prev, int leave) 322 struct gfs2_ea_header *prev, int leave)
323{ 323{
324 struct gfs2_alloc *al; 324 struct gfs2_qadata *qa;
325 int error; 325 int error;
326 326
327 al = gfs2_alloc_get(ip); 327 qa = gfs2_qadata_get(ip);
328 if (!al) 328 if (!qa)
329 return -ENOMEM; 329 return -ENOMEM;
330 330
331 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 331 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -336,7 +336,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
336 336
337 gfs2_quota_unhold(ip); 337 gfs2_quota_unhold(ip);
338out_alloc: 338out_alloc:
339 gfs2_alloc_put(ip); 339 gfs2_qadata_put(ip);
340 return error; 340 return error;
341} 341}
342 342
@@ -549,9 +549,10 @@ int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
549 goto out; 549 goto out;
550 550
551 error = gfs2_ea_get_copy(ip, &el, data, len); 551 error = gfs2_ea_get_copy(ip, &el, data, len);
552 if (error == 0) 552 if (error < 0)
553 error = len; 553 kfree(data);
554 *ppdata = data; 554 else
555 *ppdata = data;
555out: 556out:
556 brelse(el.el_bh); 557 brelse(el.el_bh);
557 return error; 558 return error;
@@ -609,7 +610,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
609 u64 block; 610 u64 block;
610 int error; 611 int error;
611 612
612 error = gfs2_alloc_block(ip, &block, &n); 613 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
613 if (error) 614 if (error)
614 return error; 615 return error;
615 gfs2_trans_add_unrevoke(sdp, block, 1); 616 gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -671,7 +672,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
671 int mh_size = sizeof(struct gfs2_meta_header); 672 int mh_size = sizeof(struct gfs2_meta_header);
672 unsigned int n = 1; 673 unsigned int n = 1;
673 674
674 error = gfs2_alloc_block(ip, &block, &n); 675 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
675 if (error) 676 if (error)
676 return error; 677 return error;
677 gfs2_trans_add_unrevoke(sdp, block, 1); 678 gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -708,21 +709,19 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
708 unsigned int blks, 709 unsigned int blks,
709 ea_skeleton_call_t skeleton_call, void *private) 710 ea_skeleton_call_t skeleton_call, void *private)
710{ 711{
711 struct gfs2_alloc *al; 712 struct gfs2_qadata *qa;
712 struct buffer_head *dibh; 713 struct buffer_head *dibh;
713 int error; 714 int error;
714 715
715 al = gfs2_alloc_get(ip); 716 qa = gfs2_qadata_get(ip);
716 if (!al) 717 if (!qa)
717 return -ENOMEM; 718 return -ENOMEM;
718 719
719 error = gfs2_quota_lock_check(ip); 720 error = gfs2_quota_lock_check(ip);
720 if (error) 721 if (error)
721 goto out; 722 goto out;
722 723
723 al->al_requested = blks; 724 error = gfs2_inplace_reserve(ip, blks);
724
725 error = gfs2_inplace_reserve(ip);
726 if (error) 725 if (error)
727 goto out_gunlock_q; 726 goto out_gunlock_q;
728 727
@@ -751,7 +750,7 @@ out_ipres:
751out_gunlock_q: 750out_gunlock_q:
752 gfs2_quota_unlock(ip); 751 gfs2_quota_unlock(ip);
753out: 752out:
754 gfs2_alloc_put(ip); 753 gfs2_qadata_put(ip);
755 return error; 754 return error;
756} 755}
757 756
@@ -991,7 +990,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
991 } else { 990 } else {
992 u64 blk; 991 u64 blk;
993 unsigned int n = 1; 992 unsigned int n = 1;
994 error = gfs2_alloc_block(ip, &blk, &n); 993 error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
995 if (error) 994 if (error)
996 return error; 995 return error;
997 gfs2_trans_add_unrevoke(sdp, blk, 1); 996 gfs2_trans_add_unrevoke(sdp, blk, 1);
@@ -1435,9 +1434,9 @@ out:
1435static int ea_dealloc_block(struct gfs2_inode *ip) 1434static int ea_dealloc_block(struct gfs2_inode *ip)
1436{ 1435{
1437 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1436 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1438 struct gfs2_alloc *al = ip->i_alloc;
1439 struct gfs2_rgrpd *rgd; 1437 struct gfs2_rgrpd *rgd;
1440 struct buffer_head *dibh; 1438 struct buffer_head *dibh;
1439 struct gfs2_holder gh;
1441 int error; 1440 int error;
1442 1441
1443 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
@@ -1446,8 +1445,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1446 return -EIO; 1445 return -EIO;
1447 } 1446 }
1448 1447
1449 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 1448 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1450 &al->al_rgd_gh);
1451 if (error) 1449 if (error)
1452 return error; 1450 return error;
1453 1451
@@ -1471,7 +1469,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1471 gfs2_trans_end(sdp); 1469 gfs2_trans_end(sdp);
1472 1470
1473out_gunlock: 1471out_gunlock:
1474 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1472 gfs2_glock_dq_uninit(&gh);
1475 return error; 1473 return error;
1476} 1474}
1477 1475
@@ -1484,11 +1482,11 @@ out_gunlock:
1484 1482
1485int gfs2_ea_dealloc(struct gfs2_inode *ip) 1483int gfs2_ea_dealloc(struct gfs2_inode *ip)
1486{ 1484{
1487 struct gfs2_alloc *al; 1485 struct gfs2_qadata *qa;
1488 int error; 1486 int error;
1489 1487
1490 al = gfs2_alloc_get(ip); 1488 qa = gfs2_qadata_get(ip);
1491 if (!al) 1489 if (!qa)
1492 return -ENOMEM; 1490 return -ENOMEM;
1493 1491
1494 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1492 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1510,7 +1508,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1510out_quota: 1508out_quota:
1511 gfs2_quota_unhold(ip); 1509 gfs2_quota_unhold(ip);
1512out_alloc: 1510out_alloc:
1513 gfs2_alloc_put(ip); 1511 gfs2_qadata_put(ip);
1514 return error; 1512 return error;
1515} 1513}
1516 1514
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index bce4eef91a06..62fc14ea4b73 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -186,7 +186,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
186 * a directory and return a corresponding inode, given the inode for 186 * a directory and return a corresponding inode, given the inode for
187 * the directory and the name (and its length) of the new file. 187 * the directory and the name (and its length) of the new file.
188 */ 188 */
189static int hfs_create(struct inode *dir, struct dentry *dentry, int mode, 189static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
190 struct nameidata *nd) 190 struct nameidata *nd)
191{ 191{
192 struct inode *inode; 192 struct inode *inode;
@@ -216,7 +216,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
216 * in a directory, given the inode for the parent directory and the 216 * in a directory, given the inode for the parent directory and the
217 * name (and its length) of the new directory. 217 * name (and its length) of the new directory.
218 */ 218 */
219static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 219static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
220{ 220{
221 struct inode *inode; 221 struct inode *inode;
222 int res; 222 int res;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ad97c2d58287..1bf967c6bfdc 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -184,7 +184,7 @@ extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
184extern const struct address_space_operations hfs_aops; 184extern const struct address_space_operations hfs_aops;
185extern const struct address_space_operations hfs_btree_aops; 185extern const struct address_space_operations hfs_btree_aops;
186 186
187extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); 187extern struct inode *hfs_new_inode(struct inode *, struct qstr *, umode_t);
188extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); 188extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
189extern int hfs_write_inode(struct inode *, struct writeback_control *); 189extern int hfs_write_inode(struct inode *, struct writeback_control *);
190extern int hfs_inode_setattr(struct dentry *, struct iattr *); 190extern int hfs_inode_setattr(struct dentry *, struct iattr *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1a9fdcd2a00..737dbeb64320 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -169,7 +169,7 @@ const struct address_space_operations hfs_aops = {
169/* 169/*
170 * hfs_new_inode 170 * hfs_new_inode
171 */ 171 */
172struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) 172struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
173{ 173{
174 struct super_block *sb = dir->i_sb; 174 struct super_block *sb = dir->i_sb;
175 struct inode *inode = new_inode(sb); 175 struct inode *inode = new_inode(sb);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1b55f704fb22..8137fb3e6780 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -133,9 +133,9 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
133 return 0; 133 return 0;
134} 134}
135 135
136static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 136static int hfs_show_options(struct seq_file *seq, struct dentry *root)
137{ 137{
138 struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb); 138 struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
139 139
140 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) 140 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
141 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); 141 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
@@ -170,7 +170,6 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
170static void hfs_i_callback(struct rcu_head *head) 170static void hfs_i_callback(struct rcu_head *head)
171{ 171{
172 struct inode *inode = container_of(head, struct inode, i_rcu); 172 struct inode *inode = container_of(head, struct inode, i_rcu);
173 INIT_LIST_HEAD(&inode->i_dentry);
174 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 173 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
175} 174}
176 175
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4536cd3f15ae..88e155f895c6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -424,7 +424,7 @@ out:
424} 424}
425 425
426static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, 426static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
427 int mode, dev_t rdev) 427 umode_t mode, dev_t rdev)
428{ 428{
429 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 429 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
430 struct inode *inode; 430 struct inode *inode;
@@ -453,13 +453,13 @@ out:
453 return res; 453 return res;
454} 454}
455 455
456static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, 456static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
457 struct nameidata *nd) 457 struct nameidata *nd)
458{ 458{
459 return hfsplus_mknod(dir, dentry, mode, 0); 459 return hfsplus_mknod(dir, dentry, mode, 0);
460} 460}
461 461
462static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) 462static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
463{ 463{
464 return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); 464 return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
465} 465}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d7674d051f52..21a5b7fc6db4 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -402,7 +402,7 @@ void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
402void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); 402void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
403int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *); 403int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
404int hfsplus_cat_write_inode(struct inode *); 404int hfsplus_cat_write_inode(struct inode *);
405struct inode *hfsplus_new_inode(struct super_block *, int); 405struct inode *hfsplus_new_inode(struct super_block *, umode_t);
406void hfsplus_delete_inode(struct inode *); 406void hfsplus_delete_inode(struct inode *);
407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, 407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
408 int datasync); 408 int datasync);
@@ -419,7 +419,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
419int hfsplus_parse_options(char *, struct hfsplus_sb_info *); 419int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
420int hfsplus_parse_options_remount(char *input, int *force); 420int hfsplus_parse_options_remount(char *input, int *force);
421void hfsplus_fill_defaults(struct hfsplus_sb_info *); 421void hfsplus_fill_defaults(struct hfsplus_sb_info *);
422int hfsplus_show_options(struct seq_file *, struct vfsmount *); 422int hfsplus_show_options(struct seq_file *, struct dentry *);
423 423
424/* super.c */ 424/* super.c */
425struct inode *hfsplus_iget(struct super_block *, unsigned long); 425struct inode *hfsplus_iget(struct super_block *, unsigned long);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 40e1413be4cf..6643b242bdd7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -378,7 +378,7 @@ static const struct file_operations hfsplus_file_operations = {
378 .unlocked_ioctl = hfsplus_ioctl, 378 .unlocked_ioctl = hfsplus_ioctl,
379}; 379};
380 380
381struct inode *hfsplus_new_inode(struct super_block *sb, int mode) 381struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
382{ 382{
383 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 383 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
384 struct inode *inode = new_inode(sb); 384 struct inode *inode = new_inode(sb);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index fbaa6690c8e0..f66c7655b3f7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -43,7 +43,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
43 unsigned int flags; 43 unsigned int flags;
44 int err = 0; 44 int err = 0;
45 45
46 err = mnt_want_write(file->f_path.mnt); 46 err = mnt_want_write_file(file);
47 if (err) 47 if (err)
48 goto out; 48 goto out;
49 49
@@ -94,7 +94,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
94out_unlock_inode: 94out_unlock_inode:
95 mutex_unlock(&inode->i_mutex); 95 mutex_unlock(&inode->i_mutex);
96out_drop_write: 96out_drop_write:
97 mnt_drop_write(file->f_path.mnt); 97 mnt_drop_write_file(file);
98out: 98out:
99 return err; 99 return err;
100} 100}
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index bb62a5882147..06fa5618600c 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -206,9 +206,9 @@ done:
206 return 1; 206 return 1;
207} 207}
208 208
209int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) 209int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
210{ 210{
211 struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); 211 struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
212 212
213 if (sbi->creator != HFSPLUS_DEF_CR_TYPE) 213 if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
214 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); 214 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d24a9b666a23..427682ca9e48 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -499,9 +499,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
499 if (!sbi->hidden_dir) { 499 if (!sbi->hidden_dir) {
500 mutex_lock(&sbi->vh_mutex); 500 mutex_lock(&sbi->vh_mutex);
501 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); 501 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
502 hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, 502 if (!sbi->hidden_dir) {
503 sbi->hidden_dir); 503 mutex_unlock(&sbi->vh_mutex);
504 err = -ENOMEM;
505 goto out_put_root;
506 }
507 err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root,
508 &str, sbi->hidden_dir);
504 mutex_unlock(&sbi->vh_mutex); 509 mutex_unlock(&sbi->vh_mutex);
510 if (err)
511 goto out_put_hidden_dir;
505 512
506 hfsplus_mark_inode_dirty(sbi->hidden_dir, 513 hfsplus_mark_inode_dirty(sbi->hidden_dir,
507 HFSPLUS_I_CAT_DIRTY); 514 HFSPLUS_I_CAT_DIRTY);
@@ -558,7 +565,6 @@ static void hfsplus_i_callback(struct rcu_head *head)
558{ 565{
559 struct inode *inode = container_of(head, struct inode, i_rcu); 566 struct inode *inode = container_of(head, struct inode, i_rcu);
560 567
561 INIT_LIST_HEAD(&inode->i_dentry);
562 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); 568 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
563} 569}
564 570
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index bf15a43016b9..3cbfa93cd782 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -39,7 +39,7 @@
39 39
40struct hostfs_iattr { 40struct hostfs_iattr {
41 unsigned int ia_valid; 41 unsigned int ia_valid;
42 mode_t ia_mode; 42 unsigned short ia_mode;
43 uid_t ia_uid; 43 uid_t ia_uid;
44 gid_t ia_gid; 44 gid_t ia_gid;
45 loff_t ia_size; 45 loff_t ia_size;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2f72da5ae686..e130bd46d671 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -250,7 +250,6 @@ static void hostfs_evict_inode(struct inode *inode)
250static void hostfs_i_callback(struct rcu_head *head) 250static void hostfs_i_callback(struct rcu_head *head)
251{ 251{
252 struct inode *inode = container_of(head, struct inode, i_rcu); 252 struct inode *inode = container_of(head, struct inode, i_rcu);
253 INIT_LIST_HEAD(&inode->i_dentry);
254 kfree(HOSTFS_I(inode)); 253 kfree(HOSTFS_I(inode));
255} 254}
256 255
@@ -259,9 +258,9 @@ static void hostfs_destroy_inode(struct inode *inode)
259 call_rcu(&inode->i_rcu, hostfs_i_callback); 258 call_rcu(&inode->i_rcu, hostfs_i_callback);
260} 259}
261 260
262static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 261static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
263{ 262{
264 const char *root_path = vfs->mnt_sb->s_fs_info; 263 const char *root_path = root->d_sb->s_fs_info;
265 size_t offset = strlen(root_ino) + 1; 264 size_t offset = strlen(root_ino) + 1;
266 265
267 if (strlen(root_path) > offset) 266 if (strlen(root_path) > offset)
@@ -552,7 +551,7 @@ static int read_name(struct inode *ino, char *name)
552 return 0; 551 return 0;
553} 552}
554 553
555int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 554int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
556 struct nameidata *nd) 555 struct nameidata *nd)
557{ 556{
558 struct inode *inode; 557 struct inode *inode;
@@ -677,7 +676,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
677 return err; 676 return err;
678} 677}
679 678
680int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) 679int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
681{ 680{
682 char *file; 681 char *file;
683 int err; 682 int err;
@@ -701,7 +700,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
701 return err; 700 return err;
702} 701}
703 702
704int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 703static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
705{ 704{
706 struct inode *inode; 705 struct inode *inode;
707 char *name; 706 char *name;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ea91fcb0ef9b..30dd7b10b507 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -8,7 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
12{ 12{
13 const unsigned char *name = dentry->d_name.name; 13 const unsigned char *name = dentry->d_name.name;
14 unsigned len = dentry->d_name.len; 14 unsigned len = dentry->d_name.len;
@@ -115,7 +115,7 @@ bail:
115 return err; 115 return err;
116} 116}
117 117
118static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 118static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
119{ 119{
120 const unsigned char *name = dentry->d_name.name; 120 const unsigned char *name = dentry->d_name.name;
121 unsigned len = dentry->d_name.len; 121 unsigned len = dentry->d_name.len;
@@ -201,7 +201,7 @@ bail:
201 return err; 201 return err;
202} 202}
203 203
204static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 204static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
205{ 205{
206 const unsigned char *name = dentry->d_name.name; 206 const unsigned char *name = dentry->d_name.name;
207 unsigned len = dentry->d_name.len; 207 unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 98580a3b5005..3690467c944e 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -181,7 +181,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
181static void hpfs_i_callback(struct rcu_head *head) 181static void hpfs_i_callback(struct rcu_head *head)
182{ 182{
183 struct inode *inode = container_of(head, struct inode, i_rcu); 183 struct inode *inode = container_of(head, struct inode, i_rcu);
184 INIT_LIST_HEAD(&inode->i_dentry);
185 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 184 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
186} 185}
187 186
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f590b1160c6c..d92f4ce80925 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -622,7 +622,6 @@ void hppfs_evict_inode(struct inode *ino)
622static void hppfs_i_callback(struct rcu_head *head) 622static void hppfs_i_callback(struct rcu_head *head)
623{ 623{
624 struct inode *inode = container_of(head, struct inode, i_rcu); 624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kfree(HPPFS_I(inode)); 625 kfree(HPPFS_I(inode));
627} 626}
628 627
@@ -726,7 +725,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
726 sb->s_fs_info = proc_mnt; 725 sb->s_fs_info = proc_mnt;
727 726
728 err = -ENOMEM; 727 err = -ENOMEM;
729 root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root)); 728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
730 if (!root_inode) 729 if (!root_inode)
731 goto out_mntput; 730 goto out_mntput;
732 731
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0be5a78598d0..1e85a7ac0217 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -447,8 +447,8 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
447 return 0; 447 return 0;
448} 448}
449 449
450static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 450static struct inode *hugetlbfs_get_root(struct super_block *sb,
451 gid_t gid, int mode, dev_t dev) 451 struct hugetlbfs_config *config)
452{ 452{
453 struct inode *inode; 453 struct inode *inode;
454 454
@@ -456,9 +456,31 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
456 if (inode) { 456 if (inode) {
457 struct hugetlbfs_inode_info *info; 457 struct hugetlbfs_inode_info *info;
458 inode->i_ino = get_next_ino(); 458 inode->i_ino = get_next_ino();
459 inode->i_mode = mode; 459 inode->i_mode = S_IFDIR | config->mode;
460 inode->i_uid = uid; 460 inode->i_uid = config->uid;
461 inode->i_gid = gid; 461 inode->i_gid = config->gid;
462 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
463 info = HUGETLBFS_I(inode);
464 mpol_shared_policy_init(&info->policy, NULL);
465 inode->i_op = &hugetlbfs_dir_inode_operations;
466 inode->i_fop = &simple_dir_operations;
467 /* directory inodes start off with i_nlink == 2 (for "." entry) */
468 inc_nlink(inode);
469 }
470 return inode;
471}
472
473static struct inode *hugetlbfs_get_inode(struct super_block *sb,
474 struct inode *dir,
475 umode_t mode, dev_t dev)
476{
477 struct inode *inode;
478
479 inode = new_inode(sb);
480 if (inode) {
481 struct hugetlbfs_inode_info *info;
482 inode->i_ino = get_next_ino();
483 inode_init_owner(inode, dir, mode);
462 inode->i_mapping->a_ops = &hugetlbfs_aops; 484 inode->i_mapping->a_ops = &hugetlbfs_aops;
463 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 485 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
464 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 486 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -500,20 +522,12 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
500 * File creation. Allocate an inode, and we're done.. 522 * File creation. Allocate an inode, and we're done..
501 */ 523 */
502static int hugetlbfs_mknod(struct inode *dir, 524static int hugetlbfs_mknod(struct inode *dir,
503 struct dentry *dentry, int mode, dev_t dev) 525 struct dentry *dentry, umode_t mode, dev_t dev)
504{ 526{
505 struct inode *inode; 527 struct inode *inode;
506 int error = -ENOSPC; 528 int error = -ENOSPC;
507 gid_t gid; 529
508 530 inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
509 if (dir->i_mode & S_ISGID) {
510 gid = dir->i_gid;
511 if (S_ISDIR(mode))
512 mode |= S_ISGID;
513 } else {
514 gid = current_fsgid();
515 }
516 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev);
517 if (inode) { 531 if (inode) {
518 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 532 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
519 d_instantiate(dentry, inode); 533 d_instantiate(dentry, inode);
@@ -523,7 +537,7 @@ static int hugetlbfs_mknod(struct inode *dir,
523 return error; 537 return error;
524} 538}
525 539
526static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 540static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
527{ 541{
528 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 542 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
529 if (!retval) 543 if (!retval)
@@ -531,7 +545,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
531 return retval; 545 return retval;
532} 546}
533 547
534static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 548static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
535{ 549{
536 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 550 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
537} 551}
@@ -541,15 +555,8 @@ static int hugetlbfs_symlink(struct inode *dir,
541{ 555{
542 struct inode *inode; 556 struct inode *inode;
543 int error = -ENOSPC; 557 int error = -ENOSPC;
544 gid_t gid;
545
546 if (dir->i_mode & S_ISGID)
547 gid = dir->i_gid;
548 else
549 gid = current_fsgid();
550 558
551 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), 559 inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
552 gid, S_IFLNK|S_IRWXUGO, 0);
553 if (inode) { 560 if (inode) {
554 int l = strlen(symname)+1; 561 int l = strlen(symname)+1;
555 error = page_symlink(inode, symname, l); 562 error = page_symlink(inode, symname, l);
@@ -576,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
576} 583}
577 584
578static int hugetlbfs_migrate_page(struct address_space *mapping, 585static int hugetlbfs_migrate_page(struct address_space *mapping,
579 struct page *newpage, struct page *page) 586 struct page *newpage, struct page *page,
587 enum migrate_mode mode)
580{ 588{
581 int rc; 589 int rc;
582 590
@@ -666,7 +674,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
666static void hugetlbfs_i_callback(struct rcu_head *head) 674static void hugetlbfs_i_callback(struct rcu_head *head)
667{ 675{
668 struct inode *inode = container_of(head, struct inode, i_rcu); 676 struct inode *inode = container_of(head, struct inode, i_rcu);
669 INIT_LIST_HEAD(&inode->i_dentry);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 677 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
671} 678}
672 679
@@ -858,8 +865,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
858 sb->s_magic = HUGETLBFS_MAGIC; 865 sb->s_magic = HUGETLBFS_MAGIC;
859 sb->s_op = &hugetlbfs_ops; 866 sb->s_op = &hugetlbfs_ops;
860 sb->s_time_gran = 1; 867 sb->s_time_gran = 1;
861 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 868 inode = hugetlbfs_get_root(sb, &config);
862 S_IFDIR | config.mode, 0);
863 if (!inode) 869 if (!inode)
864 goto out_free; 870 goto out_free;
865 871
@@ -957,8 +963,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
957 963
958 path.mnt = mntget(hugetlbfs_vfsmount); 964 path.mnt = mntget(hugetlbfs_vfsmount);
959 error = -ENOSPC; 965 error = -ENOSPC;
960 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 966 inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
961 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
962 if (!inode) 967 if (!inode)
963 goto out_dentry; 968 goto out_dentry;
964 969
diff --git a/fs/inode.c b/fs/inode.c
index ee4e66b998f4..4fa4f0916af9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,7 @@
26#include <linux/ima.h> 26#include <linux/ima.h>
27#include <linux/cred.h> 27#include <linux/cred.h>
28#include <linux/buffer_head.h> /* for inode_has_buffers */ 28#include <linux/buffer_head.h> /* for inode_has_buffers */
29#include <linux/ratelimit.h>
29#include "internal.h" 30#include "internal.h"
30 31
31/* 32/*
@@ -191,6 +192,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
191 } 192 }
192 inode->i_private = NULL; 193 inode->i_private = NULL;
193 inode->i_mapping = mapping; 194 inode->i_mapping = mapping;
195 INIT_LIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
194#ifdef CONFIG_FS_POSIX_ACL 196#ifdef CONFIG_FS_POSIX_ACL
195 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 197 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
196#endif 198#endif
@@ -241,6 +243,11 @@ void __destroy_inode(struct inode *inode)
241 BUG_ON(inode_has_buffers(inode)); 243 BUG_ON(inode_has_buffers(inode));
242 security_inode_free(inode); 244 security_inode_free(inode);
243 fsnotify_inode_delete(inode); 245 fsnotify_inode_delete(inode);
246 if (!inode->i_nlink) {
247 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
248 atomic_long_dec(&inode->i_sb->s_remove_count);
249 }
250
244#ifdef CONFIG_FS_POSIX_ACL 251#ifdef CONFIG_FS_POSIX_ACL
245 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 252 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
246 posix_acl_release(inode->i_acl); 253 posix_acl_release(inode->i_acl);
@@ -254,7 +261,6 @@ EXPORT_SYMBOL(__destroy_inode);
254static void i_callback(struct rcu_head *head) 261static void i_callback(struct rcu_head *head)
255{ 262{
256 struct inode *inode = container_of(head, struct inode, i_rcu); 263 struct inode *inode = container_of(head, struct inode, i_rcu);
257 INIT_LIST_HEAD(&inode->i_dentry);
258 kmem_cache_free(inode_cachep, inode); 264 kmem_cache_free(inode_cachep, inode);
259} 265}
260 266
@@ -268,6 +274,85 @@ static void destroy_inode(struct inode *inode)
268 call_rcu(&inode->i_rcu, i_callback); 274 call_rcu(&inode->i_rcu, i_callback);
269} 275}
270 276
277/**
278 * drop_nlink - directly drop an inode's link count
279 * @inode: inode
280 *
281 * This is a low-level filesystem helper to replace any
282 * direct filesystem manipulation of i_nlink. In cases
283 * where we are attempting to track writes to the
284 * filesystem, a decrement to zero means an imminent
285 * write when the file is truncated and actually unlinked
286 * on the filesystem.
287 */
288void drop_nlink(struct inode *inode)
289{
290 WARN_ON(inode->i_nlink == 0);
291 inode->__i_nlink--;
292 if (!inode->i_nlink)
293 atomic_long_inc(&inode->i_sb->s_remove_count);
294}
295EXPORT_SYMBOL(drop_nlink);
296
297/**
298 * clear_nlink - directly zero an inode's link count
299 * @inode: inode
300 *
301 * This is a low-level filesystem helper to replace any
302 * direct filesystem manipulation of i_nlink. See
303 * drop_nlink() for why we care about i_nlink hitting zero.
304 */
305void clear_nlink(struct inode *inode)
306{
307 if (inode->i_nlink) {
308 inode->__i_nlink = 0;
309 atomic_long_inc(&inode->i_sb->s_remove_count);
310 }
311}
312EXPORT_SYMBOL(clear_nlink);
313
314/**
315 * set_nlink - directly set an inode's link count
316 * @inode: inode
317 * @nlink: new nlink (should be non-zero)
318 *
319 * This is a low-level filesystem helper to replace any
320 * direct filesystem manipulation of i_nlink.
321 */
322void set_nlink(struct inode *inode, unsigned int nlink)
323{
324 if (!nlink) {
325 printk_ratelimited(KERN_INFO
326 "set_nlink() clearing i_nlink on %s inode %li\n",
327 inode->i_sb->s_type->name, inode->i_ino);
328 clear_nlink(inode);
329 } else {
330 /* Yes, some filesystems do change nlink from zero to one */
331 if (inode->i_nlink == 0)
332 atomic_long_dec(&inode->i_sb->s_remove_count);
333
334 inode->__i_nlink = nlink;
335 }
336}
337EXPORT_SYMBOL(set_nlink);
338
339/**
340 * inc_nlink - directly increment an inode's link count
341 * @inode: inode
342 *
343 * This is a low-level filesystem helper to replace any
344 * direct filesystem manipulation of i_nlink. Currently,
345 * it is only here for parity with dec_nlink().
346 */
347void inc_nlink(struct inode *inode)
348{
349 if (WARN_ON(inode->i_nlink == 0))
350 atomic_long_dec(&inode->i_sb->s_remove_count);
351
352 inode->__i_nlink++;
353}
354EXPORT_SYMBOL(inc_nlink);
355
271void address_space_init_once(struct address_space *mapping) 356void address_space_init_once(struct address_space *mapping)
272{ 357{
273 memset(mapping, 0, sizeof(*mapping)); 358 memset(mapping, 0, sizeof(*mapping));
@@ -290,7 +375,6 @@ void inode_init_once(struct inode *inode)
290{ 375{
291 memset(inode, 0, sizeof(*inode)); 376 memset(inode, 0, sizeof(*inode));
292 INIT_HLIST_NODE(&inode->i_hash); 377 INIT_HLIST_NODE(&inode->i_hash);
293 INIT_LIST_HEAD(&inode->i_dentry);
294 INIT_LIST_HEAD(&inode->i_devices); 378 INIT_LIST_HEAD(&inode->i_devices);
295 INIT_LIST_HEAD(&inode->i_wb_list); 379 INIT_LIST_HEAD(&inode->i_wb_list);
296 INIT_LIST_HEAD(&inode->i_lru); 380 INIT_LIST_HEAD(&inode->i_lru);
@@ -692,6 +776,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
692 else 776 else
693 __count_vm_events(PGINODESTEAL, reap); 777 __count_vm_events(PGINODESTEAL, reap);
694 spin_unlock(&sb->s_inode_lru_lock); 778 spin_unlock(&sb->s_inode_lru_lock);
779 if (current->reclaim_state)
780 current->reclaim_state->reclaimed_slab += reap;
695 781
696 dispose_list(&freeable); 782 dispose_list(&freeable);
697} 783}
@@ -1508,7 +1594,7 @@ void file_update_time(struct file *file)
1508 if (sync_it & S_MTIME) 1594 if (sync_it & S_MTIME)
1509 inode->i_mtime = now; 1595 inode->i_mtime = now;
1510 mark_inode_dirty_sync(inode); 1596 mark_inode_dirty_sync(inode);
1511 mnt_drop_write(file->f_path.mnt); 1597 mnt_drop_write_file(file);
1512} 1598}
1513EXPORT_SYMBOL(file_update_time); 1599EXPORT_SYMBOL(file_update_time);
1514 1600
@@ -1647,7 +1733,7 @@ EXPORT_SYMBOL(init_special_inode);
1647 * @mode: mode of the new inode 1733 * @mode: mode of the new inode
1648 */ 1734 */
1649void inode_init_owner(struct inode *inode, const struct inode *dir, 1735void inode_init_owner(struct inode *inode, const struct inode *dir,
1650 mode_t mode) 1736 umode_t mode)
1651{ 1737{
1652 inode->i_uid = current_fsuid(); 1738 inode->i_uid = current_fsuid();
1653 if (dir && dir->i_mode & S_ISGID) { 1739 if (dir && dir->i_mode & S_ISGID) {
diff --git a/fs/internal.h b/fs/internal.h
index fe327c20af83..9962c59ba280 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -15,19 +15,14 @@ struct super_block;
15struct file_system_type; 15struct file_system_type;
16struct linux_binprm; 16struct linux_binprm;
17struct path; 17struct path;
18struct mount;
18 19
19/* 20/*
20 * block_dev.c 21 * block_dev.c
21 */ 22 */
22#ifdef CONFIG_BLOCK 23#ifdef CONFIG_BLOCK
23extern struct super_block *blockdev_superblock;
24extern void __init bdev_cache_init(void); 24extern void __init bdev_cache_init(void);
25 25
26static inline int sb_is_blkdev_sb(struct super_block *sb)
27{
28 return sb == blockdev_superblock;
29}
30
31extern int __sync_blockdev(struct block_device *bdev, int wait); 26extern int __sync_blockdev(struct block_device *bdev, int wait);
32 27
33#else 28#else
@@ -35,11 +30,6 @@ static inline void bdev_cache_init(void)
35{ 30{
36} 31}
37 32
38static inline int sb_is_blkdev_sb(struct super_block *sb)
39{
40 return 0;
41}
42
43static inline int __sync_blockdev(struct block_device *bdev, int wait) 33static inline int __sync_blockdev(struct block_device *bdev, int wait)
44{ 34{
45 return 0; 35 return 0;
@@ -52,28 +42,17 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
52extern void __init chrdev_init(void); 42extern void __init chrdev_init(void);
53 43
54/* 44/*
55 * exec.c
56 */
57extern int check_unsafe_exec(struct linux_binprm *);
58
59/*
60 * namespace.c 45 * namespace.c
61 */ 46 */
62extern int copy_mount_options(const void __user *, unsigned long *); 47extern int copy_mount_options(const void __user *, unsigned long *);
63extern int copy_mount_string(const void __user *, char **); 48extern int copy_mount_string(const void __user *, char **);
64 49
65extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern struct vfsmount *lookup_mnt(struct path *); 50extern struct vfsmount *lookup_mnt(struct path *);
68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
69 struct vfsmount *);
70extern void release_mounts(struct list_head *);
71extern void umount_tree(struct vfsmount *, int, struct list_head *);
72extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
73extern int finish_automount(struct vfsmount *, struct path *); 51extern int finish_automount(struct vfsmount *, struct path *);
74 52
75extern void mnt_make_longterm(struct vfsmount *); 53extern void mnt_make_longterm(struct vfsmount *);
76extern void mnt_make_shortterm(struct vfsmount *); 54extern void mnt_make_shortterm(struct vfsmount *);
55extern int sb_prepare_remount_readonly(struct super_block *);
77 56
78extern void __init mnt_init(void); 57extern void __init mnt_init(void);
79 58
@@ -98,10 +77,9 @@ extern struct file *get_empty_filp(void);
98 */ 77 */
99extern int do_remount_sb(struct super_block *, int, void *, int); 78extern int do_remount_sb(struct super_block *, int, void *, int);
100extern bool grab_super_passive(struct super_block *sb); 79extern bool grab_super_passive(struct super_block *sb);
101extern void __put_super(struct super_block *sb);
102extern void put_super(struct super_block *sb);
103extern struct dentry *mount_fs(struct file_system_type *, 80extern struct dentry *mount_fs(struct file_system_type *,
104 int, const char *, void *); 81 int, const char *, void *);
82extern struct super_block *user_get_super(dev_t);
105 83
106/* 84/*
107 * open.c 85 * open.c
@@ -111,7 +89,7 @@ extern struct file *nameidata_to_filp(struct nameidata *);
111extern void release_open_intent(struct nameidata *); 89extern void release_open_intent(struct nameidata *);
112struct open_flags { 90struct open_flags {
113 int open_flag; 91 int open_flag;
114 int mode; 92 umode_t mode;
115 int acc_mode; 93 int acc_mode;
116 int intent; 94 int intent;
117}; 95};
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d9b9fcb2db4..066836e81848 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -42,7 +42,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
42 42
43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg); 43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
44 if (error == -ENOIOCTLCMD) 44 if (error == -ENOIOCTLCMD)
45 error = -EINVAL; 45 error = -ENOTTY;
46 out: 46 out:
47 return error; 47 return error;
48} 48}
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f79dab83e17b..f84b380d65e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -48,28 +48,12 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
48 if (err) 48 if (err)
49 return err; 49 return err;
50 50
51 task_lock(task); 51 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
52 do { 52 if (ioc) {
53 ioc = task->io_context; 53 ioc_ioprio_changed(ioc, ioprio);
54 /* see wmb() in current_io_context() */ 54 put_io_context(ioc, NULL);
55 smp_read_barrier_depends();
56 if (ioc)
57 break;
58
59 ioc = alloc_io_context(GFP_ATOMIC, -1);
60 if (!ioc) {
61 err = -ENOMEM;
62 break;
63 }
64 task->io_context = ioc;
65 } while (1);
66
67 if (!err) {
68 ioc->ioprio = ioprio;
69 ioc->ioprio_changed = 1;
70 } 55 }
71 56
72 task_unlock(task);
73 return err; 57 return err;
74} 58}
75EXPORT_SYMBOL_GPL(set_task_ioprio); 59EXPORT_SYMBOL_GPL(set_task_ioprio);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index f950059525fc..bd62c76fb5df 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -85,7 +85,6 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
85static void isofs_i_callback(struct rcu_head *head) 85static void isofs_i_callback(struct rcu_head *head)
86{ 86{
87 struct inode *inode = container_of(head, struct inode, i_rcu); 87 struct inode *inode = container_of(head, struct inode, i_rcu);
88 INIT_LIST_HEAD(&inode->i_dentry);
89 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 88 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
90} 89}
91 90
@@ -170,8 +169,8 @@ struct iso9660_options{
170 unsigned char map; 169 unsigned char map;
171 unsigned char check; 170 unsigned char check;
172 unsigned int blocksize; 171 unsigned int blocksize;
173 mode_t fmode; 172 umode_t fmode;
174 mode_t dmode; 173 umode_t dmode;
175 gid_t gid; 174 gid_t gid;
176 uid_t uid; 175 uid_t uid;
177 char *iocharset; 176 char *iocharset;
@@ -949,8 +948,11 @@ root_found:
949 948
950 /* get the root dentry */ 949 /* get the root dentry */
951 s->s_root = d_alloc_root(inode); 950 s->s_root = d_alloc_root(inode);
952 if (!(s->s_root)) 951 if (!(s->s_root)) {
953 goto out_no_root; 952 iput(inode);
953 error = -ENOMEM;
954 goto out_no_inode;
955 }
954 956
955 kfree(opt.iocharset); 957 kfree(opt.iocharset);
956 958
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 7d33de84f52a..0e73f63d9274 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -50,14 +50,14 @@ struct isofs_sb_info {
50 unsigned int s_uid_set:1; 50 unsigned int s_uid_set:1;
51 unsigned int s_gid_set:1; 51 unsigned int s_gid_set:1;
52 52
53 mode_t s_fmode; 53 umode_t s_fmode;
54 mode_t s_dmode; 54 umode_t s_dmode;
55 gid_t s_gid; 55 gid_t s_gid;
56 uid_t s_uid; 56 uid_t s_uid;
57 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
58}; 58};
59 59
60#define ISOFS_INVALID_MODE ((mode_t) -1) 60#define ISOFS_INVALID_MODE ((umode_t) -1)
61 61
62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb) 62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
63{ 63{
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index f94fc48ff3a0..5d1a00a5041b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -537,7 +537,7 @@ int cleanup_journal_tail(journal_t *journal)
537 * them. 537 * them.
538 * 538 *
539 * Called with j_list_lock held. 539 * Called with j_list_lock held.
540 * Returns number of bufers reaped (for debug) 540 * Returns number of buffers reaped (for debug)
541 */ 541 */
542 542
543static int journal_clean_one_cp_list(struct journal_head *jh, int *released) 543static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 8799207df058..f2b9a571f4cf 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -392,6 +392,12 @@ void journal_commit_transaction(journal_t *journal)
392 jbd_debug (3, "JBD: commit phase 1\n"); 392 jbd_debug (3, "JBD: commit phase 1\n");
393 393
394 /* 394 /*
395 * Clear revoked flag to reflect there is no revoked buffers
396 * in the next transaction which is going to be started.
397 */
398 journal_clear_buffer_revoked_flags(journal);
399
400 /*
395 * Switch to a new revoke table. 401 * Switch to a new revoke table.
396 */ 402 */
397 journal_switch_revoke_table(journal); 403 journal_switch_revoke_table(journal);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd661d2b..59c09f9541b5 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -166,7 +166,7 @@ loop:
166 */ 166 */
167 jbd_debug(1, "Now suspending kjournald\n"); 167 jbd_debug(1, "Now suspending kjournald\n");
168 spin_unlock(&journal->j_state_lock); 168 spin_unlock(&journal->j_state_lock);
169 refrigerator(); 169 try_to_freeze();
170 spin_lock(&journal->j_state_lock); 170 spin_lock(&journal->j_state_lock);
171 } else { 171 } else {
172 /* 172 /*
@@ -721,7 +721,6 @@ static journal_t * journal_init_common (void)
721 init_waitqueue_head(&journal->j_wait_checkpoint); 721 init_waitqueue_head(&journal->j_wait_checkpoint);
722 init_waitqueue_head(&journal->j_wait_commit); 722 init_waitqueue_head(&journal->j_wait_commit);
723 init_waitqueue_head(&journal->j_wait_updates); 723 init_waitqueue_head(&journal->j_wait_updates);
724 mutex_init(&journal->j_barrier);
725 mutex_init(&journal->j_checkpoint_mutex); 724 mutex_init(&journal->j_checkpoint_mutex);
726 spin_lock_init(&journal->j_revoke_lock); 725 spin_lock_init(&journal->j_revoke_lock);
727 spin_lock_init(&journal->j_list_lock); 726 spin_lock_init(&journal->j_list_lock);
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 305a90763154..25c713e7071c 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -47,6 +47,10 @@
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
48 * bit here. 48 * bit here.
49 * 49 *
50 * We cache revoke status of a buffer in the current transaction in b_states
51 * bits. As the name says, revokevalid flag indicates that the cached revoke
52 * status of a buffer is valid and we can rely on the cached status.
53 *
50 * Revoke information on buffers is a tri-state value: 54 * Revoke information on buffers is a tri-state value:
51 * 55 *
52 * RevokeValid clear: no cached revoke status, need to look it up 56 * RevokeValid clear: no cached revoke status, need to look it up
@@ -479,6 +483,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
479 return did_revoke; 483 return did_revoke;
480} 484}
481 485
486/*
487 * journal_clear_revoked_flags clears revoked flag of buffers in
488 * revoke table to reflect there is no revoked buffer in the next
489 * transaction which is going to be started.
490 */
491void journal_clear_buffer_revoked_flags(journal_t *journal)
492{
493 struct jbd_revoke_table_s *revoke = journal->j_revoke;
494 int i = 0;
495
496 for (i = 0; i < revoke->hash_size; i++) {
497 struct list_head *hash_list;
498 struct list_head *list_entry;
499 hash_list = &revoke->hash_table[i];
500
501 list_for_each(list_entry, hash_list) {
502 struct jbd_revoke_record_s *record;
503 struct buffer_head *bh;
504 record = (struct jbd_revoke_record_s *)list_entry;
505 bh = __find_get_block(journal->j_fs_dev,
506 record->blocknr,
507 journal->j_blocksize);
508 if (bh) {
509 clear_buffer_revoked(bh);
510 __brelse(bh);
511 }
512 }
513 }
514}
515
482/* journal_switch_revoke table select j_revoke for next transaction 516/* journal_switch_revoke table select j_revoke for next transaction
483 * we do not want to suspend any processing until all revokes are 517 * we do not want to suspend any processing until all revokes are
484 * written -bzzz 518 * written -bzzz
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7e59c6e66f9b..7fce94b04bc3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -426,17 +426,34 @@ int journal_restart(handle_t *handle, int nblocks)
426 * void journal_lock_updates () - establish a transaction barrier. 426 * void journal_lock_updates () - establish a transaction barrier.
427 * @journal: Journal to establish a barrier on. 427 * @journal: Journal to establish a barrier on.
428 * 428 *
429 * This locks out any further updates from being started, and blocks 429 * This locks out any further updates from being started, and blocks until all
430 * until all existing updates have completed, returning only once the 430 * existing updates have completed, returning only once the journal is in a
431 * journal is in a quiescent state with no updates running. 431 * quiescent state with no updates running.
432 * 432 *
433 * The journal lock should not be held on entry. 433 * We do not use simple mutex for synchronization as there are syscalls which
434 * want to return with filesystem locked and that trips up lockdep. Also
435 * hibernate needs to lock filesystem but locked mutex then blocks hibernation.
436 * Since locking filesystem is rare operation, we use simple counter and
437 * waitqueue for locking.
434 */ 438 */
435void journal_lock_updates(journal_t *journal) 439void journal_lock_updates(journal_t *journal)
436{ 440{
437 DEFINE_WAIT(wait); 441 DEFINE_WAIT(wait);
438 442
443wait:
444 /* Wait for previous locked operation to finish */
445 wait_event(journal->j_wait_transaction_locked,
446 journal->j_barrier_count == 0);
447
439 spin_lock(&journal->j_state_lock); 448 spin_lock(&journal->j_state_lock);
449 /*
450 * Check reliably under the lock whether we are the ones winning the race
451 * and locking the journal
452 */
453 if (journal->j_barrier_count > 0) {
454 spin_unlock(&journal->j_state_lock);
455 goto wait;
456 }
440 ++journal->j_barrier_count; 457 ++journal->j_barrier_count;
441 458
442 /* Wait until there are no running updates */ 459 /* Wait until there are no running updates */
@@ -460,14 +477,6 @@ void journal_lock_updates(journal_t *journal)
460 spin_lock(&journal->j_state_lock); 477 spin_lock(&journal->j_state_lock);
461 } 478 }
462 spin_unlock(&journal->j_state_lock); 479 spin_unlock(&journal->j_state_lock);
463
464 /*
465 * We have now established a barrier against other normal updates, but
466 * we also need to barrier against other journal_lock_updates() calls
467 * to make sure that we serialise special journal-locked operations
468 * too.
469 */
470 mutex_lock(&journal->j_barrier);
471} 480}
472 481
473/** 482/**
@@ -475,14 +484,11 @@ void journal_lock_updates(journal_t *journal)
475 * @journal: Journal to release the barrier on. 484 * @journal: Journal to release the barrier on.
476 * 485 *
477 * Release a transaction barrier obtained with journal_lock_updates(). 486 * Release a transaction barrier obtained with journal_lock_updates().
478 *
479 * Should be called without the journal lock held.
480 */ 487 */
481void journal_unlock_updates (journal_t *journal) 488void journal_unlock_updates (journal_t *journal)
482{ 489{
483 J_ASSERT(journal->j_barrier_count != 0); 490 J_ASSERT(journal->j_barrier_count != 0);
484 491
485 mutex_unlock(&journal->j_barrier);
486 spin_lock(&journal->j_state_lock); 492 spin_lock(&journal->j_state_lock);
487 --journal->j_barrier_count; 493 --journal->j_barrier_count;
488 spin_unlock(&journal->j_state_lock); 494 spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 16a698bd906d..d49d202903fb 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -565,7 +565,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
565 * 565 *
566 * Called with the journal locked. 566 * Called with the journal locked.
567 * Called with j_list_lock held. 567 * Called with j_list_lock held.
568 * Returns number of bufers reaped (for debug) 568 * Returns number of buffers reaped (for debug)
569 */ 569 */
570 570
571static int journal_clean_one_cp_list(struct journal_head *jh, int *released) 571static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 68d704db787f..5069b8475150 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -430,6 +430,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
430 jbd_debug(3, "JBD2: commit phase 1\n"); 430 jbd_debug(3, "JBD2: commit phase 1\n");
431 431
432 /* 432 /*
433 * Clear revoked flag to reflect there is no revoked buffers
434 * in the next transaction which is going to be started.
435 */
436 jbd2_clear_buffer_revoked_flags(journal);
437
438 /*
433 * Switch to a new revoke table. 439 * Switch to a new revoke table.
434 */ 440 */
435 jbd2_journal_switch_revoke_table(journal); 441 jbd2_journal_switch_revoke_table(journal);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0fa0123151d3..c0a5f9f1b127 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -173,7 +173,7 @@ loop:
173 */ 173 */
174 jbd_debug(1, "Now suspending kjournald2\n"); 174 jbd_debug(1, "Now suspending kjournald2\n");
175 write_unlock(&journal->j_state_lock); 175 write_unlock(&journal->j_state_lock);
176 refrigerator(); 176 try_to_freeze();
177 write_lock(&journal->j_state_lock); 177 write_lock(&journal->j_state_lock);
178 } else { 178 } else {
179 /* 179 /*
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 69fd93588118..30b2867d6cc9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
48 * bit here. 48 * bit here.
49 * 49 *
50 * We cache revoke status of a buffer in the current transaction in b_states
51 * bits. As the name says, revokevalid flag indicates that the cached revoke
52 * status of a buffer is valid and we can rely on the cached status.
53 *
50 * Revoke information on buffers is a tri-state value: 54 * Revoke information on buffers is a tri-state value:
51 * 55 *
52 * RevokeValid clear: no cached revoke status, need to look it up 56 * RevokeValid clear: no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
478 return did_revoke; 482 return did_revoke;
479} 483}
480 484
485/*
486 * journal_clear_revoked_flag clears revoked flag of buffers in
487 * revoke table to reflect there is no revoked buffers in the next
488 * transaction which is going to be started.
489 */
490void jbd2_clear_buffer_revoked_flags(journal_t *journal)
491{
492 struct jbd2_revoke_table_s *revoke = journal->j_revoke;
493 int i = 0;
494
495 for (i = 0; i < revoke->hash_size; i++) {
496 struct list_head *hash_list;
497 struct list_head *list_entry;
498 hash_list = &revoke->hash_table[i];
499
500 list_for_each(list_entry, hash_list) {
501 struct jbd2_revoke_record_s *record;
502 struct buffer_head *bh;
503 record = (struct jbd2_revoke_record_s *)list_entry;
504 bh = __find_get_block(journal->j_fs_dev,
505 record->blocknr,
506 journal->j_blocksize);
507 if (bh) {
508 clear_buffer_revoked(bh);
509 __brelse(bh);
510 }
511 }
512 }
513}
514
481/* journal_switch_revoke table select j_revoke for next transaction 515/* journal_switch_revoke table select j_revoke for next transaction
482 * we do not want to suspend any processing until all revokes are 516 * we do not want to suspend any processing until all revokes are
483 * written -bzzz 517 * written -bzzz
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a0e41a4c080e..35ae096bed5d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
517 break; 517 break;
518 518
519 spin_lock(&transaction->t_handle_lock); 519 spin_lock(&transaction->t_handle_lock);
520 prepare_to_wait(&journal->j_wait_updates, &wait,
521 TASK_UNINTERRUPTIBLE);
520 if (!atomic_read(&transaction->t_updates)) { 522 if (!atomic_read(&transaction->t_updates)) {
521 spin_unlock(&transaction->t_handle_lock); 523 spin_unlock(&transaction->t_handle_lock);
524 finish_wait(&journal->j_wait_updates, &wait);
522 break; 525 break;
523 } 526 }
524 prepare_to_wait(&journal->j_wait_updates, &wait,
525 TASK_UNINTERRUPTIBLE);
526 spin_unlock(&transaction->t_handle_lock); 527 spin_unlock(&transaction->t_handle_lock);
527 write_unlock(&journal->j_state_lock); 528 write_unlock(&journal->j_state_lock);
528 schedule(); 529 schedule();
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index be6169bd8acd..973ac5822bd7 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -22,16 +22,16 @@
22 22
23static int jffs2_readdir (struct file *, void *, filldir_t); 23static int jffs2_readdir (struct file *, void *, filldir_t);
24 24
25static int jffs2_create (struct inode *,struct dentry *,int, 25static int jffs2_create (struct inode *,struct dentry *,umode_t,
26 struct nameidata *); 26 struct nameidata *);
27static struct dentry *jffs2_lookup (struct inode *,struct dentry *, 27static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
28 struct nameidata *); 28 struct nameidata *);
29static int jffs2_link (struct dentry *,struct inode *,struct dentry *); 29static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
30static int jffs2_unlink (struct inode *,struct dentry *); 30static int jffs2_unlink (struct inode *,struct dentry *);
31static int jffs2_symlink (struct inode *,struct dentry *,const char *); 31static int jffs2_symlink (struct inode *,struct dentry *,const char *);
32static int jffs2_mkdir (struct inode *,struct dentry *,int); 32static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
33static int jffs2_rmdir (struct inode *,struct dentry *); 33static int jffs2_rmdir (struct inode *,struct dentry *);
34static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t); 34static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
35static int jffs2_rename (struct inode *, struct dentry *, 35static int jffs2_rename (struct inode *, struct dentry *,
36 struct inode *, struct dentry *); 36 struct inode *, struct dentry *);
37 37
@@ -169,8 +169,8 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
169/***********************************************************************/ 169/***********************************************************************/
170 170
171 171
172static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, 172static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
173 struct nameidata *nd) 173 umode_t mode, struct nameidata *nd)
174{ 174{
175 struct jffs2_raw_inode *ri; 175 struct jffs2_raw_inode *ri;
176 struct jffs2_inode_info *f, *dir_f; 176 struct jffs2_inode_info *f, *dir_f;
@@ -450,7 +450,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
450} 450}
451 451
452 452
453static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) 453static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
454{ 454{
455 struct jffs2_inode_info *f, *dir_f; 455 struct jffs2_inode_info *f, *dir_f;
456 struct jffs2_sb_info *c; 456 struct jffs2_sb_info *c;
@@ -618,7 +618,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
618 return ret; 618 return ret;
619} 619}
620 620
621static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, dev_t rdev) 621static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
622{ 622{
623 struct jffs2_inode_info *f, *dir_f; 623 struct jffs2_inode_info *f, *dir_f;
624 struct jffs2_sb_info *c; 624 struct jffs2_sb_info *c;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index e513f1913c15..a01cdad6aad1 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -74,7 +74,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
74 ((struct erase_priv_struct *)instr->priv)->jeb = jeb; 74 ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
75 ((struct erase_priv_struct *)instr->priv)->c = c; 75 ((struct erase_priv_struct *)instr->priv)->c = c;
76 76
77 ret = c->mtd->erase(c->mtd, instr); 77 ret = mtd_erase(c->mtd, instr);
78 if (!ret) 78 if (!ret)
79 return; 79 return;
80 80
@@ -336,12 +336,11 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
336 uint32_t ofs; 336 uint32_t ofs;
337 size_t retlen; 337 size_t retlen;
338 int ret = -EIO; 338 int ret = -EIO;
339 unsigned long *wordebuf;
339 340
340 if (c->mtd->point) { 341 ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
341 unsigned long *wordebuf; 342 &ebuf, NULL);
342 343 if (ret != -EOPNOTSUPP) {
343 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
344 &retlen, &ebuf, NULL);
345 if (ret) { 344 if (ret) {
346 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 345 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
347 goto do_flash_read; 346 goto do_flash_read;
@@ -349,7 +348,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
349 if (retlen < c->sector_size) { 348 if (retlen < c->sector_size) {
350 /* Don't muck about if it won't let us point to the whole erase sector */ 349 /* Don't muck about if it won't let us point to the whole erase sector */
351 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); 350 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
352 c->mtd->unpoint(c->mtd, jeb->offset, retlen); 351 mtd_unpoint(c->mtd, jeb->offset, retlen);
353 goto do_flash_read; 352 goto do_flash_read;
354 } 353 }
355 wordebuf = ebuf-sizeof(*wordebuf); 354 wordebuf = ebuf-sizeof(*wordebuf);
@@ -358,7 +357,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
358 if (*++wordebuf != ~0) 357 if (*++wordebuf != ~0)
359 break; 358 break;
360 } while(--retlen); 359 } while(--retlen);
361 c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size); 360 mtd_unpoint(c->mtd, jeb->offset, c->sector_size);
362 if (retlen) { 361 if (retlen) {
363 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", 362 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
364 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); 363 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
@@ -381,7 +380,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
381 380
382 *bad_offset = ofs; 381 *bad_offset = ofs;
383 382
384 ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf); 383 ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf);
385 if (ret) { 384 if (ret) {
386 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); 385 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
387 ret = -EIO; 386 ret = -EIO;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 4b8afe39a87f..2e0123867cb1 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -466,7 +466,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
466 466
467 if (insert_inode_locked(inode) < 0) { 467 if (insert_inode_locked(inode) < 0) {
468 make_bad_inode(inode); 468 make_bad_inode(inode);
469 unlock_new_inode(inode);
470 iput(inode); 469 iput(inode);
471 return ERR_PTR(-EINVAL); 470 return ERR_PTR(-EINVAL);
472 } 471 }
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index ee57bac1ba6d..3093ac4fb24c 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -62,17 +62,15 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
62#ifndef __ECOS 62#ifndef __ECOS
63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), 63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
64 * adding and jffs2_flash_read_end() interface. */ 64 * adding and jffs2_flash_read_end() interface. */
65 if (c->mtd->point) { 65 err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, NULL);
66 err = c->mtd->point(c->mtd, ofs, len, &retlen, 66 if (!err && retlen < len) {
67 (void **)&buffer, NULL); 67 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
68 if (!err && retlen < len) { 68 mtd_unpoint(c->mtd, ofs, retlen);
69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); 69 } else if (err) {
70 c->mtd->unpoint(c->mtd, ofs, retlen); 70 if (err != -EOPNOTSUPP)
71 } else if (err)
72 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 71 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
73 else 72 } else
74 pointed = 1; /* succefully pointed to device */ 73 pointed = 1; /* succefully pointed to device */
75 }
76#endif 74#endif
77 75
78 if (!pointed) { 76 if (!pointed) {
@@ -101,7 +99,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
101 kfree(buffer); 99 kfree(buffer);
102#ifndef __ECOS 100#ifndef __ECOS
103 else 101 else
104 c->mtd->unpoint(c->mtd, ofs, len); 102 mtd_unpoint(c->mtd, ofs, len);
105#endif 103#endif
106 104
107 if (crc != tn->data_crc) { 105 if (crc != tn->data_crc) {
@@ -137,7 +135,7 @@ free_out:
137 kfree(buffer); 135 kfree(buffer);
138#ifndef __ECOS 136#ifndef __ECOS
139 else 137 else
140 c->mtd->unpoint(c->mtd, ofs, len); 138 mtd_unpoint(c->mtd, ofs, len);
141#endif 139#endif
142 return err; 140 return err;
143} 141}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 28107ca136e4..f99464833bb2 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,15 +97,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
97 size_t pointlen, try_size; 97 size_t pointlen, try_size;
98 98
99 if (c->mtd->point) { 99 if (c->mtd->point) {
100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, 100 ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen,
101 (void **)&flashbuf, NULL); 101 (void **)&flashbuf, NULL);
102 if (!ret && pointlen < c->mtd->size) { 102 if (!ret && pointlen < c->mtd->size) {
103 /* Don't muck about if it won't let us point to the whole flash */ 103 /* Don't muck about if it won't let us point to the whole flash */
104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
105 c->mtd->unpoint(c->mtd, 0, pointlen); 105 mtd_unpoint(c->mtd, 0, pointlen);
106 flashbuf = NULL; 106 flashbuf = NULL;
107 } 107 }
108 if (ret) 108 if (ret && ret != -EOPNOTSUPP)
109 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 109 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
110 } 110 }
111#endif 111#endif
@@ -273,7 +273,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
273 kfree(flashbuf); 273 kfree(flashbuf);
274#ifndef __ECOS 274#ifndef __ECOS
275 else 275 else
276 c->mtd->unpoint(c->mtd, 0, c->mtd->size); 276 mtd_unpoint(c->mtd, 0, c->mtd->size);
277#endif 277#endif
278 kfree(s); 278 kfree(s);
279 return ret; 279 return ret;
@@ -455,7 +455,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
455 if (jffs2_cleanmarker_oob(c)) { 455 if (jffs2_cleanmarker_oob(c)) {
456 int ret; 456 int ret;
457 457
458 if (c->mtd->block_isbad(c->mtd, jeb->offset)) 458 if (mtd_block_isbad(c->mtd, jeb->offset))
459 return BLK_STATE_BADBLOCK; 459 return BLK_STATE_BADBLOCK;
460 460
461 ret = jffs2_check_nand_cleanmarker(c, jeb); 461 ret = jffs2_check_nand_cleanmarker(c, jeb);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e7e974454115..f2d96b5e64f6 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -45,7 +45,6 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
45static void jffs2_i_callback(struct rcu_head *head) 45static void jffs2_i_callback(struct rcu_head *head)
46{ 46{
47 struct inode *inode = container_of(head, struct inode, i_rcu); 47 struct inode *inode = container_of(head, struct inode, i_rcu);
48 INIT_LIST_HEAD(&inode->i_dentry);
49 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 48 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
50} 49}
51 50
@@ -97,9 +96,9 @@ static const char *jffs2_compr_name(unsigned int compr)
97 } 96 }
98} 97}
99 98
100static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt) 99static int jffs2_show_options(struct seq_file *s, struct dentry *root)
101{ 100{
102 struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb); 101 struct jffs2_sb_info *c = JFFS2_SB_INFO(root->d_sb);
103 struct jffs2_mount_opts *opts = &c->mount_opts; 102 struct jffs2_mount_opts *opts = &c->mount_opts;
104 103
105 if (opts->override_compr) 104 if (opts->override_compr)
@@ -336,9 +335,7 @@ static void jffs2_put_super (struct super_block *sb)
336 jffs2_flash_cleanup(c); 335 jffs2_flash_cleanup(c);
337 kfree(c->inocache_list); 336 kfree(c->inocache_list);
338 jffs2_clear_xattr_subsystem(c); 337 jffs2_clear_xattr_subsystem(c);
339 if (c->mtd->sync) 338 mtd_sync(c->mtd);
340 c->mtd->sync(c->mtd);
341
342 D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); 339 D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
343} 340}
344 341
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index b09e51d2f81f..30e8f47e8a23 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -228,7 +228,7 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
228 size_t retlen; 228 size_t retlen;
229 char *eccstr; 229 char *eccstr;
230 230
231 ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); 231 ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
232 if (ret && ret != -EUCLEAN && ret != -EBADMSG) { 232 if (ret && ret != -EUCLEAN && ret != -EBADMSG) {
233 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret); 233 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret);
234 return ret; 234 return ret;
@@ -337,7 +337,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
337 } 337 }
338 338
339 /* Do the read... */ 339 /* Do the read... */
340 ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); 340 ret = mtd_read(c->mtd, start, c->wbuf_ofs - start, &retlen,
341 buf);
341 342
342 /* ECC recovered ? */ 343 /* ECC recovered ? */
343 if ((ret == -EUCLEAN || ret == -EBADMSG) && 344 if ((ret == -EUCLEAN || ret == -EBADMSG) &&
@@ -413,13 +414,12 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
413 if (breakme++ == 20) { 414 if (breakme++ == 20) {
414 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); 415 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
415 breakme = 0; 416 breakme = 0;
416 c->mtd->write(c->mtd, ofs, towrite, &retlen, 417 mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf);
417 brokenbuf);
418 ret = -EIO; 418 ret = -EIO;
419 } else 419 } else
420#endif 420#endif
421 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, 421 ret = mtd_write(c->mtd, ofs, towrite, &retlen,
422 rewrite_buf); 422 rewrite_buf);
423 423
424 if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) { 424 if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) {
425 /* Argh. We tried. Really we did. */ 425 /* Argh. We tried. Really we did. */
@@ -619,13 +619,14 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
619 if (breakme++ == 20) { 619 if (breakme++ == 20) {
620 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); 620 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
621 breakme = 0; 621 breakme = 0;
622 c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, 622 mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
623 brokenbuf); 623 brokenbuf);
624 ret = -EIO; 624 ret = -EIO;
625 } else 625 } else
626#endif 626#endif
627 627
628 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); 628 ret = mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize,
629 &retlen, c->wbuf);
629 630
630 if (ret) { 631 if (ret) {
631 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret); 632 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret);
@@ -861,8 +862,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
861 v += wbuf_retlen; 862 v += wbuf_retlen;
862 863
863 if (vlen >= c->wbuf_pagesize) { 864 if (vlen >= c->wbuf_pagesize) {
864 ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen), 865 ret = mtd_write(c->mtd, outvec_to, PAGE_DIV(vlen),
865 &wbuf_retlen, v); 866 &wbuf_retlen, v);
866 if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen)) 867 if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen))
867 goto outfile; 868 goto outfile;
868 869
@@ -948,11 +949,11 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
948 int ret; 949 int ret;
949 950
950 if (!jffs2_is_writebuffered(c)) 951 if (!jffs2_is_writebuffered(c))
951 return c->mtd->read(c->mtd, ofs, len, retlen, buf); 952 return mtd_read(c->mtd, ofs, len, retlen, buf);
952 953
953 /* Read flash */ 954 /* Read flash */
954 down_read(&c->wbuf_sem); 955 down_read(&c->wbuf_sem);
955 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); 956 ret = mtd_read(c->mtd, ofs, len, retlen, buf);
956 957
957 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { 958 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
958 if (ret == -EBADMSG) 959 if (ret == -EBADMSG)
@@ -1031,7 +1032,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
1031 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; 1032 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
1032 ops.datbuf = NULL; 1033 ops.datbuf = NULL;
1033 1034
1034 ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); 1035 ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
1035 if (ret || ops.oobretlen != ops.ooblen) { 1036 if (ret || ops.oobretlen != ops.ooblen) {
1036 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" 1037 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
1037 " bytes, read %zd bytes, error %d\n", 1038 " bytes, read %zd bytes, error %d\n",
@@ -1074,7 +1075,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
1074 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; 1075 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
1075 ops.datbuf = NULL; 1076 ops.datbuf = NULL;
1076 1077
1077 ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); 1078 ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
1078 if (ret || ops.oobretlen != ops.ooblen) { 1079 if (ret || ops.oobretlen != ops.ooblen) {
1079 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" 1080 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
1080 " bytes, read %zd bytes, error %d\n", 1081 " bytes, read %zd bytes, error %d\n",
@@ -1100,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1100 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; 1101 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
1101 ops.datbuf = NULL; 1102 ops.datbuf = NULL;
1102 1103
1103 ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops); 1104 ret = mtd_write_oob(c->mtd, jeb->offset, &ops);
1104 if (ret || ops.oobretlen != ops.ooblen) { 1105 if (ret || ops.oobretlen != ops.ooblen) {
1105 printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd" 1106 printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd"
1106 " bytes, read %zd bytes, error %d\n", 1107 " bytes, read %zd bytes, error %d\n",
@@ -1129,11 +1130,8 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
1129 if( ++jeb->bad_count < MAX_ERASE_FAILURES) 1130 if( ++jeb->bad_count < MAX_ERASE_FAILURES)
1130 return 0; 1131 return 0;
1131 1132
1132 if (!c->mtd->block_markbad)
1133 return 1; // What else can we do?
1134
1135 printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); 1133 printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
1136 ret = c->mtd->block_markbad(c->mtd, bad_offset); 1134 ret = mtd_block_markbad(c->mtd, bad_offset);
1137 1135
1138 if (ret) { 1136 if (ret) {
1139 D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); 1137 D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index b9276b11bac6..a1bda9dab3f8 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -13,30 +13,6 @@
13#include <linux/mtd/mtd.h> 13#include <linux/mtd/mtd.h>
14#include "nodelist.h" 14#include "nodelist.h"
15 15
16/* This ought to be in core MTD code. All registered MTD devices
17 without writev should have this put in place. Bug the MTD
18 maintainer */
19static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs,
20 unsigned long count, loff_t to, size_t *retlen)
21{
22 unsigned long i;
23 size_t totlen = 0, thislen;
24 int ret = 0;
25
26 for (i=0; i<count; i++) {
27 if (!vecs[i].iov_len)
28 continue;
29 ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base);
30 totlen += thislen;
31 if (ret || thislen != vecs[i].iov_len)
32 break;
33 to += vecs[i].iov_len;
34 }
35 if (retlen)
36 *retlen = totlen;
37 return ret;
38}
39
40int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, 16int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
41 unsigned long count, loff_t to, size_t *retlen) 17 unsigned long count, loff_t to, size_t *retlen)
42{ 18{
@@ -50,18 +26,14 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
50 } 26 }
51 } 27 }
52 28
53 if (c->mtd->writev) 29 return mtd_writev(c->mtd, vecs, count, to, retlen);
54 return c->mtd->writev(c->mtd, vecs, count, to, retlen);
55 else {
56 return mtd_fake_writev(c->mtd, vecs, count, to, retlen);
57 }
58} 30}
59 31
60int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, 32int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
61 size_t *retlen, const u_char *buf) 33 size_t *retlen, const u_char *buf)
62{ 34{
63 int ret; 35 int ret;
64 ret = c->mtd->write(c->mtd, ofs, len, retlen, buf); 36 ret = mtd_write(c->mtd, ofs, len, retlen, buf);
65 37
66 if (jffs2_sum_active()) { 38 if (jffs2_sum_active()) {
67 struct kvec vecs[1]; 39 struct kvec vecs[1];
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 6f98a1866776..f19d1e04a374 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -68,7 +68,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
68 unsigned int oldflags; 68 unsigned int oldflags;
69 int err; 69 int err;
70 70
71 err = mnt_want_write(filp->f_path.mnt); 71 err = mnt_want_write_file(filp);
72 if (err) 72 if (err)
73 return err; 73 return err;
74 74
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
120 inode->i_ctime = CURRENT_TIME_SEC; 120 inode->i_ctime = CURRENT_TIME_SEC;
121 mark_inode_dirty(inode); 121 mark_inode_dirty(inode);
122setflags_out: 122setflags_out:
123 mnt_drop_write(filp->f_path.mnt); 123 mnt_drop_write_file(filp);
124 return err; 124 return err;
125 } 125 }
126 default: 126 default:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cc5f811ed383..2eb952c41a69 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg)
2349 2349
2350 if (freezing(current)) { 2350 if (freezing(current)) {
2351 spin_unlock_irq(&log_redrive_lock); 2351 spin_unlock_irq(&log_redrive_lock);
2352 refrigerator(); 2352 try_to_freeze();
2353 } else { 2353 } else {
2354 set_current_state(TASK_INTERRUPTIBLE); 2354 set_current_state(TASK_INTERRUPTIBLE);
2355 spin_unlock_irq(&log_redrive_lock); 2355 spin_unlock_irq(&log_redrive_lock);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index af9606057dde..bb8b661bcc50 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg)
2800 2800
2801 if (freezing(current)) { 2801 if (freezing(current)) {
2802 LAZY_UNLOCK(flags); 2802 LAZY_UNLOCK(flags);
2803 refrigerator(); 2803 try_to_freeze();
2804 } else { 2804 } else {
2805 DECLARE_WAITQUEUE(wq, current); 2805 DECLARE_WAITQUEUE(wq, current);
2806 2806
@@ -2994,7 +2994,7 @@ int jfs_sync(void *arg)
2994 2994
2995 if (freezing(current)) { 2995 if (freezing(current)) {
2996 TXN_UNLOCK(); 2996 TXN_UNLOCK();
2997 refrigerator(); 2997 try_to_freeze();
2998 } else { 2998 } else {
2999 set_current_state(TASK_INTERRUPTIBLE); 2999 set_current_state(TASK_INTERRUPTIBLE);
3000 TXN_UNLOCK(); 3000 TXN_UNLOCK();
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a112ad96e474..5f7c160ea64f 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -72,7 +72,7 @@ static inline void free_ea_wmap(struct inode *inode)
72 * RETURN: Errors from subroutines 72 * RETURN: Errors from subroutines
73 * 73 *
74 */ 74 */
75static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, 75static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
76 struct nameidata *nd) 76 struct nameidata *nd)
77{ 77{
78 int rc = 0; 78 int rc = 0;
@@ -205,7 +205,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
205 * note: 205 * note:
206 * EACCESS: user needs search+write permission on the parent directory 206 * EACCESS: user needs search+write permission on the parent directory
207 */ 207 */
208static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) 208static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
209{ 209{
210 int rc = 0; 210 int rc = 0;
211 tid_t tid; /* transaction id */ 211 tid_t tid; /* transaction id */
@@ -1353,7 +1353,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1353 * FUNCTION: Create a special file (device) 1353 * FUNCTION: Create a special file (device)
1354 */ 1354 */
1355static int jfs_mknod(struct inode *dir, struct dentry *dentry, 1355static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1356 int mode, dev_t rdev) 1356 umode_t mode, dev_t rdev)
1357{ 1357{
1358 struct jfs_inode_info *jfs_ip; 1358 struct jfs_inode_info *jfs_ip;
1359 struct btstack btstack; 1359 struct btstack btstack;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index a44eff076c17..682bca642f38 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -119,7 +119,6 @@ static void jfs_i_callback(struct rcu_head *head)
119{ 119{
120 struct inode *inode = container_of(head, struct inode, i_rcu); 120 struct inode *inode = container_of(head, struct inode, i_rcu);
121 struct jfs_inode_info *ji = JFS_IP(inode); 121 struct jfs_inode_info *ji = JFS_IP(inode);
122 INIT_LIST_HEAD(&inode->i_dentry);
123 kmem_cache_free(jfs_inode_cachep, ji); 122 kmem_cache_free(jfs_inode_cachep, ji);
124} 123}
125 124
@@ -609,9 +608,9 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
609 return 0; 608 return 0;
610} 609}
611 610
612static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 611static int jfs_show_options(struct seq_file *seq, struct dentry *root)
613{ 612{
614 struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); 613 struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
615 614
616 if (sbi->uid != -1) 615 if (sbi->uid != -1)
617 seq_printf(seq, ",uid=%d", sbi->uid); 616 seq_printf(seq, ",uid=%d", sbi->uid);
diff --git a/fs/libfs.c b/fs/libfs.c
index f6d411eef1e7..5b2dbb3ba4fc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -12,7 +12,7 @@
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/exportfs.h> 13#include <linux/exportfs.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h> /* sync_mapping_buffers */
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 23d7451b2938..65ba36b80a9e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -55,7 +55,7 @@ static DEFINE_SPINLOCK(nsm_lock);
55 * Local NSM state 55 * Local NSM state
56 */ 56 */
57u32 __read_mostly nsm_local_state; 57u32 __read_mostly nsm_local_state;
58int __read_mostly nsm_use_hostnames; 58bool __read_mostly nsm_use_hostnames;
59 59
60static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) 60static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
61{ 61{
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 1ca0679c80bf..2240d384d787 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -403,7 +403,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
403{ 403{
404 struct super_block *sb = datap; 404 struct super_block *sb = datap;
405 405
406 return sb == file->f_file->f_path.mnt->mnt_sb; 406 return sb == file->f_file->f_path.dentry->d_sb;
407} 407}
408 408
409/** 409/**
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 339e17e9133d..e97404d611e0 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -13,13 +13,14 @@
13 13
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 15
16static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf) 16static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len,
17 void *buf)
17{ 18{
18 struct mtd_info *mtd = logfs_super(sb)->s_mtd; 19 struct mtd_info *mtd = logfs_super(sb)->s_mtd;
19 size_t retlen; 20 size_t retlen;
20 int ret; 21 int ret;
21 22
22 ret = mtd->read(mtd, ofs, len, &retlen, buf); 23 ret = mtd_read(mtd, ofs, len, &retlen, buf);
23 BUG_ON(ret == -EINVAL); 24 BUG_ON(ret == -EINVAL);
24 if (ret) 25 if (ret)
25 return ret; 26 return ret;
@@ -31,7 +32,8 @@ static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf)
31 return 0; 32 return 0;
32} 33}
33 34
34static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) 35static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len,
36 void *buf)
35{ 37{
36 struct logfs_super *super = logfs_super(sb); 38 struct logfs_super *super = logfs_super(sb);
37 struct mtd_info *mtd = super->s_mtd; 39 struct mtd_info *mtd = super->s_mtd;
@@ -47,7 +49,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
47 BUG_ON(len > PAGE_CACHE_SIZE); 49 BUG_ON(len > PAGE_CACHE_SIZE);
48 page_start = ofs & PAGE_CACHE_MASK; 50 page_start = ofs & PAGE_CACHE_MASK;
49 page_end = PAGE_CACHE_ALIGN(ofs + len) - 1; 51 page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
50 ret = mtd->write(mtd, ofs, len, &retlen, buf); 52 ret = mtd_write(mtd, ofs, len, &retlen, buf);
51 if (ret || (retlen != len)) 53 if (ret || (retlen != len))
52 return -EIO; 54 return -EIO;
53 55
@@ -60,14 +62,15 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
60 * asynchronous properties. So just to prevent the first implementor of such 62 * asynchronous properties. So just to prevent the first implementor of such
61 * a thing from breaking logfs in 2350, we do the usual pointless dance to 63 * a thing from breaking logfs in 2350, we do the usual pointless dance to
62 * declare a completion variable and wait for completion before returning 64 * declare a completion variable and wait for completion before returning
63 * from mtd_erase(). What an exercise in futility! 65 * from logfs_mtd_erase(). What an exercise in futility!
64 */ 66 */
65static void logfs_erase_callback(struct erase_info *ei) 67static void logfs_erase_callback(struct erase_info *ei)
66{ 68{
67 complete((struct completion *)ei->priv); 69 complete((struct completion *)ei->priv);
68} 70}
69 71
70static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len) 72static int logfs_mtd_erase_mapping(struct super_block *sb, loff_t ofs,
73 size_t len)
71{ 74{
72 struct logfs_super *super = logfs_super(sb); 75 struct logfs_super *super = logfs_super(sb);
73 struct address_space *mapping = super->s_mapping_inode->i_mapping; 76 struct address_space *mapping = super->s_mapping_inode->i_mapping;
@@ -84,7 +87,7 @@ static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len)
84 return 0; 87 return 0;
85} 88}
86 89
87static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len, 90static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
88 int ensure_write) 91 int ensure_write)
89{ 92{
90 struct mtd_info *mtd = logfs_super(sb)->s_mtd; 93 struct mtd_info *mtd = logfs_super(sb)->s_mtd;
@@ -102,30 +105,29 @@ static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
102 ei.len = len; 105 ei.len = len;
103 ei.callback = logfs_erase_callback; 106 ei.callback = logfs_erase_callback;
104 ei.priv = (long)&complete; 107 ei.priv = (long)&complete;
105 ret = mtd->erase(mtd, &ei); 108 ret = mtd_erase(mtd, &ei);
106 if (ret) 109 if (ret)
107 return -EIO; 110 return -EIO;
108 111
109 wait_for_completion(&complete); 112 wait_for_completion(&complete);
110 if (ei.state != MTD_ERASE_DONE) 113 if (ei.state != MTD_ERASE_DONE)
111 return -EIO; 114 return -EIO;
112 return mtd_erase_mapping(sb, ofs, len); 115 return logfs_mtd_erase_mapping(sb, ofs, len);
113} 116}
114 117
115static void mtd_sync(struct super_block *sb) 118static void logfs_mtd_sync(struct super_block *sb)
116{ 119{
117 struct mtd_info *mtd = logfs_super(sb)->s_mtd; 120 struct mtd_info *mtd = logfs_super(sb)->s_mtd;
118 121
119 if (mtd->sync) 122 mtd_sync(mtd);
120 mtd->sync(mtd);
121} 123}
122 124
123static int mtd_readpage(void *_sb, struct page *page) 125static int logfs_mtd_readpage(void *_sb, struct page *page)
124{ 126{
125 struct super_block *sb = _sb; 127 struct super_block *sb = _sb;
126 int err; 128 int err;
127 129
128 err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, 130 err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
129 page_address(page)); 131 page_address(page));
130 if (err == -EUCLEAN || err == -EBADMSG) { 132 if (err == -EUCLEAN || err == -EBADMSG) {
131 /* -EBADMSG happens regularly on power failures */ 133 /* -EBADMSG happens regularly on power failures */
@@ -143,18 +145,18 @@ static int mtd_readpage(void *_sb, struct page *page)
143 return err; 145 return err;
144} 146}
145 147
146static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) 148static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
147{ 149{
148 struct logfs_super *super = logfs_super(sb); 150 struct logfs_super *super = logfs_super(sb);
149 struct address_space *mapping = super->s_mapping_inode->i_mapping; 151 struct address_space *mapping = super->s_mapping_inode->i_mapping;
150 filler_t *filler = mtd_readpage; 152 filler_t *filler = logfs_mtd_readpage;
151 struct mtd_info *mtd = super->s_mtd; 153 struct mtd_info *mtd = super->s_mtd;
152 154
153 if (!mtd->block_isbad) 155 if (!mtd_can_have_bb(mtd))
154 return NULL; 156 return NULL;
155 157
156 *ofs = 0; 158 *ofs = 0;
157 while (mtd->block_isbad(mtd, *ofs)) { 159 while (mtd_block_isbad(mtd, *ofs)) {
158 *ofs += mtd->erasesize; 160 *ofs += mtd->erasesize;
159 if (*ofs >= mtd->size) 161 if (*ofs >= mtd->size)
160 return NULL; 162 return NULL;
@@ -163,18 +165,18 @@ static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs)
163 return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); 165 return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
164} 166}
165 167
166static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) 168static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
167{ 169{
168 struct logfs_super *super = logfs_super(sb); 170 struct logfs_super *super = logfs_super(sb);
169 struct address_space *mapping = super->s_mapping_inode->i_mapping; 171 struct address_space *mapping = super->s_mapping_inode->i_mapping;
170 filler_t *filler = mtd_readpage; 172 filler_t *filler = logfs_mtd_readpage;
171 struct mtd_info *mtd = super->s_mtd; 173 struct mtd_info *mtd = super->s_mtd;
172 174
173 if (!mtd->block_isbad) 175 if (!mtd_can_have_bb(mtd))
174 return NULL; 176 return NULL;
175 177
176 *ofs = mtd->size - mtd->erasesize; 178 *ofs = mtd->size - mtd->erasesize;
177 while (mtd->block_isbad(mtd, *ofs)) { 179 while (mtd_block_isbad(mtd, *ofs)) {
178 *ofs -= mtd->erasesize; 180 *ofs -= mtd->erasesize;
179 if (*ofs <= 0) 181 if (*ofs <= 0)
180 return NULL; 182 return NULL;
@@ -184,7 +186,7 @@ static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs)
184 return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); 186 return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
185} 187}
186 188
187static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, 189static int __logfs_mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
188 size_t nr_pages) 190 size_t nr_pages)
189{ 191{
190 struct logfs_super *super = logfs_super(sb); 192 struct logfs_super *super = logfs_super(sb);
@@ -196,8 +198,8 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
196 page = find_lock_page(mapping, index + i); 198 page = find_lock_page(mapping, index + i);
197 BUG_ON(!page); 199 BUG_ON(!page);
198 200
199 err = mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE, 201 err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
200 page_address(page)); 202 page_address(page));
201 unlock_page(page); 203 unlock_page(page);
202 page_cache_release(page); 204 page_cache_release(page);
203 if (err) 205 if (err)
@@ -206,7 +208,7 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
206 return 0; 208 return 0;
207} 209}
208 210
209static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) 211static void logfs_mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
210{ 212{
211 struct logfs_super *super = logfs_super(sb); 213 struct logfs_super *super = logfs_super(sb);
212 int head; 214 int head;
@@ -227,15 +229,15 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
227 len += head; 229 len += head;
228 } 230 }
229 len = PAGE_ALIGN(len); 231 len = PAGE_ALIGN(len);
230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 232 __logfs_mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
231} 233}
232 234
233static void mtd_put_device(struct logfs_super *s) 235static void logfs_mtd_put_device(struct logfs_super *s)
234{ 236{
235 put_mtd_device(s->s_mtd); 237 put_mtd_device(s->s_mtd);
236} 238}
237 239
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs) 240static int logfs_mtd_can_write_buf(struct super_block *sb, u64 ofs)
239{ 241{
240 struct logfs_super *super = logfs_super(sb); 242 struct logfs_super *super = logfs_super(sb);
241 void *buf; 243 void *buf;
@@ -244,7 +246,7 @@ static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
244 buf = kmalloc(super->s_writesize, GFP_KERNEL); 246 buf = kmalloc(super->s_writesize, GFP_KERNEL);
245 if (!buf) 247 if (!buf)
246 return -ENOMEM; 248 return -ENOMEM;
247 err = mtd_read(sb, ofs, super->s_writesize, buf); 249 err = logfs_mtd_read(sb, ofs, super->s_writesize, buf);
248 if (err) 250 if (err)
249 goto out; 251 goto out;
250 if (memchr_inv(buf, 0xff, super->s_writesize)) 252 if (memchr_inv(buf, 0xff, super->s_writesize))
@@ -255,14 +257,14 @@ out:
255} 257}
256 258
257static const struct logfs_device_ops mtd_devops = { 259static const struct logfs_device_ops mtd_devops = {
258 .find_first_sb = mtd_find_first_sb, 260 .find_first_sb = logfs_mtd_find_first_sb,
259 .find_last_sb = mtd_find_last_sb, 261 .find_last_sb = logfs_mtd_find_last_sb,
260 .readpage = mtd_readpage, 262 .readpage = logfs_mtd_readpage,
261 .writeseg = mtd_writeseg, 263 .writeseg = logfs_mtd_writeseg,
262 .erase = mtd_erase, 264 .erase = logfs_mtd_erase,
263 .can_write_buf = mtd_can_write_buf, 265 .can_write_buf = logfs_mtd_can_write_buf,
264 .sync = mtd_sync, 266 .sync = logfs_mtd_sync,
265 .put_device = mtd_put_device, 267 .put_device = logfs_mtd_put_device,
266}; 268};
267 269
268int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) 270int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b7d7f67cee5a..501043e8966c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -482,7 +482,7 @@ out:
482 return ret; 482 return ret;
483} 483}
484 484
485static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 485static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
486{ 486{
487 struct inode *inode; 487 struct inode *inode;
488 488
@@ -501,7 +501,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
501 return __logfs_create(dir, dentry, inode, NULL, 0); 501 return __logfs_create(dir, dentry, inode, NULL, 0);
502} 502}
503 503
504static int logfs_create(struct inode *dir, struct dentry *dentry, int mode, 504static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
505 struct nameidata *nd) 505 struct nameidata *nd)
506{ 506{
507 struct inode *inode; 507 struct inode *inode;
@@ -517,7 +517,7 @@ static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
517 return __logfs_create(dir, dentry, inode, NULL, 0); 517 return __logfs_create(dir, dentry, inode, NULL, 0);
518} 518}
519 519
520static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode, 520static int logfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
521 dev_t rdev) 521 dev_t rdev)
522{ 522{
523 struct inode *inode; 523 struct inode *inode;
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 7e441ad5f792..388df1aa35e5 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -144,7 +144,6 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
144static void logfs_i_callback(struct rcu_head *head) 144static void logfs_i_callback(struct rcu_head *head)
145{ 145{
146 struct inode *inode = container_of(head, struct inode, i_rcu); 146 struct inode *inode = container_of(head, struct inode, i_rcu);
147 INIT_LIST_HEAD(&inode->i_dentry);
148 kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); 147 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
149} 148}
150 149
@@ -324,7 +323,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
324 mutex_unlock(&super->s_journal_mutex); 323 mutex_unlock(&super->s_journal_mutex);
325} 324}
326 325
327struct inode *logfs_new_inode(struct inode *dir, int mode) 326struct inode *logfs_new_inode(struct inode *dir, umode_t mode)
328{ 327{
329 struct super_block *sb = dir->i_sb; 328 struct super_block *sb = dir->i_sb;
330 struct inode *inode; 329 struct inode *inode;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 398ecff6e548..926373866a55 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -520,7 +520,7 @@ extern const struct super_operations logfs_super_operations;
520struct inode *logfs_iget(struct super_block *sb, ino_t ino); 520struct inode *logfs_iget(struct super_block *sb, ino_t ino);
521struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie); 521struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie);
522void logfs_safe_iput(struct inode *inode, int cookie); 522void logfs_safe_iput(struct inode *inode, int cookie);
523struct inode *logfs_new_inode(struct inode *dir, int mode); 523struct inode *logfs_new_inode(struct inode *dir, umode_t mode);
524struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino); 524struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); 525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
526int logfs_init_inode_cache(void); 526int logfs_init_inode_cache(void);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index ef175cb8cfd8..4bc50dac8e97 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -209,7 +209,7 @@ void minix_free_inode(struct inode * inode)
209 mark_buffer_dirty(bh); 209 mark_buffer_dirty(bh);
210} 210}
211 211
212struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) 212struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
213{ 213{
214 struct super_block *sb = dir->i_sb; 214 struct super_block *sb = dir->i_sb;
215 struct minix_sb_info *sbi = minix_sb(sb); 215 struct minix_sb_info *sbi = minix_sb(sb);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4d46a6a59070..fa8b612b8ce2 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -71,7 +71,6 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
71static void minix_i_callback(struct rcu_head *head) 71static void minix_i_callback(struct rcu_head *head)
72{ 72{
73 struct inode *inode = container_of(head, struct inode, i_rcu); 73 struct inode *inode = container_of(head, struct inode, i_rcu);
74 INIT_LIST_HEAD(&inode->i_dentry);
75 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 74 kmem_cache_free(minix_inode_cachep, minix_i(inode));
76} 75}
77 76
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 26bbd55e82ea..c889ef0aa571 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -46,7 +46,7 @@ struct minix_sb_info {
46extern struct inode *minix_iget(struct super_block *, unsigned long); 46extern struct inode *minix_iget(struct super_block *, unsigned long);
47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); 47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); 48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
49extern struct inode * minix_new_inode(const struct inode *, int, int *); 49extern struct inode * minix_new_inode(const struct inode *, umode_t, int *);
50extern void minix_free_inode(struct inode * inode); 50extern void minix_free_inode(struct inode * inode);
51extern unsigned long minix_count_free_inodes(struct super_block *sb); 51extern unsigned long minix_count_free_inodes(struct super_block *sb);
52extern int minix_new_block(struct inode * inode); 52extern int minix_new_block(struct inode * inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 6e6777f1b4b2..2f76e38c2065 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -36,7 +36,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
36 return NULL; 36 return NULL;
37} 37}
38 38
39static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 39static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
40{ 40{
41 int error; 41 int error;
42 struct inode *inode; 42 struct inode *inode;
@@ -54,7 +54,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_
54 return error; 54 return error;
55} 55}
56 56
57static int minix_create(struct inode * dir, struct dentry *dentry, int mode, 57static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
58 struct nameidata *nd) 58 struct nameidata *nd)
59{ 59{
60 return minix_mknod(dir, dentry, mode, 0); 60 return minix_mknod(dir, dentry, mode, 0);
@@ -103,7 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
103 return add_nondir(dentry, inode); 103 return add_nondir(dentry, inode);
104} 104}
105 105
106static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) 106static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
107{ 107{
108 struct inode * inode; 108 struct inode * inode;
109 int err = -EMLINK; 109 int err = -EMLINK;
diff --git a/fs/mount.h b/fs/mount.h
new file mode 100644
index 000000000000..4ef36d93e5a2
--- /dev/null
+++ b/fs/mount.h
@@ -0,0 +1,76 @@
1#include <linux/mount.h>
2#include <linux/seq_file.h>
3#include <linux/poll.h>
4
5struct mnt_namespace {
6 atomic_t count;
7 struct mount * root;
8 struct list_head list;
9 wait_queue_head_t poll;
10 int event;
11};
12
13struct mnt_pcp {
14 int mnt_count;
15 int mnt_writers;
16};
17
18struct mount {
19 struct list_head mnt_hash;
20 struct mount *mnt_parent;
21 struct dentry *mnt_mountpoint;
22 struct vfsmount mnt;
23#ifdef CONFIG_SMP
24 struct mnt_pcp __percpu *mnt_pcp;
25 atomic_t mnt_longterm; /* how many of the refs are longterm */
26#else
27 int mnt_count;
28 int mnt_writers;
29#endif
30 struct list_head mnt_mounts; /* list of children, anchored here */
31 struct list_head mnt_child; /* and going through their mnt_child */
32 struct list_head mnt_instance; /* mount instance on sb->s_mounts */
33 const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
34 struct list_head mnt_list;
35 struct list_head mnt_expire; /* link in fs-specific expiry list */
36 struct list_head mnt_share; /* circular list of shared mounts */
37 struct list_head mnt_slave_list;/* list of slave mounts */
38 struct list_head mnt_slave; /* slave list entry */
39 struct mount *mnt_master; /* slave is on master->mnt_slave_list */
40 struct mnt_namespace *mnt_ns; /* containing namespace */
41#ifdef CONFIG_FSNOTIFY
42 struct hlist_head mnt_fsnotify_marks;
43 __u32 mnt_fsnotify_mask;
44#endif
45 int mnt_id; /* mount identifier */
46 int mnt_group_id; /* peer group identifier */
47 int mnt_expiry_mark; /* true if marked for expiry */
48 int mnt_pinned;
49 int mnt_ghosts;
50};
51
52static inline struct mount *real_mount(struct vfsmount *mnt)
53{
54 return container_of(mnt, struct mount, mnt);
55}
56
57static inline int mnt_has_parent(struct mount *mnt)
58{
59 return mnt != mnt->mnt_parent;
60}
61
62extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
63
64static inline void get_mnt_ns(struct mnt_namespace *ns)
65{
66 atomic_inc(&ns->count);
67}
68
69struct proc_mounts {
70 struct seq_file m; /* must be the first element */
71 struct mnt_namespace *ns;
72 struct path root;
73 int (*show)(struct seq_file *, struct vfsmount *);
74};
75
76extern const struct seq_operations mounts_op;
diff --git a/fs/mpage.c b/fs/mpage.c
index fdfae9fa98cd..643e9f55ef29 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -371,9 +371,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
371 sector_t last_block_in_bio = 0; 371 sector_t last_block_in_bio = 0;
372 struct buffer_head map_bh; 372 struct buffer_head map_bh;
373 unsigned long first_logical_block = 0; 373 unsigned long first_logical_block = 0;
374 struct blk_plug plug;
375
376 blk_start_plug(&plug);
377 374
378 map_bh.b_state = 0; 375 map_bh.b_state = 0;
379 map_bh.b_size = 0; 376 map_bh.b_size = 0;
@@ -395,7 +392,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
395 BUG_ON(!list_empty(pages)); 392 BUG_ON(!list_empty(pages));
396 if (bio) 393 if (bio)
397 mpage_bio_submit(READ, bio); 394 mpage_bio_submit(READ, bio);
398 blk_finish_plug(&plug);
399 return 0; 395 return 0;
400} 396}
401EXPORT_SYMBOL(mpage_readpages); 397EXPORT_SYMBOL(mpage_readpages);
diff --git a/fs/namei.c b/fs/namei.c
index 5008f01787f5..c283a1ec008e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#include "internal.h" 38#include "internal.h"
39#include "mount.h"
39 40
40/* [Feb-1997 T. Schoebel-Theuer] 41/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 42 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -676,36 +677,38 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
676 677
677static int follow_up_rcu(struct path *path) 678static int follow_up_rcu(struct path *path)
678{ 679{
679 struct vfsmount *parent; 680 struct mount *mnt = real_mount(path->mnt);
681 struct mount *parent;
680 struct dentry *mountpoint; 682 struct dentry *mountpoint;
681 683
682 parent = path->mnt->mnt_parent; 684 parent = mnt->mnt_parent;
683 if (parent == path->mnt) 685 if (&parent->mnt == path->mnt)
684 return 0; 686 return 0;
685 mountpoint = path->mnt->mnt_mountpoint; 687 mountpoint = mnt->mnt_mountpoint;
686 path->dentry = mountpoint; 688 path->dentry = mountpoint;
687 path->mnt = parent; 689 path->mnt = &parent->mnt;
688 return 1; 690 return 1;
689} 691}
690 692
691int follow_up(struct path *path) 693int follow_up(struct path *path)
692{ 694{
693 struct vfsmount *parent; 695 struct mount *mnt = real_mount(path->mnt);
696 struct mount *parent;
694 struct dentry *mountpoint; 697 struct dentry *mountpoint;
695 698
696 br_read_lock(vfsmount_lock); 699 br_read_lock(vfsmount_lock);
697 parent = path->mnt->mnt_parent; 700 parent = mnt->mnt_parent;
698 if (parent == path->mnt) { 701 if (&parent->mnt == path->mnt) {
699 br_read_unlock(vfsmount_lock); 702 br_read_unlock(vfsmount_lock);
700 return 0; 703 return 0;
701 } 704 }
702 mntget(parent); 705 mntget(&parent->mnt);
703 mountpoint = dget(path->mnt->mnt_mountpoint); 706 mountpoint = dget(mnt->mnt_mountpoint);
704 br_read_unlock(vfsmount_lock); 707 br_read_unlock(vfsmount_lock);
705 dput(path->dentry); 708 dput(path->dentry);
706 path->dentry = mountpoint; 709 path->dentry = mountpoint;
707 mntput(path->mnt); 710 mntput(path->mnt);
708 path->mnt = parent; 711 path->mnt = &parent->mnt;
709 return 1; 712 return 1;
710} 713}
711 714
@@ -884,7 +887,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
884 struct inode **inode) 887 struct inode **inode)
885{ 888{
886 for (;;) { 889 for (;;) {
887 struct vfsmount *mounted; 890 struct mount *mounted;
888 /* 891 /*
889 * Don't forget we might have a non-mountpoint managed dentry 892 * Don't forget we might have a non-mountpoint managed dentry
890 * that wants to block transit. 893 * that wants to block transit.
@@ -898,8 +901,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
898 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 901 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
899 if (!mounted) 902 if (!mounted)
900 break; 903 break;
901 path->mnt = mounted; 904 path->mnt = &mounted->mnt;
902 path->dentry = mounted->mnt_root; 905 path->dentry = mounted->mnt.mnt_root;
903 nd->flags |= LOOKUP_JUMPED; 906 nd->flags |= LOOKUP_JUMPED;
904 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 907 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
905 /* 908 /*
@@ -915,12 +918,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
915static void follow_mount_rcu(struct nameidata *nd) 918static void follow_mount_rcu(struct nameidata *nd)
916{ 919{
917 while (d_mountpoint(nd->path.dentry)) { 920 while (d_mountpoint(nd->path.dentry)) {
918 struct vfsmount *mounted; 921 struct mount *mounted;
919 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); 922 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
920 if (!mounted) 923 if (!mounted)
921 break; 924 break;
922 nd->path.mnt = mounted; 925 nd->path.mnt = &mounted->mnt;
923 nd->path.dentry = mounted->mnt_root; 926 nd->path.dentry = mounted->mnt.mnt_root;
924 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 927 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
925 } 928 }
926} 929}
@@ -1976,7 +1979,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
1976 } 1979 }
1977} 1980}
1978 1981
1979int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1982int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1980 struct nameidata *nd) 1983 struct nameidata *nd)
1981{ 1984{
1982 int error = may_create(dir, dentry); 1985 int error = may_create(dir, dentry);
@@ -2177,7 +2180,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2177 2180
2178 /* Negative dentry, just create the file */ 2181 /* Negative dentry, just create the file */
2179 if (!dentry->d_inode) { 2182 if (!dentry->d_inode) {
2180 int mode = op->mode; 2183 umode_t mode = op->mode;
2181 if (!IS_POSIXACL(dir->d_inode)) 2184 if (!IS_POSIXACL(dir->d_inode))
2182 mode &= ~current_umask(); 2185 mode &= ~current_umask();
2183 /* 2186 /*
@@ -2444,7 +2447,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct pat
2444} 2447}
2445EXPORT_SYMBOL(user_path_create); 2448EXPORT_SYMBOL(user_path_create);
2446 2449
2447int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2450int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2448{ 2451{
2449 int error = may_create(dir, dentry); 2452 int error = may_create(dir, dentry);
2450 2453
@@ -2472,7 +2475,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2472 return error; 2475 return error;
2473} 2476}
2474 2477
2475static int may_mknod(mode_t mode) 2478static int may_mknod(umode_t mode)
2476{ 2479{
2477 switch (mode & S_IFMT) { 2480 switch (mode & S_IFMT) {
2478 case S_IFREG: 2481 case S_IFREG:
@@ -2489,7 +2492,7 @@ static int may_mknod(mode_t mode)
2489 } 2492 }
2490} 2493}
2491 2494
2492SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, 2495SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2493 unsigned, dev) 2496 unsigned, dev)
2494{ 2497{
2495 struct dentry *dentry; 2498 struct dentry *dentry;
@@ -2536,12 +2539,12 @@ out_dput:
2536 return error; 2539 return error;
2537} 2540}
2538 2541
2539SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev) 2542SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
2540{ 2543{
2541 return sys_mknodat(AT_FDCWD, filename, mode, dev); 2544 return sys_mknodat(AT_FDCWD, filename, mode, dev);
2542} 2545}
2543 2546
2544int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2547int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2545{ 2548{
2546 int error = may_create(dir, dentry); 2549 int error = may_create(dir, dentry);
2547 2550
@@ -2562,7 +2565,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2562 return error; 2565 return error;
2563} 2566}
2564 2567
2565SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) 2568SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
2566{ 2569{
2567 struct dentry *dentry; 2570 struct dentry *dentry;
2568 struct path path; 2571 struct path path;
@@ -2590,7 +2593,7 @@ out_dput:
2590 return error; 2593 return error;
2591} 2594}
2592 2595
2593SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) 2596SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
2594{ 2597{
2595 return sys_mkdirat(AT_FDCWD, pathname, mode); 2598 return sys_mkdirat(AT_FDCWD, pathname, mode);
2596} 2599}
diff --git a/fs/namespace.c b/fs/namespace.c
index cfc6d4448aa5..e6081996c9a2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -9,30 +9,17 @@
9 */ 9 */
10 10
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/export.h>
13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/kernel.h>
18#include <linux/acct.h>
19#include <linux/capability.h> 13#include <linux/capability.h>
20#include <linux/cpumask.h>
21#include <linux/module.h>
22#include <linux/sysfs.h>
23#include <linux/seq_file.h>
24#include <linux/mnt_namespace.h> 14#include <linux/mnt_namespace.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/nsproxy.h>
27#include <linux/security.h> 16#include <linux/security.h>
28#include <linux/mount.h>
29#include <linux/ramfs.h>
30#include <linux/log2.h>
31#include <linux/idr.h> 17#include <linux/idr.h>
32#include <linux/fs_struct.h> 18#include <linux/acct.h> /* acct_auto_close_mnt */
33#include <linux/fsnotify.h> 19#include <linux/ramfs.h> /* init_rootfs */
34#include <asm/uaccess.h> 20#include <linux/fs_struct.h> /* get_fs_root et.al. */
35#include <asm/unistd.h> 21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
22#include <linux/uaccess.h>
36#include "pnode.h" 23#include "pnode.h"
37#include "internal.h" 24#include "internal.h"
38 25
@@ -78,7 +65,7 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
78 * allocation is serialized by namespace_sem, but we need the spinlock to 65 * allocation is serialized by namespace_sem, but we need the spinlock to
79 * serialize with freeing. 66 * serialize with freeing.
80 */ 67 */
81static int mnt_alloc_id(struct vfsmount *mnt) 68static int mnt_alloc_id(struct mount *mnt)
82{ 69{
83 int res; 70 int res;
84 71
@@ -95,7 +82,7 @@ retry:
95 return res; 82 return res;
96} 83}
97 84
98static void mnt_free_id(struct vfsmount *mnt) 85static void mnt_free_id(struct mount *mnt)
99{ 86{
100 int id = mnt->mnt_id; 87 int id = mnt->mnt_id;
101 spin_lock(&mnt_id_lock); 88 spin_lock(&mnt_id_lock);
@@ -110,7 +97,7 @@ static void mnt_free_id(struct vfsmount *mnt)
110 * 97 *
111 * mnt_group_ida is protected by namespace_sem 98 * mnt_group_ida is protected by namespace_sem
112 */ 99 */
113static int mnt_alloc_group_id(struct vfsmount *mnt) 100static int mnt_alloc_group_id(struct mount *mnt)
114{ 101{
115 int res; 102 int res;
116 103
@@ -129,7 +116,7 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
129/* 116/*
130 * Release a peer group ID 117 * Release a peer group ID
131 */ 118 */
132void mnt_release_group_id(struct vfsmount *mnt) 119void mnt_release_group_id(struct mount *mnt)
133{ 120{
134 int id = mnt->mnt_group_id; 121 int id = mnt->mnt_group_id;
135 ida_remove(&mnt_group_ida, id); 122 ida_remove(&mnt_group_ida, id);
@@ -141,7 +128,7 @@ void mnt_release_group_id(struct vfsmount *mnt)
141/* 128/*
142 * vfsmount lock must be held for read 129 * vfsmount lock must be held for read
143 */ 130 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n) 131static inline void mnt_add_count(struct mount *mnt, int n)
145{ 132{
146#ifdef CONFIG_SMP 133#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n); 134 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
@@ -152,35 +139,10 @@ static inline void mnt_add_count(struct vfsmount *mnt, int n)
152#endif 139#endif
153} 140}
154 141
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/* 142/*
181 * vfsmount lock must be held for write 143 * vfsmount lock must be held for write
182 */ 144 */
183unsigned int mnt_get_count(struct vfsmount *mnt) 145unsigned int mnt_get_count(struct mount *mnt)
184{ 146{
185#ifdef CONFIG_SMP 147#ifdef CONFIG_SMP
186 unsigned int count = 0; 148 unsigned int count = 0;
@@ -196,9 +158,9 @@ unsigned int mnt_get_count(struct vfsmount *mnt)
196#endif 158#endif
197} 159}
198 160
199static struct vfsmount *alloc_vfsmnt(const char *name) 161static struct mount *alloc_vfsmnt(const char *name)
200{ 162{
201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 163 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
202 if (mnt) { 164 if (mnt) {
203 int err; 165 int err;
204 166
@@ -277,7 +239,7 @@ int __mnt_is_readonly(struct vfsmount *mnt)
277} 239}
278EXPORT_SYMBOL_GPL(__mnt_is_readonly); 240EXPORT_SYMBOL_GPL(__mnt_is_readonly);
279 241
280static inline void mnt_inc_writers(struct vfsmount *mnt) 242static inline void mnt_inc_writers(struct mount *mnt)
281{ 243{
282#ifdef CONFIG_SMP 244#ifdef CONFIG_SMP
283 this_cpu_inc(mnt->mnt_pcp->mnt_writers); 245 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
@@ -286,7 +248,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
286#endif 248#endif
287} 249}
288 250
289static inline void mnt_dec_writers(struct vfsmount *mnt) 251static inline void mnt_dec_writers(struct mount *mnt)
290{ 252{
291#ifdef CONFIG_SMP 253#ifdef CONFIG_SMP
292 this_cpu_dec(mnt->mnt_pcp->mnt_writers); 254 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
@@ -295,7 +257,7 @@ static inline void mnt_dec_writers(struct vfsmount *mnt)
295#endif 257#endif
296} 258}
297 259
298static unsigned int mnt_get_writers(struct vfsmount *mnt) 260static unsigned int mnt_get_writers(struct mount *mnt)
299{ 261{
300#ifdef CONFIG_SMP 262#ifdef CONFIG_SMP
301 unsigned int count = 0; 263 unsigned int count = 0;
@@ -311,6 +273,15 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
311#endif 273#endif
312} 274}
313 275
276static int mnt_is_readonly(struct vfsmount *mnt)
277{
278 if (mnt->mnt_sb->s_readonly_remount)
279 return 1;
280 /* Order wrt setting s_flags/s_readonly_remount in do_remount() */
281 smp_rmb();
282 return __mnt_is_readonly(mnt);
283}
284
314/* 285/*
315 * Most r/o checks on a fs are for operations that take 286 * Most r/o checks on a fs are for operations that take
316 * discrete amounts of time, like a write() or unlink(). 287 * discrete amounts of time, like a write() or unlink().
@@ -321,7 +292,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
321 */ 292 */
322/** 293/**
323 * mnt_want_write - get write access to a mount 294 * mnt_want_write - get write access to a mount
324 * @mnt: the mount on which to take a write 295 * @m: the mount on which to take a write
325 * 296 *
326 * This tells the low-level filesystem that a write is 297 * This tells the low-level filesystem that a write is
327 * about to be performed to it, and makes sure that 298 * about to be performed to it, and makes sure that
@@ -329,8 +300,9 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
329 * the write operation is finished, mnt_drop_write() 300 * the write operation is finished, mnt_drop_write()
330 * must be called. This is effectively a refcount. 301 * must be called. This is effectively a refcount.
331 */ 302 */
332int mnt_want_write(struct vfsmount *mnt) 303int mnt_want_write(struct vfsmount *m)
333{ 304{
305 struct mount *mnt = real_mount(m);
334 int ret = 0; 306 int ret = 0;
335 307
336 preempt_disable(); 308 preempt_disable();
@@ -341,7 +313,7 @@ int mnt_want_write(struct vfsmount *mnt)
341 * incremented count after it has set MNT_WRITE_HOLD. 313 * incremented count after it has set MNT_WRITE_HOLD.
342 */ 314 */
343 smp_mb(); 315 smp_mb();
344 while (mnt->mnt_flags & MNT_WRITE_HOLD) 316 while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
345 cpu_relax(); 317 cpu_relax();
346 /* 318 /*
347 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will 319 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
@@ -349,12 +321,10 @@ int mnt_want_write(struct vfsmount *mnt)
349 * MNT_WRITE_HOLD is cleared. 321 * MNT_WRITE_HOLD is cleared.
350 */ 322 */
351 smp_rmb(); 323 smp_rmb();
352 if (__mnt_is_readonly(mnt)) { 324 if (mnt_is_readonly(m)) {
353 mnt_dec_writers(mnt); 325 mnt_dec_writers(mnt);
354 ret = -EROFS; 326 ret = -EROFS;
355 goto out;
356 } 327 }
357out:
358 preempt_enable(); 328 preempt_enable();
359 return ret; 329 return ret;
360} 330}
@@ -378,7 +348,7 @@ int mnt_clone_write(struct vfsmount *mnt)
378 if (__mnt_is_readonly(mnt)) 348 if (__mnt_is_readonly(mnt))
379 return -EROFS; 349 return -EROFS;
380 preempt_disable(); 350 preempt_disable();
381 mnt_inc_writers(mnt); 351 mnt_inc_writers(real_mount(mnt));
382 preempt_enable(); 352 preempt_enable();
383 return 0; 353 return 0;
384} 354}
@@ -412,17 +382,23 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
412void mnt_drop_write(struct vfsmount *mnt) 382void mnt_drop_write(struct vfsmount *mnt)
413{ 383{
414 preempt_disable(); 384 preempt_disable();
415 mnt_dec_writers(mnt); 385 mnt_dec_writers(real_mount(mnt));
416 preempt_enable(); 386 preempt_enable();
417} 387}
418EXPORT_SYMBOL_GPL(mnt_drop_write); 388EXPORT_SYMBOL_GPL(mnt_drop_write);
419 389
420static int mnt_make_readonly(struct vfsmount *mnt) 390void mnt_drop_write_file(struct file *file)
391{
392 mnt_drop_write(file->f_path.mnt);
393}
394EXPORT_SYMBOL(mnt_drop_write_file);
395
396static int mnt_make_readonly(struct mount *mnt)
421{ 397{
422 int ret = 0; 398 int ret = 0;
423 399
424 br_write_lock(vfsmount_lock); 400 br_write_lock(vfsmount_lock);
425 mnt->mnt_flags |= MNT_WRITE_HOLD; 401 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
426 /* 402 /*
427 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 403 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
428 * should be visible before we do. 404 * should be visible before we do.
@@ -448,25 +424,61 @@ static int mnt_make_readonly(struct vfsmount *mnt)
448 if (mnt_get_writers(mnt) > 0) 424 if (mnt_get_writers(mnt) > 0)
449 ret = -EBUSY; 425 ret = -EBUSY;
450 else 426 else
451 mnt->mnt_flags |= MNT_READONLY; 427 mnt->mnt.mnt_flags |= MNT_READONLY;
452 /* 428 /*
453 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 429 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
454 * that become unheld will see MNT_READONLY. 430 * that become unheld will see MNT_READONLY.
455 */ 431 */
456 smp_wmb(); 432 smp_wmb();
457 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 433 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
458 br_write_unlock(vfsmount_lock); 434 br_write_unlock(vfsmount_lock);
459 return ret; 435 return ret;
460} 436}
461 437
462static void __mnt_unmake_readonly(struct vfsmount *mnt) 438static void __mnt_unmake_readonly(struct mount *mnt)
463{ 439{
464 br_write_lock(vfsmount_lock); 440 br_write_lock(vfsmount_lock);
465 mnt->mnt_flags &= ~MNT_READONLY; 441 mnt->mnt.mnt_flags &= ~MNT_READONLY;
442 br_write_unlock(vfsmount_lock);
443}
444
445int sb_prepare_remount_readonly(struct super_block *sb)
446{
447 struct mount *mnt;
448 int err = 0;
449
450 /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */
451 if (atomic_long_read(&sb->s_remove_count))
452 return -EBUSY;
453
454 br_write_lock(vfsmount_lock);
455 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
456 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
457 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
458 smp_mb();
459 if (mnt_get_writers(mnt) > 0) {
460 err = -EBUSY;
461 break;
462 }
463 }
464 }
465 if (!err && atomic_long_read(&sb->s_remove_count))
466 err = -EBUSY;
467
468 if (!err) {
469 sb->s_readonly_remount = 1;
470 smp_wmb();
471 }
472 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
473 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
474 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
475 }
466 br_write_unlock(vfsmount_lock); 476 br_write_unlock(vfsmount_lock);
477
478 return err;
467} 479}
468 480
469static void free_vfsmnt(struct vfsmount *mnt) 481static void free_vfsmnt(struct mount *mnt)
470{ 482{
471 kfree(mnt->mnt_devname); 483 kfree(mnt->mnt_devname);
472 mnt_free_id(mnt); 484 mnt_free_id(mnt);
@@ -481,20 +493,20 @@ static void free_vfsmnt(struct vfsmount *mnt)
481 * @dir. If @dir is set return the first mount else return the last mount. 493 * @dir. If @dir is set return the first mount else return the last mount.
482 * vfsmount_lock must be held for read or write. 494 * vfsmount_lock must be held for read or write.
483 */ 495 */
484struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 496struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
485 int dir) 497 int dir)
486{ 498{
487 struct list_head *head = mount_hashtable + hash(mnt, dentry); 499 struct list_head *head = mount_hashtable + hash(mnt, dentry);
488 struct list_head *tmp = head; 500 struct list_head *tmp = head;
489 struct vfsmount *p, *found = NULL; 501 struct mount *p, *found = NULL;
490 502
491 for (;;) { 503 for (;;) {
492 tmp = dir ? tmp->next : tmp->prev; 504 tmp = dir ? tmp->next : tmp->prev;
493 p = NULL; 505 p = NULL;
494 if (tmp == head) 506 if (tmp == head)
495 break; 507 break;
496 p = list_entry(tmp, struct vfsmount, mnt_hash); 508 p = list_entry(tmp, struct mount, mnt_hash);
497 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 509 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
498 found = p; 510 found = p;
499 break; 511 break;
500 } 512 }
@@ -508,16 +520,21 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
508 */ 520 */
509struct vfsmount *lookup_mnt(struct path *path) 521struct vfsmount *lookup_mnt(struct path *path)
510{ 522{
511 struct vfsmount *child_mnt; 523 struct mount *child_mnt;
512 524
513 br_read_lock(vfsmount_lock); 525 br_read_lock(vfsmount_lock);
514 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 526 child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
515 mntget(child_mnt); 527 if (child_mnt) {
516 br_read_unlock(vfsmount_lock); 528 mnt_add_count(child_mnt, 1);
517 return child_mnt; 529 br_read_unlock(vfsmount_lock);
530 return &child_mnt->mnt;
531 } else {
532 br_read_unlock(vfsmount_lock);
533 return NULL;
534 }
518} 535}
519 536
520static inline int check_mnt(struct vfsmount *mnt) 537static inline int check_mnt(struct mount *mnt)
521{ 538{
522 return mnt->mnt_ns == current->nsproxy->mnt_ns; 539 return mnt->mnt_ns == current->nsproxy->mnt_ns;
523} 540}
@@ -548,12 +565,12 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
548 * Clear dentry's mounted state if it has no remaining mounts. 565 * Clear dentry's mounted state if it has no remaining mounts.
549 * vfsmount_lock must be held for write. 566 * vfsmount_lock must be held for write.
550 */ 567 */
551static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) 568static void dentry_reset_mounted(struct dentry *dentry)
552{ 569{
553 unsigned u; 570 unsigned u;
554 571
555 for (u = 0; u < HASH_SIZE; u++) { 572 for (u = 0; u < HASH_SIZE; u++) {
556 struct vfsmount *p; 573 struct mount *p;
557 574
558 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { 575 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
559 if (p->mnt_mountpoint == dentry) 576 if (p->mnt_mountpoint == dentry)
@@ -568,25 +585,26 @@ static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
568/* 585/*
569 * vfsmount lock must be held for write 586 * vfsmount lock must be held for write
570 */ 587 */
571static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 588static void detach_mnt(struct mount *mnt, struct path *old_path)
572{ 589{
573 old_path->dentry = mnt->mnt_mountpoint; 590 old_path->dentry = mnt->mnt_mountpoint;
574 old_path->mnt = mnt->mnt_parent; 591 old_path->mnt = &mnt->mnt_parent->mnt;
575 mnt->mnt_parent = mnt; 592 mnt->mnt_parent = mnt;
576 mnt->mnt_mountpoint = mnt->mnt_root; 593 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
577 list_del_init(&mnt->mnt_child); 594 list_del_init(&mnt->mnt_child);
578 list_del_init(&mnt->mnt_hash); 595 list_del_init(&mnt->mnt_hash);
579 dentry_reset_mounted(old_path->mnt, old_path->dentry); 596 dentry_reset_mounted(old_path->dentry);
580} 597}
581 598
582/* 599/*
583 * vfsmount lock must be held for write 600 * vfsmount lock must be held for write
584 */ 601 */
585void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 602void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
586 struct vfsmount *child_mnt) 603 struct mount *child_mnt)
587{ 604{
588 child_mnt->mnt_parent = mntget(mnt); 605 mnt_add_count(mnt, 1); /* essentially, that's mntget */
589 child_mnt->mnt_mountpoint = dget(dentry); 606 child_mnt->mnt_mountpoint = dget(dentry);
607 child_mnt->mnt_parent = mnt;
590 spin_lock(&dentry->d_lock); 608 spin_lock(&dentry->d_lock);
591 dentry->d_flags |= DCACHE_MOUNTED; 609 dentry->d_flags |= DCACHE_MOUNTED;
592 spin_unlock(&dentry->d_lock); 610 spin_unlock(&dentry->d_lock);
@@ -595,15 +613,15 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
595/* 613/*
596 * vfsmount lock must be held for write 614 * vfsmount lock must be held for write
597 */ 615 */
598static void attach_mnt(struct vfsmount *mnt, struct path *path) 616static void attach_mnt(struct mount *mnt, struct path *path)
599{ 617{
600 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 618 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
601 list_add_tail(&mnt->mnt_hash, mount_hashtable + 619 list_add_tail(&mnt->mnt_hash, mount_hashtable +
602 hash(path->mnt, path->dentry)); 620 hash(path->mnt, path->dentry));
603 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); 621 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
604} 622}
605 623
606static inline void __mnt_make_longterm(struct vfsmount *mnt) 624static inline void __mnt_make_longterm(struct mount *mnt)
607{ 625{
608#ifdef CONFIG_SMP 626#ifdef CONFIG_SMP
609 atomic_inc(&mnt->mnt_longterm); 627 atomic_inc(&mnt->mnt_longterm);
@@ -611,7 +629,7 @@ static inline void __mnt_make_longterm(struct vfsmount *mnt)
611} 629}
612 630
613/* needs vfsmount lock for write */ 631/* needs vfsmount lock for write */
614static inline void __mnt_make_shortterm(struct vfsmount *mnt) 632static inline void __mnt_make_shortterm(struct mount *mnt)
615{ 633{
616#ifdef CONFIG_SMP 634#ifdef CONFIG_SMP
617 atomic_dec(&mnt->mnt_longterm); 635 atomic_dec(&mnt->mnt_longterm);
@@ -621,10 +639,10 @@ static inline void __mnt_make_shortterm(struct vfsmount *mnt)
621/* 639/*
622 * vfsmount lock must be held for write 640 * vfsmount lock must be held for write
623 */ 641 */
624static void commit_tree(struct vfsmount *mnt) 642static void commit_tree(struct mount *mnt)
625{ 643{
626 struct vfsmount *parent = mnt->mnt_parent; 644 struct mount *parent = mnt->mnt_parent;
627 struct vfsmount *m; 645 struct mount *m;
628 LIST_HEAD(head); 646 LIST_HEAD(head);
629 struct mnt_namespace *n = parent->mnt_ns; 647 struct mnt_namespace *n = parent->mnt_ns;
630 648
@@ -639,12 +657,12 @@ static void commit_tree(struct vfsmount *mnt)
639 list_splice(&head, n->list.prev); 657 list_splice(&head, n->list.prev);
640 658
641 list_add_tail(&mnt->mnt_hash, mount_hashtable + 659 list_add_tail(&mnt->mnt_hash, mount_hashtable +
642 hash(parent, mnt->mnt_mountpoint)); 660 hash(&parent->mnt, mnt->mnt_mountpoint));
643 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 661 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
644 touch_mnt_namespace(n); 662 touch_mnt_namespace(n);
645} 663}
646 664
647static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 665static struct mount *next_mnt(struct mount *p, struct mount *root)
648{ 666{
649 struct list_head *next = p->mnt_mounts.next; 667 struct list_head *next = p->mnt_mounts.next;
650 if (next == &p->mnt_mounts) { 668 if (next == &p->mnt_mounts) {
@@ -657,14 +675,14 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
657 p = p->mnt_parent; 675 p = p->mnt_parent;
658 } 676 }
659 } 677 }
660 return list_entry(next, struct vfsmount, mnt_child); 678 return list_entry(next, struct mount, mnt_child);
661} 679}
662 680
663static struct vfsmount *skip_mnt_tree(struct vfsmount *p) 681static struct mount *skip_mnt_tree(struct mount *p)
664{ 682{
665 struct list_head *prev = p->mnt_mounts.prev; 683 struct list_head *prev = p->mnt_mounts.prev;
666 while (prev != &p->mnt_mounts) { 684 while (prev != &p->mnt_mounts) {
667 p = list_entry(prev, struct vfsmount, mnt_child); 685 p = list_entry(prev, struct mount, mnt_child);
668 prev = p->mnt_mounts.prev; 686 prev = p->mnt_mounts.prev;
669 } 687 }
670 return p; 688 return p;
@@ -673,7 +691,7 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
673struct vfsmount * 691struct vfsmount *
674vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 692vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
675{ 693{
676 struct vfsmount *mnt; 694 struct mount *mnt;
677 struct dentry *root; 695 struct dentry *root;
678 696
679 if (!type) 697 if (!type)
@@ -684,7 +702,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
684 return ERR_PTR(-ENOMEM); 702 return ERR_PTR(-ENOMEM);
685 703
686 if (flags & MS_KERNMOUNT) 704 if (flags & MS_KERNMOUNT)
687 mnt->mnt_flags = MNT_INTERNAL; 705 mnt->mnt.mnt_flags = MNT_INTERNAL;
688 706
689 root = mount_fs(type, flags, name, data); 707 root = mount_fs(type, flags, name, data);
690 if (IS_ERR(root)) { 708 if (IS_ERR(root)) {
@@ -692,19 +710,22 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
692 return ERR_CAST(root); 710 return ERR_CAST(root);
693 } 711 }
694 712
695 mnt->mnt_root = root; 713 mnt->mnt.mnt_root = root;
696 mnt->mnt_sb = root->d_sb; 714 mnt->mnt.mnt_sb = root->d_sb;
697 mnt->mnt_mountpoint = mnt->mnt_root; 715 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
698 mnt->mnt_parent = mnt; 716 mnt->mnt_parent = mnt;
699 return mnt; 717 br_write_lock(vfsmount_lock);
718 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
719 br_write_unlock(vfsmount_lock);
720 return &mnt->mnt;
700} 721}
701EXPORT_SYMBOL_GPL(vfs_kern_mount); 722EXPORT_SYMBOL_GPL(vfs_kern_mount);
702 723
703static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, 724static struct mount *clone_mnt(struct mount *old, struct dentry *root,
704 int flag) 725 int flag)
705{ 726{
706 struct super_block *sb = old->mnt_sb; 727 struct super_block *sb = old->mnt.mnt_sb;
707 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 728 struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
708 729
709 if (mnt) { 730 if (mnt) {
710 if (flag & (CL_SLAVE | CL_PRIVATE)) 731 if (flag & (CL_SLAVE | CL_PRIVATE))
@@ -718,12 +739,15 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
718 goto out_free; 739 goto out_free;
719 } 740 }
720 741
721 mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD; 742 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
722 atomic_inc(&sb->s_active); 743 atomic_inc(&sb->s_active);
723 mnt->mnt_sb = sb; 744 mnt->mnt.mnt_sb = sb;
724 mnt->mnt_root = dget(root); 745 mnt->mnt.mnt_root = dget(root);
725 mnt->mnt_mountpoint = mnt->mnt_root; 746 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
726 mnt->mnt_parent = mnt; 747 mnt->mnt_parent = mnt;
748 br_write_lock(vfsmount_lock);
749 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
750 br_write_unlock(vfsmount_lock);
727 751
728 if (flag & CL_SLAVE) { 752 if (flag & CL_SLAVE) {
729 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 753 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -753,9 +777,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
753 return NULL; 777 return NULL;
754} 778}
755 779
756static inline void mntfree(struct vfsmount *mnt) 780static inline void mntfree(struct mount *mnt)
757{ 781{
758 struct super_block *sb = mnt->mnt_sb; 782 struct vfsmount *m = &mnt->mnt;
783 struct super_block *sb = m->mnt_sb;
759 784
760 /* 785 /*
761 * This probably indicates that somebody messed 786 * This probably indicates that somebody messed
@@ -768,32 +793,32 @@ static inline void mntfree(struct vfsmount *mnt)
768 * so mnt_get_writers() below is safe. 793 * so mnt_get_writers() below is safe.
769 */ 794 */
770 WARN_ON(mnt_get_writers(mnt)); 795 WARN_ON(mnt_get_writers(mnt));
771 fsnotify_vfsmount_delete(mnt); 796 fsnotify_vfsmount_delete(m);
772 dput(mnt->mnt_root); 797 dput(m->mnt_root);
773 free_vfsmnt(mnt); 798 free_vfsmnt(mnt);
774 deactivate_super(sb); 799 deactivate_super(sb);
775} 800}
776 801
777static void mntput_no_expire(struct vfsmount *mnt) 802static void mntput_no_expire(struct mount *mnt)
778{ 803{
779put_again: 804put_again:
780#ifdef CONFIG_SMP 805#ifdef CONFIG_SMP
781 br_read_lock(vfsmount_lock); 806 br_read_lock(vfsmount_lock);
782 if (likely(atomic_read(&mnt->mnt_longterm))) { 807 if (likely(atomic_read(&mnt->mnt_longterm))) {
783 mnt_dec_count(mnt); 808 mnt_add_count(mnt, -1);
784 br_read_unlock(vfsmount_lock); 809 br_read_unlock(vfsmount_lock);
785 return; 810 return;
786 } 811 }
787 br_read_unlock(vfsmount_lock); 812 br_read_unlock(vfsmount_lock);
788 813
789 br_write_lock(vfsmount_lock); 814 br_write_lock(vfsmount_lock);
790 mnt_dec_count(mnt); 815 mnt_add_count(mnt, -1);
791 if (mnt_get_count(mnt)) { 816 if (mnt_get_count(mnt)) {
792 br_write_unlock(vfsmount_lock); 817 br_write_unlock(vfsmount_lock);
793 return; 818 return;
794 } 819 }
795#else 820#else
796 mnt_dec_count(mnt); 821 mnt_add_count(mnt, -1);
797 if (likely(mnt_get_count(mnt))) 822 if (likely(mnt_get_count(mnt)))
798 return; 823 return;
799 br_write_lock(vfsmount_lock); 824 br_write_lock(vfsmount_lock);
@@ -802,9 +827,10 @@ put_again:
802 mnt_add_count(mnt, mnt->mnt_pinned + 1); 827 mnt_add_count(mnt, mnt->mnt_pinned + 1);
803 mnt->mnt_pinned = 0; 828 mnt->mnt_pinned = 0;
804 br_write_unlock(vfsmount_lock); 829 br_write_unlock(vfsmount_lock);
805 acct_auto_close_mnt(mnt); 830 acct_auto_close_mnt(&mnt->mnt);
806 goto put_again; 831 goto put_again;
807 } 832 }
833 list_del(&mnt->mnt_instance);
808 br_write_unlock(vfsmount_lock); 834 br_write_unlock(vfsmount_lock);
809 mntfree(mnt); 835 mntfree(mnt);
810} 836}
@@ -812,10 +838,11 @@ put_again:
812void mntput(struct vfsmount *mnt) 838void mntput(struct vfsmount *mnt)
813{ 839{
814 if (mnt) { 840 if (mnt) {
841 struct mount *m = real_mount(mnt);
815 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 842 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
816 if (unlikely(mnt->mnt_expiry_mark)) 843 if (unlikely(m->mnt_expiry_mark))
817 mnt->mnt_expiry_mark = 0; 844 m->mnt_expiry_mark = 0;
818 mntput_no_expire(mnt); 845 mntput_no_expire(m);
819 } 846 }
820} 847}
821EXPORT_SYMBOL(mntput); 848EXPORT_SYMBOL(mntput);
@@ -823,7 +850,7 @@ EXPORT_SYMBOL(mntput);
823struct vfsmount *mntget(struct vfsmount *mnt) 850struct vfsmount *mntget(struct vfsmount *mnt)
824{ 851{
825 if (mnt) 852 if (mnt)
826 mnt_inc_count(mnt); 853 mnt_add_count(real_mount(mnt), 1);
827 return mnt; 854 return mnt;
828} 855}
829EXPORT_SYMBOL(mntget); 856EXPORT_SYMBOL(mntget);
@@ -831,16 +858,17 @@ EXPORT_SYMBOL(mntget);
831void mnt_pin(struct vfsmount *mnt) 858void mnt_pin(struct vfsmount *mnt)
832{ 859{
833 br_write_lock(vfsmount_lock); 860 br_write_lock(vfsmount_lock);
834 mnt->mnt_pinned++; 861 real_mount(mnt)->mnt_pinned++;
835 br_write_unlock(vfsmount_lock); 862 br_write_unlock(vfsmount_lock);
836} 863}
837EXPORT_SYMBOL(mnt_pin); 864EXPORT_SYMBOL(mnt_pin);
838 865
839void mnt_unpin(struct vfsmount *mnt) 866void mnt_unpin(struct vfsmount *m)
840{ 867{
868 struct mount *mnt = real_mount(m);
841 br_write_lock(vfsmount_lock); 869 br_write_lock(vfsmount_lock);
842 if (mnt->mnt_pinned) { 870 if (mnt->mnt_pinned) {
843 mnt_inc_count(mnt); 871 mnt_add_count(mnt, 1);
844 mnt->mnt_pinned--; 872 mnt->mnt_pinned--;
845 } 873 }
846 br_write_unlock(vfsmount_lock); 874 br_write_unlock(vfsmount_lock);
@@ -858,12 +886,12 @@ static inline void mangle(struct seq_file *m, const char *s)
858 * 886 *
859 * See also save_mount_options(). 887 * See also save_mount_options().
860 */ 888 */
861int generic_show_options(struct seq_file *m, struct vfsmount *mnt) 889int generic_show_options(struct seq_file *m, struct dentry *root)
862{ 890{
863 const char *options; 891 const char *options;
864 892
865 rcu_read_lock(); 893 rcu_read_lock();
866 options = rcu_dereference(mnt->mnt_sb->s_options); 894 options = rcu_dereference(root->d_sb->s_options);
867 895
868 if (options != NULL && options[0]) { 896 if (options != NULL && options[0]) {
869 seq_putc(m, ','); 897 seq_putc(m, ',');
@@ -907,10 +935,10 @@ void replace_mount_options(struct super_block *sb, char *options)
907EXPORT_SYMBOL(replace_mount_options); 935EXPORT_SYMBOL(replace_mount_options);
908 936
909#ifdef CONFIG_PROC_FS 937#ifdef CONFIG_PROC_FS
910/* iterator */ 938/* iterator; we want it to have access to namespace_sem, thus here... */
911static void *m_start(struct seq_file *m, loff_t *pos) 939static void *m_start(struct seq_file *m, loff_t *pos)
912{ 940{
913 struct proc_mounts *p = m->private; 941 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
914 942
915 down_read(&namespace_sem); 943 down_read(&namespace_sem);
916 return seq_list_start(&p->ns->list, *pos); 944 return seq_list_start(&p->ns->list, *pos);
@@ -918,7 +946,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
918 946
919static void *m_next(struct seq_file *m, void *v, loff_t *pos) 947static void *m_next(struct seq_file *m, void *v, loff_t *pos)
920{ 948{
921 struct proc_mounts *p = m->private; 949 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
922 950
923 return seq_list_next(v, &p->ns->list, pos); 951 return seq_list_next(v, &p->ns->list, pos);
924} 952}
@@ -928,219 +956,18 @@ static void m_stop(struct seq_file *m, void *v)
928 up_read(&namespace_sem); 956 up_read(&namespace_sem);
929} 957}
930 958
931int mnt_had_events(struct proc_mounts *p) 959static int m_show(struct seq_file *m, void *v)
932{ 960{
933 struct mnt_namespace *ns = p->ns; 961 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
934 int res = 0; 962 struct mount *r = list_entry(v, struct mount, mnt_list);
935 963 return p->show(m, &r->mnt);
936 br_read_lock(vfsmount_lock);
937 if (p->m.poll_event != ns->event) {
938 p->m.poll_event = ns->event;
939 res = 1;
940 }
941 br_read_unlock(vfsmount_lock);
942
943 return res;
944}
945
946struct proc_fs_info {
947 int flag;
948 const char *str;
949};
950
951static int show_sb_opts(struct seq_file *m, struct super_block *sb)
952{
953 static const struct proc_fs_info fs_info[] = {
954 { MS_SYNCHRONOUS, ",sync" },
955 { MS_DIRSYNC, ",dirsync" },
956 { MS_MANDLOCK, ",mand" },
957 { 0, NULL }
958 };
959 const struct proc_fs_info *fs_infop;
960
961 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
962 if (sb->s_flags & fs_infop->flag)
963 seq_puts(m, fs_infop->str);
964 }
965
966 return security_sb_show_options(m, sb);
967}
968
969static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
970{
971 static const struct proc_fs_info mnt_info[] = {
972 { MNT_NOSUID, ",nosuid" },
973 { MNT_NODEV, ",nodev" },
974 { MNT_NOEXEC, ",noexec" },
975 { MNT_NOATIME, ",noatime" },
976 { MNT_NODIRATIME, ",nodiratime" },
977 { MNT_RELATIME, ",relatime" },
978 { 0, NULL }
979 };
980 const struct proc_fs_info *fs_infop;
981
982 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
983 if (mnt->mnt_flags & fs_infop->flag)
984 seq_puts(m, fs_infop->str);
985 }
986}
987
988static void show_type(struct seq_file *m, struct super_block *sb)
989{
990 mangle(m, sb->s_type->name);
991 if (sb->s_subtype && sb->s_subtype[0]) {
992 seq_putc(m, '.');
993 mangle(m, sb->s_subtype);
994 }
995}
996
997static int show_vfsmnt(struct seq_file *m, void *v)
998{
999 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1000 int err = 0;
1001 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1002
1003 if (mnt->mnt_sb->s_op->show_devname) {
1004 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1005 if (err)
1006 goto out;
1007 } else {
1008 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1009 }
1010 seq_putc(m, ' ');
1011 seq_path(m, &mnt_path, " \t\n\\");
1012 seq_putc(m, ' ');
1013 show_type(m, mnt->mnt_sb);
1014 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
1015 err = show_sb_opts(m, mnt->mnt_sb);
1016 if (err)
1017 goto out;
1018 show_mnt_opts(m, mnt);
1019 if (mnt->mnt_sb->s_op->show_options)
1020 err = mnt->mnt_sb->s_op->show_options(m, mnt);
1021 seq_puts(m, " 0 0\n");
1022out:
1023 return err;
1024} 964}
1025 965
1026const struct seq_operations mounts_op = { 966const struct seq_operations mounts_op = {
1027 .start = m_start, 967 .start = m_start,
1028 .next = m_next, 968 .next = m_next,
1029 .stop = m_stop, 969 .stop = m_stop,
1030 .show = show_vfsmnt 970 .show = m_show,
1031};
1032
1033static int show_mountinfo(struct seq_file *m, void *v)
1034{
1035 struct proc_mounts *p = m->private;
1036 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1037 struct super_block *sb = mnt->mnt_sb;
1038 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1039 struct path root = p->root;
1040 int err = 0;
1041
1042 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
1043 MAJOR(sb->s_dev), MINOR(sb->s_dev));
1044 if (sb->s_op->show_path)
1045 err = sb->s_op->show_path(m, mnt);
1046 else
1047 seq_dentry(m, mnt->mnt_root, " \t\n\\");
1048 if (err)
1049 goto out;
1050 seq_putc(m, ' ');
1051
1052 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
1053 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
1054 if (err)
1055 goto out;
1056
1057 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
1058 show_mnt_opts(m, mnt);
1059
1060 /* Tagged fields ("foo:X" or "bar") */
1061 if (IS_MNT_SHARED(mnt))
1062 seq_printf(m, " shared:%i", mnt->mnt_group_id);
1063 if (IS_MNT_SLAVE(mnt)) {
1064 int master = mnt->mnt_master->mnt_group_id;
1065 int dom = get_dominating_id(mnt, &p->root);
1066 seq_printf(m, " master:%i", master);
1067 if (dom && dom != master)
1068 seq_printf(m, " propagate_from:%i", dom);
1069 }
1070 if (IS_MNT_UNBINDABLE(mnt))
1071 seq_puts(m, " unbindable");
1072
1073 /* Filesystem specific data */
1074 seq_puts(m, " - ");
1075 show_type(m, sb);
1076 seq_putc(m, ' ');
1077 if (sb->s_op->show_devname)
1078 err = sb->s_op->show_devname(m, mnt);
1079 else
1080 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1081 if (err)
1082 goto out;
1083 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
1084 err = show_sb_opts(m, sb);
1085 if (err)
1086 goto out;
1087 if (sb->s_op->show_options)
1088 err = sb->s_op->show_options(m, mnt);
1089 seq_putc(m, '\n');
1090out:
1091 return err;
1092}
1093
1094const struct seq_operations mountinfo_op = {
1095 .start = m_start,
1096 .next = m_next,
1097 .stop = m_stop,
1098 .show = show_mountinfo,
1099};
1100
1101static int show_vfsstat(struct seq_file *m, void *v)
1102{
1103 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1104 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1105 int err = 0;
1106
1107 /* device */
1108 if (mnt->mnt_sb->s_op->show_devname) {
1109 seq_puts(m, "device ");
1110 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1111 } else {
1112 if (mnt->mnt_devname) {
1113 seq_puts(m, "device ");
1114 mangle(m, mnt->mnt_devname);
1115 } else
1116 seq_puts(m, "no device");
1117 }
1118
1119 /* mount point */
1120 seq_puts(m, " mounted on ");
1121 seq_path(m, &mnt_path, " \t\n\\");
1122 seq_putc(m, ' ');
1123
1124 /* file system type */
1125 seq_puts(m, "with fstype ");
1126 show_type(m, mnt->mnt_sb);
1127
1128 /* optional statistics */
1129 if (mnt->mnt_sb->s_op->show_stats) {
1130 seq_putc(m, ' ');
1131 if (!err)
1132 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
1133 }
1134
1135 seq_putc(m, '\n');
1136 return err;
1137}
1138
1139const struct seq_operations mountstats_op = {
1140 .start = m_start,
1141 .next = m_next,
1142 .stop = m_stop,
1143 .show = show_vfsstat,
1144}; 971};
1145#endif /* CONFIG_PROC_FS */ 972#endif /* CONFIG_PROC_FS */
1146 973
@@ -1152,11 +979,13 @@ const struct seq_operations mountstats_op = {
1152 * open files, pwds, chroots or sub mounts that are 979 * open files, pwds, chroots or sub mounts that are
1153 * busy. 980 * busy.
1154 */ 981 */
1155int may_umount_tree(struct vfsmount *mnt) 982int may_umount_tree(struct vfsmount *m)
1156{ 983{
984 struct mount *mnt = real_mount(m);
1157 int actual_refs = 0; 985 int actual_refs = 0;
1158 int minimum_refs = 0; 986 int minimum_refs = 0;
1159 struct vfsmount *p; 987 struct mount *p;
988 BUG_ON(!m);
1160 989
1161 /* write lock needed for mnt_get_count */ 990 /* write lock needed for mnt_get_count */
1162 br_write_lock(vfsmount_lock); 991 br_write_lock(vfsmount_lock);
@@ -1192,7 +1021,7 @@ int may_umount(struct vfsmount *mnt)
1192 int ret = 1; 1021 int ret = 1;
1193 down_read(&namespace_sem); 1022 down_read(&namespace_sem);
1194 br_write_lock(vfsmount_lock); 1023 br_write_lock(vfsmount_lock);
1195 if (propagate_mount_busy(mnt, 2)) 1024 if (propagate_mount_busy(real_mount(mnt), 2))
1196 ret = 0; 1025 ret = 0;
1197 br_write_unlock(vfsmount_lock); 1026 br_write_unlock(vfsmount_lock);
1198 up_read(&namespace_sem); 1027 up_read(&namespace_sem);
@@ -1203,25 +1032,25 @@ EXPORT_SYMBOL(may_umount);
1203 1032
1204void release_mounts(struct list_head *head) 1033void release_mounts(struct list_head *head)
1205{ 1034{
1206 struct vfsmount *mnt; 1035 struct mount *mnt;
1207 while (!list_empty(head)) { 1036 while (!list_empty(head)) {
1208 mnt = list_first_entry(head, struct vfsmount, mnt_hash); 1037 mnt = list_first_entry(head, struct mount, mnt_hash);
1209 list_del_init(&mnt->mnt_hash); 1038 list_del_init(&mnt->mnt_hash);
1210 if (mnt->mnt_parent != mnt) { 1039 if (mnt_has_parent(mnt)) {
1211 struct dentry *dentry; 1040 struct dentry *dentry;
1212 struct vfsmount *m; 1041 struct mount *m;
1213 1042
1214 br_write_lock(vfsmount_lock); 1043 br_write_lock(vfsmount_lock);
1215 dentry = mnt->mnt_mountpoint; 1044 dentry = mnt->mnt_mountpoint;
1216 m = mnt->mnt_parent; 1045 m = mnt->mnt_parent;
1217 mnt->mnt_mountpoint = mnt->mnt_root; 1046 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1218 mnt->mnt_parent = mnt; 1047 mnt->mnt_parent = mnt;
1219 m->mnt_ghosts--; 1048 m->mnt_ghosts--;
1220 br_write_unlock(vfsmount_lock); 1049 br_write_unlock(vfsmount_lock);
1221 dput(dentry); 1050 dput(dentry);
1222 mntput(m); 1051 mntput(&m->mnt);
1223 } 1052 }
1224 mntput(mnt); 1053 mntput(&mnt->mnt);
1225 } 1054 }
1226} 1055}
1227 1056
@@ -1229,10 +1058,10 @@ void release_mounts(struct list_head *head)
1229 * vfsmount lock must be held for write 1058 * vfsmount lock must be held for write
1230 * namespace_sem must be held for write 1059 * namespace_sem must be held for write
1231 */ 1060 */
1232void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1061void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1233{ 1062{
1234 LIST_HEAD(tmp_list); 1063 LIST_HEAD(tmp_list);
1235 struct vfsmount *p; 1064 struct mount *p;
1236 1065
1237 for (p = mnt; p; p = next_mnt(p, mnt)) 1066 for (p = mnt; p; p = next_mnt(p, mnt))
1238 list_move(&p->mnt_hash, &tmp_list); 1067 list_move(&p->mnt_hash, &tmp_list);
@@ -1247,24 +1076,24 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1247 p->mnt_ns = NULL; 1076 p->mnt_ns = NULL;
1248 __mnt_make_shortterm(p); 1077 __mnt_make_shortterm(p);
1249 list_del_init(&p->mnt_child); 1078 list_del_init(&p->mnt_child);
1250 if (p->mnt_parent != p) { 1079 if (mnt_has_parent(p)) {
1251 p->mnt_parent->mnt_ghosts++; 1080 p->mnt_parent->mnt_ghosts++;
1252 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); 1081 dentry_reset_mounted(p->mnt_mountpoint);
1253 } 1082 }
1254 change_mnt_propagation(p, MS_PRIVATE); 1083 change_mnt_propagation(p, MS_PRIVATE);
1255 } 1084 }
1256 list_splice(&tmp_list, kill); 1085 list_splice(&tmp_list, kill);
1257} 1086}
1258 1087
1259static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); 1088static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
1260 1089
1261static int do_umount(struct vfsmount *mnt, int flags) 1090static int do_umount(struct mount *mnt, int flags)
1262{ 1091{
1263 struct super_block *sb = mnt->mnt_sb; 1092 struct super_block *sb = mnt->mnt.mnt_sb;
1264 int retval; 1093 int retval;
1265 LIST_HEAD(umount_list); 1094 LIST_HEAD(umount_list);
1266 1095
1267 retval = security_sb_umount(mnt, flags); 1096 retval = security_sb_umount(&mnt->mnt, flags);
1268 if (retval) 1097 if (retval)
1269 return retval; 1098 return retval;
1270 1099
@@ -1275,7 +1104,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1275 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 1104 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
1276 */ 1105 */
1277 if (flags & MNT_EXPIRE) { 1106 if (flags & MNT_EXPIRE) {
1278 if (mnt == current->fs->root.mnt || 1107 if (&mnt->mnt == current->fs->root.mnt ||
1279 flags & (MNT_FORCE | MNT_DETACH)) 1108 flags & (MNT_FORCE | MNT_DETACH))
1280 return -EINVAL; 1109 return -EINVAL;
1281 1110
@@ -1317,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1317 * /reboot - static binary that would close all descriptors and 1146 * /reboot - static binary that would close all descriptors and
1318 * call reboot(9). Then init(8) could umount root and exec /reboot. 1147 * call reboot(9). Then init(8) could umount root and exec /reboot.
1319 */ 1148 */
1320 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { 1149 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1321 /* 1150 /*
1322 * Special case for "unmounting" root ... 1151 * Special case for "unmounting" root ...
1323 * we just try to remount it readonly. 1152 * we just try to remount it readonly.
@@ -1359,6 +1188,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1359SYSCALL_DEFINE2(umount, char __user *, name, int, flags) 1188SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1360{ 1189{
1361 struct path path; 1190 struct path path;
1191 struct mount *mnt;
1362 int retval; 1192 int retval;
1363 int lookup_flags = 0; 1193 int lookup_flags = 0;
1364 1194
@@ -1371,21 +1201,22 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1371 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); 1201 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1372 if (retval) 1202 if (retval)
1373 goto out; 1203 goto out;
1204 mnt = real_mount(path.mnt);
1374 retval = -EINVAL; 1205 retval = -EINVAL;
1375 if (path.dentry != path.mnt->mnt_root) 1206 if (path.dentry != path.mnt->mnt_root)
1376 goto dput_and_out; 1207 goto dput_and_out;
1377 if (!check_mnt(path.mnt)) 1208 if (!check_mnt(mnt))
1378 goto dput_and_out; 1209 goto dput_and_out;
1379 1210
1380 retval = -EPERM; 1211 retval = -EPERM;
1381 if (!capable(CAP_SYS_ADMIN)) 1212 if (!capable(CAP_SYS_ADMIN))
1382 goto dput_and_out; 1213 goto dput_and_out;
1383 1214
1384 retval = do_umount(path.mnt, flags); 1215 retval = do_umount(mnt, flags);
1385dput_and_out: 1216dput_and_out:
1386 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1217 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
1387 dput(path.dentry); 1218 dput(path.dentry);
1388 mntput_no_expire(path.mnt); 1219 mntput_no_expire(mnt);
1389out: 1220out:
1390 return retval; 1221 return retval;
1391} 1222}
@@ -1420,10 +1251,10 @@ static int mount_is_safe(struct path *path)
1420#endif 1251#endif
1421} 1252}
1422 1253
1423struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1254struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1424 int flag) 1255 int flag)
1425{ 1256{
1426 struct vfsmount *res, *p, *q, *r, *s; 1257 struct mount *res, *p, *q, *r;
1427 struct path path; 1258 struct path path;
1428 1259
1429 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1260 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
@@ -1436,6 +1267,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1436 1267
1437 p = mnt; 1268 p = mnt;
1438 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1269 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1270 struct mount *s;
1439 if (!is_subdir(r->mnt_mountpoint, dentry)) 1271 if (!is_subdir(r->mnt_mountpoint, dentry))
1440 continue; 1272 continue;
1441 1273
@@ -1449,9 +1281,9 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1449 q = q->mnt_parent; 1281 q = q->mnt_parent;
1450 } 1282 }
1451 p = s; 1283 p = s;
1452 path.mnt = q; 1284 path.mnt = &q->mnt;
1453 path.dentry = p->mnt_mountpoint; 1285 path.dentry = p->mnt_mountpoint;
1454 q = clone_mnt(p, p->mnt_root, flag); 1286 q = clone_mnt(p, p->mnt.mnt_root, flag);
1455 if (!q) 1287 if (!q)
1456 goto Enomem; 1288 goto Enomem;
1457 br_write_lock(vfsmount_lock); 1289 br_write_lock(vfsmount_lock);
@@ -1474,11 +1306,12 @@ Enomem:
1474 1306
1475struct vfsmount *collect_mounts(struct path *path) 1307struct vfsmount *collect_mounts(struct path *path)
1476{ 1308{
1477 struct vfsmount *tree; 1309 struct mount *tree;
1478 down_write(&namespace_sem); 1310 down_write(&namespace_sem);
1479 tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); 1311 tree = copy_tree(real_mount(path->mnt), path->dentry,
1312 CL_COPY_ALL | CL_PRIVATE);
1480 up_write(&namespace_sem); 1313 up_write(&namespace_sem);
1481 return tree; 1314 return tree ? &tree->mnt : NULL;
1482} 1315}
1483 1316
1484void drop_collected_mounts(struct vfsmount *mnt) 1317void drop_collected_mounts(struct vfsmount *mnt)
@@ -1486,7 +1319,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
1486 LIST_HEAD(umount_list); 1319 LIST_HEAD(umount_list);
1487 down_write(&namespace_sem); 1320 down_write(&namespace_sem);
1488 br_write_lock(vfsmount_lock); 1321 br_write_lock(vfsmount_lock);
1489 umount_tree(mnt, 0, &umount_list); 1322 umount_tree(real_mount(mnt), 0, &umount_list);
1490 br_write_unlock(vfsmount_lock); 1323 br_write_unlock(vfsmount_lock);
1491 up_write(&namespace_sem); 1324 up_write(&namespace_sem);
1492 release_mounts(&umount_list); 1325 release_mounts(&umount_list);
@@ -1495,21 +1328,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
1495int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 1328int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1496 struct vfsmount *root) 1329 struct vfsmount *root)
1497{ 1330{
1498 struct vfsmount *mnt; 1331 struct mount *mnt;
1499 int res = f(root, arg); 1332 int res = f(root, arg);
1500 if (res) 1333 if (res)
1501 return res; 1334 return res;
1502 list_for_each_entry(mnt, &root->mnt_list, mnt_list) { 1335 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1503 res = f(mnt, arg); 1336 res = f(&mnt->mnt, arg);
1504 if (res) 1337 if (res)
1505 return res; 1338 return res;
1506 } 1339 }
1507 return 0; 1340 return 0;
1508} 1341}
1509 1342
1510static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) 1343static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1511{ 1344{
1512 struct vfsmount *p; 1345 struct mount *p;
1513 1346
1514 for (p = mnt; p != end; p = next_mnt(p, mnt)) { 1347 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1515 if (p->mnt_group_id && !IS_MNT_SHARED(p)) 1348 if (p->mnt_group_id && !IS_MNT_SHARED(p))
@@ -1517,9 +1350,9 @@ static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1517 } 1350 }
1518} 1351}
1519 1352
1520static int invent_group_ids(struct vfsmount *mnt, bool recurse) 1353static int invent_group_ids(struct mount *mnt, bool recurse)
1521{ 1354{
1522 struct vfsmount *p; 1355 struct mount *p;
1523 1356
1524 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { 1357 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1525 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { 1358 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
@@ -1597,13 +1430,13 @@ static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1597 * Must be called without spinlocks held, since this function can sleep 1430 * Must be called without spinlocks held, since this function can sleep
1598 * in allocations. 1431 * in allocations.
1599 */ 1432 */
1600static int attach_recursive_mnt(struct vfsmount *source_mnt, 1433static int attach_recursive_mnt(struct mount *source_mnt,
1601 struct path *path, struct path *parent_path) 1434 struct path *path, struct path *parent_path)
1602{ 1435{
1603 LIST_HEAD(tree_list); 1436 LIST_HEAD(tree_list);
1604 struct vfsmount *dest_mnt = path->mnt; 1437 struct mount *dest_mnt = real_mount(path->mnt);
1605 struct dentry *dest_dentry = path->dentry; 1438 struct dentry *dest_dentry = path->dentry;
1606 struct vfsmount *child, *p; 1439 struct mount *child, *p;
1607 int err; 1440 int err;
1608 1441
1609 if (IS_MNT_SHARED(dest_mnt)) { 1442 if (IS_MNT_SHARED(dest_mnt)) {
@@ -1624,7 +1457,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1624 if (parent_path) { 1457 if (parent_path) {
1625 detach_mnt(source_mnt, parent_path); 1458 detach_mnt(source_mnt, parent_path);
1626 attach_mnt(source_mnt, path); 1459 attach_mnt(source_mnt, path);
1627 touch_mnt_namespace(parent_path->mnt->mnt_ns); 1460 touch_mnt_namespace(source_mnt->mnt_ns);
1628 } else { 1461 } else {
1629 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1462 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1630 commit_tree(source_mnt); 1463 commit_tree(source_mnt);
@@ -1672,13 +1505,13 @@ static void unlock_mount(struct path *path)
1672 mutex_unlock(&path->dentry->d_inode->i_mutex); 1505 mutex_unlock(&path->dentry->d_inode->i_mutex);
1673} 1506}
1674 1507
1675static int graft_tree(struct vfsmount *mnt, struct path *path) 1508static int graft_tree(struct mount *mnt, struct path *path)
1676{ 1509{
1677 if (mnt->mnt_sb->s_flags & MS_NOUSER) 1510 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1678 return -EINVAL; 1511 return -EINVAL;
1679 1512
1680 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1513 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1681 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 1514 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1682 return -ENOTDIR; 1515 return -ENOTDIR;
1683 1516
1684 if (d_unlinked(path->dentry)) 1517 if (d_unlinked(path->dentry))
@@ -1709,7 +1542,8 @@ static int flags_to_propagation_type(int flags)
1709 */ 1542 */
1710static int do_change_type(struct path *path, int flag) 1543static int do_change_type(struct path *path, int flag)
1711{ 1544{
1712 struct vfsmount *m, *mnt = path->mnt; 1545 struct mount *m;
1546 struct mount *mnt = real_mount(path->mnt);
1713 int recurse = flag & MS_REC; 1547 int recurse = flag & MS_REC;
1714 int type; 1548 int type;
1715 int err = 0; 1549 int err = 0;
@@ -1749,7 +1583,7 @@ static int do_loopback(struct path *path, char *old_name,
1749{ 1583{
1750 LIST_HEAD(umount_list); 1584 LIST_HEAD(umount_list);
1751 struct path old_path; 1585 struct path old_path;
1752 struct vfsmount *mnt = NULL; 1586 struct mount *mnt = NULL, *old;
1753 int err = mount_is_safe(path); 1587 int err = mount_is_safe(path);
1754 if (err) 1588 if (err)
1755 return err; 1589 return err;
@@ -1763,18 +1597,20 @@ static int do_loopback(struct path *path, char *old_name,
1763 if (err) 1597 if (err)
1764 goto out; 1598 goto out;
1765 1599
1600 old = real_mount(old_path.mnt);
1601
1766 err = -EINVAL; 1602 err = -EINVAL;
1767 if (IS_MNT_UNBINDABLE(old_path.mnt)) 1603 if (IS_MNT_UNBINDABLE(old))
1768 goto out2; 1604 goto out2;
1769 1605
1770 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1606 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
1771 goto out2; 1607 goto out2;
1772 1608
1773 err = -ENOMEM; 1609 err = -ENOMEM;
1774 if (recurse) 1610 if (recurse)
1775 mnt = copy_tree(old_path.mnt, old_path.dentry, 0); 1611 mnt = copy_tree(old, old_path.dentry, 0);
1776 else 1612 else
1777 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); 1613 mnt = clone_mnt(old, old_path.dentry, 0);
1778 1614
1779 if (!mnt) 1615 if (!mnt)
1780 goto out2; 1616 goto out2;
@@ -1804,9 +1640,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1804 return 0; 1640 return 0;
1805 1641
1806 if (readonly_request) 1642 if (readonly_request)
1807 error = mnt_make_readonly(mnt); 1643 error = mnt_make_readonly(real_mount(mnt));
1808 else 1644 else
1809 __mnt_unmake_readonly(mnt); 1645 __mnt_unmake_readonly(real_mount(mnt));
1810 return error; 1646 return error;
1811} 1647}
1812 1648
@@ -1820,11 +1656,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1820{ 1656{
1821 int err; 1657 int err;
1822 struct super_block *sb = path->mnt->mnt_sb; 1658 struct super_block *sb = path->mnt->mnt_sb;
1659 struct mount *mnt = real_mount(path->mnt);
1823 1660
1824 if (!capable(CAP_SYS_ADMIN)) 1661 if (!capable(CAP_SYS_ADMIN))
1825 return -EPERM; 1662 return -EPERM;
1826 1663
1827 if (!check_mnt(path->mnt)) 1664 if (!check_mnt(mnt))
1828 return -EINVAL; 1665 return -EINVAL;
1829 1666
1830 if (path->dentry != path->mnt->mnt_root) 1667 if (path->dentry != path->mnt->mnt_root)
@@ -1841,22 +1678,22 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1841 err = do_remount_sb(sb, flags, data, 0); 1678 err = do_remount_sb(sb, flags, data, 0);
1842 if (!err) { 1679 if (!err) {
1843 br_write_lock(vfsmount_lock); 1680 br_write_lock(vfsmount_lock);
1844 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1681 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
1845 path->mnt->mnt_flags = mnt_flags; 1682 mnt->mnt.mnt_flags = mnt_flags;
1846 br_write_unlock(vfsmount_lock); 1683 br_write_unlock(vfsmount_lock);
1847 } 1684 }
1848 up_write(&sb->s_umount); 1685 up_write(&sb->s_umount);
1849 if (!err) { 1686 if (!err) {
1850 br_write_lock(vfsmount_lock); 1687 br_write_lock(vfsmount_lock);
1851 touch_mnt_namespace(path->mnt->mnt_ns); 1688 touch_mnt_namespace(mnt->mnt_ns);
1852 br_write_unlock(vfsmount_lock); 1689 br_write_unlock(vfsmount_lock);
1853 } 1690 }
1854 return err; 1691 return err;
1855} 1692}
1856 1693
1857static inline int tree_contains_unbindable(struct vfsmount *mnt) 1694static inline int tree_contains_unbindable(struct mount *mnt)
1858{ 1695{
1859 struct vfsmount *p; 1696 struct mount *p;
1860 for (p = mnt; p; p = next_mnt(p, mnt)) { 1697 for (p = mnt; p; p = next_mnt(p, mnt)) {
1861 if (IS_MNT_UNBINDABLE(p)) 1698 if (IS_MNT_UNBINDABLE(p))
1862 return 1; 1699 return 1;
@@ -1867,7 +1704,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1867static int do_move_mount(struct path *path, char *old_name) 1704static int do_move_mount(struct path *path, char *old_name)
1868{ 1705{
1869 struct path old_path, parent_path; 1706 struct path old_path, parent_path;
1870 struct vfsmount *p; 1707 struct mount *p;
1708 struct mount *old;
1871 int err = 0; 1709 int err = 0;
1872 if (!capable(CAP_SYS_ADMIN)) 1710 if (!capable(CAP_SYS_ADMIN))
1873 return -EPERM; 1711 return -EPERM;
@@ -1881,8 +1719,11 @@ static int do_move_mount(struct path *path, char *old_name)
1881 if (err < 0) 1719 if (err < 0)
1882 goto out; 1720 goto out;
1883 1721
1722 old = real_mount(old_path.mnt);
1723 p = real_mount(path->mnt);
1724
1884 err = -EINVAL; 1725 err = -EINVAL;
1885 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1726 if (!check_mnt(p) || !check_mnt(old))
1886 goto out1; 1727 goto out1;
1887 1728
1888 if (d_unlinked(path->dentry)) 1729 if (d_unlinked(path->dentry))
@@ -1892,7 +1733,7 @@ static int do_move_mount(struct path *path, char *old_name)
1892 if (old_path.dentry != old_path.mnt->mnt_root) 1733 if (old_path.dentry != old_path.mnt->mnt_root)
1893 goto out1; 1734 goto out1;
1894 1735
1895 if (old_path.mnt == old_path.mnt->mnt_parent) 1736 if (!mnt_has_parent(old))
1896 goto out1; 1737 goto out1;
1897 1738
1898 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1739 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
@@ -1901,28 +1742,26 @@ static int do_move_mount(struct path *path, char *old_name)
1901 /* 1742 /*
1902 * Don't move a mount residing in a shared parent. 1743 * Don't move a mount residing in a shared parent.
1903 */ 1744 */
1904 if (old_path.mnt->mnt_parent && 1745 if (IS_MNT_SHARED(old->mnt_parent))
1905 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1906 goto out1; 1746 goto out1;
1907 /* 1747 /*
1908 * Don't move a mount tree containing unbindable mounts to a destination 1748 * Don't move a mount tree containing unbindable mounts to a destination
1909 * mount which is shared. 1749 * mount which is shared.
1910 */ 1750 */
1911 if (IS_MNT_SHARED(path->mnt) && 1751 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
1912 tree_contains_unbindable(old_path.mnt))
1913 goto out1; 1752 goto out1;
1914 err = -ELOOP; 1753 err = -ELOOP;
1915 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent) 1754 for (; mnt_has_parent(p); p = p->mnt_parent)
1916 if (p == old_path.mnt) 1755 if (p == old)
1917 goto out1; 1756 goto out1;
1918 1757
1919 err = attach_recursive_mnt(old_path.mnt, path, &parent_path); 1758 err = attach_recursive_mnt(old, path, &parent_path);
1920 if (err) 1759 if (err)
1921 goto out1; 1760 goto out1;
1922 1761
1923 /* if the mount is moved, it should no longer be expire 1762 /* if the mount is moved, it should no longer be expire
1924 * automatically */ 1763 * automatically */
1925 list_del_init(&old_path.mnt->mnt_expire); 1764 list_del_init(&old->mnt_expire);
1926out1: 1765out1:
1927 unlock_mount(path); 1766 unlock_mount(path);
1928out: 1767out:
@@ -1955,7 +1794,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1955 return ERR_PTR(err); 1794 return ERR_PTR(err);
1956} 1795}
1957 1796
1958struct vfsmount * 1797static struct vfsmount *
1959do_kern_mount(const char *fstype, int flags, const char *name, void *data) 1798do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1960{ 1799{
1961 struct file_system_type *type = get_fs_type(fstype); 1800 struct file_system_type *type = get_fs_type(fstype);
@@ -1969,12 +1808,11 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1969 put_filesystem(type); 1808 put_filesystem(type);
1970 return mnt; 1809 return mnt;
1971} 1810}
1972EXPORT_SYMBOL_GPL(do_kern_mount);
1973 1811
1974/* 1812/*
1975 * add a mount into a namespace's mount tree 1813 * add a mount into a namespace's mount tree
1976 */ 1814 */
1977static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) 1815static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1978{ 1816{
1979 int err; 1817 int err;
1980 1818
@@ -1985,20 +1823,20 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
1985 return err; 1823 return err;
1986 1824
1987 err = -EINVAL; 1825 err = -EINVAL;
1988 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) 1826 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
1989 goto unlock; 1827 goto unlock;
1990 1828
1991 /* Refuse the same filesystem on the same mount point */ 1829 /* Refuse the same filesystem on the same mount point */
1992 err = -EBUSY; 1830 err = -EBUSY;
1993 if (path->mnt->mnt_sb == newmnt->mnt_sb && 1831 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
1994 path->mnt->mnt_root == path->dentry) 1832 path->mnt->mnt_root == path->dentry)
1995 goto unlock; 1833 goto unlock;
1996 1834
1997 err = -EINVAL; 1835 err = -EINVAL;
1998 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) 1836 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
1999 goto unlock; 1837 goto unlock;
2000 1838
2001 newmnt->mnt_flags = mnt_flags; 1839 newmnt->mnt.mnt_flags = mnt_flags;
2002 err = graft_tree(newmnt, path); 1840 err = graft_tree(newmnt, path);
2003 1841
2004unlock: 1842unlock:
@@ -2027,7 +1865,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
2027 if (IS_ERR(mnt)) 1865 if (IS_ERR(mnt))
2028 return PTR_ERR(mnt); 1866 return PTR_ERR(mnt);
2029 1867
2030 err = do_add_mount(mnt, path, mnt_flags); 1868 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2031 if (err) 1869 if (err)
2032 mntput(mnt); 1870 mntput(mnt);
2033 return err; 1871 return err;
@@ -2035,11 +1873,12 @@ static int do_new_mount(struct path *path, char *type, int flags,
2035 1873
2036int finish_automount(struct vfsmount *m, struct path *path) 1874int finish_automount(struct vfsmount *m, struct path *path)
2037{ 1875{
1876 struct mount *mnt = real_mount(m);
2038 int err; 1877 int err;
2039 /* The new mount record should have at least 2 refs to prevent it being 1878 /* The new mount record should have at least 2 refs to prevent it being
2040 * expired before we get a chance to add it 1879 * expired before we get a chance to add it
2041 */ 1880 */
2042 BUG_ON(mnt_get_count(m) < 2); 1881 BUG_ON(mnt_get_count(mnt) < 2);
2043 1882
2044 if (m->mnt_sb == path->mnt->mnt_sb && 1883 if (m->mnt_sb == path->mnt->mnt_sb &&
2045 m->mnt_root == path->dentry) { 1884 m->mnt_root == path->dentry) {
@@ -2047,15 +1886,15 @@ int finish_automount(struct vfsmount *m, struct path *path)
2047 goto fail; 1886 goto fail;
2048 } 1887 }
2049 1888
2050 err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); 1889 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2051 if (!err) 1890 if (!err)
2052 return 0; 1891 return 0;
2053fail: 1892fail:
2054 /* remove m from any expiration list it may be on */ 1893 /* remove m from any expiration list it may be on */
2055 if (!list_empty(&m->mnt_expire)) { 1894 if (!list_empty(&mnt->mnt_expire)) {
2056 down_write(&namespace_sem); 1895 down_write(&namespace_sem);
2057 br_write_lock(vfsmount_lock); 1896 br_write_lock(vfsmount_lock);
2058 list_del_init(&m->mnt_expire); 1897 list_del_init(&mnt->mnt_expire);
2059 br_write_unlock(vfsmount_lock); 1898 br_write_unlock(vfsmount_lock);
2060 up_write(&namespace_sem); 1899 up_write(&namespace_sem);
2061 } 1900 }
@@ -2074,7 +1913,7 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2074 down_write(&namespace_sem); 1913 down_write(&namespace_sem);
2075 br_write_lock(vfsmount_lock); 1914 br_write_lock(vfsmount_lock);
2076 1915
2077 list_add_tail(&mnt->mnt_expire, expiry_list); 1916 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2078 1917
2079 br_write_unlock(vfsmount_lock); 1918 br_write_unlock(vfsmount_lock);
2080 up_write(&namespace_sem); 1919 up_write(&namespace_sem);
@@ -2088,7 +1927,7 @@ EXPORT_SYMBOL(mnt_set_expiry);
2088 */ 1927 */
2089void mark_mounts_for_expiry(struct list_head *mounts) 1928void mark_mounts_for_expiry(struct list_head *mounts)
2090{ 1929{
2091 struct vfsmount *mnt, *next; 1930 struct mount *mnt, *next;
2092 LIST_HEAD(graveyard); 1931 LIST_HEAD(graveyard);
2093 LIST_HEAD(umounts); 1932 LIST_HEAD(umounts);
2094 1933
@@ -2111,7 +1950,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2111 list_move(&mnt->mnt_expire, &graveyard); 1950 list_move(&mnt->mnt_expire, &graveyard);
2112 } 1951 }
2113 while (!list_empty(&graveyard)) { 1952 while (!list_empty(&graveyard)) {
2114 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire); 1953 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2115 touch_mnt_namespace(mnt->mnt_ns); 1954 touch_mnt_namespace(mnt->mnt_ns);
2116 umount_tree(mnt, 1, &umounts); 1955 umount_tree(mnt, 1, &umounts);
2117 } 1956 }
@@ -2129,9 +1968,9 @@ EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2129 * search the list of submounts for a given mountpoint, and move any 1968 * search the list of submounts for a given mountpoint, and move any
2130 * shrinkable submounts to the 'graveyard' list. 1969 * shrinkable submounts to the 'graveyard' list.
2131 */ 1970 */
2132static int select_submounts(struct vfsmount *parent, struct list_head *graveyard) 1971static int select_submounts(struct mount *parent, struct list_head *graveyard)
2133{ 1972{
2134 struct vfsmount *this_parent = parent; 1973 struct mount *this_parent = parent;
2135 struct list_head *next; 1974 struct list_head *next;
2136 int found = 0; 1975 int found = 0;
2137 1976
@@ -2140,10 +1979,10 @@ repeat:
2140resume: 1979resume:
2141 while (next != &this_parent->mnt_mounts) { 1980 while (next != &this_parent->mnt_mounts) {
2142 struct list_head *tmp = next; 1981 struct list_head *tmp = next;
2143 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child); 1982 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2144 1983
2145 next = tmp->next; 1984 next = tmp->next;
2146 if (!(mnt->mnt_flags & MNT_SHRINKABLE)) 1985 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2147 continue; 1986 continue;
2148 /* 1987 /*
2149 * Descend a level if the d_mounts list is non-empty. 1988 * Descend a level if the d_mounts list is non-empty.
@@ -2175,15 +2014,15 @@ resume:
2175 * 2014 *
2176 * vfsmount_lock must be held for write 2015 * vfsmount_lock must be held for write
2177 */ 2016 */
2178static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 2017static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
2179{ 2018{
2180 LIST_HEAD(graveyard); 2019 LIST_HEAD(graveyard);
2181 struct vfsmount *m; 2020 struct mount *m;
2182 2021
2183 /* extract submounts of 'mountpoint' from the expiration list */ 2022 /* extract submounts of 'mountpoint' from the expiration list */
2184 while (select_submounts(mnt, &graveyard)) { 2023 while (select_submounts(mnt, &graveyard)) {
2185 while (!list_empty(&graveyard)) { 2024 while (!list_empty(&graveyard)) {
2186 m = list_first_entry(&graveyard, struct vfsmount, 2025 m = list_first_entry(&graveyard, struct mount,
2187 mnt_expire); 2026 mnt_expire);
2188 touch_mnt_namespace(m->mnt_ns); 2027 touch_mnt_namespace(m->mnt_ns);
2189 umount_tree(m, 1, umounts); 2028 umount_tree(m, 1, umounts);
@@ -2370,12 +2209,13 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2370 2209
2371void mnt_make_longterm(struct vfsmount *mnt) 2210void mnt_make_longterm(struct vfsmount *mnt)
2372{ 2211{
2373 __mnt_make_longterm(mnt); 2212 __mnt_make_longterm(real_mount(mnt));
2374} 2213}
2375 2214
2376void mnt_make_shortterm(struct vfsmount *mnt) 2215void mnt_make_shortterm(struct vfsmount *m)
2377{ 2216{
2378#ifdef CONFIG_SMP 2217#ifdef CONFIG_SMP
2218 struct mount *mnt = real_mount(m);
2379 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) 2219 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
2380 return; 2220 return;
2381 br_write_lock(vfsmount_lock); 2221 br_write_lock(vfsmount_lock);
@@ -2393,7 +2233,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2393{ 2233{
2394 struct mnt_namespace *new_ns; 2234 struct mnt_namespace *new_ns;
2395 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2235 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2396 struct vfsmount *p, *q; 2236 struct mount *p, *q;
2237 struct mount *old = mnt_ns->root;
2238 struct mount *new;
2397 2239
2398 new_ns = alloc_mnt_ns(); 2240 new_ns = alloc_mnt_ns();
2399 if (IS_ERR(new_ns)) 2241 if (IS_ERR(new_ns))
@@ -2401,15 +2243,15 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2401 2243
2402 down_write(&namespace_sem); 2244 down_write(&namespace_sem);
2403 /* First pass: copy the tree topology */ 2245 /* First pass: copy the tree topology */
2404 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root, 2246 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
2405 CL_COPY_ALL | CL_EXPIRE); 2247 if (!new) {
2406 if (!new_ns->root) {
2407 up_write(&namespace_sem); 2248 up_write(&namespace_sem);
2408 kfree(new_ns); 2249 kfree(new_ns);
2409 return ERR_PTR(-ENOMEM); 2250 return ERR_PTR(-ENOMEM);
2410 } 2251 }
2252 new_ns->root = new;
2411 br_write_lock(vfsmount_lock); 2253 br_write_lock(vfsmount_lock);
2412 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2254 list_add_tail(&new_ns->list, &new->mnt_list);
2413 br_write_unlock(vfsmount_lock); 2255 br_write_unlock(vfsmount_lock);
2414 2256
2415 /* 2257 /*
@@ -2417,27 +2259,27 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2417 * as belonging to new namespace. We have already acquired a private 2259 * as belonging to new namespace. We have already acquired a private
2418 * fs_struct, so tsk->fs->lock is not needed. 2260 * fs_struct, so tsk->fs->lock is not needed.
2419 */ 2261 */
2420 p = mnt_ns->root; 2262 p = old;
2421 q = new_ns->root; 2263 q = new;
2422 while (p) { 2264 while (p) {
2423 q->mnt_ns = new_ns; 2265 q->mnt_ns = new_ns;
2424 __mnt_make_longterm(q); 2266 __mnt_make_longterm(q);
2425 if (fs) { 2267 if (fs) {
2426 if (p == fs->root.mnt) { 2268 if (&p->mnt == fs->root.mnt) {
2427 fs->root.mnt = mntget(q); 2269 fs->root.mnt = mntget(&q->mnt);
2428 __mnt_make_longterm(q); 2270 __mnt_make_longterm(q);
2429 mnt_make_shortterm(p); 2271 mnt_make_shortterm(&p->mnt);
2430 rootmnt = p; 2272 rootmnt = &p->mnt;
2431 } 2273 }
2432 if (p == fs->pwd.mnt) { 2274 if (&p->mnt == fs->pwd.mnt) {
2433 fs->pwd.mnt = mntget(q); 2275 fs->pwd.mnt = mntget(&q->mnt);
2434 __mnt_make_longterm(q); 2276 __mnt_make_longterm(q);
2435 mnt_make_shortterm(p); 2277 mnt_make_shortterm(&p->mnt);
2436 pwdmnt = p; 2278 pwdmnt = &p->mnt;
2437 } 2279 }
2438 } 2280 }
2439 p = next_mnt(p, mnt_ns->root); 2281 p = next_mnt(p, old);
2440 q = next_mnt(q, new_ns->root); 2282 q = next_mnt(q, new);
2441 } 2283 }
2442 up_write(&namespace_sem); 2284 up_write(&namespace_sem);
2443 2285
@@ -2470,22 +2312,20 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2470 * create_mnt_ns - creates a private namespace and adds a root filesystem 2312 * create_mnt_ns - creates a private namespace and adds a root filesystem
2471 * @mnt: pointer to the new root filesystem mountpoint 2313 * @mnt: pointer to the new root filesystem mountpoint
2472 */ 2314 */
2473struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) 2315static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2474{ 2316{
2475 struct mnt_namespace *new_ns; 2317 struct mnt_namespace *new_ns = alloc_mnt_ns();
2476
2477 new_ns = alloc_mnt_ns();
2478 if (!IS_ERR(new_ns)) { 2318 if (!IS_ERR(new_ns)) {
2319 struct mount *mnt = real_mount(m);
2479 mnt->mnt_ns = new_ns; 2320 mnt->mnt_ns = new_ns;
2480 __mnt_make_longterm(mnt); 2321 __mnt_make_longterm(mnt);
2481 new_ns->root = mnt; 2322 new_ns->root = mnt;
2482 list_add(&new_ns->list, &new_ns->root->mnt_list); 2323 list_add(&new_ns->list, &mnt->mnt_list);
2483 } else { 2324 } else {
2484 mntput(mnt); 2325 mntput(m);
2485 } 2326 }
2486 return new_ns; 2327 return new_ns;
2487} 2328}
2488EXPORT_SYMBOL(create_mnt_ns);
2489 2329
2490struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) 2330struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2491{ 2331{
@@ -2559,6 +2399,31 @@ out_type:
2559} 2399}
2560 2400
2561/* 2401/*
2402 * Return true if path is reachable from root
2403 *
2404 * namespace_sem or vfsmount_lock is held
2405 */
2406bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2407 const struct path *root)
2408{
2409 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2410 dentry = mnt->mnt_mountpoint;
2411 mnt = mnt->mnt_parent;
2412 }
2413 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2414}
2415
2416int path_is_under(struct path *path1, struct path *path2)
2417{
2418 int res;
2419 br_read_lock(vfsmount_lock);
2420 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2421 br_read_unlock(vfsmount_lock);
2422 return res;
2423}
2424EXPORT_SYMBOL(path_is_under);
2425
2426/*
2562 * pivot_root Semantics: 2427 * pivot_root Semantics:
2563 * Moves the root file system of the current process to the directory put_old, 2428 * Moves the root file system of the current process to the directory put_old,
2564 * makes new_root as the new root file system of the current process, and sets 2429 * makes new_root as the new root file system of the current process, and sets
@@ -2586,8 +2451,8 @@ out_type:
2586SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 2451SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2587 const char __user *, put_old) 2452 const char __user *, put_old)
2588{ 2453{
2589 struct vfsmount *tmp;
2590 struct path new, old, parent_path, root_parent, root; 2454 struct path new, old, parent_path, root_parent, root;
2455 struct mount *new_mnt, *root_mnt;
2591 int error; 2456 int error;
2592 2457
2593 if (!capable(CAP_SYS_ADMIN)) 2458 if (!capable(CAP_SYS_ADMIN))
@@ -2611,11 +2476,13 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2611 goto out3; 2476 goto out3;
2612 2477
2613 error = -EINVAL; 2478 error = -EINVAL;
2614 if (IS_MNT_SHARED(old.mnt) || 2479 new_mnt = real_mount(new.mnt);
2615 IS_MNT_SHARED(new.mnt->mnt_parent) || 2480 root_mnt = real_mount(root.mnt);
2616 IS_MNT_SHARED(root.mnt->mnt_parent)) 2481 if (IS_MNT_SHARED(real_mount(old.mnt)) ||
2482 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2483 IS_MNT_SHARED(root_mnt->mnt_parent))
2617 goto out4; 2484 goto out4;
2618 if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) 2485 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2619 goto out4; 2486 goto out4;
2620 error = -ENOENT; 2487 error = -ENOENT;
2621 if (d_unlinked(new.dentry)) 2488 if (d_unlinked(new.dentry))
@@ -2629,33 +2496,22 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2629 error = -EINVAL; 2496 error = -EINVAL;
2630 if (root.mnt->mnt_root != root.dentry) 2497 if (root.mnt->mnt_root != root.dentry)
2631 goto out4; /* not a mountpoint */ 2498 goto out4; /* not a mountpoint */
2632 if (root.mnt->mnt_parent == root.mnt) 2499 if (!mnt_has_parent(root_mnt))
2633 goto out4; /* not attached */ 2500 goto out4; /* not attached */
2634 if (new.mnt->mnt_root != new.dentry) 2501 if (new.mnt->mnt_root != new.dentry)
2635 goto out4; /* not a mountpoint */ 2502 goto out4; /* not a mountpoint */
2636 if (new.mnt->mnt_parent == new.mnt) 2503 if (!mnt_has_parent(new_mnt))
2637 goto out4; /* not attached */ 2504 goto out4; /* not attached */
2638 /* make sure we can reach put_old from new_root */ 2505 /* make sure we can reach put_old from new_root */
2639 tmp = old.mnt; 2506 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
2640 if (tmp != new.mnt) {
2641 for (;;) {
2642 if (tmp->mnt_parent == tmp)
2643 goto out4; /* already mounted on put_old */
2644 if (tmp->mnt_parent == new.mnt)
2645 break;
2646 tmp = tmp->mnt_parent;
2647 }
2648 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2649 goto out4;
2650 } else if (!is_subdir(old.dentry, new.dentry))
2651 goto out4; 2507 goto out4;
2652 br_write_lock(vfsmount_lock); 2508 br_write_lock(vfsmount_lock);
2653 detach_mnt(new.mnt, &parent_path); 2509 detach_mnt(new_mnt, &parent_path);
2654 detach_mnt(root.mnt, &root_parent); 2510 detach_mnt(root_mnt, &root_parent);
2655 /* mount old root on put_old */ 2511 /* mount old root on put_old */
2656 attach_mnt(root.mnt, &old); 2512 attach_mnt(root_mnt, &old);
2657 /* mount new_root on / */ 2513 /* mount new_root on / */
2658 attach_mnt(new.mnt, &root_parent); 2514 attach_mnt(new_mnt, &root_parent);
2659 touch_mnt_namespace(current->nsproxy->mnt_ns); 2515 touch_mnt_namespace(current->nsproxy->mnt_ns);
2660 br_write_unlock(vfsmount_lock); 2516 br_write_unlock(vfsmount_lock);
2661 chroot_fs_refs(&root, &new); 2517 chroot_fs_refs(&root, &new);
@@ -2693,8 +2549,8 @@ static void __init init_mount_tree(void)
2693 init_task.nsproxy->mnt_ns = ns; 2549 init_task.nsproxy->mnt_ns = ns;
2694 get_mnt_ns(ns); 2550 get_mnt_ns(ns);
2695 2551
2696 root.mnt = ns->root; 2552 root.mnt = mnt;
2697 root.dentry = ns->root->mnt_root; 2553 root.dentry = mnt->mnt_root;
2698 2554
2699 set_fs_pwd(current->fs, &root); 2555 set_fs_pwd(current->fs, &root);
2700 set_fs_root(current->fs, &root); 2556 set_fs_root(current->fs, &root);
@@ -2707,7 +2563,7 @@ void __init mnt_init(void)
2707 2563
2708 init_rwsem(&namespace_sem); 2564 init_rwsem(&namespace_sem);
2709 2565
2710 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 2566 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2711 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2567 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2712 2568
2713 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2569 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
@@ -2747,7 +2603,6 @@ void put_mnt_ns(struct mnt_namespace *ns)
2747 release_mounts(&umount_list); 2603 release_mounts(&umount_list);
2748 kfree(ns); 2604 kfree(ns);
2749} 2605}
2750EXPORT_SYMBOL(put_mnt_ns);
2751 2606
2752struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 2607struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2753{ 2608{
@@ -2776,5 +2631,5 @@ EXPORT_SYMBOL(kern_unmount);
2776 2631
2777bool our_mnt(struct vfsmount *mnt) 2632bool our_mnt(struct vfsmount *mnt)
2778{ 2633{
2779 return check_mnt(mnt); 2634 return check_mnt(real_mount(mnt));
2780} 2635}
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c51f621e901..aeed93a6bde0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,15 +30,15 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
30 30
31static int ncp_readdir(struct file *, void *, filldir_t); 31static int ncp_readdir(struct file *, void *, filldir_t);
32 32
33static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *); 33static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
34static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); 34static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
35static int ncp_unlink(struct inode *, struct dentry *); 35static int ncp_unlink(struct inode *, struct dentry *);
36static int ncp_mkdir(struct inode *, struct dentry *, int); 36static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
37static int ncp_rmdir(struct inode *, struct dentry *); 37static int ncp_rmdir(struct inode *, struct dentry *);
38static int ncp_rename(struct inode *, struct dentry *, 38static int ncp_rename(struct inode *, struct dentry *,
39 struct inode *, struct dentry *); 39 struct inode *, struct dentry *);
40static int ncp_mknod(struct inode * dir, struct dentry *dentry, 40static int ncp_mknod(struct inode * dir, struct dentry *dentry,
41 int mode, dev_t rdev); 41 umode_t mode, dev_t rdev);
42#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) 42#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
43extern int ncp_symlink(struct inode *, struct dentry *, const char *); 43extern int ncp_symlink(struct inode *, struct dentry *, const char *);
44#else 44#else
@@ -919,7 +919,7 @@ out_close:
919 goto out; 919 goto out;
920} 920}
921 921
922int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode, 922int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
923 dev_t rdev, __le32 attributes) 923 dev_t rdev, __le32 attributes)
924{ 924{
925 struct ncp_server *server = NCP_SERVER(dir); 925 struct ncp_server *server = NCP_SERVER(dir);
@@ -928,7 +928,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
928 int opmode; 928 int opmode;
929 __u8 __name[NCP_MAXPATHLEN + 1]; 929 __u8 __name[NCP_MAXPATHLEN + 1];
930 930
931 PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n", 931 PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
932 dentry->d_parent->d_name.name, dentry->d_name.name, mode); 932 dentry->d_parent->d_name.name, dentry->d_name.name, mode);
933 933
934 ncp_age_dentry(server, dentry); 934 ncp_age_dentry(server, dentry);
@@ -979,13 +979,13 @@ out:
979 return error; 979 return error;
980} 980}
981 981
982static int ncp_create(struct inode *dir, struct dentry *dentry, int mode, 982static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
983 struct nameidata *nd) 983 struct nameidata *nd)
984{ 984{
985 return ncp_create_new(dir, dentry, mode, 0, 0); 985 return ncp_create_new(dir, dentry, mode, 0, 0);
986} 986}
987 987
988static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode) 988static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
989{ 989{
990 struct ncp_entry_info finfo; 990 struct ncp_entry_info finfo;
991 struct ncp_server *server = NCP_SERVER(dir); 991 struct ncp_server *server = NCP_SERVER(dir);
@@ -1201,12 +1201,12 @@ out:
1201} 1201}
1202 1202
1203static int ncp_mknod(struct inode * dir, struct dentry *dentry, 1203static int ncp_mknod(struct inode * dir, struct dentry *dentry,
1204 int mode, dev_t rdev) 1204 umode_t mode, dev_t rdev)
1205{ 1205{
1206 if (!new_valid_dev(rdev)) 1206 if (!new_valid_dev(rdev))
1207 return -EINVAL; 1207 return -EINVAL;
1208 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { 1208 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
1209 DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%o\n", mode); 1209 DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
1210 return ncp_create_new(dir, dentry, mode, rdev, 0); 1210 return ncp_create_new(dir, dentry, mode, rdev, 0);
1211 } 1211 }
1212 return -EPERM; /* Strange, but true */ 1212 return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cbd1a61c110a..3d1e34f8a68e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -44,7 +44,7 @@
44static void ncp_evict_inode(struct inode *); 44static void ncp_evict_inode(struct inode *);
45static void ncp_put_super(struct super_block *); 45static void ncp_put_super(struct super_block *);
46static int ncp_statfs(struct dentry *, struct kstatfs *); 46static int ncp_statfs(struct dentry *, struct kstatfs *);
47static int ncp_show_options(struct seq_file *, struct vfsmount *); 47static int ncp_show_options(struct seq_file *, struct dentry *);
48 48
49static struct kmem_cache * ncp_inode_cachep; 49static struct kmem_cache * ncp_inode_cachep;
50 50
@@ -60,7 +60,6 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
60static void ncp_i_callback(struct rcu_head *head) 60static void ncp_i_callback(struct rcu_head *head)
61{ 61{
62 struct inode *inode = container_of(head, struct inode, i_rcu); 62 struct inode *inode = container_of(head, struct inode, i_rcu);
63 INIT_LIST_HEAD(&inode->i_dentry);
64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 63 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
65} 64}
66 65
@@ -323,9 +322,9 @@ static void ncp_stop_tasks(struct ncp_server *server) {
323 flush_work_sync(&server->timeout_tq); 322 flush_work_sync(&server->timeout_tq);
324} 323}
325 324
326static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) 325static int ncp_show_options(struct seq_file *seq, struct dentry *root)
327{ 326{
328 struct ncp_server *server = NCP_SBP(mnt->mnt_sb); 327 struct ncp_server *server = NCP_SBP(root->d_sb);
329 unsigned int tmp; 328 unsigned int tmp;
330 329
331 if (server->m.uid != 0) 330 if (server->m.uid != 0)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 790e92a9ec63..6958adfaff08 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -901,7 +901,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
901 ret = __ncp_ioctl(inode, cmd, arg); 901 ret = __ncp_ioctl(inode, cmd, arg);
902outDropWrite: 902outDropWrite:
903 if (need_drop_write) 903 if (need_drop_write)
904 mnt_drop_write(filp->f_path.mnt); 904 mnt_drop_write_file(filp);
905out: 905out:
906 return ret; 906 return ret;
907} 907}
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 09881e6aa5ad..32c06587351a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -114,7 +114,7 @@ int ncp_dirhandle_alloc(struct ncp_server *, __u8 vol, __le32 dirent, __u8 *dirh
114int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle); 114int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle);
115 115
116int ncp_create_new(struct inode *dir, struct dentry *dentry, 116int ncp_create_new(struct inode *dir, struct dentry *dentry,
117 int mode, dev_t rdev, __le32 attributes); 117 umode_t mode, dev_t rdev, __le32 attributes);
118 118
119static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) { 119static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) {
120#ifdef CONFIG_NCPFS_NFS_NS 120#ifdef CONFIG_NCPFS_NFS_NS
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 661f861d80c6..52439ddc8de0 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -108,7 +108,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
108 char *rawlink; 108 char *rawlink;
109 int length, err, i, outlen; 109 int length, err, i, outlen;
110 int kludge; 110 int kludge;
111 int mode; 111 umode_t mode;
112 __le32 attr; 112 __le32 attr;
113 unsigned int hdr; 113 unsigned int hdr;
114 114
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 43926add945b..54cea8ad5a76 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
339 dprintk("%s enter. slotid %d seqid %d\n", 339 dprintk("%s enter. slotid %d seqid %d\n",
340 __func__, args->csa_slotid, args->csa_sequenceid); 340 __func__, args->csa_slotid, args->csa_sequenceid);
341 341
342 if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) 342 if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
343 return htonl(NFS4ERR_BADSLOT); 343 return htonl(NFS4ERR_BADSLOT);
344 344
345 slot = tbl->slots + args->csa_slotid; 345 slot = tbl->slots + args->csa_slotid;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 873bf00d51a2..31778f74357d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -84,7 +84,7 @@ retry:
84/* 84/*
85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS 85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
86 */ 86 */
87static int nfs4_disable_idmapping = 0; 87static bool nfs4_disable_idmapping = true;
88 88
89/* 89/*
90 * RPC cruft for NFS 90 * RPC cruft for NFS
@@ -185,7 +185,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
185 clp->cl_minorversion = cl_init->minorversion; 185 clp->cl_minorversion = cl_init->minorversion;
186 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; 186 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
187#endif 187#endif
188 cred = rpc_lookup_machine_cred(); 188 cred = rpc_lookup_machine_cred("*");
189 if (!IS_ERR(cred)) 189 if (!IS_ERR(cred))
190 clp->cl_machine_cred = cred; 190 clp->cl_machine_cred = cred;
191 nfs_fscache_get_client_cookie(clp); 191 nfs_fscache_get_client_cookie(clp);
@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server)
250 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); 250 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
251} 251}
252 252
253static void nfs4_destroy_server(struct nfs_server *server)
254{
255 nfs4_purge_state_owners(server);
256}
257
253#else 258#else
254static void nfs4_shutdown_client(struct nfs_client *clp) 259static void nfs4_shutdown_client(struct nfs_client *clp)
255{ 260{
@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void)
1065 INIT_LIST_HEAD(&server->master_link); 1070 INIT_LIST_HEAD(&server->master_link);
1066 INIT_LIST_HEAD(&server->delegations); 1071 INIT_LIST_HEAD(&server->delegations);
1067 INIT_LIST_HEAD(&server->layouts); 1072 INIT_LIST_HEAD(&server->layouts);
1073 INIT_LIST_HEAD(&server->state_owners_lru);
1068 1074
1069 atomic_set(&server->active, 0); 1075 atomic_set(&server->active, 0);
1070 1076
@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1538 1544
1539 nfs_server_insert_lists(server); 1545 nfs_server_insert_lists(server);
1540 server->mount_time = jiffies; 1546 server->mount_time = jiffies;
1547 server->destroy = nfs4_destroy_server;
1541out: 1548out:
1542 nfs_free_fattr(fattr); 1549 nfs_free_fattr(fattr);
1543 return error; 1550 return error;
@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1719 1726
1720 /* Copy data from the source */ 1727 /* Copy data from the source */
1721 server->nfs_client = source->nfs_client; 1728 server->nfs_client = source->nfs_client;
1729 server->destroy = source->destroy;
1722 atomic_inc(&server->nfs_client->cl_count); 1730 atomic_inc(&server->nfs_client->cl_count);
1723 nfs_server_copy_userdata(server, source); 1731 nfs_server_copy_userdata(server, source);
1724 1732
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac2899098147..fd9a872fada0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,13 +47,13 @@ static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *); 47static int nfs_closedir(struct inode *, struct file *);
48static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
51static int nfs_mkdir(struct inode *, struct dentry *, int); 51static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
52static int nfs_rmdir(struct inode *, struct dentry *); 52static int nfs_rmdir(struct inode *, struct dentry *);
53static int nfs_unlink(struct inode *, struct dentry *); 53static int nfs_unlink(struct inode *, struct dentry *);
54static int nfs_symlink(struct inode *, struct dentry *, const char *); 54static int nfs_symlink(struct inode *, struct dentry *, const char *);
55static int nfs_link(struct dentry *, struct inode *, struct dentry *); 55static int nfs_link(struct dentry *, struct inode *, struct dentry *);
56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); 56static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
57static int nfs_rename(struct inode *, struct dentry *, 57static int nfs_rename(struct inode *, struct dentry *,
58 struct inode *, struct dentry *); 58 struct inode *, struct dentry *);
59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); 59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
112#ifdef CONFIG_NFS_V4 112#ifdef CONFIG_NFS_V4
113 113
114static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 114static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
115static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); 115static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
116const struct inode_operations nfs4_dir_inode_operations = { 116const struct inode_operations nfs4_dir_inode_operations = {
117 .create = nfs_open_create, 117 .create = nfs_open_create,
118 .lookup = nfs_atomic_lookup, 118 .lookup = nfs_atomic_lookup,
@@ -1368,18 +1368,7 @@ static fmode_t flags_to_mode(int flags)
1368 1368
1369static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) 1369static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
1370{ 1370{
1371 struct nfs_open_context *ctx; 1371 return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
1372 struct rpc_cred *cred;
1373 fmode_t fmode = flags_to_mode(open_flags);
1374
1375 cred = rpc_lookup_cred();
1376 if (IS_ERR(cred))
1377 return ERR_CAST(cred);
1378 ctx = alloc_nfs_open_context(dentry, cred, fmode);
1379 put_rpccred(cred);
1380 if (ctx == NULL)
1381 return ERR_PTR(-ENOMEM);
1382 return ctx;
1383} 1372}
1384 1373
1385static int do_open(struct inode *inode, struct file *filp) 1374static int do_open(struct inode *inode, struct file *filp)
@@ -1584,8 +1573,8 @@ no_open:
1584 return nfs_lookup_revalidate(dentry, nd); 1573 return nfs_lookup_revalidate(dentry, nd);
1585} 1574}
1586 1575
1587static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, 1576static int nfs_open_create(struct inode *dir, struct dentry *dentry,
1588 struct nameidata *nd) 1577 umode_t mode, struct nameidata *nd)
1589{ 1578{
1590 struct nfs_open_context *ctx = NULL; 1579 struct nfs_open_context *ctx = NULL;
1591 struct iattr attr; 1580 struct iattr attr;
@@ -1675,8 +1664,8 @@ out_error:
1675 * that the operation succeeded on the server, but an error in the 1664 * that the operation succeeded on the server, but an error in the
1676 * reply path made it appear to have failed. 1665 * reply path made it appear to have failed.
1677 */ 1666 */
1678static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, 1667static int nfs_create(struct inode *dir, struct dentry *dentry,
1679 struct nameidata *nd) 1668 umode_t mode, struct nameidata *nd)
1680{ 1669{
1681 struct iattr attr; 1670 struct iattr attr;
1682 int error; 1671 int error;
@@ -1704,7 +1693,7 @@ out_err:
1704 * See comments for nfs_proc_create regarding failed operations. 1693 * See comments for nfs_proc_create regarding failed operations.
1705 */ 1694 */
1706static int 1695static int
1707nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1696nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1708{ 1697{
1709 struct iattr attr; 1698 struct iattr attr;
1710 int status; 1699 int status;
@@ -1730,7 +1719,7 @@ out_err:
1730/* 1719/*
1731 * See comments for nfs_proc_create regarding failed operations. 1720 * See comments for nfs_proc_create regarding failed operations.
1732 */ 1721 */
1733static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1722static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1734{ 1723{
1735 struct iattr attr; 1724 struct iattr attr;
1736 int error; 1725 int error;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 606ef0f20aed..c43a452f7da2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
272 datasync); 272 datasync);
273 273
274 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 274 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
275 if (ret)
276 return ret;
277 mutex_lock(&inode->i_mutex); 275 mutex_lock(&inode->i_mutex);
278 276
279 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 277 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
280 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 278 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
281 status = nfs_commit_inode(inode, FLUSH_SYNC); 279 status = nfs_commit_inode(inode, FLUSH_SYNC);
280 if (status >= 0 && ret < 0)
281 status = ret;
282 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 282 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
283 if (have_error) 283 if (have_error)
284 ret = xchg(&ctx->error, 0); 284 ret = xchg(&ctx->error, 0);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 47d1c6ff2d8e..2c05f1991e1e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -38,6 +38,89 @@
38#include <linux/kernel.h> 38#include <linux/kernel.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/nfs_idmap.h> 40#include <linux/nfs_idmap.h>
41#include <linux/nfs_fs.h>
42
43/**
44 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
45 * @fattr: fully initialised struct nfs_fattr
46 * @owner_name: owner name string cache
47 * @group_name: group name string cache
48 */
49void nfs_fattr_init_names(struct nfs_fattr *fattr,
50 struct nfs4_string *owner_name,
51 struct nfs4_string *group_name)
52{
53 fattr->owner_name = owner_name;
54 fattr->group_name = group_name;
55}
56
57static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
58{
59 fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
60 kfree(fattr->owner_name->data);
61}
62
63static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
64{
65 fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
66 kfree(fattr->group_name->data);
67}
68
69static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
70{
71 struct nfs4_string *owner = fattr->owner_name;
72 __u32 uid;
73
74 if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
75 return false;
76 if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
77 fattr->uid = uid;
78 fattr->valid |= NFS_ATTR_FATTR_OWNER;
79 }
80 return true;
81}
82
83static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
84{
85 struct nfs4_string *group = fattr->group_name;
86 __u32 gid;
87
88 if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
89 return false;
90 if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
91 fattr->gid = gid;
92 fattr->valid |= NFS_ATTR_FATTR_GROUP;
93 }
94 return true;
95}
96
97/**
98 * nfs_fattr_free_names - free up the NFSv4 owner and group strings
99 * @fattr: a fully initialised nfs_fattr structure
100 */
101void nfs_fattr_free_names(struct nfs_fattr *fattr)
102{
103 if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
104 nfs_fattr_free_owner_name(fattr);
105 if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
106 nfs_fattr_free_group_name(fattr);
107}
108
109/**
110 * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
111 * @server: pointer to the filesystem nfs_server structure
112 * @fattr: a fully initialised nfs_fattr structure
113 *
114 * This helper maps the cached NFSv4 owner/group strings in fattr into
115 * their numeric uid/gid equivalents, and then frees the cached strings.
116 */
117void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
118{
119 if (nfs_fattr_map_owner_name(server, fattr))
120 nfs_fattr_free_owner_name(fattr);
121 if (nfs_fattr_map_group_name(server, fattr))
122 nfs_fattr_free_group_name(fattr);
123}
41 124
42static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) 125static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
43{ 126{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..f649fba8c384 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h> 40#include <linux/compat.h>
41#include <linux/freezer.h>
41 42
42#include <asm/system.h> 43#include <asm/system.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
@@ -56,7 +57,7 @@
56#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 57#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
57 58
58/* Default is to see 64-bit inode numbers */ 59/* Default is to see 64-bit inode numbers */
59static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; 60static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
60 61
61static void nfs_invalidate_inode(struct inode *); 62static void nfs_invalidate_inode(struct inode *);
62static int nfs_update_inode(struct inode *, struct nfs_fattr *); 63static int nfs_update_inode(struct inode *, struct nfs_fattr *);
@@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word)
77{ 78{
78 if (fatal_signal_pending(current)) 79 if (fatal_signal_pending(current))
79 return -ERESTARTSYS; 80 return -ERESTARTSYS;
80 schedule(); 81 freezable_schedule();
81 return 0; 82 return 0;
82} 83}
83 84
@@ -629,23 +630,28 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
629 nfs_revalidate_inode(server, inode); 630 nfs_revalidate_inode(server, inode);
630} 631}
631 632
632struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode) 633struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
633{ 634{
634 struct nfs_open_context *ctx; 635 struct nfs_open_context *ctx;
636 struct rpc_cred *cred = rpc_lookup_cred();
637 if (IS_ERR(cred))
638 return ERR_CAST(cred);
635 639
636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 640 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
637 if (ctx != NULL) { 641 if (!ctx) {
638 nfs_sb_active(dentry->d_sb); 642 put_rpccred(cred);
639 ctx->dentry = dget(dentry); 643 return ERR_PTR(-ENOMEM);
640 ctx->cred = get_rpccred(cred);
641 ctx->state = NULL;
642 ctx->mode = f_mode;
643 ctx->flags = 0;
644 ctx->error = 0;
645 nfs_init_lock_context(&ctx->lock_context);
646 ctx->lock_context.open_context = ctx;
647 INIT_LIST_HEAD(&ctx->list);
648 } 644 }
645 nfs_sb_active(dentry->d_sb);
646 ctx->dentry = dget(dentry);
647 ctx->cred = cred;
648 ctx->state = NULL;
649 ctx->mode = f_mode;
650 ctx->flags = 0;
651 ctx->error = 0;
652 nfs_init_lock_context(&ctx->lock_context);
653 ctx->lock_context.open_context = ctx;
654 INIT_LIST_HEAD(&ctx->list);
649 return ctx; 655 return ctx;
650} 656}
651 657
@@ -738,15 +744,10 @@ static void nfs_file_clear_open_context(struct file *filp)
738int nfs_open(struct inode *inode, struct file *filp) 744int nfs_open(struct inode *inode, struct file *filp)
739{ 745{
740 struct nfs_open_context *ctx; 746 struct nfs_open_context *ctx;
741 struct rpc_cred *cred;
742 747
743 cred = rpc_lookup_cred(); 748 ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
744 if (IS_ERR(cred)) 749 if (IS_ERR(ctx))
745 return PTR_ERR(cred); 750 return PTR_ERR(ctx);
746 ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
747 put_rpccred(cred);
748 if (ctx == NULL)
749 return -ENOMEM;
750 nfs_file_set_open_context(filp, ctx); 751 nfs_file_set_open_context(filp, ctx);
751 put_nfs_open_context(ctx); 752 put_nfs_open_context(ctx);
752 nfs_fscache_set_inode_cookie(inode, filp); 753 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1019,6 +1020,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
1019 fattr->valid = 0; 1020 fattr->valid = 0;
1020 fattr->time_start = jiffies; 1021 fattr->time_start = jiffies;
1021 fattr->gencount = nfs_inc_attr_generation_counter(); 1022 fattr->gencount = nfs_inc_attr_generation_counter();
1023 fattr->owner_name = NULL;
1024 fattr->group_name = NULL;
1022} 1025}
1023 1026
1024struct nfs_fattr *nfs_alloc_fattr(void) 1027struct nfs_fattr *nfs_alloc_fattr(void)
@@ -1464,7 +1467,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
1464static void nfs_i_callback(struct rcu_head *head) 1467static void nfs_i_callback(struct rcu_head *head)
1465{ 1468{
1466 struct inode *inode = container_of(head, struct inode, i_rcu); 1469 struct inode *inode = container_of(head, struct inode, i_rcu);
1467 INIT_LIST_HEAD(&inode->i_dentry);
1468 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1470 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1469} 1471}
1470 1472
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3f4d95751d52..8102db9b926c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -307,6 +307,8 @@ extern void nfs_readdata_release(struct nfs_read_data *rdata);
307/* write.c */ 307/* write.c */
308extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 308extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
309 struct list_head *head); 309 struct list_head *head);
310extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
311 struct inode *inode, int ioflags);
310extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 312extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
311extern void nfs_writedata_release(struct nfs_write_data *wdata); 313extern void nfs_writedata_release(struct nfs_write_data *wdata);
312extern void nfs_commit_free(struct nfs_write_data *p); 314extern void nfs_commit_free(struct nfs_write_data *p);
@@ -330,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
330 332
331#ifdef CONFIG_MIGRATION 333#ifdef CONFIG_MIGRATION
332extern int nfs_migrate_page(struct address_space *, 334extern int nfs_migrate_page(struct address_space *,
333 struct page *, struct page *); 335 struct page *, struct page *, enum migrate_mode);
334#else 336#else
335#define nfs_migrate_page NULL 337#define nfs_migrate_page NULL
336#endif 338#endif
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed91748..91943953a370 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/lockd/bind.h> 18#include <linux/lockd/bind.h>
19#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
20#include <linux/freezer.h>
20 21
21#include "iostat.h" 22#include "iostat.h"
22#include "internal.h" 23#include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
32 res = rpc_call_sync(clnt, msg, flags); 33 res = rpc_call_sync(clnt, msg, flags);
33 if (res != -EJUKEBOX && res != -EKEYEXPIRED) 34 if (res != -EJUKEBOX && res != -EKEYEXPIRED)
34 break; 35 break;
35 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); 36 freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
36 res = -ERESTARTSYS; 37 res = -ERESTARTSYS;
37 } while (!fatal_signal_pending(current)); 38 } while (!fatal_signal_pending(current));
38 return res; 39 return res;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 693ae22f8731..4d7d0aedc101 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -94,6 +94,8 @@ struct nfs_unique_id {
94struct nfs4_state_owner { 94struct nfs4_state_owner {
95 struct nfs_unique_id so_owner_id; 95 struct nfs_unique_id so_owner_id;
96 struct nfs_server *so_server; 96 struct nfs_server *so_server;
97 struct list_head so_lru;
98 unsigned long so_expires;
97 struct rb_node so_server_node; 99 struct rb_node so_server_node;
98 100
99 struct rpc_cred *so_cred; /* Associated cred */ 101 struct rpc_cred *so_cred; /* Associated cred */
@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
319 321
320extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 322extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
321extern void nfs4_put_state_owner(struct nfs4_state_owner *); 323extern void nfs4_put_state_owner(struct nfs4_state_owner *);
324extern void nfs4_purge_state_owners(struct nfs_server *);
322extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 325extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
323extern void nfs4_put_open_state(struct nfs4_state *); 326extern void nfs4_put_open_state(struct nfs4_state *);
324extern void nfs4_close_state(struct nfs4_state *, fmode_t); 327extern void nfs4_close_state(struct nfs4_state *, fmode_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index a62d36b9a99e..71ec08617e23 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -49,13 +49,14 @@ filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
49 loff_t offset) 49 loff_t offset)
50{ 50{
51 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; 51 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
52 u64 tmp; 52 u64 stripe_no;
53 u32 rem;
53 54
54 offset -= flseg->pattern_offset; 55 offset -= flseg->pattern_offset;
55 tmp = offset; 56 stripe_no = div_u64(offset, stripe_width);
56 do_div(tmp, stripe_width); 57 div_u64_rem(offset, flseg->stripe_unit, &rem);
57 58
58 return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit); 59 return stripe_no * flseg->stripe_unit + rem;
59} 60}
60 61
61/* This function is used by the layout driver to calculate the 62/* This function is used by the layout driver to calculate the
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d9f4d78c3413..75366dc89686 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -52,9 +52,11 @@
52#include <linux/namei.h> 52#include <linux/namei.h>
53#include <linux/mount.h> 53#include <linux/mount.h>
54#include <linux/module.h> 54#include <linux/module.h>
55#include <linux/nfs_idmap.h>
55#include <linux/sunrpc/bc_xprt.h> 56#include <linux/sunrpc/bc_xprt.h>
56#include <linux/xattr.h> 57#include <linux/xattr.h>
57#include <linux/utsname.h> 58#include <linux/utsname.h>
59#include <linux/freezer.h>
58 60
59#include "nfs4_fs.h" 61#include "nfs4_fs.h"
60#include "delegation.h" 62#include "delegation.h"
@@ -243,7 +245,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
243 *timeout = NFS4_POLL_RETRY_MIN; 245 *timeout = NFS4_POLL_RETRY_MIN;
244 if (*timeout > NFS4_POLL_RETRY_MAX) 246 if (*timeout > NFS4_POLL_RETRY_MAX)
245 *timeout = NFS4_POLL_RETRY_MAX; 247 *timeout = NFS4_POLL_RETRY_MAX;
246 schedule_timeout_killable(*timeout); 248 freezable_schedule_timeout_killable(*timeout);
247 if (fatal_signal_pending(current)) 249 if (fatal_signal_pending(current))
248 res = -ERESTARTSYS; 250 res = -ERESTARTSYS;
249 *timeout <<= 1; 251 *timeout <<= 1;
@@ -363,9 +365,8 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
363 * Must be called while holding tbl->slot_tbl_lock 365 * Must be called while holding tbl->slot_tbl_lock
364 */ 366 */
365static void 367static void
366nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) 368nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
367{ 369{
368 int free_slotid = free_slot - tbl->slots;
369 int slotid = free_slotid; 370 int slotid = free_slotid;
370 371
371 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); 372 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
@@ -430,7 +431,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
430 } 431 }
431 432
432 spin_lock(&tbl->slot_tbl_lock); 433 spin_lock(&tbl->slot_tbl_lock);
433 nfs4_free_slot(tbl, res->sr_slot); 434 nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
434 nfs4_check_drain_fc_complete(res->sr_session); 435 nfs4_check_drain_fc_complete(res->sr_session);
435 spin_unlock(&tbl->slot_tbl_lock); 436 spin_unlock(&tbl->slot_tbl_lock);
436 res->sr_slot = NULL; 437 res->sr_slot = NULL;
@@ -553,13 +554,10 @@ int nfs41_setup_sequence(struct nfs4_session *session,
553 spin_lock(&tbl->slot_tbl_lock); 554 spin_lock(&tbl->slot_tbl_lock);
554 if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && 555 if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
555 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { 556 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
556 /* 557 /* The state manager will wait until the slot table is empty */
557 * The state manager will wait until the slot table is empty.
558 * Schedule the reset thread
559 */
560 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 558 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
561 spin_unlock(&tbl->slot_tbl_lock); 559 spin_unlock(&tbl->slot_tbl_lock);
562 dprintk("%s Schedule Session Reset\n", __func__); 560 dprintk("%s session is draining\n", __func__);
563 return -EAGAIN; 561 return -EAGAIN;
564 } 562 }
565 563
@@ -764,6 +762,8 @@ struct nfs4_opendata {
764 struct nfs_openres o_res; 762 struct nfs_openres o_res;
765 struct nfs_open_confirmargs c_arg; 763 struct nfs_open_confirmargs c_arg;
766 struct nfs_open_confirmres c_res; 764 struct nfs_open_confirmres c_res;
765 struct nfs4_string owner_name;
766 struct nfs4_string group_name;
767 struct nfs_fattr f_attr; 767 struct nfs_fattr f_attr;
768 struct nfs_fattr dir_attr; 768 struct nfs_fattr dir_attr;
769 struct dentry *dir; 769 struct dentry *dir;
@@ -787,6 +787,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
787 p->o_res.server = p->o_arg.server; 787 p->o_res.server = p->o_arg.server;
788 nfs_fattr_init(&p->f_attr); 788 nfs_fattr_init(&p->f_attr);
789 nfs_fattr_init(&p->dir_attr); 789 nfs_fattr_init(&p->dir_attr);
790 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
790} 791}
791 792
792static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, 793static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
@@ -818,6 +819,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
818 p->o_arg.name = &dentry->d_name; 819 p->o_arg.name = &dentry->d_name;
819 p->o_arg.server = server; 820 p->o_arg.server = server;
820 p->o_arg.bitmask = server->attr_bitmask; 821 p->o_arg.bitmask = server->attr_bitmask;
822 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
821 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 823 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
822 if (flags & O_CREAT) { 824 if (flags & O_CREAT) {
823 u32 *s; 825 u32 *s;
@@ -854,6 +856,7 @@ static void nfs4_opendata_free(struct kref *kref)
854 dput(p->dir); 856 dput(p->dir);
855 dput(p->dentry); 857 dput(p->dentry);
856 nfs_sb_deactive(sb); 858 nfs_sb_deactive(sb);
859 nfs_fattr_free_names(&p->f_attr);
857 kfree(p); 860 kfree(p);
858} 861}
859 862
@@ -1578,6 +1581,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1578 if (status != 0 || !data->rpc_done) 1581 if (status != 0 || !data->rpc_done)
1579 return status; 1582 return status;
1580 1583
1584 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
1585
1581 nfs_refresh_inode(dir, o_res->dir_attr); 1586 nfs_refresh_inode(dir, o_res->dir_attr);
1582 1587
1583 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1588 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1610,6 +1615,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1610 return status; 1615 return status;
1611 } 1616 }
1612 1617
1618 nfs_fattr_map_and_free_names(server, &data->f_attr);
1619
1613 if (o_arg->open_flags & O_CREAT) { 1620 if (o_arg->open_flags & O_CREAT) {
1614 update_changeattr(dir, &o_res->cinfo); 1621 update_changeattr(dir, &o_res->cinfo);
1615 nfs_post_op_update_inode(dir, o_res->dir_attr); 1622 nfs_post_op_update_inode(dir, o_res->dir_attr);
@@ -3430,19 +3437,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
3430 */ 3437 */
3431#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) 3438#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
3432 3439
3433static void buf_to_pages(const void *buf, size_t buflen,
3434 struct page **pages, unsigned int *pgbase)
3435{
3436 const void *p = buf;
3437
3438 *pgbase = offset_in_page(buf);
3439 p -= *pgbase;
3440 while (p < buf + buflen) {
3441 *(pages++) = virt_to_page(p);
3442 p += PAGE_CACHE_SIZE;
3443 }
3444}
3445
3446static int buf_to_pages_noslab(const void *buf, size_t buflen, 3440static int buf_to_pages_noslab(const void *buf, size_t buflen,
3447 struct page **pages, unsigned int *pgbase) 3441 struct page **pages, unsigned int *pgbase)
3448{ 3442{
@@ -3539,9 +3533,19 @@ out:
3539 nfs4_set_cached_acl(inode, acl); 3533 nfs4_set_cached_acl(inode, acl);
3540} 3534}
3541 3535
3536/*
3537 * The getxattr API returns the required buffer length when called with a
3538 * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
3539 * the required buf. On a NULL buf, we send a page of data to the server
3540 * guessing that the ACL request can be serviced by a page. If so, we cache
3541 * up to the page of ACL data, and the 2nd call to getxattr is serviced by
3542 * the cache. If not so, we throw away the page, and cache the required
3543 * length. The next getxattr call will then produce another round trip to
3544 * the server, this time with the input buf of the required size.
3545 */
3542static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) 3546static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
3543{ 3547{
3544 struct page *pages[NFS4ACL_MAXPAGES]; 3548 struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
3545 struct nfs_getaclargs args = { 3549 struct nfs_getaclargs args = {
3546 .fh = NFS_FH(inode), 3550 .fh = NFS_FH(inode),
3547 .acl_pages = pages, 3551 .acl_pages = pages,
@@ -3556,41 +3560,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3556 .rpc_argp = &args, 3560 .rpc_argp = &args,
3557 .rpc_resp = &res, 3561 .rpc_resp = &res,
3558 }; 3562 };
3559 struct page *localpage = NULL; 3563 int ret = -ENOMEM, npages, i, acl_len = 0;
3560 int ret;
3561 3564
3562 if (buflen < PAGE_SIZE) { 3565 npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
3563 /* As long as we're doing a round trip to the server anyway, 3566 /* As long as we're doing a round trip to the server anyway,
3564 * let's be prepared for a page of acl data. */ 3567 * let's be prepared for a page of acl data. */
3565 localpage = alloc_page(GFP_KERNEL); 3568 if (npages == 0)
3566 resp_buf = page_address(localpage); 3569 npages = 1;
3567 if (localpage == NULL) 3570
3568 return -ENOMEM; 3571 for (i = 0; i < npages; i++) {
3569 args.acl_pages[0] = localpage; 3572 pages[i] = alloc_page(GFP_KERNEL);
3570 args.acl_pgbase = 0; 3573 if (!pages[i])
3571 args.acl_len = PAGE_SIZE; 3574 goto out_free;
3572 } else {
3573 resp_buf = buf;
3574 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
3575 } 3575 }
3576 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); 3576 if (npages > 1) {
3577 /* for decoding across pages */
3578 args.acl_scratch = alloc_page(GFP_KERNEL);
3579 if (!args.acl_scratch)
3580 goto out_free;
3581 }
3582 args.acl_len = npages * PAGE_SIZE;
3583 args.acl_pgbase = 0;
3584 /* Let decode_getfacl know not to fail if the ACL data is larger than
3585 * the page we send as a guess */
3586 if (buf == NULL)
3587 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3588 resp_buf = page_address(pages[0]);
3589
3590 dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n",
3591 __func__, buf, buflen, npages, args.acl_len);
3592 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
3593 &msg, &args.seq_args, &res.seq_res, 0);
3577 if (ret) 3594 if (ret)
3578 goto out_free; 3595 goto out_free;
3579 if (res.acl_len > args.acl_len) 3596
3580 nfs4_write_cached_acl(inode, NULL, res.acl_len); 3597 acl_len = res.acl_len - res.acl_data_offset;
3598 if (acl_len > args.acl_len)
3599 nfs4_write_cached_acl(inode, NULL, acl_len);
3581 else 3600 else
3582 nfs4_write_cached_acl(inode, resp_buf, res.acl_len); 3601 nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
3602 acl_len);
3583 if (buf) { 3603 if (buf) {
3584 ret = -ERANGE; 3604 ret = -ERANGE;
3585 if (res.acl_len > buflen) 3605 if (acl_len > buflen)
3586 goto out_free; 3606 goto out_free;
3587 if (localpage) 3607 _copy_from_pages(buf, pages, res.acl_data_offset,
3588 memcpy(buf, resp_buf, res.acl_len); 3608 res.acl_len);
3589 } 3609 }
3590 ret = res.acl_len; 3610 ret = acl_len;
3591out_free: 3611out_free:
3592 if (localpage) 3612 for (i = 0; i < npages; i++)
3593 __free_page(localpage); 3613 if (pages[i])
3614 __free_page(pages[i]);
3615 if (args.acl_scratch)
3616 __free_page(args.acl_scratch);
3594 return ret; 3617 return ret;
3595} 3618}
3596 3619
@@ -3621,6 +3644,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
3621 nfs_zap_acl_cache(inode); 3644 nfs_zap_acl_cache(inode);
3622 ret = nfs4_read_cached_acl(inode, buf, buflen); 3645 ret = nfs4_read_cached_acl(inode, buf, buflen);
3623 if (ret != -ENOENT) 3646 if (ret != -ENOENT)
3647 /* -ENOENT is returned if there is no ACL or if there is an ACL
3648 * but no cached acl data, just the acl length */
3624 return ret; 3649 return ret;
3625 return nfs4_get_acl_uncached(inode, buf, buflen); 3650 return nfs4_get_acl_uncached(inode, buf, buflen);
3626} 3651}
@@ -3958,7 +3983,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
3958static unsigned long 3983static unsigned long
3959nfs4_set_lock_task_retry(unsigned long timeout) 3984nfs4_set_lock_task_retry(unsigned long timeout)
3960{ 3985{
3961 schedule_timeout_killable(timeout); 3986 freezable_schedule_timeout_killable(timeout);
3962 timeout <<= 1; 3987 timeout <<= 1;
3963 if (timeout > NFS4_LOCK_MAXTIMEOUT) 3988 if (timeout > NFS4_LOCK_MAXTIMEOUT)
3964 return NFS4_LOCK_MAXTIMEOUT; 3989 return NFS4_LOCK_MAXTIMEOUT;
@@ -5021,23 +5046,6 @@ out:
5021 return ret; 5046 return ret;
5022} 5047}
5023 5048
5024/*
5025 * Reset the forechannel and backchannel slot tables
5026 */
5027static int nfs4_reset_slot_tables(struct nfs4_session *session)
5028{
5029 int status;
5030
5031 status = nfs4_reset_slot_table(&session->fc_slot_table,
5032 session->fc_attrs.max_reqs, 1);
5033 if (status)
5034 return status;
5035
5036 status = nfs4_reset_slot_table(&session->bc_slot_table,
5037 session->bc_attrs.max_reqs, 0);
5038 return status;
5039}
5040
5041/* Destroy the slot table */ 5049/* Destroy the slot table */
5042static void nfs4_destroy_slot_tables(struct nfs4_session *session) 5050static void nfs4_destroy_slot_tables(struct nfs4_session *session)
5043{ 5051{
@@ -5083,29 +5091,35 @@ out:
5083} 5091}
5084 5092
5085/* 5093/*
5086 * Initialize the forechannel and backchannel tables 5094 * Initialize or reset the forechannel and backchannel tables
5087 */ 5095 */
5088static int nfs4_init_slot_tables(struct nfs4_session *session) 5096static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
5089{ 5097{
5090 struct nfs4_slot_table *tbl; 5098 struct nfs4_slot_table *tbl;
5091 int status = 0; 5099 int status;
5092 5100
5093 tbl = &session->fc_slot_table; 5101 dprintk("--> %s\n", __func__);
5102 /* Fore channel */
5103 tbl = &ses->fc_slot_table;
5094 if (tbl->slots == NULL) { 5104 if (tbl->slots == NULL) {
5095 status = nfs4_init_slot_table(tbl, 5105 status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5096 session->fc_attrs.max_reqs, 1); 5106 if (status) /* -ENOMEM */
5107 return status;
5108 } else {
5109 status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5097 if (status) 5110 if (status)
5098 return status; 5111 return status;
5099 } 5112 }
5100 5113 /* Back channel */
5101 tbl = &session->bc_slot_table; 5114 tbl = &ses->bc_slot_table;
5102 if (tbl->slots == NULL) { 5115 if (tbl->slots == NULL) {
5103 status = nfs4_init_slot_table(tbl, 5116 status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5104 session->bc_attrs.max_reqs, 0);
5105 if (status) 5117 if (status)
5106 nfs4_destroy_slot_tables(session); 5118 /* Fore and back channel share a connection so get
5107 } 5119 * both slot tables or neither */
5108 5120 nfs4_destroy_slot_tables(ses);
5121 } else
5122 status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5109 return status; 5123 return status;
5110} 5124}
5111 5125
@@ -5293,13 +5307,9 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5293 if (status) 5307 if (status)
5294 goto out; 5308 goto out;
5295 5309
5296 /* Init and reset the fore channel */ 5310 /* Init or reset the session slot tables */
5297 status = nfs4_init_slot_tables(session); 5311 status = nfs4_setup_session_slot_tables(session);
5298 dprintk("slot table initialization returned %d\n", status); 5312 dprintk("slot table setup returned %d\n", status);
5299 if (status)
5300 goto out;
5301 status = nfs4_reset_slot_tables(session);
5302 dprintk("slot table reset returned %d\n", status);
5303 if (status) 5313 if (status)
5304 goto out; 5314 goto out;
5305 5315
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6a7107ae6b72..a53f33b4ac3a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
49#include <linux/ratelimit.h> 49#include <linux/ratelimit.h>
50#include <linux/workqueue.h> 50#include <linux/workqueue.h>
51#include <linux/bitops.h> 51#include <linux/bitops.h>
52#include <linux/jiffies.h>
52 53
53#include "nfs4_fs.h" 54#include "nfs4_fs.h"
54#include "callback.h" 55#include "callback.h"
@@ -377,31 +378,24 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
377{ 378{
378 struct rb_node **p = &server->state_owners.rb_node, 379 struct rb_node **p = &server->state_owners.rb_node,
379 *parent = NULL; 380 *parent = NULL;
380 struct nfs4_state_owner *sp, *res = NULL; 381 struct nfs4_state_owner *sp;
381 382
382 while (*p != NULL) { 383 while (*p != NULL) {
383 parent = *p; 384 parent = *p;
384 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); 385 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
385 386
386 if (server < sp->so_server) {
387 p = &parent->rb_left;
388 continue;
389 }
390 if (server > sp->so_server) {
391 p = &parent->rb_right;
392 continue;
393 }
394 if (cred < sp->so_cred) 387 if (cred < sp->so_cred)
395 p = &parent->rb_left; 388 p = &parent->rb_left;
396 else if (cred > sp->so_cred) 389 else if (cred > sp->so_cred)
397 p = &parent->rb_right; 390 p = &parent->rb_right;
398 else { 391 else {
392 if (!list_empty(&sp->so_lru))
393 list_del_init(&sp->so_lru);
399 atomic_inc(&sp->so_count); 394 atomic_inc(&sp->so_count);
400 res = sp; 395 return sp;
401 break;
402 } 396 }
403 } 397 }
404 return res; 398 return NULL;
405} 399}
406 400
407static struct nfs4_state_owner * 401static struct nfs4_state_owner *
@@ -421,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
421 else if (new->so_cred > sp->so_cred) 415 else if (new->so_cred > sp->so_cred)
422 p = &parent->rb_right; 416 p = &parent->rb_right;
423 else { 417 else {
418 if (!list_empty(&sp->so_lru))
419 list_del_init(&sp->so_lru);
424 atomic_inc(&sp->so_count); 420 atomic_inc(&sp->so_count);
425 return sp; 421 return sp;
426 } 422 }
@@ -462,6 +458,7 @@ nfs4_alloc_state_owner(void)
462 spin_lock_init(&sp->so_sequence.lock); 458 spin_lock_init(&sp->so_sequence.lock);
463 INIT_LIST_HEAD(&sp->so_sequence.list); 459 INIT_LIST_HEAD(&sp->so_sequence.list);
464 atomic_set(&sp->so_count, 1); 460 atomic_set(&sp->so_count, 1);
461 INIT_LIST_HEAD(&sp->so_lru);
465 return sp; 462 return sp;
466} 463}
467 464
@@ -479,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
479 } 476 }
480} 477}
481 478
479static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
480{
481 rpc_destroy_wait_queue(&sp->so_sequence.wait);
482 put_rpccred(sp->so_cred);
483 kfree(sp);
484}
485
486static void nfs4_gc_state_owners(struct nfs_server *server)
487{
488 struct nfs_client *clp = server->nfs_client;
489 struct nfs4_state_owner *sp, *tmp;
490 unsigned long time_min, time_max;
491 LIST_HEAD(doomed);
492
493 spin_lock(&clp->cl_lock);
494 time_max = jiffies;
495 time_min = (long)time_max - (long)clp->cl_lease_time;
496 list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
497 /* NB: LRU is sorted so that oldest is at the head */
498 if (time_in_range(sp->so_expires, time_min, time_max))
499 break;
500 list_move(&sp->so_lru, &doomed);
501 nfs4_remove_state_owner_locked(sp);
502 }
503 spin_unlock(&clp->cl_lock);
504
505 list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
506 list_del(&sp->so_lru);
507 nfs4_free_state_owner(sp);
508 }
509}
510
482/** 511/**
483 * nfs4_get_state_owner - Look up a state owner given a credential 512 * nfs4_get_state_owner - Look up a state owner given a credential
484 * @server: nfs_server to search 513 * @server: nfs_server to search
@@ -496,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
496 sp = nfs4_find_state_owner_locked(server, cred); 525 sp = nfs4_find_state_owner_locked(server, cred);
497 spin_unlock(&clp->cl_lock); 526 spin_unlock(&clp->cl_lock);
498 if (sp != NULL) 527 if (sp != NULL)
499 return sp; 528 goto out;
500 new = nfs4_alloc_state_owner(); 529 new = nfs4_alloc_state_owner();
501 if (new == NULL) 530 if (new == NULL)
502 return NULL; 531 goto out;
503 new->so_server = server; 532 new->so_server = server;
504 new->so_cred = cred; 533 new->so_cred = cred;
505 spin_lock(&clp->cl_lock); 534 spin_lock(&clp->cl_lock);
@@ -511,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
511 rpc_destroy_wait_queue(&new->so_sequence.wait); 540 rpc_destroy_wait_queue(&new->so_sequence.wait);
512 kfree(new); 541 kfree(new);
513 } 542 }
543out:
544 nfs4_gc_state_owners(server);
514 return sp; 545 return sp;
515} 546}
516 547
517/** 548/**
518 * nfs4_put_state_owner - Release a nfs4_state_owner 549 * nfs4_put_state_owner - Release a nfs4_state_owner
519 * @sp: state owner data to release 550 * @sp: state owner data to release
520 *
521 */ 551 */
522void nfs4_put_state_owner(struct nfs4_state_owner *sp) 552void nfs4_put_state_owner(struct nfs4_state_owner *sp)
523{ 553{
524 struct nfs_client *clp = sp->so_server->nfs_client; 554 struct nfs_server *server = sp->so_server;
525 struct rpc_cred *cred = sp->so_cred; 555 struct nfs_client *clp = server->nfs_client;
526 556
527 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 557 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
528 return; 558 return;
529 nfs4_remove_state_owner_locked(sp); 559
560 if (!RB_EMPTY_NODE(&sp->so_server_node)) {
561 sp->so_expires = jiffies;
562 list_add_tail(&sp->so_lru, &server->state_owners_lru);
563 spin_unlock(&clp->cl_lock);
564 } else {
565 nfs4_remove_state_owner_locked(sp);
566 spin_unlock(&clp->cl_lock);
567 nfs4_free_state_owner(sp);
568 }
569}
570
571/**
572 * nfs4_purge_state_owners - Release all cached state owners
573 * @server: nfs_server with cached state owners to release
574 *
575 * Called at umount time. Remaining state owners will be on
576 * the LRU with ref count of zero.
577 */
578void nfs4_purge_state_owners(struct nfs_server *server)
579{
580 struct nfs_client *clp = server->nfs_client;
581 struct nfs4_state_owner *sp, *tmp;
582 LIST_HEAD(doomed);
583
584 spin_lock(&clp->cl_lock);
585 list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
586 list_move(&sp->so_lru, &doomed);
587 nfs4_remove_state_owner_locked(sp);
588 }
530 spin_unlock(&clp->cl_lock); 589 spin_unlock(&clp->cl_lock);
531 rpc_destroy_wait_queue(&sp->so_sequence.wait); 590
532 put_rpccred(cred); 591 list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
533 kfree(sp); 592 list_del(&sp->so_lru);
593 nfs4_free_state_owner(sp);
594 }
534} 595}
535 596
536static struct nfs4_state * 597static struct nfs4_state *
@@ -1402,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
1402restart: 1463restart:
1403 rcu_read_lock(); 1464 rcu_read_lock();
1404 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 1465 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1466 nfs4_purge_state_owners(server);
1405 spin_lock(&clp->cl_lock); 1467 spin_lock(&clp->cl_lock);
1406 for (pos = rb_first(&server->state_owners); 1468 for (pos = rb_first(&server->state_owners);
1407 pos != NULL; 1469 pos != NULL;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6161b213ed1..95e92e438407 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2298,7 +2298,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2298 encode_getfh(xdr, &hdr); 2298 encode_getfh(xdr, &hdr);
2299 encode_getfattr(xdr, args->bitmask, &hdr); 2299 encode_getfattr(xdr, args->bitmask, &hdr);
2300 encode_restorefh(xdr, &hdr); 2300 encode_restorefh(xdr, &hdr);
2301 encode_getfattr(xdr, args->bitmask, &hdr); 2301 encode_getfattr(xdr, args->dir_bitmask, &hdr);
2302 encode_nops(&hdr); 2302 encode_nops(&hdr);
2303} 2303}
2304 2304
@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2517 encode_compound_hdr(xdr, req, &hdr); 2517 encode_compound_hdr(xdr, req, &hdr);
2518 encode_sequence(xdr, &args->seq_args, &hdr); 2518 encode_sequence(xdr, &args->seq_args, &hdr);
2519 encode_putfh(xdr, args->fh, &hdr); 2519 encode_putfh(xdr, args->fh, &hdr);
2520 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; 2520 replen = hdr.replen + op_decode_hdr_maxsz + 1;
2521 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); 2521 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
2522 2522
2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
2524 args->acl_pages, args->acl_pgbase, args->acl_len); 2524 args->acl_pages, args->acl_pgbase, args->acl_len);
2525 xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
2526
2525 encode_nops(&hdr); 2527 encode_nops(&hdr);
2526} 2528}
2527 2529
@@ -3790,7 +3792,8 @@ out_overflow:
3790} 3792}
3791 3793
3792static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3794static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3793 const struct nfs_server *server, uint32_t *uid, int may_sleep) 3795 const struct nfs_server *server, uint32_t *uid,
3796 struct nfs4_string *owner_name)
3794{ 3797{
3795 uint32_t len; 3798 uint32_t len;
3796 __be32 *p; 3799 __be32 *p;
@@ -3807,8 +3810,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3807 p = xdr_inline_decode(xdr, len); 3810 p = xdr_inline_decode(xdr, len);
3808 if (unlikely(!p)) 3811 if (unlikely(!p))
3809 goto out_overflow; 3812 goto out_overflow;
3810 if (!may_sleep) { 3813 if (owner_name != NULL) {
3811 /* do nothing */ 3814 owner_name->data = kmemdup(p, len, GFP_NOWAIT);
3815 if (owner_name->data != NULL) {
3816 owner_name->len = len;
3817 ret = NFS_ATTR_FATTR_OWNER_NAME;
3818 }
3812 } else if (len < XDR_MAX_NETOBJ) { 3819 } else if (len < XDR_MAX_NETOBJ) {
3813 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) 3820 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
3814 ret = NFS_ATTR_FATTR_OWNER; 3821 ret = NFS_ATTR_FATTR_OWNER;
@@ -3828,7 +3835,8 @@ out_overflow:
3828} 3835}
3829 3836
3830static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3837static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3831 const struct nfs_server *server, uint32_t *gid, int may_sleep) 3838 const struct nfs_server *server, uint32_t *gid,
3839 struct nfs4_string *group_name)
3832{ 3840{
3833 uint32_t len; 3841 uint32_t len;
3834 __be32 *p; 3842 __be32 *p;
@@ -3845,8 +3853,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3845 p = xdr_inline_decode(xdr, len); 3853 p = xdr_inline_decode(xdr, len);
3846 if (unlikely(!p)) 3854 if (unlikely(!p))
3847 goto out_overflow; 3855 goto out_overflow;
3848 if (!may_sleep) { 3856 if (group_name != NULL) {
3849 /* do nothing */ 3857 group_name->data = kmemdup(p, len, GFP_NOWAIT);
3858 if (group_name->data != NULL) {
3859 group_name->len = len;
3860 ret = NFS_ATTR_FATTR_GROUP_NAME;
3861 }
3850 } else if (len < XDR_MAX_NETOBJ) { 3862 } else if (len < XDR_MAX_NETOBJ) {
3851 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) 3863 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
3852 ret = NFS_ATTR_FATTR_GROUP; 3864 ret = NFS_ATTR_FATTR_GROUP;
@@ -4283,7 +4295,7 @@ xdr_error:
4283 4295
4284static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4296static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4285 struct nfs_fattr *fattr, struct nfs_fh *fh, 4297 struct nfs_fattr *fattr, struct nfs_fh *fh,
4286 const struct nfs_server *server, int may_sleep) 4298 const struct nfs_server *server)
4287{ 4299{
4288 int status; 4300 int status;
4289 umode_t fmode = 0; 4301 umode_t fmode = 0;
@@ -4350,12 +4362,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4350 goto xdr_error; 4362 goto xdr_error;
4351 fattr->valid |= status; 4363 fattr->valid |= status;
4352 4364
4353 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep); 4365 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name);
4354 if (status < 0) 4366 if (status < 0)
4355 goto xdr_error; 4367 goto xdr_error;
4356 fattr->valid |= status; 4368 fattr->valid |= status;
4357 4369
4358 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep); 4370 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name);
4359 if (status < 0) 4371 if (status < 0)
4360 goto xdr_error; 4372 goto xdr_error;
4361 fattr->valid |= status; 4373 fattr->valid |= status;
@@ -4396,7 +4408,7 @@ xdr_error:
4396} 4408}
4397 4409
4398static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4410static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4399 struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) 4411 struct nfs_fh *fh, const struct nfs_server *server)
4400{ 4412{
4401 __be32 *savep; 4413 __be32 *savep;
4402 uint32_t attrlen, 4414 uint32_t attrlen,
@@ -4415,7 +4427,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
4415 if (status < 0) 4427 if (status < 0)
4416 goto xdr_error; 4428 goto xdr_error;
4417 4429
4418 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep); 4430 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server);
4419 if (status < 0) 4431 if (status < 0)
4420 goto xdr_error; 4432 goto xdr_error;
4421 4433
@@ -4426,9 +4438,9 @@ xdr_error:
4426} 4438}
4427 4439
4428static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 4440static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4429 const struct nfs_server *server, int may_sleep) 4441 const struct nfs_server *server)
4430{ 4442{
4431 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); 4443 return decode_getfattr_generic(xdr, fattr, NULL, server);
4432} 4444}
4433 4445
4434/* 4446/*
@@ -4957,17 +4969,18 @@ decode_restorefh(struct xdr_stream *xdr)
4957} 4969}
4958 4970
4959static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, 4971static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
4960 size_t *acl_len) 4972 struct nfs_getaclres *res)
4961{ 4973{
4962 __be32 *savep; 4974 __be32 *savep, *bm_p;
4963 uint32_t attrlen, 4975 uint32_t attrlen,
4964 bitmap[3] = {0}; 4976 bitmap[3] = {0};
4965 struct kvec *iov = req->rq_rcv_buf.head; 4977 struct kvec *iov = req->rq_rcv_buf.head;
4966 int status; 4978 int status;
4967 4979
4968 *acl_len = 0; 4980 res->acl_len = 0;
4969 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 4981 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
4970 goto out; 4982 goto out;
4983 bm_p = xdr->p;
4971 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 4984 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
4972 goto out; 4985 goto out;
4973 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) 4986 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4979,18 +4992,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
4979 size_t hdrlen; 4992 size_t hdrlen;
4980 u32 recvd; 4993 u32 recvd;
4981 4994
4995 /* The bitmap (xdr len + bitmaps) and the attr xdr len words
4996 * are stored with the acl data to handle the problem of
4997 * variable length bitmaps.*/
4998 xdr->p = bm_p;
4999 res->acl_data_offset = be32_to_cpup(bm_p) + 2;
5000 res->acl_data_offset <<= 2;
5001
4982 /* We ignore &savep and don't do consistency checks on 5002 /* We ignore &savep and don't do consistency checks on
4983 * the attr length. Let userspace figure it out.... */ 5003 * the attr length. Let userspace figure it out.... */
4984 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; 5004 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
5005 attrlen += res->acl_data_offset;
4985 recvd = req->rq_rcv_buf.len - hdrlen; 5006 recvd = req->rq_rcv_buf.len - hdrlen;
4986 if (attrlen > recvd) { 5007 if (attrlen > recvd) {
4987 dprintk("NFS: server cheating in getattr" 5008 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
4988 " acl reply: attrlen %u > recvd %u\n", 5009 /* getxattr interface called with a NULL buf */
5010 res->acl_len = attrlen;
5011 goto out;
5012 }
5013 dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
4989 attrlen, recvd); 5014 attrlen, recvd);
4990 return -EINVAL; 5015 return -EINVAL;
4991 } 5016 }
4992 xdr_read_pages(xdr, attrlen); 5017 xdr_read_pages(xdr, attrlen);
4993 *acl_len = attrlen; 5018 res->acl_len = attrlen;
4994 } else 5019 } else
4995 status = -EOPNOTSUPP; 5020 status = -EOPNOTSUPP;
4996 5021
@@ -5696,8 +5721,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
5696 status = decode_open_downgrade(xdr, res); 5721 status = decode_open_downgrade(xdr, res);
5697 if (status != 0) 5722 if (status != 0)
5698 goto out; 5723 goto out;
5699 decode_getfattr(xdr, res->fattr, res->server, 5724 decode_getfattr(xdr, res->fattr, res->server);
5700 !RPC_IS_ASYNC(rqstp->rq_task));
5701out: 5725out:
5702 return status; 5726 return status;
5703} 5727}
@@ -5723,8 +5747,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5723 status = decode_access(xdr, res); 5747 status = decode_access(xdr, res);
5724 if (status != 0) 5748 if (status != 0)
5725 goto out; 5749 goto out;
5726 decode_getfattr(xdr, res->fattr, res->server, 5750 decode_getfattr(xdr, res->fattr, res->server);
5727 !RPC_IS_ASYNC(rqstp->rq_task));
5728out: 5751out:
5729 return status; 5752 return status;
5730} 5753}
@@ -5753,8 +5776,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5753 status = decode_getfh(xdr, res->fh); 5776 status = decode_getfh(xdr, res->fh);
5754 if (status) 5777 if (status)
5755 goto out; 5778 goto out;
5756 status = decode_getfattr(xdr, res->fattr, res->server 5779 status = decode_getfattr(xdr, res->fattr, res->server);
5757 ,!RPC_IS_ASYNC(rqstp->rq_task));
5758out: 5780out:
5759 return status; 5781 return status;
5760} 5782}
@@ -5780,8 +5802,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
5780 goto out; 5802 goto out;
5781 status = decode_getfh(xdr, res->fh); 5803 status = decode_getfh(xdr, res->fh);
5782 if (status == 0) 5804 if (status == 0)
5783 status = decode_getfattr(xdr, res->fattr, res->server, 5805 status = decode_getfattr(xdr, res->fattr, res->server);
5784 !RPC_IS_ASYNC(rqstp->rq_task));
5785out: 5806out:
5786 return status; 5807 return status;
5787} 5808}
@@ -5807,8 +5828,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5807 status = decode_remove(xdr, &res->cinfo); 5828 status = decode_remove(xdr, &res->cinfo);
5808 if (status) 5829 if (status)
5809 goto out; 5830 goto out;
5810 decode_getfattr(xdr, res->dir_attr, res->server, 5831 decode_getfattr(xdr, res->dir_attr, res->server);
5811 !RPC_IS_ASYNC(rqstp->rq_task));
5812out: 5832out:
5813 return status; 5833 return status;
5814} 5834}
@@ -5841,14 +5861,12 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5841 if (status) 5861 if (status)
5842 goto out; 5862 goto out;
5843 /* Current FH is target directory */ 5863 /* Current FH is target directory */
5844 if (decode_getfattr(xdr, res->new_fattr, res->server, 5864 if (decode_getfattr(xdr, res->new_fattr, res->server))
5845 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5846 goto out; 5865 goto out;
5847 status = decode_restorefh(xdr); 5866 status = decode_restorefh(xdr);
5848 if (status) 5867 if (status)
5849 goto out; 5868 goto out;
5850 decode_getfattr(xdr, res->old_fattr, res->server, 5869 decode_getfattr(xdr, res->old_fattr, res->server);
5851 !RPC_IS_ASYNC(rqstp->rq_task));
5852out: 5870out:
5853 return status; 5871 return status;
5854} 5872}
@@ -5884,14 +5902,12 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5884 * Note order: OP_LINK leaves the directory as the current 5902 * Note order: OP_LINK leaves the directory as the current
5885 * filehandle. 5903 * filehandle.
5886 */ 5904 */
5887 if (decode_getfattr(xdr, res->dir_attr, res->server, 5905 if (decode_getfattr(xdr, res->dir_attr, res->server))
5888 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5889 goto out; 5906 goto out;
5890 status = decode_restorefh(xdr); 5907 status = decode_restorefh(xdr);
5891 if (status) 5908 if (status)
5892 goto out; 5909 goto out;
5893 decode_getfattr(xdr, res->fattr, res->server, 5910 decode_getfattr(xdr, res->fattr, res->server);
5894 !RPC_IS_ASYNC(rqstp->rq_task));
5895out: 5911out:
5896 return status; 5912 return status;
5897} 5913}
@@ -5923,14 +5939,12 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5923 status = decode_getfh(xdr, res->fh); 5939 status = decode_getfh(xdr, res->fh);
5924 if (status) 5940 if (status)
5925 goto out; 5941 goto out;
5926 if (decode_getfattr(xdr, res->fattr, res->server, 5942 if (decode_getfattr(xdr, res->fattr, res->server))
5927 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5928 goto out; 5943 goto out;
5929 status = decode_restorefh(xdr); 5944 status = decode_restorefh(xdr);
5930 if (status) 5945 if (status)
5931 goto out; 5946 goto out;
5932 decode_getfattr(xdr, res->dir_fattr, res->server, 5947 decode_getfattr(xdr, res->dir_fattr, res->server);
5933 !RPC_IS_ASYNC(rqstp->rq_task));
5934out: 5948out:
5935 return status; 5949 return status;
5936} 5950}
@@ -5962,8 +5976,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5962 status = decode_putfh(xdr); 5976 status = decode_putfh(xdr);
5963 if (status) 5977 if (status)
5964 goto out; 5978 goto out;
5965 status = decode_getfattr(xdr, res->fattr, res->server, 5979 status = decode_getfattr(xdr, res->fattr, res->server);
5966 !RPC_IS_ASYNC(rqstp->rq_task));
5967out: 5980out:
5968 return status; 5981 return status;
5969} 5982}
@@ -6028,7 +6041,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6028 status = decode_putfh(xdr); 6041 status = decode_putfh(xdr);
6029 if (status) 6042 if (status)
6030 goto out; 6043 goto out;
6031 status = decode_getacl(xdr, rqstp, &res->acl_len); 6044 status = decode_getacl(xdr, rqstp, res);
6032 6045
6033out: 6046out:
6034 return status; 6047 return status;
@@ -6061,8 +6074,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6061 * an ESTALE error. Shouldn't be a problem, 6074 * an ESTALE error. Shouldn't be a problem,
6062 * though, since fattr->valid will remain unset. 6075 * though, since fattr->valid will remain unset.
6063 */ 6076 */
6064 decode_getfattr(xdr, res->fattr, res->server, 6077 decode_getfattr(xdr, res->fattr, res->server);
6065 !RPC_IS_ASYNC(rqstp->rq_task));
6066out: 6078out:
6067 return status; 6079 return status;
6068} 6080}
@@ -6093,13 +6105,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6093 goto out; 6105 goto out;
6094 if (decode_getfh(xdr, &res->fh) != 0) 6106 if (decode_getfh(xdr, &res->fh) != 0)
6095 goto out; 6107 goto out;
6096 if (decode_getfattr(xdr, res->f_attr, res->server, 6108 if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
6097 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
6098 goto out; 6109 goto out;
6099 if (decode_restorefh(xdr) != 0) 6110 if (decode_restorefh(xdr) != 0)
6100 goto out; 6111 goto out;
6101 decode_getfattr(xdr, res->dir_attr, res->server, 6112 decode_getfattr(xdr, res->dir_attr, res->server);
6102 !RPC_IS_ASYNC(rqstp->rq_task));
6103out: 6113out:
6104 return status; 6114 return status;
6105} 6115}
@@ -6147,8 +6157,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
6147 status = decode_open(xdr, res); 6157 status = decode_open(xdr, res);
6148 if (status) 6158 if (status)
6149 goto out; 6159 goto out;
6150 decode_getfattr(xdr, res->f_attr, res->server, 6160 decode_getfattr(xdr, res->f_attr, res->server);
6151 !RPC_IS_ASYNC(rqstp->rq_task));
6152out: 6161out:
6153 return status; 6162 return status;
6154} 6163}
@@ -6175,8 +6184,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
6175 status = decode_setattr(xdr); 6184 status = decode_setattr(xdr);
6176 if (status) 6185 if (status)
6177 goto out; 6186 goto out;
6178 decode_getfattr(xdr, res->fattr, res->server, 6187 decode_getfattr(xdr, res->fattr, res->server);
6179 !RPC_IS_ASYNC(rqstp->rq_task));
6180out: 6188out:
6181 return status; 6189 return status;
6182} 6190}
@@ -6356,8 +6364,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6356 if (status) 6364 if (status)
6357 goto out; 6365 goto out;
6358 if (res->fattr) 6366 if (res->fattr)
6359 decode_getfattr(xdr, res->fattr, res->server, 6367 decode_getfattr(xdr, res->fattr, res->server);
6360 !RPC_IS_ASYNC(rqstp->rq_task));
6361 if (!status) 6368 if (!status)
6362 status = res->count; 6369 status = res->count;
6363out: 6370out:
@@ -6386,8 +6393,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6386 if (status) 6393 if (status)
6387 goto out; 6394 goto out;
6388 if (res->fattr) 6395 if (res->fattr)
6389 decode_getfattr(xdr, res->fattr, res->server, 6396 decode_getfattr(xdr, res->fattr, res->server);
6390 !RPC_IS_ASYNC(rqstp->rq_task));
6391out: 6397out:
6392 return status; 6398 return status;
6393} 6399}
@@ -6546,8 +6552,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
6546 status = decode_delegreturn(xdr); 6552 status = decode_delegreturn(xdr);
6547 if (status != 0) 6553 if (status != 0)
6548 goto out; 6554 goto out;
6549 decode_getfattr(xdr, res->fattr, res->server, 6555 decode_getfattr(xdr, res->fattr, res->server);
6550 !RPC_IS_ASYNC(rqstp->rq_task));
6551out: 6556out:
6552 return status; 6557 return status;
6553} 6558}
@@ -6576,8 +6581,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
6576 goto out; 6581 goto out;
6577 xdr_enter_page(xdr, PAGE_SIZE); 6582 xdr_enter_page(xdr, PAGE_SIZE);
6578 status = decode_getfattr(xdr, &res->fs_locations->fattr, 6583 status = decode_getfattr(xdr, &res->fs_locations->fattr,
6579 res->fs_locations->server, 6584 res->fs_locations->server);
6580 !RPC_IS_ASYNC(req->rq_task));
6581out: 6585out:
6582 return status; 6586 return status;
6583} 6587}
@@ -6826,8 +6830,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6826 status = decode_layoutcommit(xdr, rqstp, res); 6830 status = decode_layoutcommit(xdr, rqstp, res);
6827 if (status) 6831 if (status)
6828 goto out; 6832 goto out;
6829 decode_getfattr(xdr, res->fattr, res->server, 6833 decode_getfattr(xdr, res->fattr, res->server);
6830 !RPC_IS_ASYNC(rqstp->rq_task));
6831out: 6834out:
6832 return status; 6835 return status;
6833} 6836}
@@ -6958,7 +6961,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6958 goto out_overflow; 6961 goto out_overflow;
6959 6962
6960 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6963 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6961 entry->server, 1) < 0) 6964 entry->server) < 0)
6962 goto out_overflow; 6965 goto out_overflow;
6963 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) 6966 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6964 entry->ino = entry->fattr->mounted_on_fileid; 6967 entry->ino = entry->fattr->mounted_on_fileid;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c807ab93140e..55d01280a609 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio_pg_write_ops = {
551static struct pnfs_layoutdriver_type objlayout_type = { 551static struct pnfs_layoutdriver_type objlayout_type = {
552 .id = LAYOUT_OSD2_OBJECTS, 552 .id = LAYOUT_OSD2_OBJECTS,
553 .name = "LAYOUT_OSD2_OBJECTS", 553 .name = "LAYOUT_OSD2_OBJECTS",
554 .flags = PNFS_LAYOUTRET_ON_SETATTR, 554 .flags = PNFS_LAYOUTRET_ON_SETATTR |
555 PNFS_LAYOUTRET_ON_ERROR,
555 556
556 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 557 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
557 .free_layout_hdr = objlayout_free_layout_hdr, 558 .free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 72074e3a04f9..b3c29039f5b8 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
254 oir->status = rdata->task.tk_status = status; 254 oir->status = rdata->task.tk_status = status;
255 if (status >= 0) 255 if (status >= 0)
256 rdata->res.count = status; 256 rdata->res.count = status;
257 else
258 rdata->pnfs_error = status;
257 objlayout_iodone(oir); 259 objlayout_iodone(oir);
258 /* must not use oir after this point */ 260 /* must not use oir after this point */
259 261
@@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
334 if (status >= 0) { 336 if (status >= 0) {
335 wdata->res.count = status; 337 wdata->res.count = status;
336 wdata->verf.committed = oir->committed; 338 wdata->verf.committed = oir->committed;
339 } else {
340 wdata->pnfs_error = status;
337 } 341 }
338 objlayout_iodone(oir); 342 objlayout_iodone(oir);
339 /* must not use oir after this point */ 343 /* must not use oir after this point */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8e672a2b2d69..17149a490065 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1166,6 +1166,33 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1166} 1166}
1167EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1167EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1168 1168
1169static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
1170{
1171 struct nfs_pageio_descriptor pgio;
1172 LIST_HEAD(failed);
1173
1174 /* Resend all requests through the MDS */
1175 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
1176 while (!list_empty(head)) {
1177 struct nfs_page *req = nfs_list_entry(head->next);
1178
1179 nfs_list_remove_request(req);
1180 if (!nfs_pageio_add_request(&pgio, req))
1181 nfs_list_add_request(req, &failed);
1182 }
1183 nfs_pageio_complete(&pgio);
1184
1185 if (!list_empty(&failed)) {
1186 /* For some reason our attempt to resend pages. Mark the
1187 * overall send request as having failed, and let
1188 * nfs_writeback_release_full deal with the error.
1189 */
1190 list_move(&failed, head);
1191 return -EIO;
1192 }
1193 return 0;
1194}
1195
1169/* 1196/*
1170 * Called by non rpc-based layout drivers 1197 * Called by non rpc-based layout drivers
1171 */ 1198 */
@@ -1175,9 +1202,17 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
1175 pnfs_set_layoutcommit(data); 1202 pnfs_set_layoutcommit(data);
1176 data->mds_ops->rpc_call_done(&data->task, data); 1203 data->mds_ops->rpc_call_done(&data->task, data);
1177 } else { 1204 } else {
1178 put_lseg(data->lseg);
1179 data->lseg = NULL;
1180 dprintk("pnfs write error = %d\n", data->pnfs_error); 1205 dprintk("pnfs write error = %d\n", data->pnfs_error);
1206 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
1207 PNFS_LAYOUTRET_ON_ERROR) {
1208 /* Don't lo_commit on error, Server will needs to
1209 * preform a file recovery.
1210 */
1211 clear_bit(NFS_INO_LAYOUTCOMMIT,
1212 &NFS_I(data->inode)->flags);
1213 pnfs_return_layout(data->inode);
1214 }
1215 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1181 } 1216 }
1182 data->mds_ops->rpc_release(data); 1217 data->mds_ops->rpc_release(data);
1183} 1218}
@@ -1267,6 +1302,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1267 put_lseg(data->lseg); 1302 put_lseg(data->lseg);
1268 data->lseg = NULL; 1303 data->lseg = NULL;
1269 dprintk("pnfs write error = %d\n", data->pnfs_error); 1304 dprintk("pnfs write error = %d\n", data->pnfs_error);
1305 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
1306 PNFS_LAYOUTRET_ON_ERROR)
1307 pnfs_return_layout(data->inode);
1270 1308
1271 nfs_pageio_init_read_mds(&pgio, data->inode); 1309 nfs_pageio_init_read_mds(&pgio, data->inode);
1272 1310
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1509530cb111..53d593a0a4f2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -68,6 +68,7 @@ enum {
68enum layoutdriver_policy_flags { 68enum layoutdriver_policy_flags {
69 /* Should the pNFS client commit and return the layout upon a setattr */ 69 /* Should the pNFS client commit and return the layout upon a setattr */
70 PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, 70 PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
71 PNFS_LAYOUTRET_ON_ERROR = 1 << 1,
71}; 72};
72 73
73struct nfs4_deviceid_node; 74struct nfs4_deviceid_node;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f48125da198a..0c672588fe5a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
41#include <linux/nfs_fs.h> 41#include <linux/nfs_fs.h>
42#include <linux/nfs_page.h> 42#include <linux/nfs_page.h>
43#include <linux/lockd/bind.h> 43#include <linux/lockd/bind.h>
44#include <linux/freezer.h>
44#include "internal.h" 45#include "internal.h"
45 46
46#define NFSDBG_FACILITY NFSDBG_PROC 47#define NFSDBG_FACILITY NFSDBG_PROC
@@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
59 res = rpc_call_sync(clnt, msg, flags); 60 res = rpc_call_sync(clnt, msg, flags);
60 if (res != -EKEYEXPIRED) 61 if (res != -EKEYEXPIRED)
61 break; 62 break;
62 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); 63 freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
63 res = -ERESTARTSYS; 64 res = -ERESTARTSYS;
64 } while (!fatal_signal_pending(current)); 65 } while (!fatal_signal_pending(current));
65 return res; 66 return res;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 134777406ee3..3dfa4f112c0a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -41,7 +41,6 @@
41#include <linux/lockd/bind.h> 41#include <linux/lockd/bind.h>
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/mount.h> 43#include <linux/mount.h>
44#include <linux/mnt_namespace.h>
45#include <linux/namei.h> 44#include <linux/namei.h>
46#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
47#include <linux/vfs.h> 46#include <linux/vfs.h>
@@ -263,10 +262,10 @@ static match_table_t nfs_local_lock_tokens = {
263 262
264static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
265static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
266static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct dentry *);
267static int nfs_show_devname(struct seq_file *, struct vfsmount *); 266static int nfs_show_devname(struct seq_file *, struct dentry *);
268static int nfs_show_path(struct seq_file *, struct vfsmount *); 267static int nfs_show_path(struct seq_file *, struct dentry *);
269static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct dentry *);
270static struct dentry *nfs_fs_mount(struct file_system_type *, 269static struct dentry *nfs_fs_mount(struct file_system_type *,
271 int, const char *, void *); 270 int, const char *, void *);
272static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -721,9 +720,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
721/* 720/*
722 * Describe the mount options on this VFS mountpoint 721 * Describe the mount options on this VFS mountpoint
723 */ 722 */
724static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) 723static int nfs_show_options(struct seq_file *m, struct dentry *root)
725{ 724{
726 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); 725 struct nfs_server *nfss = NFS_SB(root->d_sb);
727 726
728 nfs_show_mount_options(m, nfss, 0); 727 nfs_show_mount_options(m, nfss, 0);
729 728
@@ -761,14 +760,14 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
761#endif 760#endif
762#endif 761#endif
763 762
764static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) 763static int nfs_show_devname(struct seq_file *m, struct dentry *root)
765{ 764{
766 char *page = (char *) __get_free_page(GFP_KERNEL); 765 char *page = (char *) __get_free_page(GFP_KERNEL);
767 char *devname, *dummy; 766 char *devname, *dummy;
768 int err = 0; 767 int err = 0;
769 if (!page) 768 if (!page)
770 return -ENOMEM; 769 return -ENOMEM;
771 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE); 770 devname = nfs_path(&dummy, root, page, PAGE_SIZE);
772 if (IS_ERR(devname)) 771 if (IS_ERR(devname))
773 err = PTR_ERR(devname); 772 err = PTR_ERR(devname);
774 else 773 else
@@ -777,7 +776,7 @@ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
777 return err; 776 return err;
778} 777}
779 778
780static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt) 779static int nfs_show_path(struct seq_file *m, struct dentry *dentry)
781{ 780{
782 seq_puts(m, "/"); 781 seq_puts(m, "/");
783 return 0; 782 return 0;
@@ -786,10 +785,10 @@ static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
786/* 785/*
787 * Present statistical information for this VFS mountpoint 786 * Present statistical information for this VFS mountpoint
788 */ 787 */
789static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) 788static int nfs_show_stats(struct seq_file *m, struct dentry *root)
790{ 789{
791 int i, cpu; 790 int i, cpu;
792 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); 791 struct nfs_server *nfss = NFS_SB(root->d_sb);
793 struct rpc_auth *auth = nfss->client->cl_auth; 792 struct rpc_auth *auth = nfss->client->cl_auth;
794 struct nfs_iostats totals = { }; 793 struct nfs_iostats totals = { };
795 794
@@ -799,10 +798,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
799 * Display all mount option settings 798 * Display all mount option settings
800 */ 799 */
801 seq_printf(m, "\n\topts:\t"); 800 seq_printf(m, "\n\topts:\t");
802 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); 801 seq_puts(m, root->d_sb->s_flags & MS_RDONLY ? "ro" : "rw");
803 seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); 802 seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
804 seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); 803 seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : "");
805 seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); 804 seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
806 nfs_show_mount_options(m, nfss, 1); 805 nfs_show_mount_options(m, nfss, 1);
807 806
808 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); 807 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
@@ -909,10 +908,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
909 data->auth_flavor_len = 1; 908 data->auth_flavor_len = 1;
910 data->version = version; 909 data->version = version;
911 data->minorversion = 0; 910 data->minorversion = 0;
911 security_init_mnt_opts(&data->lsm_opts);
912 } 912 }
913 return data; 913 return data;
914} 914}
915 915
916static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
917{
918 if (data) {
919 kfree(data->client_address);
920 kfree(data->mount_server.hostname);
921 kfree(data->nfs_server.export_path);
922 kfree(data->nfs_server.hostname);
923 kfree(data->fscache_uniq);
924 security_free_mnt_opts(&data->lsm_opts);
925 kfree(data);
926 }
927}
928
916/* 929/*
917 * Sanity-check a server address provided by the mount command. 930 * Sanity-check a server address provided by the mount command.
918 * 931 *
@@ -2220,9 +2233,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2220 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2233 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2221 mntfh = nfs_alloc_fhandle(); 2234 mntfh = nfs_alloc_fhandle();
2222 if (data == NULL || mntfh == NULL) 2235 if (data == NULL || mntfh == NULL)
2223 goto out_free_fh; 2236 goto out;
2224
2225 security_init_mnt_opts(&data->lsm_opts);
2226 2237
2227 /* Validate the mount data */ 2238 /* Validate the mount data */
2228 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2239 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
@@ -2234,8 +2245,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2234#ifdef CONFIG_NFS_V4 2245#ifdef CONFIG_NFS_V4
2235 if (data->version == 4) { 2246 if (data->version == 4) {
2236 mntroot = nfs4_try_mount(flags, dev_name, data); 2247 mntroot = nfs4_try_mount(flags, dev_name, data);
2237 kfree(data->client_address);
2238 kfree(data->nfs_server.export_path);
2239 goto out; 2248 goto out;
2240 } 2249 }
2241#endif /* CONFIG_NFS_V4 */ 2250#endif /* CONFIG_NFS_V4 */
@@ -2290,13 +2299,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2290 s->s_flags |= MS_ACTIVE; 2299 s->s_flags |= MS_ACTIVE;
2291 2300
2292out: 2301out:
2293 kfree(data->nfs_server.hostname); 2302 nfs_free_parsed_mount_data(data);
2294 kfree(data->mount_server.hostname);
2295 kfree(data->fscache_uniq);
2296 security_free_mnt_opts(&data->lsm_opts);
2297out_free_fh:
2298 nfs_free_fhandle(mntfh); 2303 nfs_free_fhandle(mntfh);
2299 kfree(data);
2300 return mntroot; 2304 return mntroot;
2301 2305
2302out_err_nosb: 2306out_err_nosb:
@@ -2623,9 +2627,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2623 2627
2624 mntfh = nfs_alloc_fhandle(); 2628 mntfh = nfs_alloc_fhandle();
2625 if (data == NULL || mntfh == NULL) 2629 if (data == NULL || mntfh == NULL)
2626 goto out_free_fh; 2630 goto out;
2627
2628 security_init_mnt_opts(&data->lsm_opts);
2629 2631
2630 /* Get a volume representation */ 2632 /* Get a volume representation */
2631 server = nfs4_create_server(data, mntfh); 2633 server = nfs4_create_server(data, mntfh);
@@ -2677,13 +2679,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2677 2679
2678 s->s_flags |= MS_ACTIVE; 2680 s->s_flags |= MS_ACTIVE;
2679 2681
2680 security_free_mnt_opts(&data->lsm_opts);
2681 nfs_free_fhandle(mntfh); 2682 nfs_free_fhandle(mntfh);
2682 return mntroot; 2683 return mntroot;
2683 2684
2684out: 2685out:
2685 security_free_mnt_opts(&data->lsm_opts);
2686out_free_fh:
2687 nfs_free_fhandle(mntfh); 2686 nfs_free_fhandle(mntfh);
2688 return ERR_PTR(error); 2687 return ERR_PTR(error);
2689 2688
@@ -2788,11 +2787,15 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2788 const char *export_path) 2787 const char *export_path)
2789{ 2788{
2790 struct dentry *dentry; 2789 struct dentry *dentry;
2791 int ret = nfs_referral_loop_protect(); 2790 int err;
2791
2792 if (IS_ERR(root_mnt))
2793 return ERR_CAST(root_mnt);
2792 2794
2793 if (ret) { 2795 err = nfs_referral_loop_protect();
2796 if (err) {
2794 mntput(root_mnt); 2797 mntput(root_mnt);
2795 return ERR_PTR(ret); 2798 return ERR_PTR(err);
2796 } 2799 }
2797 2800
2798 dentry = mount_subtree(root_mnt, export_path); 2801 dentry = mount_subtree(root_mnt, export_path);
@@ -2816,9 +2819,7 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2816 data->nfs_server.hostname); 2819 data->nfs_server.hostname);
2817 data->nfs_server.export_path = export_path; 2820 data->nfs_server.export_path = export_path;
2818 2821
2819 res = ERR_CAST(root_mnt); 2822 res = nfs_follow_remote_path(root_mnt, export_path);
2820 if (!IS_ERR(root_mnt))
2821 res = nfs_follow_remote_path(root_mnt, export_path);
2822 2823
2823 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", 2824 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2824 IS_ERR(res) ? PTR_ERR(res) : 0, 2825 IS_ERR(res) ? PTR_ERR(res) : 0,
@@ -2838,7 +2839,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2838 2839
2839 data = nfs_alloc_parsed_mount_data(4); 2840 data = nfs_alloc_parsed_mount_data(4);
2840 if (data == NULL) 2841 if (data == NULL)
2841 goto out_free_data; 2842 goto out;
2842 2843
2843 /* Validate the mount data */ 2844 /* Validate the mount data */
2844 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2845 error = nfs4_validate_mount_data(raw_data, data, dev_name);
@@ -2852,12 +2853,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2852 error = PTR_ERR(res); 2853 error = PTR_ERR(res);
2853 2854
2854out: 2855out:
2855 kfree(data->client_address); 2856 nfs_free_parsed_mount_data(data);
2856 kfree(data->nfs_server.export_path);
2857 kfree(data->nfs_server.hostname);
2858 kfree(data->fscache_uniq);
2859out_free_data:
2860 kfree(data);
2861 dprintk("<-- nfs4_mount() = %d%s\n", error, 2857 dprintk("<-- nfs4_mount() = %d%s\n", error,
2862 error != 0 ? " [error]" : ""); 2858 error != 0 ? " [error]" : "");
2863 return res; 2859 return res;
@@ -3079,9 +3075,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3079 flags, data, data->hostname); 3075 flags, data, data->hostname);
3080 data->mnt_path = export_path; 3076 data->mnt_path = export_path;
3081 3077
3082 res = ERR_CAST(root_mnt); 3078 res = nfs_follow_remote_path(root_mnt, export_path);
3083 if (!IS_ERR(root_mnt))
3084 res = nfs_follow_remote_path(root_mnt, export_path);
3085 dprintk("<-- nfs4_referral_mount() = %ld%s\n", 3079 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3086 IS_ERR(res) ? PTR_ERR(res) : 0, 3080 IS_ERR(res) ? PTR_ERR(res) : 0,
3087 IS_ERR(res) ? " [error]" : ""); 3081 IS_ERR(res) ? " [error]" : "");
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1dda78db6a73..834f0fe96f89 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1052,7 +1052,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1052 .pg_doio = nfs_generic_pg_writepages, 1052 .pg_doio = nfs_generic_pg_writepages,
1053}; 1053};
1054 1054
1055static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1055void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1056 struct inode *inode, int ioflags) 1056 struct inode *inode, int ioflags)
1057{ 1057{
1058 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1058 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
@@ -1166,13 +1166,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1166static void nfs_writeback_release_full(void *calldata) 1166static void nfs_writeback_release_full(void *calldata)
1167{ 1167{
1168 struct nfs_write_data *data = calldata; 1168 struct nfs_write_data *data = calldata;
1169 int ret, status = data->task.tk_status; 1169 int status = data->task.tk_status;
1170 struct nfs_pageio_descriptor pgio;
1171
1172 if (data->pnfs_error) {
1173 nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
1174 pgio.pg_recoalesce = 1;
1175 }
1176 1170
1177 /* Update attributes as result of writeback. */ 1171 /* Update attributes as result of writeback. */
1178 while (!list_empty(&data->pages)) { 1172 while (!list_empty(&data->pages)) {
@@ -1188,11 +1182,6 @@ static void nfs_writeback_release_full(void *calldata)
1188 req->wb_bytes, 1182 req->wb_bytes,
1189 (long long)req_offset(req)); 1183 (long long)req_offset(req));
1190 1184
1191 if (data->pnfs_error) {
1192 dprintk(", pnfs error = %d\n", data->pnfs_error);
1193 goto next;
1194 }
1195
1196 if (status < 0) { 1185 if (status < 0) {
1197 nfs_set_pageerror(page); 1186 nfs_set_pageerror(page);
1198 nfs_context_set_write_error(req->wb_context, status); 1187 nfs_context_set_write_error(req->wb_context, status);
@@ -1212,19 +1201,7 @@ remove_request:
1212 next: 1201 next:
1213 nfs_clear_page_tag_locked(req); 1202 nfs_clear_page_tag_locked(req);
1214 nfs_end_page_writeback(page); 1203 nfs_end_page_writeback(page);
1215 if (data->pnfs_error) {
1216 lock_page(page);
1217 nfs_pageio_cond_complete(&pgio, page->index);
1218 ret = nfs_page_async_flush(&pgio, page, 0);
1219 if (ret) {
1220 nfs_set_pageerror(page);
1221 dprintk("rewrite to MDS error = %d\n", ret);
1222 }
1223 unlock_page(page);
1224 }
1225 } 1204 }
1226 if (data->pnfs_error)
1227 nfs_pageio_complete(&pgio);
1228 nfs_writedata_release(calldata); 1205 nfs_writedata_release(calldata);
1229} 1206}
1230 1207
@@ -1711,7 +1688,7 @@ out_error:
1711 1688
1712#ifdef CONFIG_MIGRATION 1689#ifdef CONFIG_MIGRATION
1713int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1690int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1714 struct page *page) 1691 struct page *page, enum migrate_mode mode)
1715{ 1692{
1716 /* 1693 /*
1717 * If PagePrivate is set, then the page is currently associated with 1694 * If PagePrivate is set, then the page is currently associated with
@@ -1726,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1726 1703
1727 nfs_fscache_release_page(page, GFP_KERNEL); 1704 nfs_fscache_release_page(page, GFP_KERNEL);
1728 1705
1729 return migrate_page(mapping, newpage, page); 1706 return migrate_page(mapping, newpage, page, mode);
1730} 1707}
1731#endif 1708#endif
1732 1709
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 10e6366608f2..8df1ea4a6ff9 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -80,3 +80,13 @@ config NFSD_V4
80 available from http://linux-nfs.org/. 80 available from http://linux-nfs.org/.
81 81
82 If unsure, say N. 82 If unsure, say N.
83
84config NFSD_FAULT_INJECTION
85 bool "NFS server manual fault injection"
86 depends on NFSD_V4 && DEBUG_KERNEL
87 help
88 This option enables support for manually injecting faults
89 into the NFS server. This is intended to be used for
90 testing error recovery on the NFS client.
91
92 If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9b118ee20193..af32ef06b4fe 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o
6 6
7nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 7nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
8 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 8 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
9nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
9nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 10nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
10nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o 11nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 62f3b9074e84..cf8a6bd062fa 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
87 struct svc_expkey key; 87 struct svc_expkey key;
88 struct svc_expkey *ek = NULL; 88 struct svc_expkey *ek = NULL;
89 89
90 if (mesg[mlen-1] != '\n') 90 if (mlen < 1 || mesg[mlen-1] != '\n')
91 return -EINVAL; 91 return -EINVAL;
92 mesg[mlen-1] = 0; 92 mesg[mlen-1] = 0;
93 93
@@ -1226,12 +1226,12 @@ nfsd_export_init(void)
1226 int rv; 1226 int rv;
1227 dprintk("nfsd: initializing export module.\n"); 1227 dprintk("nfsd: initializing export module.\n");
1228 1228
1229 rv = cache_register(&svc_export_cache); 1229 rv = cache_register_net(&svc_export_cache, &init_net);
1230 if (rv) 1230 if (rv)
1231 return rv; 1231 return rv;
1232 rv = cache_register(&svc_expkey_cache); 1232 rv = cache_register_net(&svc_expkey_cache, &init_net);
1233 if (rv) 1233 if (rv)
1234 cache_unregister(&svc_export_cache); 1234 cache_unregister_net(&svc_export_cache, &init_net);
1235 return rv; 1235 return rv;
1236 1236
1237} 1237}
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)
1255 1255
1256 dprintk("nfsd: shutting down export module.\n"); 1256 dprintk("nfsd: shutting down export module.\n");
1257 1257
1258 cache_unregister(&svc_expkey_cache); 1258 cache_unregister_net(&svc_expkey_cache, &init_net);
1259 cache_unregister(&svc_export_cache); 1259 cache_unregister_net(&svc_export_cache, &init_net);
1260 svcauth_unix_purge(); 1260 svcauth_unix_purge();
1261 1261
1262 dprintk("nfsd: export shutdown complete.\n"); 1262 dprintk("nfsd: export shutdown complete.\n");
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
new file mode 100644
index 000000000000..ce7f0758d84c
--- /dev/null
+++ b/fs/nfsd/fault_inject.c
@@ -0,0 +1,91 @@
1/*
2 * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
3 *
4 * Uses debugfs to create fault injection points for client testing
5 */
6
7#include <linux/types.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/module.h>
11
12#include "state.h"
13#include "fault_inject.h"
14
15struct nfsd_fault_inject_op {
16 char *file;
17 void (*func)(u64);
18};
19
20static struct nfsd_fault_inject_op inject_ops[] = {
21 {
22 .file = "forget_clients",
23 .func = nfsd_forget_clients,
24 },
25 {
26 .file = "forget_locks",
27 .func = nfsd_forget_locks,
28 },
29 {
30 .file = "forget_openowners",
31 .func = nfsd_forget_openowners,
32 },
33 {
34 .file = "forget_delegations",
35 .func = nfsd_forget_delegations,
36 },
37 {
38 .file = "recall_delegations",
39 .func = nfsd_recall_delegations,
40 },
41};
42
43static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
44static struct dentry *debug_dir;
45
46static int nfsd_inject_set(void *op_ptr, u64 val)
47{
48 struct nfsd_fault_inject_op *op = op_ptr;
49
50 if (val == 0)
51 printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
52 else
53 printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
54
55 op->func(val);
56 return 0;
57}
58
59static int nfsd_inject_get(void *data, u64 *val)
60{
61 return 0;
62}
63
64DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
65
66void nfsd_fault_inject_cleanup(void)
67{
68 debugfs_remove_recursive(debug_dir);
69}
70
71int nfsd_fault_inject_init(void)
72{
73 unsigned int i;
74 struct nfsd_fault_inject_op *op;
75 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
76
77 debug_dir = debugfs_create_dir("nfsd", NULL);
78 if (!debug_dir)
79 goto fail;
80
81 for (i = 0; i < NUM_INJECT_OPS; i++) {
82 op = &inject_ops[i];
83 if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
84 goto fail;
85 }
86 return 0;
87
88fail:
89 nfsd_fault_inject_cleanup();
90 return -ENOMEM;
91}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
new file mode 100644
index 000000000000..90bd0570956c
--- /dev/null
+++ b/fs/nfsd/fault_inject.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
3 *
4 * Function definitions for fault injection
5 */
6
7#ifndef LINUX_NFSD_FAULT_INJECT_H
8#define LINUX_NFSD_FAULT_INJECT_H
9
10#ifdef CONFIG_NFSD_FAULT_INJECTION
11int nfsd_fault_inject_init(void);
12void nfsd_fault_inject_cleanup(void);
13void nfsd_forget_clients(u64);
14void nfsd_forget_locks(u64);
15void nfsd_forget_openowners(u64);
16void nfsd_forget_delegations(u64);
17void nfsd_recall_delegations(u64);
18#else /* CONFIG_NFSD_FAULT_INJECTION */
19static inline int nfsd_fault_inject_init(void) { return 0; }
20static inline void nfsd_fault_inject_cleanup(void) {}
21static inline void nfsd_forget_clients(u64 num) {}
22static inline void nfsd_forget_locks(u64 num) {}
23static inline void nfsd_forget_openowners(u64 num) {}
24static inline void nfsd_forget_delegations(u64 num) {}
25static inline void nfsd_recall_delegations(u64 num) {}
26#endif /* CONFIG_NFSD_FAULT_INJECTION */
27
28#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7748d6a18d97..6f3ebb48b12f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -718,7 +718,7 @@ int set_callback_cred(void)
718{ 718{
719 if (callback_cred) 719 if (callback_cred)
720 return 0; 720 return 0;
721 callback_cred = rpc_lookup_machine_cred(); 721 callback_cred = rpc_lookup_machine_cred("nfs");
722 if (!callback_cred) 722 if (!callback_cred)
723 return -ENOMEM; 723 return -ENOMEM;
724 return 0; 724 return 0;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 55780a22fdbd..94096273cd6c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -36,6 +36,7 @@
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <net/net_namespace.h>
39#include "idmap.h" 40#include "idmap.h"
40#include "nfsd.h" 41#include "nfsd.h"
41 42
@@ -466,20 +467,20 @@ nfsd_idmap_init(void)
466{ 467{
467 int rv; 468 int rv;
468 469
469 rv = cache_register(&idtoname_cache); 470 rv = cache_register_net(&idtoname_cache, &init_net);
470 if (rv) 471 if (rv)
471 return rv; 472 return rv;
472 rv = cache_register(&nametoid_cache); 473 rv = cache_register_net(&nametoid_cache, &init_net);
473 if (rv) 474 if (rv)
474 cache_unregister(&idtoname_cache); 475 cache_unregister_net(&idtoname_cache, &init_net);
475 return rv; 476 return rv;
476} 477}
477 478
478void 479void
479nfsd_idmap_shutdown(void) 480nfsd_idmap_shutdown(void)
480{ 481{
481 cache_unregister(&idtoname_cache); 482 cache_unregister_net(&idtoname_cache, &init_net);
482 cache_unregister(&nametoid_cache); 483 cache_unregister_net(&nametoid_cache, &init_net);
483} 484}
484 485
485static int 486static int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index fa383361bc61..896da74ec563 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
266{ 266{
267 __be32 status; 267 __be32 status;
268 268
269 /* Only reclaims from previously confirmed clients are valid */
270 if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
271 return status;
272
273 /* We don't know the target directory, and therefore can not 269 /* We don't know the target directory, and therefore can not
274 * set the change info 270 * set the change info
275 */ 271 */
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
373 break; 369 break;
374 case NFS4_OPEN_CLAIM_PREVIOUS: 370 case NFS4_OPEN_CLAIM_PREVIOUS:
375 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 371 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
372 status = nfs4_check_open_reclaim(&open->op_clientid);
373 if (status)
374 goto out;
376 case NFS4_OPEN_CLAIM_FH: 375 case NFS4_OPEN_CLAIM_FH:
377 case NFS4_OPEN_CLAIM_DELEG_CUR_FH: 376 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
378 status = do_open_fhandle(rqstp, &cstate->current_fh, 377 status = do_open_fhandle(rqstp, &cstate->current_fh,
@@ -838,7 +837,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
838 return status; 837 return status;
839 } 838 }
840 } 839 }
841 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt); 840 status = fh_want_write(&cstate->current_fh);
842 if (status) 841 if (status)
843 return status; 842 return status;
844 status = nfs_ok; 843 status = nfs_ok;
@@ -856,7 +855,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
856 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 855 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
857 0, (time_t)0); 856 0, (time_t)0);
858out: 857out:
859 mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt); 858 fh_drop_write(&cstate->current_fh);
860 return status; 859 return status;
861} 860}
862 861
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ed083b9a731b..0b3e875d1abd 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -117,8 +117,7 @@ out_no_tfm:
117 return status; 117 return status;
118} 118}
119 119
120int 120void nfsd4_create_clid_dir(struct nfs4_client *clp)
121nfsd4_create_clid_dir(struct nfs4_client *clp)
122{ 121{
123 const struct cred *original_cred; 122 const struct cred *original_cred;
124 char *dname = clp->cl_recdir; 123 char *dname = clp->cl_recdir;
@@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
127 126
128 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); 127 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
129 128
130 if (!rec_file || clp->cl_firststate) 129 if (clp->cl_firststate)
131 return 0; 130 return;
132
133 clp->cl_firststate = 1; 131 clp->cl_firststate = 1;
132 if (!rec_file)
133 return;
134 status = nfs4_save_creds(&original_cred); 134 status = nfs4_save_creds(&original_cred);
135 if (status < 0) 135 if (status < 0)
136 return status; 136 return;
137 137
138 dir = rec_file->f_path.dentry; 138 dir = rec_file->f_path.dentry;
139 /* lock the parent */ 139 /* lock the parent */
@@ -144,14 +144,21 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
144 status = PTR_ERR(dentry); 144 status = PTR_ERR(dentry);
145 goto out_unlock; 145 goto out_unlock;
146 } 146 }
147 status = -EEXIST;
148 if (dentry->d_inode) 147 if (dentry->d_inode)
148 /*
149 * In the 4.1 case, where we're called from
150 * reclaim_complete(), records from the previous reboot
151 * may still be left, so this is OK.
152 *
153 * In the 4.0 case, we should never get here; but we may
154 * as well be forgiving and just succeed silently.
155 */
149 goto out_put; 156 goto out_put;
150 status = mnt_want_write(rec_file->f_path.mnt); 157 status = mnt_want_write_file(rec_file);
151 if (status) 158 if (status)
152 goto out_put; 159 goto out_put;
153 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); 160 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
154 mnt_drop_write(rec_file->f_path.mnt); 161 mnt_drop_write_file(rec_file);
155out_put: 162out_put:
156 dput(dentry); 163 dput(dentry);
157out_unlock: 164out_unlock:
@@ -164,7 +171,6 @@ out_unlock:
164 " and is writeable", status, 171 " and is writeable", status,
165 user_recovery_dirname); 172 user_recovery_dirname);
166 nfs4_reset_creds(original_cred); 173 nfs4_reset_creds(original_cred);
167 return status;
168} 174}
169 175
170typedef int (recdir_func)(struct dentry *, struct dentry *); 176typedef int (recdir_func)(struct dentry *, struct dentry *);
@@ -268,7 +274,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
268 if (!rec_file || !clp->cl_firststate) 274 if (!rec_file || !clp->cl_firststate)
269 return; 275 return;
270 276
271 status = mnt_want_write(rec_file->f_path.mnt); 277 status = mnt_want_write_file(rec_file);
272 if (status) 278 if (status)
273 goto out; 279 goto out;
274 clp->cl_firststate = 0; 280 clp->cl_firststate = 0;
@@ -281,7 +287,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
281 nfs4_reset_creds(original_cred); 287 nfs4_reset_creds(original_cred);
282 if (status == 0) 288 if (status == 0)
283 vfs_fsync(rec_file, 0); 289 vfs_fsync(rec_file, 0);
284 mnt_drop_write(rec_file->f_path.mnt); 290 mnt_drop_write_file(rec_file);
285out: 291out:
286 if (status) 292 if (status)
287 printk("NFSD: Failed to remove expired client state directory" 293 printk("NFSD: Failed to remove expired client state directory"
@@ -311,13 +317,13 @@ nfsd4_recdir_purge_old(void) {
311 317
312 if (!rec_file) 318 if (!rec_file)
313 return; 319 return;
314 status = mnt_want_write(rec_file->f_path.mnt); 320 status = mnt_want_write_file(rec_file);
315 if (status) 321 if (status)
316 goto out; 322 goto out;
317 status = nfsd4_list_rec_dir(purge_old); 323 status = nfsd4_list_rec_dir(purge_old);
318 if (status == 0) 324 if (status == 0)
319 vfs_fsync(rec_file, 0); 325 vfs_fsync(rec_file, 0);
320 mnt_drop_write(rec_file->f_path.mnt); 326 mnt_drop_write_file(rec_file);
321out: 327out:
322 if (status) 328 if (status)
323 printk("nfsd4: failed to purge old clients from recovery" 329 printk("nfsd4: failed to purge old clients from recovery"
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e33a975..e8c98f009670 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,12 +49,20 @@
49time_t nfsd4_lease = 90; /* default lease time */ 49time_t nfsd4_lease = 90; /* default lease time */
50time_t nfsd4_grace = 90; 50time_t nfsd4_grace = 90;
51static time_t boot_time; 51static time_t boot_time;
52static stateid_t zerostateid; /* bits all 0 */ 52
53static stateid_t onestateid; /* bits all 1 */ 53#define all_ones {{~0,~0},~0}
54static const stateid_t one_stateid = {
55 .si_generation = ~0,
56 .si_opaque = all_ones,
57};
58static const stateid_t zero_stateid = {
59 /* all fields zero */
60};
61
54static u64 current_sessionid = 1; 62static u64 current_sessionid = 1;
55 63
56#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) 64#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
57#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) 65#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
58 66
59/* forward declarations */ 67/* forward declarations */
60static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); 68static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -133,21 +141,21 @@ unsigned int max_delegations;
133 * Open owner state (share locks) 141 * Open owner state (share locks)
134 */ 142 */
135 143
136/* hash tables for open owners */ 144/* hash tables for lock and open owners */
137#define OPEN_OWNER_HASH_BITS 8 145#define OWNER_HASH_BITS 8
138#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) 146#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
139#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) 147#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
140 148
141static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) 149static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
142{ 150{
143 unsigned int ret; 151 unsigned int ret;
144 152
145 ret = opaque_hashval(ownername->data, ownername->len); 153 ret = opaque_hashval(ownername->data, ownername->len);
146 ret += clientid; 154 ret += clientid;
147 return ret & OPEN_OWNER_HASH_MASK; 155 return ret & OWNER_HASH_MASK;
148} 156}
149 157
150static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; 158static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
151 159
152/* hash table for nfs4_file */ 160/* hash table for nfs4_file */
153#define FILE_HASH_BITS 8 161#define FILE_HASH_BITS 8
@@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
514 522
515 list_del(&lo->lo_owner.so_strhash); 523 list_del(&lo->lo_owner.so_strhash);
516 list_del(&lo->lo_perstateid); 524 list_del(&lo->lo_perstateid);
525 list_del(&lo->lo_owner_ino_hash);
517 while (!list_empty(&lo->lo_owner.so_stateids)) { 526 while (!list_empty(&lo->lo_owner.so_stateids)) {
518 stp = list_first_entry(&lo->lo_owner.so_stateids, 527 stp = list_first_entry(&lo->lo_owner.so_stateids,
519 struct nfs4_ol_stateid, st_perstateowner); 528 struct nfs4_ol_stateid, st_perstateowner);
@@ -658,7 +667,7 @@ static int nfsd4_sanitize_slot_size(u32 size)
658/* 667/*
659 * XXX: If we run out of reserved DRC memory we could (up to a point) 668 * XXX: If we run out of reserved DRC memory we could (up to a point)
660 * re-negotiate active sessions and reduce their slot usage to make 669 * re-negotiate active sessions and reduce their slot usage to make
661 * rooom for new connections. For now we just fail the create session. 670 * room for new connections. For now we just fail the create session.
662 */ 671 */
663static int nfsd4_get_drc_mem(int slotsize, u32 num) 672static int nfsd4_get_drc_mem(int slotsize, u32 num)
664{ 673{
@@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
985 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); 994 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
986 if (clp == NULL) 995 if (clp == NULL)
987 return NULL; 996 return NULL;
988 clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); 997 clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
989 if (clp->cl_name.data == NULL) { 998 if (clp->cl_name.data == NULL) {
990 kfree(clp); 999 kfree(clp);
991 return NULL; 1000 return NULL;
992 } 1001 }
993 memcpy(clp->cl_name.data, name.data, name.len);
994 clp->cl_name.len = name.len; 1002 clp->cl_name.len = name.len;
995 return clp; 1003 return clp;
996} 1004}
@@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp)
1058 spin_unlock(&recall_lock); 1066 spin_unlock(&recall_lock);
1059 while (!list_empty(&reaplist)) { 1067 while (!list_empty(&reaplist)) {
1060 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1068 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1061 list_del_init(&dp->dl_recall_lru);
1062 unhash_delegation(dp); 1069 unhash_delegation(dp);
1063 } 1070 }
1064 while (!list_empty(&clp->cl_openowners)) { 1071 while (!list_empty(&clp->cl_openowners)) {
@@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void)
2301 nfsd4_free_slab(&deleg_slab); 2308 nfsd4_free_slab(&deleg_slab);
2302} 2309}
2303 2310
2304static int 2311int
2305nfsd4_init_slabs(void) 2312nfsd4_init_slabs(void)
2306{ 2313{
2307 openowner_slab = kmem_cache_create("nfsd4_openowners", 2314 openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
2373 2380
2374static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) 2381static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
2375{ 2382{
2376 list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); 2383 list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
2377 list_add(&oo->oo_perclient, &clp->cl_openowners); 2384 list_add(&oo->oo_perclient, &clp->cl_openowners);
2378} 2385}
2379 2386
@@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
2436 struct nfs4_stateowner *so; 2443 struct nfs4_stateowner *so;
2437 struct nfs4_openowner *oo; 2444 struct nfs4_openowner *oo;
2438 2445
2439 list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { 2446 list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
2447 if (!so->so_is_open_owner)
2448 continue;
2440 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { 2449 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
2441 oo = openowner(so); 2450 oo = openowner(so);
2442 renew_client(oo->oo_owner.so_client); 2451 renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2580 if (open->op_file == NULL) 2589 if (open->op_file == NULL)
2581 return nfserr_jukebox; 2590 return nfserr_jukebox;
2582 2591
2583 strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); 2592 strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
2584 oo = find_openstateowner_str(strhashval, open); 2593 oo = find_openstateowner_str(strhashval, open);
2585 open->op_openowner = oo; 2594 open->op_openowner = oo;
2586 if (!oo) { 2595 if (!oo) {
@@ -3123,7 +3132,6 @@ nfs4_laundromat(void)
3123 spin_unlock(&recall_lock); 3132 spin_unlock(&recall_lock);
3124 list_for_each_safe(pos, next, &reaplist) { 3133 list_for_each_safe(pos, next, &reaplist) {
3125 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 3134 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
3126 list_del_init(&dp->dl_recall_lru);
3127 unhash_delegation(dp); 3135 unhash_delegation(dp);
3128 } 3136 }
3129 test_val = nfsd4_lease; 3137 test_val = nfsd4_lease;
@@ -3718,13 +3726,11 @@ out:
3718} 3726}
3719 3727
3720 3728
3721/*
3722 * Lock owner state (byte-range locks)
3723 */
3724#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) 3729#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
3725#define LOCK_HASH_BITS 8 3730
3726#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) 3731#define LOCKOWNER_INO_HASH_BITS 8
3727#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) 3732#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
3733#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
3728 3734
3729static inline u64 3735static inline u64
3730end_offset(u64 start, u64 len) 3736end_offset(u64 start, u64 len)
@@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len)
3746 return end > start ? end - 1: NFS4_MAX_UINT64; 3752 return end > start ? end - 1: NFS4_MAX_UINT64;
3747} 3753}
3748 3754
3749static inline unsigned int 3755static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
3750lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
3751 struct xdr_netobj *ownername)
3752{ 3756{
3753 return (file_hashval(inode) + cl_id 3757 return (file_hashval(inode) + cl_id
3754 + opaque_hashval(ownername->data, ownername->len)) 3758 + opaque_hashval(ownername->data, ownername->len))
3755 & LOCK_HASH_MASK; 3759 & LOCKOWNER_INO_HASH_MASK;
3756} 3760}
3757 3761
3758static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; 3762static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
3759 3763
3760/* 3764/*
3761 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that 3765 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3809,23 +3813,39 @@ nevermind:
3809 deny->ld_type = NFS4_WRITE_LT; 3813 deny->ld_type = NFS4_WRITE_LT;
3810} 3814}
3811 3815
3816static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
3817{
3818 struct nfs4_ol_stateid *lst;
3819
3820 if (!same_owner_str(&lo->lo_owner, owner, clid))
3821 return false;
3822 lst = list_first_entry(&lo->lo_owner.so_stateids,
3823 struct nfs4_ol_stateid, st_perstateowner);
3824 return lst->st_file->fi_inode == inode;
3825}
3826
3812static struct nfs4_lockowner * 3827static struct nfs4_lockowner *
3813find_lockowner_str(struct inode *inode, clientid_t *clid, 3828find_lockowner_str(struct inode *inode, clientid_t *clid,
3814 struct xdr_netobj *owner) 3829 struct xdr_netobj *owner)
3815{ 3830{
3816 unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); 3831 unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
3817 struct nfs4_stateowner *op; 3832 struct nfs4_lockowner *lo;
3818 3833
3819 list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { 3834 list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
3820 if (same_owner_str(op, owner, clid)) 3835 if (same_lockowner_ino(lo, inode, clid, owner))
3821 return lockowner(op); 3836 return lo;
3822 } 3837 }
3823 return NULL; 3838 return NULL;
3824} 3839}
3825 3840
3826static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) 3841static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
3827{ 3842{
3828 list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); 3843 struct inode *inode = open_stp->st_file->fi_inode;
3844 unsigned int inohash = lockowner_ino_hashval(inode,
3845 clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
3846
3847 list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
3848 list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
3829 list_add(&lo->lo_perstateid, &open_stp->st_lockowners); 3849 list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
3830} 3850}
3831 3851
@@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
3834 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 3854 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
3835 * occurred. 3855 * occurred.
3836 * 3856 *
3837 * strhashval = lock_ownerstr_hashval 3857 * strhashval = ownerstr_hashval
3838 */ 3858 */
3839 3859
3840static struct nfs4_lockowner * 3860static struct nfs4_lockowner *
@@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
3892 __set_bit(access, &lock_stp->st_access_bmap); 3912 __set_bit(access, &lock_stp->st_access_bmap);
3893} 3913}
3894 3914
3915__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
3916{
3917 struct nfs4_file *fi = ost->st_file;
3918 struct nfs4_openowner *oo = openowner(ost->st_stateowner);
3919 struct nfs4_client *cl = oo->oo_owner.so_client;
3920 struct nfs4_lockowner *lo;
3921 unsigned int strhashval;
3922
3923 lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
3924 if (lo) {
3925 if (!cstate->minorversion)
3926 return nfserr_bad_seqid;
3927 /* XXX: a lockowner always has exactly one stateid: */
3928 *lst = list_first_entry(&lo->lo_owner.so_stateids,
3929 struct nfs4_ol_stateid, st_perstateowner);
3930 return nfs_ok;
3931 }
3932 strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
3933 &lock->v.new.owner);
3934 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
3935 if (lo == NULL)
3936 return nfserr_jukebox;
3937 *lst = alloc_init_lock_stateid(lo, fi, ost);
3938 if (*lst == NULL) {
3939 release_lockowner(lo);
3940 return nfserr_jukebox;
3941 }
3942 *new = true;
3943 return nfs_ok;
3944}
3945
3895/* 3946/*
3896 * LOCK operation 3947 * LOCK operation
3897 */ 3948 */
@@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3907 struct file_lock file_lock; 3958 struct file_lock file_lock;
3908 struct file_lock conflock; 3959 struct file_lock conflock;
3909 __be32 status = 0; 3960 __be32 status = 0;
3910 unsigned int strhashval; 3961 bool new_state = false;
3911 int lkflg; 3962 int lkflg;
3912 int err; 3963 int err;
3913 3964
@@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3933 * lock stateid. 3984 * lock stateid.
3934 */ 3985 */
3935 struct nfs4_ol_stateid *open_stp = NULL; 3986 struct nfs4_ol_stateid *open_stp = NULL;
3936 3987
3988 if (nfsd4_has_session(cstate))
3989 /* See rfc 5661 18.10.3: given clientid is ignored: */
3990 memcpy(&lock->v.new.clientid,
3991 &cstate->session->se_client->cl_clientid,
3992 sizeof(clientid_t));
3993
3937 status = nfserr_stale_clientid; 3994 status = nfserr_stale_clientid;
3938 if (!nfsd4_has_session(cstate) && 3995 if (STALE_CLIENTID(&lock->lk_new_clientid))
3939 STALE_CLIENTID(&lock->lk_new_clientid))
3940 goto out; 3996 goto out;
3941 3997
3942 /* validate and update open stateid and open seqid */ 3998 /* validate and update open stateid and open seqid */
@@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3948 goto out; 4004 goto out;
3949 open_sop = openowner(open_stp->st_stateowner); 4005 open_sop = openowner(open_stp->st_stateowner);
3950 status = nfserr_bad_stateid; 4006 status = nfserr_bad_stateid;
3951 if (!nfsd4_has_session(cstate) && 4007 if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
3952 !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
3953 &lock->v.new.clientid)) 4008 &lock->v.new.clientid))
3954 goto out; 4009 goto out;
3955 /* create lockowner and lock stateid */ 4010 status = lookup_or_create_lock_state(cstate, open_stp, lock,
3956 fp = open_stp->st_file; 4011 &lock_stp, &new_state);
3957 strhashval = lock_ownerstr_hashval(fp->fi_inode, 4012 if (status)
3958 open_sop->oo_owner.so_client->cl_clientid.cl_id,
3959 &lock->v.new.owner);
3960 /* XXX: Do we need to check for duplicate stateowners on
3961 * the same file, or should they just be allowed (and
3962 * create new stateids)? */
3963 status = nfserr_jukebox;
3964 lock_sop = alloc_init_lock_stateowner(strhashval,
3965 open_sop->oo_owner.so_client, open_stp, lock);
3966 if (lock_sop == NULL)
3967 goto out;
3968 lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
3969 if (lock_stp == NULL)
3970 goto out; 4013 goto out;
3971 } else { 4014 } else {
3972 /* lock (lock owner + lock stateid) already exists */ 4015 /* lock (lock owner + lock stateid) already exists */
@@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3976 NFS4_LOCK_STID, &lock_stp); 4019 NFS4_LOCK_STID, &lock_stp);
3977 if (status) 4020 if (status)
3978 goto out; 4021 goto out;
3979 lock_sop = lockowner(lock_stp->st_stateowner);
3980 fp = lock_stp->st_file;
3981 } 4022 }
3982 /* lock_sop and lock_stp have been created or found */ 4023 lock_sop = lockowner(lock_stp->st_stateowner);
4024 fp = lock_stp->st_file;
3983 4025
3984 lkflg = setlkflg(lock->lk_type); 4026 lkflg = setlkflg(lock->lk_type);
3985 status = nfs4_check_openmode(lock_stp, lkflg); 4027 status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4054 break; 4096 break;
4055 } 4097 }
4056out: 4098out:
4057 if (status && lock->lk_is_new && lock_sop) 4099 if (status && new_state)
4058 release_lockowner(lock_sop); 4100 release_lockowner(lock_sop);
4059 if (!cstate->replay_owner) 4101 if (!cstate->replay_owner)
4060 nfs4_unlock_state(); 4102 nfs4_unlock_state();
@@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4251 struct nfs4_ol_stateid *stp; 4293 struct nfs4_ol_stateid *stp;
4252 struct xdr_netobj *owner = &rlockowner->rl_owner; 4294 struct xdr_netobj *owner = &rlockowner->rl_owner;
4253 struct list_head matches; 4295 struct list_head matches;
4254 int i; 4296 unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
4255 __be32 status; 4297 __be32 status;
4256 4298
4257 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", 4299 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4266 nfs4_lock_state(); 4308 nfs4_lock_state();
4267 4309
4268 status = nfserr_locks_held; 4310 status = nfserr_locks_held;
4269 /* XXX: we're doing a linear search through all the lockowners.
4270 * Yipes! For now we'll just hope clients aren't really using
4271 * release_lockowner much, but eventually we have to fix these
4272 * data structures. */
4273 INIT_LIST_HEAD(&matches); 4311 INIT_LIST_HEAD(&matches);
4274 for (i = 0; i < LOCK_HASH_SIZE; i++) { 4312
4275 list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { 4313 list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
4276 if (!same_owner_str(sop, owner, clid)) 4314 if (sop->so_is_open_owner)
4277 continue; 4315 continue;
4278 list_for_each_entry(stp, &sop->so_stateids, 4316 if (!same_owner_str(sop, owner, clid))
4279 st_perstateowner) { 4317 continue;
4280 lo = lockowner(sop); 4318 list_for_each_entry(stp, &sop->so_stateids,
4281 if (check_for_locks(stp->st_file, lo)) 4319 st_perstateowner) {
4282 goto out; 4320 lo = lockowner(sop);
4283 list_add(&lo->lo_list, &matches); 4321 if (check_for_locks(stp->st_file, lo))
4284 } 4322 goto out;
4323 list_add(&lo->lo_list, &matches);
4285 } 4324 }
4286 } 4325 }
4287 /* Clients probably won't expect us to return with some (but not all) 4326 /* Clients probably won't expect us to return with some (but not all)
@@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid)
4394 return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; 4433 return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
4395} 4434}
4396 4435
4436#ifdef CONFIG_NFSD_FAULT_INJECTION
4437
4438void nfsd_forget_clients(u64 num)
4439{
4440 struct nfs4_client *clp, *next;
4441 int count = 0;
4442
4443 nfs4_lock_state();
4444 list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
4445 nfsd4_remove_clid_dir(clp);
4446 expire_client(clp);
4447 if (++count == num)
4448 break;
4449 }
4450 nfs4_unlock_state();
4451
4452 printk(KERN_INFO "NFSD: Forgot %d clients", count);
4453}
4454
4455static void release_lockowner_sop(struct nfs4_stateowner *sop)
4456{
4457 release_lockowner(lockowner(sop));
4458}
4459
4460static void release_openowner_sop(struct nfs4_stateowner *sop)
4461{
4462 release_openowner(openowner(sop));
4463}
4464
4465static int nfsd_release_n_owners(u64 num, bool is_open_owner,
4466 void (*release_sop)(struct nfs4_stateowner *))
4467{
4468 int i, count = 0;
4469 struct nfs4_stateowner *sop, *next;
4470
4471 for (i = 0; i < OWNER_HASH_SIZE; i++) {
4472 list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
4473 if (sop->so_is_open_owner != is_open_owner)
4474 continue;
4475 release_sop(sop);
4476 if (++count == num)
4477 return count;
4478 }
4479 }
4480 return count;
4481}
4482
4483void nfsd_forget_locks(u64 num)
4484{
4485 int count;
4486
4487 nfs4_lock_state();
4488 count = nfsd_release_n_owners(num, false, release_lockowner_sop);
4489 nfs4_unlock_state();
4490
4491 printk(KERN_INFO "NFSD: Forgot %d locks", count);
4492}
4493
4494void nfsd_forget_openowners(u64 num)
4495{
4496 int count;
4497
4498 nfs4_lock_state();
4499 count = nfsd_release_n_owners(num, true, release_openowner_sop);
4500 nfs4_unlock_state();
4501
4502 printk(KERN_INFO "NFSD: Forgot %d open owners", count);
4503}
4504
4505int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
4506{
4507 int i, count = 0;
4508 struct nfs4_file *fp, *fnext;
4509 struct nfs4_delegation *dp, *dnext;
4510
4511 for (i = 0; i < FILE_HASH_SIZE; i++) {
4512 list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
4513 list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
4514 deleg_func(dp);
4515 if (++count == num)
4516 return count;
4517 }
4518 }
4519 }
4520
4521 return count;
4522}
4523
4524void nfsd_forget_delegations(u64 num)
4525{
4526 unsigned int count;
4527
4528 nfs4_lock_state();
4529 count = nfsd_process_n_delegations(num, unhash_delegation);
4530 nfs4_unlock_state();
4531
4532 printk(KERN_INFO "NFSD: Forgot %d delegations", count);
4533}
4534
4535void nfsd_recall_delegations(u64 num)
4536{
4537 unsigned int count;
4538
4539 nfs4_lock_state();
4540 spin_lock(&recall_lock);
4541 count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
4542 spin_unlock(&recall_lock);
4543 nfs4_unlock_state();
4544
4545 printk(KERN_INFO "NFSD: Recalled %d delegations", count);
4546}
4547
4548#endif /* CONFIG_NFSD_FAULT_INJECTION */
4549
4397/* initialization to perform at module load time: */ 4550/* initialization to perform at module load time: */
4398 4551
4399int 4552void
4400nfs4_state_init(void) 4553nfs4_state_init(void)
4401{ 4554{
4402 int i, status; 4555 int i;
4403 4556
4404 status = nfsd4_init_slabs();
4405 if (status)
4406 return status;
4407 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 4557 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
4408 INIT_LIST_HEAD(&conf_id_hashtbl[i]); 4558 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
4409 INIT_LIST_HEAD(&conf_str_hashtbl[i]); 4559 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4416,18 +4566,15 @@ nfs4_state_init(void)
4416 for (i = 0; i < FILE_HASH_SIZE; i++) { 4566 for (i = 0; i < FILE_HASH_SIZE; i++) {
4417 INIT_LIST_HEAD(&file_hashtbl[i]); 4567 INIT_LIST_HEAD(&file_hashtbl[i]);
4418 } 4568 }
4419 for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { 4569 for (i = 0; i < OWNER_HASH_SIZE; i++) {
4420 INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); 4570 INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
4421 }
4422 for (i = 0; i < LOCK_HASH_SIZE; i++) {
4423 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
4424 } 4571 }
4425 memset(&onestateid, ~0, sizeof(stateid_t)); 4572 for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
4573 INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
4426 INIT_LIST_HEAD(&close_lru); 4574 INIT_LIST_HEAD(&close_lru);
4427 INIT_LIST_HEAD(&client_lru); 4575 INIT_LIST_HEAD(&client_lru);
4428 INIT_LIST_HEAD(&del_recall_lru); 4576 INIT_LIST_HEAD(&del_recall_lru);
4429 reclaim_str_hashtbl_size = 0; 4577 reclaim_str_hashtbl_size = 0;
4430 return 0;
4431} 4578}
4432 4579
4433static void 4580static void
@@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void)
4526 spin_unlock(&recall_lock); 4673 spin_unlock(&recall_lock);
4527 list_for_each_safe(pos, next, &reaplist) { 4674 list_for_each_safe(pos, next, &reaplist) {
4528 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 4675 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4529 list_del_init(&dp->dl_recall_lru);
4530 unhash_delegation(dp); 4676 unhash_delegation(dp);
4531 } 4677 }
4532 4678
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b6fa792d6b85..0ec5a1b9700e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,
215static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) 215static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
216{ 216{
217 if (p == argp->tmp) { 217 if (p == argp->tmp) {
218 p = kmalloc(nbytes, GFP_KERNEL); 218 p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
219 if (!p) 219 if (!p)
220 return NULL; 220 return NULL;
221 memcpy(p, argp->tmp, nbytes);
222 } else { 221 } else {
223 BUG_ON(p != argp->tmpp); 222 BUG_ON(p != argp->tmpp);
224 argp->tmpp = NULL; 223 argp->tmpp = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c45a2ea4a090..748eda93ce59 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -18,6 +18,7 @@
18#include "idmap.h" 18#include "idmap.h"
19#include "nfsd.h" 19#include "nfsd.h"
20#include "cache.h" 20#include "cache.h"
21#include "fault_inject.h"
21 22
22/* 23/*
23 * We have a single directory with several nodes in it. 24 * We have a single directory with several nodes in it.
@@ -272,7 +273,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
272 * 2. Is that directory a mount point, or 273 * 2. Is that directory a mount point, or
273 * 3. Is that directory the root of an exported file system? 274 * 3. Is that directory the root of an exported file system?
274 */ 275 */
275 error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb); 276 error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
276 277
277 path_put(&path); 278 path_put(&path);
278 return error; 279 return error;
@@ -1128,9 +1129,13 @@ static int __init init_nfsd(void)
1128 int retval; 1129 int retval;
1129 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); 1130 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
1130 1131
1131 retval = nfs4_state_init(); /* nfs4 locking state */ 1132 retval = nfsd4_init_slabs();
1132 if (retval) 1133 if (retval)
1133 return retval; 1134 return retval;
1135 nfs4_state_init();
1136 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
1137 if (retval)
1138 goto out_free_slabs;
1134 nfsd_stat_init(); /* Statistics */ 1139 nfsd_stat_init(); /* Statistics */
1135 retval = nfsd_reply_cache_init(); 1140 retval = nfsd_reply_cache_init();
1136 if (retval) 1141 if (retval)
@@ -1161,6 +1166,8 @@ out_free_cache:
1161 nfsd_reply_cache_shutdown(); 1166 nfsd_reply_cache_shutdown();
1162out_free_stat: 1167out_free_stat:
1163 nfsd_stat_shutdown(); 1168 nfsd_stat_shutdown();
1169 nfsd_fault_inject_cleanup();
1170out_free_slabs:
1164 nfsd4_free_slabs(); 1171 nfsd4_free_slabs();
1165 return retval; 1172 return retval;
1166} 1173}
@@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void)
1175 nfsd_lockd_shutdown(); 1182 nfsd_lockd_shutdown();
1176 nfsd_idmap_shutdown(); 1183 nfsd_idmap_shutdown();
1177 nfsd4_free_slabs(); 1184 nfsd4_free_slabs();
1185 nfsd_fault_inject_cleanup();
1178 unregister_filesystem(&nfsd_fs_type); 1186 unregister_filesystem(&nfsd_fs_type);
1179} 1187}
1180 1188
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 58134a23fdfb..1d1e8589b4ce 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
104 */ 104 */
105#ifdef CONFIG_NFSD_V4 105#ifdef CONFIG_NFSD_V4
106extern unsigned int max_delegations; 106extern unsigned int max_delegations;
107int nfs4_state_init(void); 107void nfs4_state_init(void);
108int nfsd4_init_slabs(void);
108void nfsd4_free_slabs(void); 109void nfsd4_free_slabs(void);
109int nfs4_state_start(void); 110int nfs4_state_start(void);
110void nfs4_state_shutdown(void); 111void nfs4_state_shutdown(void);
111void nfs4_reset_lease(time_t leasetime); 112void nfs4_reset_lease(time_t leasetime);
112int nfs4_reset_recoverydir(char *recdir); 113int nfs4_reset_recoverydir(char *recdir);
113#else 114#else
114static inline int nfs4_state_init(void) { return 0; } 115static inline void nfs4_state_init(void) { }
116static inline int nfsd4_init_slabs(void) { return 0; }
115static inline void nfsd4_free_slabs(void) { } 117static inline void nfsd4_free_slabs(void) { }
116static inline int nfs4_state_start(void) { return 0; } 118static inline int nfs4_state_start(void) { return 0; }
117static inline void nfs4_state_shutdown(void) { } 119static inline void nfs4_state_shutdown(void) { }
@@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
338} 340}
339 341
340/* These will return ERR_INVAL if specified in GETATTR or READDIR. */ 342/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
341#define NFSD_WRITEONLY_ATTRS_WORD1 \ 343#define NFSD_WRITEONLY_ATTRS_WORD1 \
342(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) 344 (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
343 345
344/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ 346/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
345#define NFSD_WRITEABLE_ATTRS_WORD0 \ 347#define NFSD_WRITEABLE_ATTRS_WORD0 \
346(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) 348 (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
347#define NFSD_WRITEABLE_ATTRS_WORD1 \ 349#define NFSD_WRITEABLE_ATTRS_WORD1 \
348(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ 350 (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
349 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) 351 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
350#define NFSD_WRITEABLE_ATTRS_WORD2 0 352#define NFSD_WRITEABLE_ATTRS_WORD2 0
351 353
352#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ 354#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c763de5c1157..68454e75fce9 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -59,7 +59,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
59 * the write call). 59 * the write call).
60 */ 60 */
61static inline __be32 61static inline __be32
62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested) 62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested)
63{ 63{
64 mode &= S_IFMT; 64 mode &= S_IFMT;
65 65
@@ -293,7 +293,7 @@ out:
293 * include/linux/nfsd/nfsd.h. 293 * include/linux/nfsd/nfsd.h.
294 */ 294 */
295__be32 295__be32
296fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) 296fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
297{ 297{
298 struct svc_export *exp; 298 struct svc_export *exp;
299 struct dentry *dentry; 299 struct dentry *dentry;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index c16f8d8331b5..e5e6707ba687 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -102,7 +102,7 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
102/* 102/*
103 * Function prototypes 103 * Function prototypes
104 */ 104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int); 105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *); 106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *); 107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *); 108void fh_put(struct svc_fh *);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a3cf38476a1b..ffb5df1db94f 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -366,6 +366,7 @@ struct nfs4_openowner {
366 366
367struct nfs4_lockowner { 367struct nfs4_lockowner {
368 struct nfs4_stateowner lo_owner; /* must be first element */ 368 struct nfs4_stateowner lo_owner; /* must be first element */
369 struct list_head lo_owner_ino_hash; /* hash by owner,file */
369 struct list_head lo_perstateid; /* for lockowners only */ 370 struct list_head lo_perstateid; /* for lockowners only */
370 struct list_head lo_list; /* for temporary uses */ 371 struct list_head lo_list; /* for temporary uses */
371}; 372};
@@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);
482extern int nfs4_client_to_reclaim(const char *name); 483extern int nfs4_client_to_reclaim(const char *name);
483extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); 484extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
484extern void nfsd4_recdir_purge_old(void); 485extern void nfsd4_recdir_purge_old(void);
485extern int nfsd4_create_clid_dir(struct nfs4_client *clp); 486extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
486extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); 487extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
487extern void release_session_client(struct nfsd4_session *); 488extern void release_session_client(struct nfsd4_session *);
488extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); 489extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a2e442623c8..edf6d3ed8777 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -307,7 +307,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
307 struct dentry *dentry; 307 struct dentry *dentry;
308 struct inode *inode; 308 struct inode *inode;
309 int accmode = NFSD_MAY_SATTR; 309 int accmode = NFSD_MAY_SATTR;
310 int ftype = 0; 310 umode_t ftype = 0;
311 __be32 err; 311 __be32 err;
312 int host_err; 312 int host_err;
313 int size_change = 0; 313 int size_change = 0;
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
594 return error; 594 return error;
595} 595}
596 596
597#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." 597/*
598#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" 598 * NFS junction information is stored in an extended attribute.
599 */
600#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
601
602/**
603 * nfsd4_is_junction - Test if an object could be an NFS junction
604 *
605 * @dentry: object to test
606 *
607 * Returns 1 if "dentry" appears to contain NFS junction information.
608 * Otherwise 0 is returned.
609 */
599int nfsd4_is_junction(struct dentry *dentry) 610int nfsd4_is_junction(struct dentry *dentry)
600{ 611{
601 struct inode *inode = dentry->d_inode; 612 struct inode *inode = dentry->d_inode;
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)
606 return 0; 617 return 0;
607 if (!(inode->i_mode & S_ISVTX)) 618 if (!(inode->i_mode & S_ISVTX))
608 return 0; 619 return 0;
609 if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) 620 if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
610 return 0; 621 return 0;
611 return 1; 622 return 1;
612} 623}
@@ -730,7 +741,7 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
730 * N.B. After this call fhp needs an fh_put 741 * N.B. After this call fhp needs an fh_put
731 */ 742 */
732__be32 743__be32
733nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 744nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
734 int access, struct file **filp) 745 int access, struct file **filp)
735{ 746{
736 struct dentry *dentry; 747 struct dentry *dentry;
@@ -1300,7 +1311,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1300 goto out; 1311 goto out;
1301 } 1312 }
1302 1313
1303 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1314 host_err = fh_want_write(fhp);
1304 if (host_err) 1315 if (host_err)
1305 goto out_nfserr; 1316 goto out_nfserr;
1306 1317
@@ -1325,7 +1336,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1325 break; 1336 break;
1326 } 1337 }
1327 if (host_err < 0) { 1338 if (host_err < 0) {
1328 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1339 fh_drop_write(fhp);
1329 goto out_nfserr; 1340 goto out_nfserr;
1330 } 1341 }
1331 1342
@@ -1339,7 +1350,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1339 err2 = nfserrno(commit_metadata(fhp)); 1350 err2 = nfserrno(commit_metadata(fhp));
1340 if (err2) 1351 if (err2)
1341 err = err2; 1352 err = err2;
1342 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1353 fh_drop_write(fhp);
1343 /* 1354 /*
1344 * Update the file handle to get the new inode info. 1355 * Update the file handle to get the new inode info.
1345 */ 1356 */
@@ -1430,7 +1441,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1430 v_atime = verifier[1]&0x7fffffff; 1441 v_atime = verifier[1]&0x7fffffff;
1431 } 1442 }
1432 1443
1433 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1444 host_err = fh_want_write(fhp);
1434 if (host_err) 1445 if (host_err)
1435 goto out_nfserr; 1446 goto out_nfserr;
1436 if (dchild->d_inode) { 1447 if (dchild->d_inode) {
@@ -1469,13 +1480,13 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1469 case NFS3_CREATE_GUARDED: 1480 case NFS3_CREATE_GUARDED:
1470 err = nfserr_exist; 1481 err = nfserr_exist;
1471 } 1482 }
1472 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1483 fh_drop_write(fhp);
1473 goto out; 1484 goto out;
1474 } 1485 }
1475 1486
1476 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1487 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1477 if (host_err < 0) { 1488 if (host_err < 0) {
1478 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1489 fh_drop_write(fhp);
1479 goto out_nfserr; 1490 goto out_nfserr;
1480 } 1491 }
1481 if (created) 1492 if (created)
@@ -1503,7 +1514,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1503 if (!err) 1514 if (!err)
1504 err = nfserrno(commit_metadata(fhp)); 1515 err = nfserrno(commit_metadata(fhp));
1505 1516
1506 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1517 fh_drop_write(fhp);
1507 /* 1518 /*
1508 * Update the filehandle to get the new inode info. 1519 * Update the filehandle to get the new inode info.
1509 */ 1520 */
@@ -1600,7 +1611,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1600 if (IS_ERR(dnew)) 1611 if (IS_ERR(dnew))
1601 goto out_nfserr; 1612 goto out_nfserr;
1602 1613
1603 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1614 host_err = fh_want_write(fhp);
1604 if (host_err) 1615 if (host_err)
1605 goto out_nfserr; 1616 goto out_nfserr;
1606 1617
@@ -1621,7 +1632,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1621 err = nfserrno(commit_metadata(fhp)); 1632 err = nfserrno(commit_metadata(fhp));
1622 fh_unlock(fhp); 1633 fh_unlock(fhp);
1623 1634
1624 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1635 fh_drop_write(fhp);
1625 1636
1626 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1637 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1627 dput(dnew); 1638 dput(dnew);
@@ -1674,7 +1685,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1674 1685
1675 dold = tfhp->fh_dentry; 1686 dold = tfhp->fh_dentry;
1676 1687
1677 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); 1688 host_err = fh_want_write(tfhp);
1678 if (host_err) { 1689 if (host_err) {
1679 err = nfserrno(host_err); 1690 err = nfserrno(host_err);
1680 goto out_dput; 1691 goto out_dput;
@@ -1699,7 +1710,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1699 err = nfserrno(host_err); 1710 err = nfserrno(host_err);
1700 } 1711 }
1701out_drop_write: 1712out_drop_write:
1702 mnt_drop_write(tfhp->fh_export->ex_path.mnt); 1713 fh_drop_write(tfhp);
1703out_dput: 1714out_dput:
1704 dput(dnew); 1715 dput(dnew);
1705out_unlock: 1716out_unlock:
@@ -1776,7 +1787,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1776 host_err = -EXDEV; 1787 host_err = -EXDEV;
1777 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1788 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1778 goto out_dput_new; 1789 goto out_dput_new;
1779 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); 1790 host_err = fh_want_write(ffhp);
1780 if (host_err) 1791 if (host_err)
1781 goto out_dput_new; 1792 goto out_dput_new;
1782 1793
@@ -1795,7 +1806,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1795 host_err = commit_metadata(ffhp); 1806 host_err = commit_metadata(ffhp);
1796 } 1807 }
1797out_drop_write: 1808out_drop_write:
1798 mnt_drop_write(ffhp->fh_export->ex_path.mnt); 1809 fh_drop_write(ffhp);
1799 out_dput_new: 1810 out_dput_new:
1800 dput(ndentry); 1811 dput(ndentry);
1801 out_dput_old: 1812 out_dput_old:
@@ -1854,7 +1865,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1854 if (!type) 1865 if (!type)
1855 type = rdentry->d_inode->i_mode & S_IFMT; 1866 type = rdentry->d_inode->i_mode & S_IFMT;
1856 1867
1857 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1868 host_err = fh_want_write(fhp);
1858 if (host_err) 1869 if (host_err)
1859 goto out_put; 1870 goto out_put;
1860 1871
@@ -1868,7 +1879,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1868 if (!host_err) 1879 if (!host_err)
1869 host_err = commit_metadata(fhp); 1880 host_err = commit_metadata(fhp);
1870out_drop_write: 1881out_drop_write:
1871 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1882 fh_drop_write(fhp);
1872out_put: 1883out_put:
1873 dput(rdentry); 1884 dput(rdentry);
1874 1885
@@ -2270,7 +2281,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2270 } else 2281 } else
2271 size = 0; 2282 size = 0;
2272 2283
2273 error = mnt_want_write(fhp->fh_export->ex_path.mnt); 2284 error = fh_want_write(fhp);
2274 if (error) 2285 if (error)
2275 goto getout; 2286 goto getout;
2276 if (size) 2287 if (size)
@@ -2284,7 +2295,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2284 error = 0; 2295 error = 0;
2285 } 2296 }
2286 } 2297 }
2287 mnt_drop_write(fhp->fh_export->ex_path.mnt); 2298 fh_drop_write(fhp);
2288 2299
2289getout: 2300getout:
2290 kfree(value); 2301 kfree(value);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3f54ad03bb2b..1dcd238e11a0 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -66,7 +66,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
66__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 66__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
67 loff_t, unsigned long); 67 loff_t, unsigned long);
68#endif /* CONFIG_NFSD_V3 */ 68#endif /* CONFIG_NFSD_V3 */
69__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, 69__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
70 int, struct file **); 70 int, struct file **);
71void nfsd_close(struct file *); 71void nfsd_close(struct file *);
72__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, 72__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
@@ -106,4 +106,14 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
106int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); 106int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
107#endif 107#endif
108 108
109static inline int fh_want_write(struct svc_fh *fh)
110{
111 return mnt_want_write(fh->fh_export->ex_path.mnt);
112}
113
114static inline void fh_drop_write(struct svc_fh *fh)
115{
116 mnt_drop_write(fh->fh_export->ex_path.mnt);
117}
118
109#endif /* LINUX_NFSD_VFS_H */ 119#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 3a1923943b14..ca35b3a46d17 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -251,7 +251,7 @@ nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
251 251
252static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) 252static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
253{ 253{
254 mode_t mode = inode->i_mode; 254 umode_t mode = inode->i_mode;
255 255
256 de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 256 de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
257} 257}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b50ffb72e5b3..8f7b95ac1f7e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -291,7 +291,7 @@ const struct address_space_operations nilfs_aops = {
291 .is_partially_uptodate = block_is_partially_uptodate, 291 .is_partially_uptodate = block_is_partially_uptodate,
292}; 292};
293 293
294struct inode *nilfs_new_inode(struct inode *dir, int mode) 294struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
295{ 295{
296 struct super_block *sb = dir->i_sb; 296 struct super_block *sb = dir->i_sb;
297 struct the_nilfs *nilfs = sb->s_fs_info; 297 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index ac258beeda3c..886649627c3d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -27,7 +27,7 @@
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ 27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/compat.h> /* compat_ptr() */ 29#include <linux/compat.h> /* compat_ptr() */
30#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ 30#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */
31#include <linux/buffer_head.h> 31#include <linux/buffer_head.h>
32#include <linux/nilfs2_fs.h> 32#include <linux/nilfs2_fs.h>
33#include "nilfs.h" 33#include "nilfs.h"
@@ -119,7 +119,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
119 if (get_user(flags, (int __user *)argp)) 119 if (get_user(flags, (int __user *)argp))
120 return -EFAULT; 120 return -EFAULT;
121 121
122 ret = mnt_want_write(filp->f_path.mnt); 122 ret = mnt_want_write_file(filp);
123 if (ret) 123 if (ret)
124 return ret; 124 return ret;
125 125
@@ -154,7 +154,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
154 ret = nilfs_transaction_commit(inode->i_sb); 154 ret = nilfs_transaction_commit(inode->i_sb);
155out: 155out:
156 mutex_unlock(&inode->i_mutex); 156 mutex_unlock(&inode->i_mutex);
157 mnt_drop_write(filp->f_path.mnt); 157 mnt_drop_write_file(filp);
158 return ret; 158 return ret;
159} 159}
160 160
@@ -174,7 +174,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
174 if (!capable(CAP_SYS_ADMIN)) 174 if (!capable(CAP_SYS_ADMIN))
175 return -EPERM; 175 return -EPERM;
176 176
177 ret = mnt_want_write(filp->f_path.mnt); 177 ret = mnt_want_write_file(filp);
178 if (ret) 178 if (ret)
179 return ret; 179 return ret;
180 180
@@ -194,7 +194,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
194 194
195 up_read(&inode->i_sb->s_umount); 195 up_read(&inode->i_sb->s_umount);
196out: 196out:
197 mnt_drop_write(filp->f_path.mnt); 197 mnt_drop_write_file(filp);
198 return ret; 198 return ret;
199} 199}
200 200
@@ -210,7 +210,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
210 if (!capable(CAP_SYS_ADMIN)) 210 if (!capable(CAP_SYS_ADMIN))
211 return -EPERM; 211 return -EPERM;
212 212
213 ret = mnt_want_write(filp->f_path.mnt); 213 ret = mnt_want_write_file(filp);
214 if (ret) 214 if (ret)
215 return ret; 215 return ret;
216 216
@@ -225,7 +225,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
225 else 225 else
226 nilfs_transaction_commit(inode->i_sb); /* never fails */ 226 nilfs_transaction_commit(inode->i_sb); /* never fails */
227out: 227out:
228 mnt_drop_write(filp->f_path.mnt); 228 mnt_drop_write_file(filp);
229 return ret; 229 return ret;
230} 230}
231 231
@@ -591,7 +591,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
591 if (!capable(CAP_SYS_ADMIN)) 591 if (!capable(CAP_SYS_ADMIN))
592 return -EPERM; 592 return -EPERM;
593 593
594 ret = mnt_want_write(filp->f_path.mnt); 594 ret = mnt_want_write_file(filp);
595 if (ret) 595 if (ret)
596 return ret; 596 return ret;
597 597
@@ -675,7 +675,7 @@ out_free:
675 vfree(kbufs[n]); 675 vfree(kbufs[n]);
676 kfree(kbufs[4]); 676 kfree(kbufs[4]);
677out: 677out:
678 mnt_drop_write(filp->f_path.mnt); 678 mnt_drop_write_file(filp);
679 return ret; 679 return ret;
680} 680}
681 681
@@ -710,7 +710,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
710 if (!capable(CAP_SYS_ADMIN)) 710 if (!capable(CAP_SYS_ADMIN))
711 goto out; 711 goto out;
712 712
713 ret = mnt_want_write(filp->f_path.mnt); 713 ret = mnt_want_write_file(filp);
714 if (ret) 714 if (ret)
715 goto out; 715 goto out;
716 716
@@ -721,7 +721,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
721 ret = nilfs_resize_fs(inode->i_sb, newsize); 721 ret = nilfs_resize_fs(inode->i_sb, newsize);
722 722
723out_drop_write: 723out_drop_write:
724 mnt_drop_write(filp->f_path.mnt); 724 mnt_drop_write_file(filp);
725out: 725out:
726 return ret; 726 return ret;
727} 727}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 768982de10e4..1cd3f624dffc 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -84,7 +84,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
84 * If the create succeeds, we fill in the inode information 84 * If the create succeeds, we fill in the inode information
85 * with d_instantiate(). 85 * with d_instantiate().
86 */ 86 */
87static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, 87static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 struct nameidata *nd)
89{ 89{
90 struct inode *inode; 90 struct inode *inode;
@@ -112,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
112} 112}
113 113
114static int 114static int
115nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 115nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
116{ 116{
117 struct inode *inode; 117 struct inode *inode;
118 struct nilfs_transaction_info ti; 118 struct nilfs_transaction_info ti;
@@ -213,7 +213,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
213 return err; 213 return err;
214} 214}
215 215
216static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 216static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
217{ 217{
218 struct inode *inode; 218 struct inode *inode;
219 struct nilfs_transaction_info ti; 219 struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 3777d138f895..250add84da76 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -246,7 +246,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
246/* inode.c */ 246/* inode.c */
247void nilfs_inode_add_blocks(struct inode *inode, int n); 247void nilfs_inode_add_blocks(struct inode *inode, int n);
248void nilfs_inode_sub_blocks(struct inode *inode, int n); 248void nilfs_inode_sub_blocks(struct inode *inode, int n);
249extern struct inode *nilfs_new_inode(struct inode *, int); 249extern struct inode *nilfs_new_inode(struct inode *, umode_t);
250extern void nilfs_free_inode(struct inode *); 250extern void nilfs_free_inode(struct inode *);
251extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 251extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
252extern void nilfs_set_inode_flags(struct inode *); 252extern void nilfs_set_inode_flags(struct inode *);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6c282f..0e72ad6f22aa 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg)
2470 2470
2471 if (freezing(current)) { 2471 if (freezing(current)) {
2472 spin_unlock(&sci->sc_state_lock); 2472 spin_unlock(&sci->sc_state_lock);
2473 refrigerator(); 2473 try_to_freeze();
2474 spin_lock(&sci->sc_state_lock); 2474 spin_lock(&sci->sc_state_lock);
2475 } else { 2475 } else {
2476 DEFINE_WAIT(wait); 2476 DEFINE_WAIT(wait);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8351c44a7320..08e3d4f9df18 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,8 +175,6 @@ static void nilfs_i_callback(struct rcu_head *head)
175 struct inode *inode = container_of(head, struct inode, i_rcu); 175 struct inode *inode = container_of(head, struct inode, i_rcu);
176 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 176 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
177 177
178 INIT_LIST_HEAD(&inode->i_dentry);
179
180 if (mdi) { 178 if (mdi) {
181 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 179 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
182 kfree(mdi); 180 kfree(mdi);
@@ -650,11 +648,11 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
650 return 0; 648 return 0;
651} 649}
652 650
653static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 651static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
654{ 652{
655 struct super_block *sb = vfs->mnt_sb; 653 struct super_block *sb = dentry->d_sb;
656 struct the_nilfs *nilfs = sb->s_fs_info; 654 struct the_nilfs *nilfs = sb->s_fs_info;
657 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; 655 struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
658 656
659 if (!nilfs_test_opt(nilfs, BARRIER)) 657 if (!nilfs_test_opt(nilfs, BARRIER))
660 seq_puts(seq, ",nobarrier"); 658 seq_puts(seq, ",nobarrier");
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 44a88a9fa2c8..fea6bd5831dc 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -52,7 +52,7 @@ static const struct utf8_table utf8_table[] =
52#define SURROGATE_LOW 0x00000400 52#define SURROGATE_LOW 0x00000400
53#define SURROGATE_BITS 0x000003ff 53#define SURROGATE_BITS 0x000003ff
54 54
55int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) 55int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu)
56{ 56{
57 unsigned long l; 57 unsigned long l;
58 int c0, c, nc; 58 int c0, c, nc;
@@ -71,7 +71,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
71 *pu = (unicode_t) l; 71 *pu = (unicode_t) l;
72 return nc; 72 return nc;
73 } 73 }
74 if (len <= nc) 74 if (inlen <= nc)
75 return -1; 75 return -1;
76 s++; 76 s++;
77 c = (*s ^ 0x80) & 0xFF; 77 c = (*s ^ 0x80) & 0xFF;
@@ -83,7 +83,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
83} 83}
84EXPORT_SYMBOL(utf8_to_utf32); 84EXPORT_SYMBOL(utf8_to_utf32);
85 85
86int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) 86int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
87{ 87{
88 unsigned long l; 88 unsigned long l;
89 int c, nc; 89 int c, nc;
@@ -97,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
97 return -1; 97 return -1;
98 98
99 nc = 0; 99 nc = 0;
100 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { 100 for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
101 nc++; 101 nc++;
102 if (l <= t->lmask) { 102 if (l <= t->lmask) {
103 c = t->shift; 103 c = t->shift;
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
114} 114}
115EXPORT_SYMBOL(utf32_to_utf8); 115EXPORT_SYMBOL(utf32_to_utf8);
116 116
117int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) 117static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
118{
119 switch (endian) {
120 default:
121 *s = (wchar_t) c;
122 break;
123 case UTF16_LITTLE_ENDIAN:
124 *s = __cpu_to_le16(c);
125 break;
126 case UTF16_BIG_ENDIAN:
127 *s = __cpu_to_be16(c);
128 break;
129 }
130}
131
132int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
133 wchar_t *pwcs, int maxout)
118{ 134{
119 u16 *op; 135 u16 *op;
120 int size; 136 int size;
121 unicode_t u; 137 unicode_t u;
122 138
123 op = pwcs; 139 op = pwcs;
124 while (*s && len > 0) { 140 while (inlen > 0 && maxout > 0 && *s) {
125 if (*s & 0x80) { 141 if (*s & 0x80) {
126 size = utf8_to_utf32(s, len, &u); 142 size = utf8_to_utf32(s, inlen, &u);
127 if (size < 0) 143 if (size < 0)
128 return -EINVAL; 144 return -EINVAL;
145 s += size;
146 inlen -= size;
129 147
130 if (u >= PLANE_SIZE) { 148 if (u >= PLANE_SIZE) {
149 if (maxout < 2)
150 break;
131 u -= PLANE_SIZE; 151 u -= PLANE_SIZE;
132 *op++ = (wchar_t) (SURROGATE_PAIR | 152 put_utf16(op++, SURROGATE_PAIR |
133 ((u >> 10) & SURROGATE_BITS)); 153 ((u >> 10) & SURROGATE_BITS),
134 *op++ = (wchar_t) (SURROGATE_PAIR | 154 endian);
155 put_utf16(op++, SURROGATE_PAIR |
135 SURROGATE_LOW | 156 SURROGATE_LOW |
136 (u & SURROGATE_BITS)); 157 (u & SURROGATE_BITS),
158 endian);
159 maxout -= 2;
137 } else { 160 } else {
138 *op++ = (wchar_t) u; 161 put_utf16(op++, u, endian);
162 maxout--;
139 } 163 }
140 s += size;
141 len -= size;
142 } else { 164 } else {
143 *op++ = *s++; 165 put_utf16(op++, *s++, endian);
144 len--; 166 inlen--;
167 maxout--;
145 } 168 }
146 } 169 }
147 return op - pwcs; 170 return op - pwcs;
@@ -160,27 +183,27 @@ static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
160 } 183 }
161} 184}
162 185
163int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, 186int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
164 u8 *s, int maxlen) 187 u8 *s, int maxout)
165{ 188{
166 u8 *op; 189 u8 *op;
167 int size; 190 int size;
168 unsigned long u, v; 191 unsigned long u, v;
169 192
170 op = s; 193 op = s;
171 while (len > 0 && maxlen > 0) { 194 while (inlen > 0 && maxout > 0) {
172 u = get_utf16(*pwcs, endian); 195 u = get_utf16(*pwcs, endian);
173 if (!u) 196 if (!u)
174 break; 197 break;
175 pwcs++; 198 pwcs++;
176 len--; 199 inlen--;
177 if (u > 0x7f) { 200 if (u > 0x7f) {
178 if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { 201 if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
179 if (u & SURROGATE_LOW) { 202 if (u & SURROGATE_LOW) {
180 /* Ignore character and move on */ 203 /* Ignore character and move on */
181 continue; 204 continue;
182 } 205 }
183 if (len <= 0) 206 if (inlen <= 0)
184 break; 207 break;
185 v = get_utf16(*pwcs, endian); 208 v = get_utf16(*pwcs, endian);
186 if ((v & SURROGATE_MASK) != SURROGATE_PAIR || 209 if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
@@ -191,18 +214,18 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
191 u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) 214 u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
192 + (v & SURROGATE_BITS); 215 + (v & SURROGATE_BITS);
193 pwcs++; 216 pwcs++;
194 len--; 217 inlen--;
195 } 218 }
196 size = utf32_to_utf8(u, op, maxlen); 219 size = utf32_to_utf8(u, op, maxout);
197 if (size == -1) { 220 if (size == -1) {
198 /* Ignore character and move on */ 221 /* Ignore character and move on */
199 } else { 222 } else {
200 op += size; 223 op += size;
201 maxlen -= size; 224 maxout -= size;
202 } 225 }
203 } else { 226 } else {
204 *op++ = (u8) u; 227 *op++ = (u8) u;
205 maxlen--; 228 maxout--;
206 } 229 }
207 } 230 }
208 return op - s; 231 return op - s;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9fde1c00a296..3568c8a8b138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,8 @@
16 16
17#include <asm/ioctls.h> 17#include <asm/ioctls.h>
18 18
19#include "../../mount.h"
20
19#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 21#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
20#define FANOTIFY_DEFAULT_MAX_MARKS 8192 22#define FANOTIFY_DEFAULT_MAX_MARKS 8192
21#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 23#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
@@ -546,7 +548,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
546 548
547 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); 549 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
548 fsnotify_put_mark(fsn_mark); 550 fsnotify_put_mark(fsn_mark);
549 if (removed & mnt->mnt_fsnotify_mask) 551 if (removed & real_mount(mnt)->mnt_fsnotify_mask)
550 fsnotify_recalc_vfsmount_mask(mnt); 552 fsnotify_recalc_vfsmount_mask(mnt);
551 553
552 return 0; 554 return 0;
@@ -623,7 +625,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
623 } 625 }
624 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 626 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
625 627
626 if (added & ~mnt->mnt_fsnotify_mask) 628 if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
627 fsnotify_recalc_vfsmount_mask(mnt); 629 fsnotify_recalc_vfsmount_mask(mnt);
628err: 630err:
629 fsnotify_put_mark(fsn_mark); 631 fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 79b47cbb5cd8..ccb14d3fc0de 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/fsnotify_backend.h> 27#include <linux/fsnotify_backend.h>
28#include "fsnotify.h" 28#include "fsnotify.h"
29#include "../mount.h"
29 30
30/* 31/*
31 * Clear all of the marks on an inode when it is being evicted from core 32 * Clear all of the marks on an inode when it is being evicted from core
@@ -205,13 +206,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
205 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; 206 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
206 struct fsnotify_group *inode_group, *vfsmount_group; 207 struct fsnotify_group *inode_group, *vfsmount_group;
207 struct fsnotify_event *event = NULL; 208 struct fsnotify_event *event = NULL;
208 struct vfsmount *mnt; 209 struct mount *mnt;
209 int idx, ret = 0; 210 int idx, ret = 0;
210 /* global tests shouldn't care about events on child only the specific event */ 211 /* global tests shouldn't care about events on child only the specific event */
211 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); 212 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
212 213
213 if (data_is == FSNOTIFY_EVENT_PATH) 214 if (data_is == FSNOTIFY_EVENT_PATH)
214 mnt = ((struct path *)data)->mnt; 215 mnt = real_mount(((struct path *)data)->mnt);
215 else 216 else
216 mnt = NULL; 217 mnt = NULL;
217 218
@@ -262,11 +263,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
262 /* we didn't use the vfsmount_mark */ 263 /* we didn't use the vfsmount_mark */
263 vfsmount_group = NULL; 264 vfsmount_group = NULL;
264 } else if (vfsmount_group > inode_group) { 265 } else if (vfsmount_group > inode_group) {
265 ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, 266 ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data,
266 data_is, cookie, file_name, &event); 267 data_is, cookie, file_name, &event);
267 inode_group = NULL; 268 inode_group = NULL;
268 } else { 269 } else {
269 ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, 270 ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark,
270 mask, data, data_is, cookie, file_name, 271 mask, data, data_is, cookie, file_name,
271 &event); 272 &event);
272 } 273 }
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index e14587d55689..f104d565b682 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -135,9 +135,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
135 135
136 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 136 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
137 137
138 /* 1 from caller and 1 for being on i_list/g_list */
139 BUG_ON(atomic_read(&mark->refcnt) < 2);
140
141 spin_lock(&group->mark_lock); 138 spin_lock(&group->mark_lock);
142 139
143 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { 140 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
@@ -182,6 +179,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
182 iput(inode); 179 iput(inode);
183 180
184 /* 181 /*
182 * We don't necessarily have a ref on mark from caller so the above iput
183 * may have already destroyed it. Don't touch from now on.
184 */
185
186 /*
185 * it's possible that this group tried to destroy itself, but this 187 * it's possible that this group tried to destroy itself, but this
186 * this mark was simultaneously being freed by inode. If that's the 188 * this mark was simultaneously being freed by inode. If that's the
187 * case, we finish freeing the group here. 189 * case, we finish freeing the group here.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 778fe6cae3b0..b7b4b0e8554f 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -28,15 +28,17 @@
28 28
29#include <linux/fsnotify_backend.h> 29#include <linux/fsnotify_backend.h>
30#include "fsnotify.h" 30#include "fsnotify.h"
31#include "../mount.h"
31 32
32void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) 33void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
33{ 34{
34 struct fsnotify_mark *mark, *lmark; 35 struct fsnotify_mark *mark, *lmark;
35 struct hlist_node *pos, *n; 36 struct hlist_node *pos, *n;
37 struct mount *m = real_mount(mnt);
36 LIST_HEAD(free_list); 38 LIST_HEAD(free_list);
37 39
38 spin_lock(&mnt->mnt_root->d_lock); 40 spin_lock(&mnt->mnt_root->d_lock);
39 hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) { 41 hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
40 list_add(&mark->m.free_m_list, &free_list); 42 list_add(&mark->m.free_m_list, &free_list);
41 hlist_del_init_rcu(&mark->m.m_list); 43 hlist_del_init_rcu(&mark->m.m_list);
42 fsnotify_get_mark(mark); 44 fsnotify_get_mark(mark);
@@ -59,15 +61,16 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
59 */ 61 */
60static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) 62static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
61{ 63{
64 struct mount *m = real_mount(mnt);
62 struct fsnotify_mark *mark; 65 struct fsnotify_mark *mark;
63 struct hlist_node *pos; 66 struct hlist_node *pos;
64 __u32 new_mask = 0; 67 __u32 new_mask = 0;
65 68
66 assert_spin_locked(&mnt->mnt_root->d_lock); 69 assert_spin_locked(&mnt->mnt_root->d_lock);
67 70
68 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) 71 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
69 new_mask |= mark->mask; 72 new_mask |= mark->mask;
70 mnt->mnt_fsnotify_mask = new_mask; 73 m->mnt_fsnotify_mask = new_mask;
71} 74}
72 75
73/* 76/*
@@ -101,12 +104,13 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
101static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group, 104static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
102 struct vfsmount *mnt) 105 struct vfsmount *mnt)
103{ 106{
107 struct mount *m = real_mount(mnt);
104 struct fsnotify_mark *mark; 108 struct fsnotify_mark *mark;
105 struct hlist_node *pos; 109 struct hlist_node *pos;
106 110
107 assert_spin_locked(&mnt->mnt_root->d_lock); 111 assert_spin_locked(&mnt->mnt_root->d_lock);
108 112
109 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) { 113 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
110 if (mark->group == group) { 114 if (mark->group == group) {
111 fsnotify_get_mark(mark); 115 fsnotify_get_mark(mark);
112 return mark; 116 return mark;
@@ -140,6 +144,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
140 struct fsnotify_group *group, struct vfsmount *mnt, 144 struct fsnotify_group *group, struct vfsmount *mnt,
141 int allow_dups) 145 int allow_dups)
142{ 146{
147 struct mount *m = real_mount(mnt);
143 struct fsnotify_mark *lmark; 148 struct fsnotify_mark *lmark;
144 struct hlist_node *node, *last = NULL; 149 struct hlist_node *node, *last = NULL;
145 int ret = 0; 150 int ret = 0;
@@ -154,13 +159,13 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
154 mark->m.mnt = mnt; 159 mark->m.mnt = mnt;
155 160
156 /* is mark the first mark? */ 161 /* is mark the first mark? */
157 if (hlist_empty(&mnt->mnt_fsnotify_marks)) { 162 if (hlist_empty(&m->mnt_fsnotify_marks)) {
158 hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks); 163 hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
159 goto out; 164 goto out;
160 } 165 }
161 166
162 /* should mark be in the middle of the current list? */ 167 /* should mark be in the middle of the current list? */
163 hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) { 168 hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
164 last = node; 169 last = node;
165 170
166 if ((lmark->group == group) && !allow_dups) { 171 if ((lmark->group == group) && !allow_dups) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 97e2dacbc867..2eaa66652944 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -335,7 +335,6 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
335static void ntfs_i_callback(struct rcu_head *head) 335static void ntfs_i_callback(struct rcu_head *head)
336{ 336{
337 struct inode *inode = container_of(head, struct inode, i_rcu); 337 struct inode *inode = container_of(head, struct inode, i_rcu);
338 INIT_LIST_HEAD(&inode->i_dentry);
339 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); 338 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
340} 339}
341 340
@@ -2301,16 +2300,16 @@ void ntfs_evict_big_inode(struct inode *vi)
2301/** 2300/**
2302 * ntfs_show_options - show mount options in /proc/mounts 2301 * ntfs_show_options - show mount options in /proc/mounts
2303 * @sf: seq_file in which to write our mount options 2302 * @sf: seq_file in which to write our mount options
2304 * @mnt: vfs mount whose mount options to display 2303 * @root: root of the mounted tree whose mount options to display
2305 * 2304 *
2306 * Called by the VFS once for each mounted ntfs volume when someone reads 2305 * Called by the VFS once for each mounted ntfs volume when someone reads
2307 * /proc/mounts in order to display the NTFS specific mount options of each 2306 * /proc/mounts in order to display the NTFS specific mount options of each
2308 * mount. The mount options of the vfs mount @mnt are written to the seq file 2307 * mount. The mount options of fs specified by @root are written to the seq file
2309 * @sf and success is returned. 2308 * @sf and success is returned.
2310 */ 2309 */
2311int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) 2310int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2312{ 2311{
2313 ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); 2312 ntfs_volume *vol = NTFS_SB(root->d_sb);
2314 int i; 2313 int i;
2315 2314
2316 seq_printf(sf, ",uid=%i", vol->uid); 2315 seq_printf(sf, ",uid=%i", vol->uid);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index fe8e7e928889..db29695f845c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -298,7 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
298 298
299extern int ntfs_read_inode_mount(struct inode *vi); 299extern int ntfs_read_inode_mount(struct inode *vi);
300 300
301extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); 301extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
302 302
303#ifdef NTFS_RW 303#ifdef NTFS_RW
304 304
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index b52706da4645..5a4a8af5c406 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -104,7 +104,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
104 int errors = 0, sloppy = 0; 104 int errors = 0, sloppy = 0;
105 uid_t uid = (uid_t)-1; 105 uid_t uid = (uid_t)-1;
106 gid_t gid = (gid_t)-1; 106 gid_t gid = (gid_t)-1;
107 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; 107 umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
108 int mft_zone_multiplier = -1, on_errors = -1; 108 int mft_zone_multiplier = -1, on_errors = -1;
109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; 109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
110 struct nls_table *nls_map = NULL, *old_nls; 110 struct nls_table *nls_map = NULL, *old_nls;
@@ -287,9 +287,9 @@ no_mount_options:
287 vol->uid = uid; 287 vol->uid = uid;
288 if (gid != (gid_t)-1) 288 if (gid != (gid_t)-1)
289 vol->gid = gid; 289 vol->gid = gid;
290 if (fmask != (mode_t)-1) 290 if (fmask != (umode_t)-1)
291 vol->fmask = fmask; 291 vol->fmask = fmask;
292 if (dmask != (mode_t)-1) 292 if (dmask != (umode_t)-1)
293 vol->dmask = dmask; 293 vol->dmask = dmask;
294 if (show_sys_files != -1) { 294 if (show_sys_files != -1) {
295 if (show_sys_files) 295 if (show_sys_files)
@@ -3198,7 +3198,7 @@ MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparm
3198MODULE_VERSION(NTFS_VERSION); 3198MODULE_VERSION(NTFS_VERSION);
3199MODULE_LICENSE("GPL"); 3199MODULE_LICENSE("GPL");
3200#ifdef DEBUG 3200#ifdef DEBUG
3201module_param(debug_msgs, bool, 0); 3201module_param(debug_msgs, bint, 0);
3202MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); 3202MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
3203#endif 3203#endif
3204 3204
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 406ab55dfb32..15e3ba8d521a 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -48,8 +48,8 @@ typedef struct {
48 unsigned long flags; /* Miscellaneous flags, see below. */ 48 unsigned long flags; /* Miscellaneous flags, see below. */
49 uid_t uid; /* uid that files will be mounted as. */ 49 uid_t uid; /* uid that files will be mounted as. */
50 gid_t gid; /* gid that files will be mounted as. */ 50 gid_t gid; /* gid that files will be mounted as. */
51 mode_t fmask; /* The mask for file permissions. */ 51 umode_t fmask; /* The mask for file permissions. */
52 mode_t dmask; /* The mask for directory 52 umode_t dmask; /* The mask for directory
53 permissions. */ 53 permissions. */
54 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ 54 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
55 u8 on_errors; /* What to do on filesystem errors. */ 55 u8 on_errors; /* What to do on filesystem errors. */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index dc45deb19e68..73ba81928bce 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -553,7 +553,7 @@ void o2net_debugfs_exit(void)
553 553
554int o2net_debugfs_init(void) 554int o2net_debugfs_init(void)
555{ 555{
556 mode_t mode = S_IFREG|S_IRUSR; 556 umode_t mode = S_IFREG|S_IRUSR;
557 557
558 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); 558 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
559 if (o2net_dentry) 559 if (o2net_dentry)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b42076797049..abfac0d7ae9c 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -354,7 +354,6 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
354static void dlmfs_i_callback(struct rcu_head *head) 354static void dlmfs_i_callback(struct rcu_head *head)
355{ 355{
356 struct inode *inode = container_of(head, struct inode, i_rcu); 356 struct inode *inode = container_of(head, struct inode, i_rcu);
357 INIT_LIST_HEAD(&inode->i_dentry);
358 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 357 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
359} 358}
360 359
@@ -401,16 +400,14 @@ static struct backing_dev_info dlmfs_backing_dev_info = {
401static struct inode *dlmfs_get_root_inode(struct super_block *sb) 400static struct inode *dlmfs_get_root_inode(struct super_block *sb)
402{ 401{
403 struct inode *inode = new_inode(sb); 402 struct inode *inode = new_inode(sb);
404 int mode = S_IFDIR | 0755; 403 umode_t mode = S_IFDIR | 0755;
405 struct dlmfs_inode_private *ip; 404 struct dlmfs_inode_private *ip;
406 405
407 if (inode) { 406 if (inode) {
408 ip = DLMFS_I(inode); 407 ip = DLMFS_I(inode);
409 408
410 inode->i_ino = get_next_ino(); 409 inode->i_ino = get_next_ino();
411 inode->i_mode = mode; 410 inode_init_owner(inode, NULL, mode);
412 inode->i_uid = current_fsuid();
413 inode->i_gid = current_fsgid();
414 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 411 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
415 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 412 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
416 inc_nlink(inode); 413 inc_nlink(inode);
@@ -424,7 +421,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
424 421
425static struct inode *dlmfs_get_inode(struct inode *parent, 422static struct inode *dlmfs_get_inode(struct inode *parent,
426 struct dentry *dentry, 423 struct dentry *dentry,
427 int mode) 424 umode_t mode)
428{ 425{
429 struct super_block *sb = parent->i_sb; 426 struct super_block *sb = parent->i_sb;
430 struct inode * inode = new_inode(sb); 427 struct inode * inode = new_inode(sb);
@@ -434,9 +431,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
434 return NULL; 431 return NULL;
435 432
436 inode->i_ino = get_next_ino(); 433 inode->i_ino = get_next_ino();
437 inode->i_mode = mode; 434 inode_init_owner(inode, parent, mode);
438 inode->i_uid = current_fsuid();
439 inode->i_gid = current_fsgid();
440 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 435 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
441 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 436 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
442 437
@@ -473,13 +468,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
473 inc_nlink(inode); 468 inc_nlink(inode);
474 break; 469 break;
475 } 470 }
476
477 if (parent->i_mode & S_ISGID) {
478 inode->i_gid = parent->i_gid;
479 if (S_ISDIR(mode))
480 inode->i_mode |= S_ISGID;
481 }
482
483 return inode; 471 return inode;
484} 472}
485 473
@@ -489,7 +477,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
489/* SMP-safe */ 477/* SMP-safe */
490static int dlmfs_mkdir(struct inode * dir, 478static int dlmfs_mkdir(struct inode * dir,
491 struct dentry * dentry, 479 struct dentry * dentry,
492 int mode) 480 umode_t mode)
493{ 481{
494 int status; 482 int status;
495 struct inode *inode = NULL; 483 struct inode *inode = NULL;
@@ -537,7 +525,7 @@ bail:
537 525
538static int dlmfs_create(struct inode *dir, 526static int dlmfs_create(struct inode *dir,
539 struct dentry *dentry, 527 struct dentry *dentry,
540 int mode, 528 umode_t mode,
541 struct nameidata *nd) 529 struct nameidata *nd)
542{ 530{
543 int status = 0; 531 int status = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6e396683c3d4..061591a3ab08 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2128,7 +2128,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2128 * remove_suid() calls ->setattr without any hint that 2128 * remove_suid() calls ->setattr without any hint that
2129 * we may have already done our cluster locking. Since 2129 * we may have already done our cluster locking. Since
2130 * ocfs2_setattr() *must* take cluster locks to 2130 * ocfs2_setattr() *must* take cluster locks to
2131 * proceeed, this will lead us to recursively lock the 2131 * proceed, this will lead us to recursively lock the
2132 * inode. There's also the dinode i_size state which 2132 * inode. There's also the dinode i_size state which
2133 * can be lost via setattr during extending writes (we 2133 * can be lost via setattr during extending writes (we
2134 * set inode->i_size at the end of a write. */ 2134 * set inode->i_size at the end of a write. */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 726ff265b296..a6fda3c188aa 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -906,12 +906,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
906 if (get_user(flags, (int __user *) arg)) 906 if (get_user(flags, (int __user *) arg))
907 return -EFAULT; 907 return -EFAULT;
908 908
909 status = mnt_want_write(filp->f_path.mnt); 909 status = mnt_want_write_file(filp);
910 if (status) 910 if (status)
911 return status; 911 return status;
912 status = ocfs2_set_inode_attr(inode, flags, 912 status = ocfs2_set_inode_attr(inode, flags,
913 OCFS2_FL_MODIFIABLE); 913 OCFS2_FL_MODIFIABLE);
914 mnt_drop_write(filp->f_path.mnt); 914 mnt_drop_write_file(filp);
915 return status; 915 return status;
916 case OCFS2_IOC_RESVSP: 916 case OCFS2_IOC_RESVSP:
917 case OCFS2_IOC_RESVSP64: 917 case OCFS2_IOC_RESVSP64:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 184c76b8c293..b1e3fce72ea4 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1059,7 +1059,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
1059 struct ocfs2_move_extents range; 1059 struct ocfs2_move_extents range;
1060 struct ocfs2_move_extents_context *context = NULL; 1060 struct ocfs2_move_extents_context *context = NULL;
1061 1061
1062 status = mnt_want_write(filp->f_path.mnt); 1062 status = mnt_want_write_file(filp);
1063 if (status) 1063 if (status)
1064 return status; 1064 return status;
1065 1065
@@ -1145,7 +1145,7 @@ out:
1145 1145
1146 kfree(context); 1146 kfree(context);
1147 1147
1148 mnt_drop_write(filp->f_path.mnt); 1148 mnt_drop_write_file(filp);
1149 1149
1150 return status; 1150 return status;
1151} 1151}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a8b2bfea574e..be244692550d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -185,7 +185,7 @@ bail:
185 return ret; 185 return ret;
186} 186}
187 187
188static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) 188static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
189{ 189{
190 struct inode *inode; 190 struct inode *inode;
191 191
@@ -207,7 +207,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
207 207
208static int ocfs2_mknod(struct inode *dir, 208static int ocfs2_mknod(struct inode *dir,
209 struct dentry *dentry, 209 struct dentry *dentry,
210 int mode, 210 umode_t mode,
211 dev_t dev) 211 dev_t dev)
212{ 212{
213 int status = 0; 213 int status = 0;
@@ -602,7 +602,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
602 602
603static int ocfs2_mkdir(struct inode *dir, 603static int ocfs2_mkdir(struct inode *dir,
604 struct dentry *dentry, 604 struct dentry *dentry,
605 int mode) 605 umode_t mode)
606{ 606{
607 int ret; 607 int ret;
608 608
@@ -617,7 +617,7 @@ static int ocfs2_mkdir(struct inode *dir,
617 617
618static int ocfs2_create(struct inode *dir, 618static int ocfs2_create(struct inode *dir,
619 struct dentry *dentry, 619 struct dentry *dentry,
620 int mode, 620 umode_t mode,
621 struct nameidata *nd) 621 struct nameidata *nd)
622{ 622{
623 int ret; 623 int ret;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index a5ebe421195f..286edf1e231f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -827,8 +827,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
827 goto out; 827 goto out;
828 } 828 }
829 829
830 rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), 830 rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
831 &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); 831 NULL, NULL, NULL, &fsdlm);
832 if (rc) { 832 if (rc) {
833 ocfs2_live_connection_drop(control); 833 ocfs2_live_connection_drop(control);
834 goto out; 834 goto out;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4994f8b0e604..604e12c4e979 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -108,7 +108,7 @@ static int ocfs2_parse_options(struct super_block *sb, char *options,
108 int is_remount); 108 int is_remount);
109static int ocfs2_check_set_options(struct super_block *sb, 109static int ocfs2_check_set_options(struct super_block *sb,
110 struct mount_options *options); 110 struct mount_options *options);
111static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 111static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
112static void ocfs2_put_super(struct super_block *sb); 112static void ocfs2_put_super(struct super_block *sb);
113static int ocfs2_mount_volume(struct super_block *sb); 113static int ocfs2_mount_volume(struct super_block *sb);
114static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 114static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -569,7 +569,6 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
569static void ocfs2_i_callback(struct rcu_head *head) 569static void ocfs2_i_callback(struct rcu_head *head)
570{ 570{
571 struct inode *inode = container_of(head, struct inode, i_rcu); 571 struct inode *inode = container_of(head, struct inode, i_rcu);
572 INIT_LIST_HEAD(&inode->i_dentry);
573 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 572 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
574} 573}
575 574
@@ -1534,9 +1533,9 @@ bail:
1534 return status; 1533 return status;
1535} 1534}
1536 1535
1537static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 1536static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
1538{ 1537{
1539 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 1538 struct ocfs2_super *osb = OCFS2_SB(root->d_sb);
1540 unsigned long opts = osb->s_mount_opt; 1539 unsigned long opts = osb->s_mount_opt;
1541 unsigned int local_alloc_megs; 1540 unsigned int local_alloc_megs;
1542 1541
@@ -1568,8 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1568 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 1567 if (osb->preferred_slot != OCFS2_INVALID_SLOT)
1569 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 1568 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
1570 1569
1571 if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME)) 1570 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
1572 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
1573 1571
1574 if (osb->osb_commit_interval) 1572 if (osb->osb_commit_interval)
1575 seq_printf(s, ",commit=%u", 1573 seq_printf(s, ",commit=%u",
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index aa9e8777b09a..0ba9ea1e7961 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -623,7 +623,7 @@ int ocfs2_calc_security_init(struct inode *dir,
623 623
624int ocfs2_calc_xattr_init(struct inode *dir, 624int ocfs2_calc_xattr_init(struct inode *dir,
625 struct buffer_head *dir_bh, 625 struct buffer_head *dir_bh,
626 int mode, 626 umode_t mode,
627 struct ocfs2_security_xattr_info *si, 627 struct ocfs2_security_xattr_info *si,
628 int *want_clusters, 628 int *want_clusters,
629 int *xattr_credits, 629 int *xattr_credits,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index d63cfb72316b..e5c7f15465b4 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
68 struct ocfs2_security_xattr_info *, 68 struct ocfs2_security_xattr_info *,
69 int *, int *, struct ocfs2_alloc_context **); 69 int *, int *, struct ocfs2_alloc_context **);
70int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, 70int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
71 int, struct ocfs2_security_xattr_info *, 71 umode_t, struct ocfs2_security_xattr_info *,
72 int *, int *, int *); 72 int *, int *, int *);
73 73
74/* 74/*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 98e544274390..f00576ec320f 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -255,7 +255,7 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry)
255 return 0; 255 return 0;
256} 256}
257 257
258static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode) 258static int omfs_add_node(struct inode *dir, struct dentry *dentry, umode_t mode)
259{ 259{
260 int err; 260 int err;
261 struct inode *inode = omfs_new_inode(dir, mode); 261 struct inode *inode = omfs_new_inode(dir, mode);
@@ -279,12 +279,12 @@ out_free_inode:
279 return err; 279 return err;
280} 280}
281 281
282static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 282static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
283{ 283{
284 return omfs_add_node(dir, dentry, mode | S_IFDIR); 284 return omfs_add_node(dir, dentry, mode | S_IFDIR);
285} 285}
286 286
287static int omfs_create(struct inode *dir, struct dentry *dentry, int mode, 287static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
288 struct nameidata *nd) 288 struct nameidata *nd)
289{ 289{
290 return omfs_add_node(dir, dentry, mode | S_IFREG); 290 return omfs_add_node(dir, dentry, mode | S_IFREG);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e043c4cb9a97..6065bb0ba207 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -28,7 +28,7 @@ struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
28 return sb_bread(sb, clus_to_blk(sbi, block)); 28 return sb_bread(sb, clus_to_blk(sbi, block));
29} 29}
30 30
31struct inode *omfs_new_inode(struct inode *dir, int mode) 31struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
32{ 32{
33 struct inode *inode; 33 struct inode *inode;
34 u64 new_block; 34 u64 new_block;
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 7d414fef501a..8941f12c6b01 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -60,7 +60,7 @@ extern int omfs_shrink_inode(struct inode *inode);
60/* inode.c */ 60/* inode.c */
61extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block); 61extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
62extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); 62extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
63extern struct inode *omfs_new_inode(struct inode *dir, int mode); 63extern struct inode *omfs_new_inode(struct inode *dir, umode_t mode);
64extern int omfs_reserve_block(struct super_block *sb, sector_t block); 64extern int omfs_reserve_block(struct super_block *sb, sector_t block);
65extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino); 65extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
66extern int omfs_sync_inode(struct inode *inode); 66extern int omfs_sync_inode(struct inode *inode);
diff --git a/fs/open.c b/fs/open.c
index 22c41b543f2d..77becc041149 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -456,7 +456,7 @@ static int chmod_common(struct path *path, umode_t mode)
456 if (error) 456 if (error)
457 return error; 457 return error;
458 mutex_lock(&inode->i_mutex); 458 mutex_lock(&inode->i_mutex);
459 error = security_path_chmod(path->dentry, path->mnt, mode); 459 error = security_path_chmod(path, mode);
460 if (error) 460 if (error)
461 goto out_unlock; 461 goto out_unlock;
462 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 462 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
@@ -468,7 +468,7 @@ out_unlock:
468 return error; 468 return error;
469} 469}
470 470
471SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) 471SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
472{ 472{
473 struct file * file; 473 struct file * file;
474 int err = -EBADF; 474 int err = -EBADF;
@@ -482,7 +482,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
482 return err; 482 return err;
483} 483}
484 484
485SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) 485SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
486{ 486{
487 struct path path; 487 struct path path;
488 int error; 488 int error;
@@ -495,7 +495,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
495 return error; 495 return error;
496} 496}
497 497
498SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode) 498SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
499{ 499{
500 return sys_fchmodat(AT_FDCWD, filename, mode); 500 return sys_fchmodat(AT_FDCWD, filename, mode);
501} 501}
@@ -608,7 +608,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
608 dentry = file->f_path.dentry; 608 dentry = file->f_path.dentry;
609 audit_inode(NULL, dentry); 609 audit_inode(NULL, dentry);
610 error = chown_common(&file->f_path, user, group); 610 error = chown_common(&file->f_path, user, group);
611 mnt_drop_write(file->f_path.mnt); 611 mnt_drop_write_file(file);
612out_fput: 612out_fput:
613 fput(file); 613 fput(file);
614out: 614out:
@@ -877,7 +877,7 @@ void fd_install(unsigned int fd, struct file *file)
877 877
878EXPORT_SYMBOL(fd_install); 878EXPORT_SYMBOL(fd_install);
879 879
880static inline int build_open_flags(int flags, int mode, struct open_flags *op) 880static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
881{ 881{
882 int lookup_flags = 0; 882 int lookup_flags = 0;
883 int acc_mode; 883 int acc_mode;
@@ -948,7 +948,7 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
948 * have to. But in generally you should not do this, so please move 948 * have to. But in generally you should not do this, so please move
949 * along, nothing to see here.. 949 * along, nothing to see here..
950 */ 950 */
951struct file *filp_open(const char *filename, int flags, int mode) 951struct file *filp_open(const char *filename, int flags, umode_t mode)
952{ 952{
953 struct open_flags op; 953 struct open_flags op;
954 int lookup = build_open_flags(flags, mode, &op); 954 int lookup = build_open_flags(flags, mode, &op);
@@ -970,7 +970,7 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
970} 970}
971EXPORT_SYMBOL(file_open_root); 971EXPORT_SYMBOL(file_open_root);
972 972
973long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 973long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
974{ 974{
975 struct open_flags op; 975 struct open_flags op;
976 int lookup = build_open_flags(flags, mode, &op); 976 int lookup = build_open_flags(flags, mode, &op);
@@ -994,7 +994,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
994 return fd; 994 return fd;
995} 995}
996 996
997SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) 997SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
998{ 998{
999 long ret; 999 long ret;
1000 1000
@@ -1008,7 +1008,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
1008} 1008}
1009 1009
1010SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, 1010SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1011 int, mode) 1011 umode_t, mode)
1012{ 1012{
1013 long ret; 1013 long ret;
1014 1014
@@ -1027,7 +1027,7 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1027 * For backward compatibility? Maybe this should be moved 1027 * For backward compatibility? Maybe this should be moved
1028 * into arch/i386 instead? 1028 * into arch/i386 instead?
1029 */ 1029 */
1030SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode) 1030SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1031{ 1031{
1032 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1032 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1033} 1033}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index e4e0ff7962e2..a88c03bc749d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -346,7 +346,6 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
346static void openprom_i_callback(struct rcu_head *head) 346static void openprom_i_callback(struct rcu_head *head)
347{ 347{
348 struct inode *inode = container_of(head, struct inode, i_rcu); 348 struct inode *inode = container_of(head, struct inode, i_rcu);
349 INIT_LIST_HEAD(&inode->i_dentry);
350 kmem_cache_free(op_inode_cachep, OP_I(inode)); 349 kmem_cache_free(op_inode_cachep, OP_I(inode));
351} 350}
352 351
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
deleted file mode 100644
index cb5f0a3f1b03..000000000000
--- a/fs/partitions/Kconfig
+++ /dev/null
@@ -1,251 +0,0 @@
1#
2# Partition configuration
3#
4config PARTITION_ADVANCED
5 bool "Advanced partition selection"
6 help
7 Say Y here if you would like to use hard disks under Linux which
8 were partitioned under an operating system running on a different
9 architecture than your Linux system.
10
11 Note that the answer to this question won't directly affect the
12 kernel: saying N will just cause the configurator to skip all
13 the questions about foreign partitioning schemes.
14
15 If unsure, say N.
16
17config ACORN_PARTITION
18 bool "Acorn partition support" if PARTITION_ADVANCED
19 default y if ARCH_ACORN
20 help
21 Support hard disks partitioned under Acorn operating systems.
22
23config ACORN_PARTITION_CUMANA
24 bool "Cumana partition support" if PARTITION_ADVANCED
25 default y if ARCH_ACORN
26 depends on ACORN_PARTITION
27 help
28 Say Y here if you would like to use hard disks under Linux which
29 were partitioned using the Cumana interface on Acorn machines.
30
31config ACORN_PARTITION_EESOX
32 bool "EESOX partition support" if PARTITION_ADVANCED
33 default y if ARCH_ACORN
34 depends on ACORN_PARTITION
35
36config ACORN_PARTITION_ICS
37 bool "ICS partition support" if PARTITION_ADVANCED
38 default y if ARCH_ACORN
39 depends on ACORN_PARTITION
40 help
41 Say Y here if you would like to use hard disks under Linux which
42 were partitioned using the ICS interface on Acorn machines.
43
44config ACORN_PARTITION_ADFS
45 bool "Native filecore partition support" if PARTITION_ADVANCED
46 default y if ARCH_ACORN
47 depends on ACORN_PARTITION
48 help
49 The Acorn Disc Filing System is the standard file system of the
50 RiscOS operating system which runs on Acorn's ARM-based Risc PC
51 systems and the Acorn Archimedes range of machines. If you say
52 `Y' here, Linux will support disk partitions created under ADFS.
53
54config ACORN_PARTITION_POWERTEC
55 bool "PowerTec partition support" if PARTITION_ADVANCED
56 default y if ARCH_ACORN
57 depends on ACORN_PARTITION
58 help
59 Support reading partition tables created on Acorn machines using
60 the PowerTec SCSI drive.
61
62config ACORN_PARTITION_RISCIX
63 bool "RISCiX partition support" if PARTITION_ADVANCED
64 default y if ARCH_ACORN
65 depends on ACORN_PARTITION
66 help
67 Once upon a time, there was a native Unix port for the Acorn series
68 of machines called RISCiX. If you say 'Y' here, Linux will be able
69 to read disks partitioned under RISCiX.
70
71config OSF_PARTITION
72 bool "Alpha OSF partition support" if PARTITION_ADVANCED
73 default y if ALPHA
74 help
75 Say Y here if you would like to use hard disks under Linux which
76 were partitioned on an Alpha machine.
77
78config AMIGA_PARTITION
79 bool "Amiga partition table support" if PARTITION_ADVANCED
80 default y if (AMIGA || AFFS_FS=y)
81 help
82 Say Y here if you would like to use hard disks under Linux which
83 were partitioned under AmigaOS.
84
85config ATARI_PARTITION
86 bool "Atari partition table support" if PARTITION_ADVANCED
87 default y if ATARI
88 help
89 Say Y here if you would like to use hard disks under Linux which
90 were partitioned under the Atari OS.
91
92config IBM_PARTITION
93 bool "IBM disk label and partition support"
94 depends on PARTITION_ADVANCED && S390
95 help
96 Say Y here if you would like to be able to read the hard disk
97 partition table format used by IBM DASD disks operating under CMS.
98 Otherwise, say N.
99
100config MAC_PARTITION
101 bool "Macintosh partition map support" if PARTITION_ADVANCED
102 default y if (MAC || PPC_PMAC)
103 help
104 Say Y here if you would like to use hard disks under Linux which
105 were partitioned on a Macintosh.
106
107config MSDOS_PARTITION
108 bool "PC BIOS (MSDOS partition tables) support" if PARTITION_ADVANCED
109 default y
110 help
111 Say Y here.
112
113config BSD_DISKLABEL
114 bool "BSD disklabel (FreeBSD partition tables) support"
115 depends on PARTITION_ADVANCED && MSDOS_PARTITION
116 help
117 FreeBSD uses its own hard disk partition scheme on your PC. It
118 requires only one entry in the primary partition table of your disk
119 and manages it similarly to DOS extended partitions, putting in its
120 first sector a new partition table in BSD disklabel format. Saying Y
121 here allows you to read these disklabels and further mount FreeBSD
122 partitions from within Linux if you have also said Y to "UFS
123 file system support", above. If you don't know what all this is
124 about, say N.
125
126config MINIX_SUBPARTITION
127 bool "Minix subpartition support"
128 depends on PARTITION_ADVANCED && MSDOS_PARTITION
129 help
130 Minix 2.0.0/2.0.2 subpartition table support for Linux.
131 Say Y here if you want to mount and use Minix 2.0.0/2.0.2
132 subpartitions.
133
134config SOLARIS_X86_PARTITION
135 bool "Solaris (x86) partition table support"
136 depends on PARTITION_ADVANCED && MSDOS_PARTITION
137 help
138 Like most systems, Solaris x86 uses its own hard disk partition
139 table format, incompatible with all others. Saying Y here allows you
140 to read these partition tables and further mount Solaris x86
141 partitions from within Linux if you have also said Y to "UFS
142 file system support", above.
143
144config UNIXWARE_DISKLABEL
145 bool "Unixware slices support"
146 depends on PARTITION_ADVANCED && MSDOS_PARTITION
147 ---help---
148 Like some systems, UnixWare uses its own slice table inside a
149 partition (VTOC - Virtual Table of Contents). Its format is
150 incompatible with all other OSes. Saying Y here allows you to read
151 VTOC and further mount UnixWare partitions read-only from within
152 Linux if you have also said Y to "UFS file system support" or
153 "System V and Coherent file system support", above.
154
155 This is mainly used to carry data from a UnixWare box to your
156 Linux box via a removable medium like magneto-optical, ZIP or
157 removable IDE drives. Note, however, that a good portable way to
158 transport files and directories between unixes (and even other
159 operating systems) is given by the tar program ("man tar" or
160 preferably "info tar").
161
162 If you don't know what all this is about, say N.
163
164config LDM_PARTITION
165 bool "Windows Logical Disk Manager (Dynamic Disk) support"
166 depends on PARTITION_ADVANCED
167 ---help---
168 Say Y here if you would like to use hard disks under Linux which
169 were partitioned using Windows 2000's/XP's or Vista's Logical Disk
170 Manager. They are also known as "Dynamic Disks".
171
172 Note this driver only supports Dynamic Disks with a protective MBR
173 label, i.e. DOS partition table. It does not support GPT labelled
174 Dynamic Disks yet as can be created with Vista.
175
176 Windows 2000 introduced the concept of Dynamic Disks to get around
177 the limitations of the PC's partitioning scheme. The Logical Disk
178 Manager allows the user to repartition a disk and create spanned,
179 mirrored, striped or RAID volumes, all without the need for
180 rebooting.
181
182 Normal partitions are now called Basic Disks under Windows 2000, XP,
183 and Vista.
184
185 For a fuller description read <file:Documentation/ldm.txt>.
186
187 If unsure, say N.
188
189config LDM_DEBUG
190 bool "Windows LDM extra logging"
191 depends on LDM_PARTITION
192 help
193 Say Y here if you would like LDM to log verbosely. This could be
194 helpful if the driver doesn't work as expected and you'd like to
195 report a bug.
196
197 If unsure, say N.
198
199config SGI_PARTITION
200 bool "SGI partition support" if PARTITION_ADVANCED
201 default y if DEFAULT_SGI_PARTITION
202 help
203 Say Y here if you would like to be able to read the hard disk
204 partition table format used by SGI machines.
205
206config ULTRIX_PARTITION
207 bool "Ultrix partition table support" if PARTITION_ADVANCED
208 default y if MACH_DECSTATION
209 help
210 Say Y here if you would like to be able to read the hard disk
211 partition table format used by DEC (now Compaq) Ultrix machines.
212 Otherwise, say N.
213
214config SUN_PARTITION
215 bool "Sun partition tables support" if PARTITION_ADVANCED
216 default y if (SPARC || SUN3 || SUN3X)
217 ---help---
218 Like most systems, SunOS uses its own hard disk partition table
219 format, incompatible with all others. Saying Y here allows you to
220 read these partition tables and further mount SunOS partitions from
221 within Linux if you have also said Y to "UFS file system support",
222 above. This is mainly used to carry data from a SPARC under SunOS to
223 your Linux box via a removable medium like magneto-optical or ZIP
224 drives; note however that a good portable way to transport files and
225 directories between unixes (and even other operating systems) is
226 given by the tar program ("man tar" or preferably "info tar"). If
227 you don't know what all this is about, say N.
228
229config KARMA_PARTITION
230 bool "Karma Partition support"
231 depends on PARTITION_ADVANCED
232 help
233 Say Y here if you would like to mount the Rio Karma MP3 player, as it
234 uses a proprietary partition table.
235
236config EFI_PARTITION
237 bool "EFI GUID Partition support"
238 depends on PARTITION_ADVANCED
239 select CRC32
240 help
241 Say Y here if you would like to use hard disks under Linux which
242 were partitioned using EFI GPT.
243
244config SYSV68_PARTITION
245 bool "SYSV68 partition table support" if PARTITION_ADVANCED
246 default y if VME
247 help
248 Say Y here if you would like to be able to read the hard disk
249 partition table format used by Motorola Delta machines (using
250 sysv68).
251 Otherwise, say N.
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
deleted file mode 100644
index 03af8eac51da..000000000000
--- a/fs/partitions/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-$(CONFIG_BLOCK) := check.o
6
7obj-$(CONFIG_ACORN_PARTITION) += acorn.o
8obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
9obj-$(CONFIG_ATARI_PARTITION) += atari.o
10obj-$(CONFIG_MAC_PARTITION) += mac.o
11obj-$(CONFIG_LDM_PARTITION) += ldm.o
12obj-$(CONFIG_MSDOS_PARTITION) += msdos.o
13obj-$(CONFIG_OSF_PARTITION) += osf.o
14obj-$(CONFIG_SGI_PARTITION) += sgi.o
15obj-$(CONFIG_SUN_PARTITION) += sun.o
16obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
17obj-$(CONFIG_IBM_PARTITION) += ibm.o
18obj-$(CONFIG_EFI_PARTITION) += efi.o
19obj-$(CONFIG_KARMA_PARTITION) += karma.o
20obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
deleted file mode 100644
index fbeb697374d5..000000000000
--- a/fs/partitions/acorn.c
+++ /dev/null
@@ -1,556 +0,0 @@
1/*
2 * linux/fs/partitions/acorn.c
3 *
4 * Copyright (c) 1996-2000 Russell King.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * Scan ADFS partitions on hard disk drives. Unfortunately, there
11 * isn't a standard for partitioning drives on Acorn machines, so
12 * every single manufacturer of SCSI and IDE cards created their own
13 * method.
14 */
15#include <linux/buffer_head.h>
16#include <linux/adfs_fs.h>
17
18#include "check.h"
19#include "acorn.h"
20
21/*
22 * Partition types. (Oh for reusability)
23 */
24#define PARTITION_RISCIX_MFM 1
25#define PARTITION_RISCIX_SCSI 2
26#define PARTITION_LINUX 9
27
28#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
29 defined(CONFIG_ACORN_PARTITION_ADFS)
30static struct adfs_discrecord *
31adfs_partition(struct parsed_partitions *state, char *name, char *data,
32 unsigned long first_sector, int slot)
33{
34 struct adfs_discrecord *dr;
35 unsigned int nr_sects;
36
37 if (adfs_checkbblk(data))
38 return NULL;
39
40 dr = (struct adfs_discrecord *)(data + 0x1c0);
41
42 if (dr->disc_size == 0 && dr->disc_size_high == 0)
43 return NULL;
44
45 nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) |
46 (le32_to_cpu(dr->disc_size) >> 9);
47
48 if (name) {
49 strlcat(state->pp_buf, " [", PAGE_SIZE);
50 strlcat(state->pp_buf, name, PAGE_SIZE);
51 strlcat(state->pp_buf, "]", PAGE_SIZE);
52 }
53 put_partition(state, slot, first_sector, nr_sects);
54 return dr;
55}
56#endif
57
58#ifdef CONFIG_ACORN_PARTITION_RISCIX
59
60struct riscix_part {
61 __le32 start;
62 __le32 length;
63 __le32 one;
64 char name[16];
65};
66
67struct riscix_record {
68 __le32 magic;
69#define RISCIX_MAGIC cpu_to_le32(0x4a657320)
70 __le32 date;
71 struct riscix_part part[8];
72};
73
74#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
75 defined(CONFIG_ACORN_PARTITION_ADFS)
76static int riscix_partition(struct parsed_partitions *state,
77 unsigned long first_sect, int slot,
78 unsigned long nr_sects)
79{
80 Sector sect;
81 struct riscix_record *rr;
82
83 rr = read_part_sector(state, first_sect, &sect);
84 if (!rr)
85 return -1;
86
87 strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE);
88
89
90 if (rr->magic == RISCIX_MAGIC) {
91 unsigned long size = nr_sects > 2 ? 2 : nr_sects;
92 int part;
93
94 strlcat(state->pp_buf, " <", PAGE_SIZE);
95
96 put_partition(state, slot++, first_sect, size);
97 for (part = 0; part < 8; part++) {
98 if (rr->part[part].one &&
99 memcmp(rr->part[part].name, "All\0", 4)) {
100 put_partition(state, slot++,
101 le32_to_cpu(rr->part[part].start),
102 le32_to_cpu(rr->part[part].length));
103 strlcat(state->pp_buf, "(", PAGE_SIZE);
104 strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE);
105 strlcat(state->pp_buf, ")", PAGE_SIZE);
106 }
107 }
108
109 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
110 } else {
111 put_partition(state, slot++, first_sect, nr_sects);
112 }
113
114 put_dev_sector(sect);
115 return slot;
116}
117#endif
118#endif
119
120#define LINUX_NATIVE_MAGIC 0xdeafa1de
121#define LINUX_SWAP_MAGIC 0xdeafab1e
122
123struct linux_part {
124 __le32 magic;
125 __le32 start_sect;
126 __le32 nr_sects;
127};
128
129#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
130 defined(CONFIG_ACORN_PARTITION_ADFS)
131static int linux_partition(struct parsed_partitions *state,
132 unsigned long first_sect, int slot,
133 unsigned long nr_sects)
134{
135 Sector sect;
136 struct linux_part *linuxp;
137 unsigned long size = nr_sects > 2 ? 2 : nr_sects;
138
139 strlcat(state->pp_buf, " [Linux]", PAGE_SIZE);
140
141 put_partition(state, slot++, first_sect, size);
142
143 linuxp = read_part_sector(state, first_sect, &sect);
144 if (!linuxp)
145 return -1;
146
147 strlcat(state->pp_buf, " <", PAGE_SIZE);
148 while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) ||
149 linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) {
150 if (slot == state->limit)
151 break;
152 put_partition(state, slot++, first_sect +
153 le32_to_cpu(linuxp->start_sect),
154 le32_to_cpu(linuxp->nr_sects));
155 linuxp ++;
156 }
157 strlcat(state->pp_buf, " >", PAGE_SIZE);
158
159 put_dev_sector(sect);
160 return slot;
161}
162#endif
163
164#ifdef CONFIG_ACORN_PARTITION_CUMANA
165int adfspart_check_CUMANA(struct parsed_partitions *state)
166{
167 unsigned long first_sector = 0;
168 unsigned int start_blk = 0;
169 Sector sect;
170 unsigned char *data;
171 char *name = "CUMANA/ADFS";
172 int first = 1;
173 int slot = 1;
174
175 /*
176 * Try Cumana style partitions - sector 6 contains ADFS boot block
177 * with pointer to next 'drive'.
178 *
179 * There are unknowns in this code - is the 'cylinder number' of the
180 * next partition relative to the start of this one - I'm assuming
181 * it is.
182 *
183 * Also, which ID did Cumana use?
184 *
185 * This is totally unfinished, and will require more work to get it
186 * going. Hence it is totally untested.
187 */
188 do {
189 struct adfs_discrecord *dr;
190 unsigned int nr_sects;
191
192 data = read_part_sector(state, start_blk * 2 + 6, &sect);
193 if (!data)
194 return -1;
195
196 if (slot == state->limit)
197 break;
198
199 dr = adfs_partition(state, name, data, first_sector, slot++);
200 if (!dr)
201 break;
202
203 name = NULL;
204
205 nr_sects = (data[0x1fd] + (data[0x1fe] << 8)) *
206 (dr->heads + (dr->lowsector & 0x40 ? 1 : 0)) *
207 dr->secspertrack;
208
209 if (!nr_sects)
210 break;
211
212 first = 0;
213 first_sector += nr_sects;
214 start_blk += nr_sects >> (BLOCK_SIZE_BITS - 9);
215 nr_sects = 0; /* hmm - should be partition size */
216
217 switch (data[0x1fc] & 15) {
218 case 0: /* No partition / ADFS? */
219 break;
220
221#ifdef CONFIG_ACORN_PARTITION_RISCIX
222 case PARTITION_RISCIX_SCSI:
223 /* RISCiX - we don't know how to find the next one. */
224 slot = riscix_partition(state, first_sector, slot,
225 nr_sects);
226 break;
227#endif
228
229 case PARTITION_LINUX:
230 slot = linux_partition(state, first_sector, slot,
231 nr_sects);
232 break;
233 }
234 put_dev_sector(sect);
235 if (slot == -1)
236 return -1;
237 } while (1);
238 put_dev_sector(sect);
239 return first ? 0 : 1;
240}
241#endif
242
243#ifdef CONFIG_ACORN_PARTITION_ADFS
244/*
245 * Purpose: allocate ADFS partitions.
246 *
247 * Params : hd - pointer to gendisk structure to store partition info.
248 * dev - device number to access.
249 *
250 * Returns: -1 on error, 0 for no ADFS boot sector, 1 for ok.
251 *
252 * Alloc : hda = whole drive
253 * hda1 = ADFS partition on first drive.
254 * hda2 = non-ADFS partition.
255 */
256int adfspart_check_ADFS(struct parsed_partitions *state)
257{
258 unsigned long start_sect, nr_sects, sectscyl, heads;
259 Sector sect;
260 unsigned char *data;
261 struct adfs_discrecord *dr;
262 unsigned char id;
263 int slot = 1;
264
265 data = read_part_sector(state, 6, &sect);
266 if (!data)
267 return -1;
268
269 dr = adfs_partition(state, "ADFS", data, 0, slot++);
270 if (!dr) {
271 put_dev_sector(sect);
272 return 0;
273 }
274
275 heads = dr->heads + ((dr->lowsector >> 6) & 1);
276 sectscyl = dr->secspertrack * heads;
277 start_sect = ((data[0x1fe] << 8) + data[0x1fd]) * sectscyl;
278 id = data[0x1fc] & 15;
279 put_dev_sector(sect);
280
281 /*
282 * Work out start of non-adfs partition.
283 */
284 nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
285
286 if (start_sect) {
287 switch (id) {
288#ifdef CONFIG_ACORN_PARTITION_RISCIX
289 case PARTITION_RISCIX_SCSI:
290 case PARTITION_RISCIX_MFM:
291 slot = riscix_partition(state, start_sect, slot,
292 nr_sects);
293 break;
294#endif
295
296 case PARTITION_LINUX:
297 slot = linux_partition(state, start_sect, slot,
298 nr_sects);
299 break;
300 }
301 }
302 strlcat(state->pp_buf, "\n", PAGE_SIZE);
303 return 1;
304}
305#endif
306
307#ifdef CONFIG_ACORN_PARTITION_ICS
308
309struct ics_part {
310 __le32 start;
311 __le32 size;
312};
313
314static int adfspart_check_ICSLinux(struct parsed_partitions *state,
315 unsigned long block)
316{
317 Sector sect;
318 unsigned char *data = read_part_sector(state, block, &sect);
319 int result = 0;
320
321 if (data) {
322 if (memcmp(data, "LinuxPart", 9) == 0)
323 result = 1;
324 put_dev_sector(sect);
325 }
326
327 return result;
328}
329
330/*
331 * Check for a valid ICS partition using the checksum.
332 */
333static inline int valid_ics_sector(const unsigned char *data)
334{
335 unsigned long sum;
336 int i;
337
338 for (i = 0, sum = 0x50617274; i < 508; i++)
339 sum += data[i];
340
341 sum -= le32_to_cpu(*(__le32 *)(&data[508]));
342
343 return sum == 0;
344}
345
346/*
347 * Purpose: allocate ICS partitions.
348 * Params : hd - pointer to gendisk structure to store partition info.
349 * dev - device number to access.
350 * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
351 * Alloc : hda = whole drive
352 * hda1 = ADFS partition 0 on first drive.
353 * hda2 = ADFS partition 1 on first drive.
354 * ..etc..
355 */
356int adfspart_check_ICS(struct parsed_partitions *state)
357{
358 const unsigned char *data;
359 const struct ics_part *p;
360 int slot;
361 Sector sect;
362
363 /*
364 * Try ICS style partitions - sector 0 contains partition info.
365 */
366 data = read_part_sector(state, 0, &sect);
367 if (!data)
368 return -1;
369
370 if (!valid_ics_sector(data)) {
371 put_dev_sector(sect);
372 return 0;
373 }
374
375 strlcat(state->pp_buf, " [ICS]", PAGE_SIZE);
376
377 for (slot = 1, p = (const struct ics_part *)data; p->size; p++) {
378 u32 start = le32_to_cpu(p->start);
379 s32 size = le32_to_cpu(p->size); /* yes, it's signed. */
380
381 if (slot == state->limit)
382 break;
383
384 /*
385 * Negative sizes tell the RISC OS ICS driver to ignore
386 * this partition - in effect it says that this does not
387 * contain an ADFS filesystem.
388 */
389 if (size < 0) {
390 size = -size;
391
392 /*
393 * Our own extension - We use the first sector
394 * of the partition to identify what type this
395 * partition is. We must not make this visible
396 * to the filesystem.
397 */
398 if (size > 1 && adfspart_check_ICSLinux(state, start)) {
399 start += 1;
400 size -= 1;
401 }
402 }
403
404 if (size)
405 put_partition(state, slot++, start, size);
406 }
407
408 put_dev_sector(sect);
409 strlcat(state->pp_buf, "\n", PAGE_SIZE);
410 return 1;
411}
412#endif
413
414#ifdef CONFIG_ACORN_PARTITION_POWERTEC
415struct ptec_part {
416 __le32 unused1;
417 __le32 unused2;
418 __le32 start;
419 __le32 size;
420 __le32 unused5;
421 char type[8];
422};
423
424static inline int valid_ptec_sector(const unsigned char *data)
425{
426 unsigned char checksum = 0x2a;
427 int i;
428
429 /*
430 * If it looks like a PC/BIOS partition, then it
431 * probably isn't PowerTec.
432 */
433 if (data[510] == 0x55 && data[511] == 0xaa)
434 return 0;
435
436 for (i = 0; i < 511; i++)
437 checksum += data[i];
438
439 return checksum == data[511];
440}
441
442/*
443 * Purpose: allocate ICS partitions.
444 * Params : hd - pointer to gendisk structure to store partition info.
445 * dev - device number to access.
446 * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
447 * Alloc : hda = whole drive
448 * hda1 = ADFS partition 0 on first drive.
449 * hda2 = ADFS partition 1 on first drive.
450 * ..etc..
451 */
452int adfspart_check_POWERTEC(struct parsed_partitions *state)
453{
454 Sector sect;
455 const unsigned char *data;
456 const struct ptec_part *p;
457 int slot = 1;
458 int i;
459
460 data = read_part_sector(state, 0, &sect);
461 if (!data)
462 return -1;
463
464 if (!valid_ptec_sector(data)) {
465 put_dev_sector(sect);
466 return 0;
467 }
468
469 strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE);
470
471 for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) {
472 u32 start = le32_to_cpu(p->start);
473 u32 size = le32_to_cpu(p->size);
474
475 if (size)
476 put_partition(state, slot++, start, size);
477 }
478
479 put_dev_sector(sect);
480 strlcat(state->pp_buf, "\n", PAGE_SIZE);
481 return 1;
482}
483#endif
484
485#ifdef CONFIG_ACORN_PARTITION_EESOX
486struct eesox_part {
487 char magic[6];
488 char name[10];
489 __le32 start;
490 __le32 unused6;
491 __le32 unused7;
492 __le32 unused8;
493};
494
495/*
496 * Guess who created this format?
497 */
498static const char eesox_name[] = {
499 'N', 'e', 'i', 'l', ' ',
500 'C', 'r', 'i', 't', 'c', 'h', 'e', 'l', 'l', ' ', ' '
501};
502
503/*
504 * EESOX SCSI partition format.
505 *
506 * This is a goddamned awful partition format. We don't seem to store
507 * the size of the partition in this table, only the start addresses.
508 *
509 * There are two possibilities where the size comes from:
510 * 1. The individual ADFS boot block entries that are placed on the disk.
511 * 2. The start address of the next entry.
512 */
513int adfspart_check_EESOX(struct parsed_partitions *state)
514{
515 Sector sect;
516 const unsigned char *data;
517 unsigned char buffer[256];
518 struct eesox_part *p;
519 sector_t start = 0;
520 int i, slot = 1;
521
522 data = read_part_sector(state, 7, &sect);
523 if (!data)
524 return -1;
525
526 /*
527 * "Decrypt" the partition table. God knows why...
528 */
529 for (i = 0; i < 256; i++)
530 buffer[i] = data[i] ^ eesox_name[i & 15];
531
532 put_dev_sector(sect);
533
534 for (i = 0, p = (struct eesox_part *)buffer; i < 8; i++, p++) {
535 sector_t next;
536
537 if (memcmp(p->magic, "Eesox", 6))
538 break;
539
540 next = le32_to_cpu(p->start);
541 if (i)
542 put_partition(state, slot++, start, next - start);
543 start = next;
544 }
545
546 if (i != 0) {
547 sector_t size;
548
549 size = get_capacity(state->bdev->bd_disk);
550 put_partition(state, slot++, start, size - start);
551 strlcat(state->pp_buf, "\n", PAGE_SIZE);
552 }
553
554 return i ? 1 : 0;
555}
556#endif
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h
deleted file mode 100644
index ede828529692..000000000000
--- a/fs/partitions/acorn.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * linux/fs/partitions/acorn.h
3 *
4 * Copyright (C) 1996-2001 Russell King.
5 *
6 * I _hate_ this partitioning mess - why can't we have one defined
7 * format, and everyone stick to it?
8 */
9
10int adfspart_check_CUMANA(struct parsed_partitions *state);
11int adfspart_check_ADFS(struct parsed_partitions *state);
12int adfspart_check_ICS(struct parsed_partitions *state);
13int adfspart_check_POWERTEC(struct parsed_partitions *state);
14int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
deleted file mode 100644
index 70cbf44a1560..000000000000
--- a/fs/partitions/amiga.c
+++ /dev/null
@@ -1,139 +0,0 @@
1/*
2 * fs/partitions/amiga.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include <linux/types.h>
11#include <linux/affs_hardblocks.h>
12
13#include "check.h"
14#include "amiga.h"
15
16static __inline__ u32
17checksum_block(__be32 *m, int size)
18{
19 u32 sum = 0;
20
21 while (size--)
22 sum += be32_to_cpu(*m++);
23 return sum;
24}
25
26int amiga_partition(struct parsed_partitions *state)
27{
28 Sector sect;
29 unsigned char *data;
30 struct RigidDiskBlock *rdb;
31 struct PartitionBlock *pb;
32 int start_sect, nr_sects, blk, part, res = 0;
33 int blksize = 1; /* Multiplier for disk block size */
34 int slot = 1;
35 char b[BDEVNAME_SIZE];
36
37 for (blk = 0; ; blk++, put_dev_sector(sect)) {
38 if (blk == RDB_ALLOCATION_LIMIT)
39 goto rdb_done;
40 data = read_part_sector(state, blk, &sect);
41 if (!data) {
42 if (warn_no_part)
43 printk("Dev %s: unable to read RDB block %d\n",
44 bdevname(state->bdev, b), blk);
45 res = -1;
46 goto rdb_done;
47 }
48 if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK))
49 continue;
50
51 rdb = (struct RigidDiskBlock *)data;
52 if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F) == 0)
53 break;
54 /* Try again with 0xdc..0xdf zeroed, Windows might have
55 * trashed it.
56 */
57 *(__be32 *)(data+0xdc) = 0;
58 if (checksum_block((__be32 *)data,
59 be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
60 printk("Warning: Trashed word at 0xd0 in block %d "
61 "ignored in checksum calculation\n",blk);
62 break;
63 }
64
65 printk("Dev %s: RDB in block %d has bad checksum\n",
66 bdevname(state->bdev, b), blk);
67 }
68
69 /* blksize is blocks per 512 byte standard block */
70 blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512;
71
72 {
73 char tmp[7 + 10 + 1 + 1];
74
75 /* Be more informative */
76 snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512);
77 strlcat(state->pp_buf, tmp, PAGE_SIZE);
78 }
79 blk = be32_to_cpu(rdb->rdb_PartitionList);
80 put_dev_sector(sect);
81 for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
82 blk *= blksize; /* Read in terms partition table understands */
83 data = read_part_sector(state, blk, &sect);
84 if (!data) {
85 if (warn_no_part)
86 printk("Dev %s: unable to read partition block %d\n",
87 bdevname(state->bdev, b), blk);
88 res = -1;
89 goto rdb_done;
90 }
91 pb = (struct PartitionBlock *)data;
92 blk = be32_to_cpu(pb->pb_Next);
93 if (pb->pb_ID != cpu_to_be32(IDNAME_PARTITION))
94 continue;
95 if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
96 continue;
97
98 /* Tell Kernel about it */
99
100 nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
101 be32_to_cpu(pb->pb_Environment[9])) *
102 be32_to_cpu(pb->pb_Environment[3]) *
103 be32_to_cpu(pb->pb_Environment[5]) *
104 blksize;
105 if (!nr_sects)
106 continue;
107 start_sect = be32_to_cpu(pb->pb_Environment[9]) *
108 be32_to_cpu(pb->pb_Environment[3]) *
109 be32_to_cpu(pb->pb_Environment[5]) *
110 blksize;
111 put_partition(state,slot++,start_sect,nr_sects);
112 {
113 /* Be even more informative to aid mounting */
114 char dostype[4];
115 char tmp[42];
116
117 __be32 *dt = (__be32 *)dostype;
118 *dt = pb->pb_Environment[16];
119 if (dostype[3] < ' ')
120 snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)",
121 dostype[0], dostype[1],
122 dostype[2], dostype[3] + '@' );
123 else
124 snprintf(tmp, sizeof(tmp), " (%c%c%c%c)",
125 dostype[0], dostype[1],
126 dostype[2], dostype[3]);
127 strlcat(state->pp_buf, tmp, PAGE_SIZE);
128 snprintf(tmp, sizeof(tmp), "(res %d spb %d)",
129 be32_to_cpu(pb->pb_Environment[6]),
130 be32_to_cpu(pb->pb_Environment[4]));
131 strlcat(state->pp_buf, tmp, PAGE_SIZE);
132 }
133 res = 1;
134 }
135 strlcat(state->pp_buf, "\n", PAGE_SIZE);
136
137rdb_done:
138 return res;
139}
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h
deleted file mode 100644
index d094585cadaa..000000000000
--- a/fs/partitions/amiga.h
+++ /dev/null
@@ -1,6 +0,0 @@
1/*
2 * fs/partitions/amiga.h
3 */
4
5int amiga_partition(struct parsed_partitions *state);
6
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
deleted file mode 100644
index 9875b05e80a2..000000000000
--- a/fs/partitions/atari.c
+++ /dev/null
@@ -1,149 +0,0 @@
1/*
2 * fs/partitions/atari.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include <linux/ctype.h>
11#include "check.h"
12#include "atari.h"
13
14/* ++guenther: this should be settable by the user ("make config")?.
15 */
16#define ICD_PARTS
17
18/* check if a partition entry looks valid -- Atari format is assumed if at
19 least one of the primary entries is ok this way */
20#define VALID_PARTITION(pi,hdsiz) \
21 (((pi)->flg & 1) && \
22 isalnum((pi)->id[0]) && isalnum((pi)->id[1]) && isalnum((pi)->id[2]) && \
23 be32_to_cpu((pi)->st) <= (hdsiz) && \
24 be32_to_cpu((pi)->st) + be32_to_cpu((pi)->siz) <= (hdsiz))
25
26static inline int OK_id(char *s)
27{
28 return memcmp (s, "GEM", 3) == 0 || memcmp (s, "BGM", 3) == 0 ||
29 memcmp (s, "LNX", 3) == 0 || memcmp (s, "SWP", 3) == 0 ||
30 memcmp (s, "RAW", 3) == 0 ;
31}
32
33int atari_partition(struct parsed_partitions *state)
34{
35 Sector sect;
36 struct rootsector *rs;
37 struct partition_info *pi;
38 u32 extensect;
39 u32 hd_size;
40 int slot;
41#ifdef ICD_PARTS
42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
43#endif
44
45 rs = read_part_sector(state, 0, &sect);
46 if (!rs)
47 return -1;
48
49 /* Verify this is an Atari rootsector: */
50 hd_size = state->bdev->bd_inode->i_size >> 9;
51 if (!VALID_PARTITION(&rs->part[0], hd_size) &&
52 !VALID_PARTITION(&rs->part[1], hd_size) &&
53 !VALID_PARTITION(&rs->part[2], hd_size) &&
54 !VALID_PARTITION(&rs->part[3], hd_size)) {
55 /*
56 * if there's no valid primary partition, assume that no Atari
57 * format partition table (there's no reliable magic or the like
58 * :-()
59 */
60 put_dev_sector(sect);
61 return 0;
62 }
63
64 pi = &rs->part[0];
65 strlcat(state->pp_buf, " AHDI", PAGE_SIZE);
66 for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) {
67 struct rootsector *xrs;
68 Sector sect2;
69 ulong partsect;
70
71 if ( !(pi->flg & 1) )
72 continue;
73 /* active partition */
74 if (memcmp (pi->id, "XGM", 3) != 0) {
75 /* we don't care about other id's */
76 put_partition (state, slot, be32_to_cpu(pi->st),
77 be32_to_cpu(pi->siz));
78 continue;
79 }
80 /* extension partition */
81#ifdef ICD_PARTS
82 part_fmt = 1;
83#endif
84 strlcat(state->pp_buf, " XGM<", PAGE_SIZE);
85 partsect = extensect = be32_to_cpu(pi->st);
86 while (1) {
87 xrs = read_part_sector(state, partsect, &sect2);
88 if (!xrs) {
89 printk (" block %ld read failed\n", partsect);
90 put_dev_sector(sect);
91 return -1;
92 }
93
94 /* ++roman: sanity check: bit 0 of flg field must be set */
95 if (!(xrs->part[0].flg & 1)) {
96 printk( "\nFirst sub-partition in extended partition is not valid!\n" );
97 put_dev_sector(sect2);
98 break;
99 }
100
101 put_partition(state, slot,
102 partsect + be32_to_cpu(xrs->part[0].st),
103 be32_to_cpu(xrs->part[0].siz));
104
105 if (!(xrs->part[1].flg & 1)) {
106 /* end of linked partition list */
107 put_dev_sector(sect2);
108 break;
109 }
110 if (memcmp( xrs->part[1].id, "XGM", 3 ) != 0) {
111 printk("\nID of extended partition is not XGM!\n");
112 put_dev_sector(sect2);
113 break;
114 }
115
116 partsect = be32_to_cpu(xrs->part[1].st) + extensect;
117 put_dev_sector(sect2);
118 if (++slot == state->limit) {
119 printk( "\nMaximum number of partitions reached!\n" );
120 break;
121 }
122 }
123 strlcat(state->pp_buf, " >", PAGE_SIZE);
124 }
125#ifdef ICD_PARTS
126 if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */
127 pi = &rs->icdpart[0];
128 /* sanity check: no ICD format if first partition invalid */
129 if (OK_id(pi->id)) {
130 strlcat(state->pp_buf, " ICD<", PAGE_SIZE);
131 for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) {
132 /* accept only GEM,BGM,RAW,LNX,SWP partitions */
133 if (!((pi->flg & 1) && OK_id(pi->id)))
134 continue;
135 part_fmt = 2;
136 put_partition (state, slot,
137 be32_to_cpu(pi->st),
138 be32_to_cpu(pi->siz));
139 }
140 strlcat(state->pp_buf, " >", PAGE_SIZE);
141 }
142 }
143#endif
144 put_dev_sector(sect);
145
146 strlcat(state->pp_buf, "\n", PAGE_SIZE);
147
148 return 1;
149}
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h
deleted file mode 100644
index fe2d32a89f36..000000000000
--- a/fs/partitions/atari.h
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * fs/partitions/atari.h
3 * Moved by Russell King from:
4 *
5 * linux/include/linux/atari_rootsec.h
6 * definitions for Atari Rootsector layout
7 * by Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de)
8 *
9 * modified for ICD/Supra partitioning scheme restricted to at most 12
10 * partitions
11 * by Guenther Kelleter (guenther@pool.informatik.rwth-aachen.de)
12 */
13
14struct partition_info
15{
16 u8 flg; /* bit 0: active; bit 7: bootable */
17 char id[3]; /* "GEM", "BGM", "XGM", or other */
18 __be32 st; /* start of partition */
19 __be32 siz; /* length of partition */
20};
21
22struct rootsector
23{
24 char unused[0x156]; /* room for boot code */
25 struct partition_info icdpart[8]; /* info for ICD-partitions 5..12 */
26 char unused2[0xc];
27 u32 hd_siz; /* size of disk in blocks */
28 struct partition_info part[4];
29 u32 bsl_st; /* start of bad sector list */
30 u32 bsl_cnt; /* length of bad sector list */
31 u16 checksum; /* checksum for bootable disks */
32} __attribute__((__packed__));
33
34int atari_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
deleted file mode 100644
index e3c63d1c5e13..000000000000
--- a/fs/partitions/check.c
+++ /dev/null
@@ -1,687 +0,0 @@
1/*
2 * fs/partitions/check.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 * Re-organised Feb 1998 Russell King
7 *
8 * We now have independent partition support from the
9 * block drivers, which allows all the partition code to
10 * be grouped in one location, and it to be mostly self
11 * contained.
12 *
13 * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
14 */
15
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/fs.h>
19#include <linux/slab.h>
20#include <linux/kmod.h>
21#include <linux/ctype.h>
22#include <linux/genhd.h>
23#include <linux/blktrace_api.h>
24
25#include "check.h"
26
27#include "acorn.h"
28#include "amiga.h"
29#include "atari.h"
30#include "ldm.h"
31#include "mac.h"
32#include "msdos.h"
33#include "osf.h"
34#include "sgi.h"
35#include "sun.h"
36#include "ibm.h"
37#include "ultrix.h"
38#include "efi.h"
39#include "karma.h"
40#include "sysv68.h"
41
42#ifdef CONFIG_BLK_DEV_MD
43extern void md_autodetect_dev(dev_t dev);
44#endif
45
46int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
47
48static int (*check_part[])(struct parsed_partitions *) = {
49 /*
50 * Probe partition formats with tables at disk address 0
51 * that also have an ADFS boot block at 0xdc0.
52 */
53#ifdef CONFIG_ACORN_PARTITION_ICS
54 adfspart_check_ICS,
55#endif
56#ifdef CONFIG_ACORN_PARTITION_POWERTEC
57 adfspart_check_POWERTEC,
58#endif
59#ifdef CONFIG_ACORN_PARTITION_EESOX
60 adfspart_check_EESOX,
61#endif
62
63 /*
64 * Now move on to formats that only have partition info at
65 * disk address 0xdc0. Since these may also have stale
66 * PC/BIOS partition tables, they need to come before
67 * the msdos entry.
68 */
69#ifdef CONFIG_ACORN_PARTITION_CUMANA
70 adfspart_check_CUMANA,
71#endif
72#ifdef CONFIG_ACORN_PARTITION_ADFS
73 adfspart_check_ADFS,
74#endif
75
76#ifdef CONFIG_EFI_PARTITION
77 efi_partition, /* this must come before msdos */
78#endif
79#ifdef CONFIG_SGI_PARTITION
80 sgi_partition,
81#endif
82#ifdef CONFIG_LDM_PARTITION
83 ldm_partition, /* this must come before msdos */
84#endif
85#ifdef CONFIG_MSDOS_PARTITION
86 msdos_partition,
87#endif
88#ifdef CONFIG_OSF_PARTITION
89 osf_partition,
90#endif
91#ifdef CONFIG_SUN_PARTITION
92 sun_partition,
93#endif
94#ifdef CONFIG_AMIGA_PARTITION
95 amiga_partition,
96#endif
97#ifdef CONFIG_ATARI_PARTITION
98 atari_partition,
99#endif
100#ifdef CONFIG_MAC_PARTITION
101 mac_partition,
102#endif
103#ifdef CONFIG_ULTRIX_PARTITION
104 ultrix_partition,
105#endif
106#ifdef CONFIG_IBM_PARTITION
107 ibm_partition,
108#endif
109#ifdef CONFIG_KARMA_PARTITION
110 karma_partition,
111#endif
112#ifdef CONFIG_SYSV68_PARTITION
113 sysv68_partition,
114#endif
115 NULL
116};
117
118/*
119 * disk_name() is used by partition check code and the genhd driver.
120 * It formats the devicename of the indicated disk into
121 * the supplied buffer (of size at least 32), and returns
122 * a pointer to that same buffer (for convenience).
123 */
124
125char *disk_name(struct gendisk *hd, int partno, char *buf)
126{
127 if (!partno)
128 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
129 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
130 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
131 else
132 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
133
134 return buf;
135}
136
137const char *bdevname(struct block_device *bdev, char *buf)
138{
139 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
140}
141
142EXPORT_SYMBOL(bdevname);
143
144/*
145 * There's very little reason to use this, you should really
146 * have a struct block_device just about everywhere and use
147 * bdevname() instead.
148 */
149const char *__bdevname(dev_t dev, char *buffer)
150{
151 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
152 MAJOR(dev), MINOR(dev));
153 return buffer;
154}
155
156EXPORT_SYMBOL(__bdevname);
157
158static struct parsed_partitions *
159check_partition(struct gendisk *hd, struct block_device *bdev)
160{
161 struct parsed_partitions *state;
162 int i, res, err;
163
164 state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
165 if (!state)
166 return NULL;
167 state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
168 if (!state->pp_buf) {
169 kfree(state);
170 return NULL;
171 }
172 state->pp_buf[0] = '\0';
173
174 state->bdev = bdev;
175 disk_name(hd, 0, state->name);
176 snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
177 if (isdigit(state->name[strlen(state->name)-1]))
178 sprintf(state->name, "p");
179
180 state->limit = disk_max_parts(hd);
181 i = res = err = 0;
182 while (!res && check_part[i]) {
183 memset(&state->parts, 0, sizeof(state->parts));
184 res = check_part[i++](state);
185 if (res < 0) {
186 /* We have hit an I/O error which we don't report now.
187 * But record it, and let the others do their job.
188 */
189 err = res;
190 res = 0;
191 }
192
193 }
194 if (res > 0) {
195 printk(KERN_INFO "%s", state->pp_buf);
196
197 free_page((unsigned long)state->pp_buf);
198 return state;
199 }
200 if (state->access_beyond_eod)
201 err = -ENOSPC;
202 if (err)
203 /* The partition is unrecognized. So report I/O errors if there were any */
204 res = err;
205 if (!res)
206 strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
207 else if (warn_no_part)
208 strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
209
210 printk(KERN_INFO "%s", state->pp_buf);
211
212 free_page((unsigned long)state->pp_buf);
213 kfree(state);
214 return ERR_PTR(res);
215}
216
217static ssize_t part_partition_show(struct device *dev,
218 struct device_attribute *attr, char *buf)
219{
220 struct hd_struct *p = dev_to_part(dev);
221
222 return sprintf(buf, "%d\n", p->partno);
223}
224
225static ssize_t part_start_show(struct device *dev,
226 struct device_attribute *attr, char *buf)
227{
228 struct hd_struct *p = dev_to_part(dev);
229
230 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
231}
232
233ssize_t part_size_show(struct device *dev,
234 struct device_attribute *attr, char *buf)
235{
236 struct hd_struct *p = dev_to_part(dev);
237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
238}
239
240static ssize_t part_ro_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct hd_struct *p = dev_to_part(dev);
244 return sprintf(buf, "%d\n", p->policy ? 1 : 0);
245}
246
247static ssize_t part_alignment_offset_show(struct device *dev,
248 struct device_attribute *attr, char *buf)
249{
250 struct hd_struct *p = dev_to_part(dev);
251 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
252}
253
254static ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf)
256{
257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment);
259}
260
261ssize_t part_stat_show(struct device *dev,
262 struct device_attribute *attr, char *buf)
263{
264 struct hd_struct *p = dev_to_part(dev);
265 int cpu;
266
267 cpu = part_stat_lock();
268 part_round_stats(cpu, p);
269 part_stat_unlock();
270 return sprintf(buf,
271 "%8lu %8lu %8llu %8u "
272 "%8lu %8lu %8llu %8u "
273 "%8u %8u %8u"
274 "\n",
275 part_stat_read(p, ios[READ]),
276 part_stat_read(p, merges[READ]),
277 (unsigned long long)part_stat_read(p, sectors[READ]),
278 jiffies_to_msecs(part_stat_read(p, ticks[READ])),
279 part_stat_read(p, ios[WRITE]),
280 part_stat_read(p, merges[WRITE]),
281 (unsigned long long)part_stat_read(p, sectors[WRITE]),
282 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
283 part_in_flight(p),
284 jiffies_to_msecs(part_stat_read(p, io_ticks)),
285 jiffies_to_msecs(part_stat_read(p, time_in_queue)));
286}
287
288ssize_t part_inflight_show(struct device *dev,
289 struct device_attribute *attr, char *buf)
290{
291 struct hd_struct *p = dev_to_part(dev);
292
293 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
294 atomic_read(&p->in_flight[1]));
295}
296
297#ifdef CONFIG_FAIL_MAKE_REQUEST
298ssize_t part_fail_show(struct device *dev,
299 struct device_attribute *attr, char *buf)
300{
301 struct hd_struct *p = dev_to_part(dev);
302
303 return sprintf(buf, "%d\n", p->make_it_fail);
304}
305
306ssize_t part_fail_store(struct device *dev,
307 struct device_attribute *attr,
308 const char *buf, size_t count)
309{
310 struct hd_struct *p = dev_to_part(dev);
311 int i;
312
313 if (count > 0 && sscanf(buf, "%d", &i) > 0)
314 p->make_it_fail = (i == 0) ? 0 : 1;
315
316 return count;
317}
318#endif
319
320static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
321static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
322static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
323static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
324static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
325static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
326 NULL);
327static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
328static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
329#ifdef CONFIG_FAIL_MAKE_REQUEST
330static struct device_attribute dev_attr_fail =
331 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
332#endif
333
334static struct attribute *part_attrs[] = {
335 &dev_attr_partition.attr,
336 &dev_attr_start.attr,
337 &dev_attr_size.attr,
338 &dev_attr_ro.attr,
339 &dev_attr_alignment_offset.attr,
340 &dev_attr_discard_alignment.attr,
341 &dev_attr_stat.attr,
342 &dev_attr_inflight.attr,
343#ifdef CONFIG_FAIL_MAKE_REQUEST
344 &dev_attr_fail.attr,
345#endif
346 NULL
347};
348
349static struct attribute_group part_attr_group = {
350 .attrs = part_attrs,
351};
352
353static const struct attribute_group *part_attr_groups[] = {
354 &part_attr_group,
355#ifdef CONFIG_BLK_DEV_IO_TRACE
356 &blk_trace_attr_group,
357#endif
358 NULL
359};
360
361static void part_release(struct device *dev)
362{
363 struct hd_struct *p = dev_to_part(dev);
364 free_part_stats(p);
365 free_part_info(p);
366 kfree(p);
367}
368
369struct device_type part_type = {
370 .name = "partition",
371 .groups = part_attr_groups,
372 .release = part_release,
373};
374
375static void delete_partition_rcu_cb(struct rcu_head *head)
376{
377 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
378
379 part->start_sect = 0;
380 part->nr_sects = 0;
381 part_stat_set_all(part, 0);
382 put_device(part_to_dev(part));
383}
384
385void __delete_partition(struct hd_struct *part)
386{
387 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
388}
389
390void delete_partition(struct gendisk *disk, int partno)
391{
392 struct disk_part_tbl *ptbl = disk->part_tbl;
393 struct hd_struct *part;
394
395 if (partno >= ptbl->len)
396 return;
397
398 part = ptbl->part[partno];
399 if (!part)
400 return;
401
402 blk_free_devt(part_devt(part));
403 rcu_assign_pointer(ptbl->part[partno], NULL);
404 rcu_assign_pointer(ptbl->last_lookup, NULL);
405 kobject_put(part->holder_dir);
406 device_del(part_to_dev(part));
407
408 hd_struct_put(part);
409}
410
411static ssize_t whole_disk_show(struct device *dev,
412 struct device_attribute *attr, char *buf)
413{
414 return 0;
415}
416static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
417 whole_disk_show, NULL);
418
419struct hd_struct *add_partition(struct gendisk *disk, int partno,
420 sector_t start, sector_t len, int flags,
421 struct partition_meta_info *info)
422{
423 struct hd_struct *p;
424 dev_t devt = MKDEV(0, 0);
425 struct device *ddev = disk_to_dev(disk);
426 struct device *pdev;
427 struct disk_part_tbl *ptbl;
428 const char *dname;
429 int err;
430
431 err = disk_expand_part_tbl(disk, partno);
432 if (err)
433 return ERR_PTR(err);
434 ptbl = disk->part_tbl;
435
436 if (ptbl->part[partno])
437 return ERR_PTR(-EBUSY);
438
439 p = kzalloc(sizeof(*p), GFP_KERNEL);
440 if (!p)
441 return ERR_PTR(-EBUSY);
442
443 if (!init_part_stats(p)) {
444 err = -ENOMEM;
445 goto out_free;
446 }
447 pdev = part_to_dev(p);
448
449 p->start_sect = start;
450 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len;
455 p->partno = partno;
456 p->policy = get_disk_ro(disk);
457
458 if (info) {
459 struct partition_meta_info *pinfo = alloc_part_info(disk);
460 if (!pinfo)
461 goto out_free_stats;
462 memcpy(pinfo, info, sizeof(*info));
463 p->info = pinfo;
464 }
465
466 dname = dev_name(ddev);
467 if (isdigit(dname[strlen(dname) - 1]))
468 dev_set_name(pdev, "%sp%d", dname, partno);
469 else
470 dev_set_name(pdev, "%s%d", dname, partno);
471
472 device_initialize(pdev);
473 pdev->class = &block_class;
474 pdev->type = &part_type;
475 pdev->parent = ddev;
476
477 err = blk_alloc_devt(p, &devt);
478 if (err)
479 goto out_free_info;
480 pdev->devt = devt;
481
482 /* delay uevent until 'holders' subdir is created */
483 dev_set_uevent_suppress(pdev, 1);
484 err = device_add(pdev);
485 if (err)
486 goto out_put;
487
488 err = -ENOMEM;
489 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
490 if (!p->holder_dir)
491 goto out_del;
492
493 dev_set_uevent_suppress(pdev, 0);
494 if (flags & ADDPART_FLAG_WHOLEDISK) {
495 err = device_create_file(pdev, &dev_attr_whole_disk);
496 if (err)
497 goto out_del;
498 }
499
500 /* everything is up and running, commence */
501 rcu_assign_pointer(ptbl->part[partno], p);
502
503 /* suppress uevent if the disk suppresses it */
504 if (!dev_get_uevent_suppress(ddev))
505 kobject_uevent(&pdev->kobj, KOBJ_ADD);
506
507 hd_ref_init(p);
508 return p;
509
510out_free_info:
511 free_part_info(p);
512out_free_stats:
513 free_part_stats(p);
514out_free:
515 kfree(p);
516 return ERR_PTR(err);
517out_del:
518 kobject_put(p->holder_dir);
519 device_del(pdev);
520out_put:
521 put_device(pdev);
522 blk_free_devt(devt);
523 return ERR_PTR(err);
524}
525
526static bool disk_unlock_native_capacity(struct gendisk *disk)
527{
528 const struct block_device_operations *bdops = disk->fops;
529
530 if (bdops->unlock_native_capacity &&
531 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
532 printk(KERN_CONT "enabling native capacity\n");
533 bdops->unlock_native_capacity(disk);
534 disk->flags |= GENHD_FL_NATIVE_CAPACITY;
535 return true;
536 } else {
537 printk(KERN_CONT "truncated\n");
538 return false;
539 }
540}
541
542int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
543{
544 struct parsed_partitions *state = NULL;
545 struct disk_part_iter piter;
546 struct hd_struct *part;
547 int p, highest, res;
548rescan:
549 if (state && !IS_ERR(state)) {
550 kfree(state);
551 state = NULL;
552 }
553
554 if (bdev->bd_part_count)
555 return -EBUSY;
556 res = invalidate_partition(disk, 0);
557 if (res)
558 return res;
559
560 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
561 while ((part = disk_part_iter_next(&piter)))
562 delete_partition(disk, part->partno);
563 disk_part_iter_exit(&piter);
564
565 if (disk->fops->revalidate_disk)
566 disk->fops->revalidate_disk(disk);
567 check_disk_size_change(disk, bdev);
568 bdev->bd_invalidated = 0;
569 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
570 return 0;
571 if (IS_ERR(state)) {
572 /*
573 * I/O error reading the partition table. If any
574 * partition code tried to read beyond EOD, retry
575 * after unlocking native capacity.
576 */
577 if (PTR_ERR(state) == -ENOSPC) {
578 printk(KERN_WARNING "%s: partition table beyond EOD, ",
579 disk->disk_name);
580 if (disk_unlock_native_capacity(disk))
581 goto rescan;
582 }
583 return -EIO;
584 }
585 /*
586 * If any partition code tried to read beyond EOD, try
587 * unlocking native capacity even if partition table is
588 * successfully read as we could be missing some partitions.
589 */
590 if (state->access_beyond_eod) {
591 printk(KERN_WARNING
592 "%s: partition table partially beyond EOD, ",
593 disk->disk_name);
594 if (disk_unlock_native_capacity(disk))
595 goto rescan;
596 }
597
598 /* tell userspace that the media / partition table may have changed */
599 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
600
601 /* Detect the highest partition number and preallocate
602 * disk->part_tbl. This is an optimization and not strictly
603 * necessary.
604 */
605 for (p = 1, highest = 0; p < state->limit; p++)
606 if (state->parts[p].size)
607 highest = p;
608
609 disk_expand_part_tbl(disk, highest);
610
611 /* add partitions */
612 for (p = 1; p < state->limit; p++) {
613 sector_t size, from;
614 struct partition_meta_info *info = NULL;
615
616 size = state->parts[p].size;
617 if (!size)
618 continue;
619
620 from = state->parts[p].from;
621 if (from >= get_capacity(disk)) {
622 printk(KERN_WARNING
623 "%s: p%d start %llu is beyond EOD, ",
624 disk->disk_name, p, (unsigned long long) from);
625 if (disk_unlock_native_capacity(disk))
626 goto rescan;
627 continue;
628 }
629
630 if (from + size > get_capacity(disk)) {
631 printk(KERN_WARNING
632 "%s: p%d size %llu extends beyond EOD, ",
633 disk->disk_name, p, (unsigned long long) size);
634
635 if (disk_unlock_native_capacity(disk)) {
636 /* free state and restart */
637 goto rescan;
638 } else {
639 /*
640 * we can not ignore partitions of broken tables
641 * created by for example camera firmware, but
642 * we limit them to the end of the disk to avoid
643 * creating invalid block devices
644 */
645 size = get_capacity(disk) - from;
646 }
647 }
648
649 if (state->parts[p].has_info)
650 info = &state->parts[p].info;
651 part = add_partition(disk, p, from, size,
652 state->parts[p].flags,
653 &state->parts[p].info);
654 if (IS_ERR(part)) {
655 printk(KERN_ERR " %s: p%d could not be added: %ld\n",
656 disk->disk_name, p, -PTR_ERR(part));
657 continue;
658 }
659#ifdef CONFIG_BLK_DEV_MD
660 if (state->parts[p].flags & ADDPART_FLAG_RAID)
661 md_autodetect_dev(part_to_dev(part)->devt);
662#endif
663 }
664 kfree(state);
665 return 0;
666}
667
668unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
669{
670 struct address_space *mapping = bdev->bd_inode->i_mapping;
671 struct page *page;
672
673 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
674 NULL);
675 if (!IS_ERR(page)) {
676 if (PageError(page))
677 goto fail;
678 p->v = page;
679 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
680fail:
681 page_cache_release(page);
682 }
683 p->v = NULL;
684 return NULL;
685}
686
687EXPORT_SYMBOL(read_dev_sector);
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
deleted file mode 100644
index d68bf4dc3bc2..000000000000
--- a/fs/partitions/check.h
+++ /dev/null
@@ -1,49 +0,0 @@
1#include <linux/pagemap.h>
2#include <linux/blkdev.h>
3#include <linux/genhd.h>
4
5/*
6 * add_gd_partition adds a partitions details to the devices partition
7 * description.
8 */
9struct parsed_partitions {
10 struct block_device *bdev;
11 char name[BDEVNAME_SIZE];
12 struct {
13 sector_t from;
14 sector_t size;
15 int flags;
16 bool has_info;
17 struct partition_meta_info info;
18 } parts[DISK_MAX_PARTS];
19 int next;
20 int limit;
21 bool access_beyond_eod;
22 char *pp_buf;
23};
24
25static inline void *read_part_sector(struct parsed_partitions *state,
26 sector_t n, Sector *p)
27{
28 if (n >= get_capacity(state->bdev->bd_disk)) {
29 state->access_beyond_eod = true;
30 return NULL;
31 }
32 return read_dev_sector(state->bdev, n, p);
33}
34
35static inline void
36put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
37{
38 if (n < p->limit) {
39 char tmp[1 + BDEVNAME_SIZE + 10 + 1];
40
41 p->parts[n].from = from;
42 p->parts[n].size = size;
43 snprintf(tmp, sizeof(tmp), " %s%d", p->name, n);
44 strlcat(p->pp_buf, tmp, PAGE_SIZE);
45 }
46}
47
48extern int warn_no_part;
49
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
deleted file mode 100644
index 6296b403c67a..000000000000
--- a/fs/partitions/efi.c
+++ /dev/null
@@ -1,675 +0,0 @@
1/************************************************************
2 * EFI GUID Partition Table handling
3 *
4 * http://www.uefi.org/specs/
5 * http://www.intel.com/technology/efi/
6 *
7 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
8 * Copyright 2000,2001,2002,2004 Dell Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 *
25 * TODO:
26 *
27 * Changelog:
28 * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com>
29 * - test for valid PMBR and valid PGPT before ever reading
30 * AGPT, allow override with 'gpt' kernel command line option.
31 * - check for first/last_usable_lba outside of size of disk
32 *
33 * Tue Mar 26 2002 Matt Domsch <Matt_Domsch@dell.com>
34 * - Ported to 2.5.7-pre1 and 2.5.7-dj2
35 * - Applied patch to avoid fault in alternate header handling
36 * - cleaned up find_valid_gpt
37 * - On-disk structure and copy in memory is *always* LE now -
38 * swab fields as needed
39 * - remove print_gpt_header()
40 * - only use first max_p partition entries, to keep the kernel minor number
41 * and partition numbers tied.
42 *
43 * Mon Feb 04 2002 Matt Domsch <Matt_Domsch@dell.com>
44 * - Removed __PRIPTR_PREFIX - not being used
45 *
46 * Mon Jan 14 2002 Matt Domsch <Matt_Domsch@dell.com>
47 * - Ported to 2.5.2-pre11 + library crc32 patch Linus applied
48 *
49 * Thu Dec 6 2001 Matt Domsch <Matt_Domsch@dell.com>
50 * - Added compare_gpts().
51 * - moved le_efi_guid_to_cpus() back into this file. GPT is the only
52 * thing that keeps EFI GUIDs on disk.
53 * - Changed gpt structure names and members to be simpler and more Linux-like.
54 *
55 * Wed Oct 17 2001 Matt Domsch <Matt_Domsch@dell.com>
56 * - Removed CONFIG_DEVFS_VOLUMES_UUID code entirely per Martin Wilck
57 *
58 * Wed Oct 10 2001 Matt Domsch <Matt_Domsch@dell.com>
59 * - Changed function comments to DocBook style per Andreas Dilger suggestion.
60 *
61 * Mon Oct 08 2001 Matt Domsch <Matt_Domsch@dell.com>
62 * - Change read_lba() to use the page cache per Al Viro's work.
63 * - print u64s properly on all architectures
64 * - fixed debug_printk(), now Dprintk()
65 *
66 * Mon Oct 01 2001 Matt Domsch <Matt_Domsch@dell.com>
67 * - Style cleanups
68 * - made most functions static
69 * - Endianness addition
70 * - remove test for second alternate header, as it's not per spec,
71 * and is unnecessary. There's now a method to read/write the last
72 * sector of an odd-sized disk from user space. No tools have ever
73 * been released which used this code, so it's effectively dead.
74 * - Per Asit Mallick of Intel, added a test for a valid PMBR.
75 * - Added kernel command line option 'gpt' to override valid PMBR test.
76 *
77 * Wed Jun 6 2001 Martin Wilck <Martin.Wilck@Fujitsu-Siemens.com>
78 * - added devfs volume UUID support (/dev/volumes/uuids) for
79 * mounting file systems by the partition GUID.
80 *
81 * Tue Dec 5 2000 Matt Domsch <Matt_Domsch@dell.com>
82 * - Moved crc32() to linux/lib, added efi_crc32().
83 *
84 * Thu Nov 30 2000 Matt Domsch <Matt_Domsch@dell.com>
85 * - Replaced Intel's CRC32 function with an equivalent
86 * non-license-restricted version.
87 *
88 * Wed Oct 25 2000 Matt Domsch <Matt_Domsch@dell.com>
89 * - Fixed the last_lba() call to return the proper last block
90 *
91 * Thu Oct 12 2000 Matt Domsch <Matt_Domsch@dell.com>
92 * - Thanks to Andries Brouwer for his debugging assistance.
93 * - Code works, detects all the partitions.
94 *
95 ************************************************************/
96#include <linux/crc32.h>
97#include <linux/ctype.h>
98#include <linux/math64.h>
99#include <linux/slab.h>
100#include "check.h"
101#include "efi.h"
102
103/* This allows a kernel command line option 'gpt' to override
104 * the test for invalid PMBR. Not __initdata because reloading
105 * the partition tables happens after init too.
106 */
107static int force_gpt;
108static int __init
109force_gpt_fn(char *str)
110{
111 force_gpt = 1;
112 return 1;
113}
114__setup("gpt", force_gpt_fn);
115
116
117/**
118 * efi_crc32() - EFI version of crc32 function
119 * @buf: buffer to calculate crc32 of
120 * @len - length of buf
121 *
122 * Description: Returns EFI-style CRC32 value for @buf
123 *
124 * This function uses the little endian Ethernet polynomial
125 * but seeds the function with ~0, and xor's with ~0 at the end.
126 * Note, the EFI Specification, v1.02, has a reference to
127 * Dr. Dobbs Journal, May 1994 (actually it's in May 1992).
128 */
129static inline u32
130efi_crc32(const void *buf, unsigned long len)
131{
132 return (crc32(~0L, buf, len) ^ ~0L);
133}
134
135/**
136 * last_lba(): return number of last logical block of device
137 * @bdev: block device
138 *
139 * Description: Returns last LBA value on success, 0 on error.
140 * This is stored (by sd and ide-geometry) in
141 * the part[0] entry for this disk, and is the number of
142 * physical sectors available on the disk.
143 */
144static u64 last_lba(struct block_device *bdev)
145{
146 if (!bdev || !bdev->bd_inode)
147 return 0;
148 return div_u64(bdev->bd_inode->i_size,
149 bdev_logical_block_size(bdev)) - 1ULL;
150}
151
152static inline int
153pmbr_part_valid(struct partition *part)
154{
155 if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT &&
156 le32_to_cpu(part->start_sect) == 1UL)
157 return 1;
158 return 0;
159}
160
161/**
162 * is_pmbr_valid(): test Protective MBR for validity
163 * @mbr: pointer to a legacy mbr structure
164 *
165 * Description: Returns 1 if PMBR is valid, 0 otherwise.
166 * Validity depends on two things:
167 * 1) MSDOS signature is in the last two bytes of the MBR
168 * 2) One partition of type 0xEE is found
169 */
170static int
171is_pmbr_valid(legacy_mbr *mbr)
172{
173 int i;
174 if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE)
175 return 0;
176 for (i = 0; i < 4; i++)
177 if (pmbr_part_valid(&mbr->partition_record[i]))
178 return 1;
179 return 0;
180}
181
182/**
183 * read_lba(): Read bytes from disk, starting at given LBA
184 * @state
185 * @lba
186 * @buffer
187 * @size_t
188 *
189 * Description: Reads @count bytes from @state->bdev into @buffer.
190 * Returns number of bytes read on success, 0 on error.
191 */
192static size_t read_lba(struct parsed_partitions *state,
193 u64 lba, u8 *buffer, size_t count)
194{
195 size_t totalreadcount = 0;
196 struct block_device *bdev = state->bdev;
197 sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
198
199 if (!buffer || lba > last_lba(bdev))
200 return 0;
201
202 while (count) {
203 int copied = 512;
204 Sector sect;
205 unsigned char *data = read_part_sector(state, n++, &sect);
206 if (!data)
207 break;
208 if (copied > count)
209 copied = count;
210 memcpy(buffer, data, copied);
211 put_dev_sector(sect);
212 buffer += copied;
213 totalreadcount +=copied;
214 count -= copied;
215 }
216 return totalreadcount;
217}
218
219/**
220 * alloc_read_gpt_entries(): reads partition entries from disk
221 * @state
222 * @gpt - GPT header
223 *
224 * Description: Returns ptes on success, NULL on error.
225 * Allocates space for PTEs based on information found in @gpt.
226 * Notes: remember to free pte when you're done!
227 */
228static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
229 gpt_header *gpt)
230{
231 size_t count;
232 gpt_entry *pte;
233
234 if (!gpt)
235 return NULL;
236
237 count = le32_to_cpu(gpt->num_partition_entries) *
238 le32_to_cpu(gpt->sizeof_partition_entry);
239 if (!count)
240 return NULL;
241 pte = kzalloc(count, GFP_KERNEL);
242 if (!pte)
243 return NULL;
244
245 if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
246 (u8 *) pte,
247 count) < count) {
248 kfree(pte);
249 pte=NULL;
250 return NULL;
251 }
252 return pte;
253}
254
255/**
256 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
257 * @state
258 * @lba is the Logical Block Address of the partition table
259 *
260 * Description: returns GPT header on success, NULL on error. Allocates
261 * and fills a GPT header starting at @ from @state->bdev.
262 * Note: remember to free gpt when finished with it.
263 */
264static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
265 u64 lba)
266{
267 gpt_header *gpt;
268 unsigned ssz = bdev_logical_block_size(state->bdev);
269
270 gpt = kzalloc(ssz, GFP_KERNEL);
271 if (!gpt)
272 return NULL;
273
274 if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) {
275 kfree(gpt);
276 gpt=NULL;
277 return NULL;
278 }
279
280 return gpt;
281}
282
283/**
284 * is_gpt_valid() - tests one GPT header and PTEs for validity
285 * @state
286 * @lba is the logical block address of the GPT header to test
287 * @gpt is a GPT header ptr, filled on return.
288 * @ptes is a PTEs ptr, filled on return.
289 *
290 * Description: returns 1 if valid, 0 on error.
291 * If valid, returns pointers to newly allocated GPT header and PTEs.
292 */
293static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
294 gpt_header **gpt, gpt_entry **ptes)
295{
296 u32 crc, origcrc;
297 u64 lastlba;
298
299 if (!ptes)
300 return 0;
301 if (!(*gpt = alloc_read_gpt_header(state, lba)))
302 return 0;
303
304 /* Check the GUID Partition Table signature */
305 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
306 pr_debug("GUID Partition Table Header signature is wrong:"
307 "%lld != %lld\n",
308 (unsigned long long)le64_to_cpu((*gpt)->signature),
309 (unsigned long long)GPT_HEADER_SIGNATURE);
310 goto fail;
311 }
312
313 /* Check the GUID Partition Table header size */
314 if (le32_to_cpu((*gpt)->header_size) >
315 bdev_logical_block_size(state->bdev)) {
316 pr_debug("GUID Partition Table Header size is wrong: %u > %u\n",
317 le32_to_cpu((*gpt)->header_size),
318 bdev_logical_block_size(state->bdev));
319 goto fail;
320 }
321
322 /* Check the GUID Partition Table CRC */
323 origcrc = le32_to_cpu((*gpt)->header_crc32);
324 (*gpt)->header_crc32 = 0;
325 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
326
327 if (crc != origcrc) {
328 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
329 crc, origcrc);
330 goto fail;
331 }
332 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
333
334 /* Check that the my_lba entry points to the LBA that contains
335 * the GUID Partition Table */
336 if (le64_to_cpu((*gpt)->my_lba) != lba) {
337 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
338 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
339 (unsigned long long)lba);
340 goto fail;
341 }
342
343 /* Check the first_usable_lba and last_usable_lba are
344 * within the disk.
345 */
346 lastlba = last_lba(state->bdev);
347 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
348 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
350 (unsigned long long)lastlba);
351 goto fail;
352 }
353 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
354 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
355 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
356 (unsigned long long)lastlba);
357 goto fail;
358 }
359
360 /* Check that sizeof_partition_entry has the correct value */
361 if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
362 pr_debug("GUID Partitition Entry Size check failed.\n");
363 goto fail;
364 }
365
366 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
367 goto fail;
368
369 /* Check the GUID Partition Entry Array CRC */
370 crc = efi_crc32((const unsigned char *) (*ptes),
371 le32_to_cpu((*gpt)->num_partition_entries) *
372 le32_to_cpu((*gpt)->sizeof_partition_entry));
373
374 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
375 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
376 goto fail_ptes;
377 }
378
379 /* We're done, all's well */
380 return 1;
381
382 fail_ptes:
383 kfree(*ptes);
384 *ptes = NULL;
385 fail:
386 kfree(*gpt);
387 *gpt = NULL;
388 return 0;
389}
390
391/**
392 * is_pte_valid() - tests one PTE for validity
393 * @pte is the pte to check
394 * @lastlba is last lba of the disk
395 *
396 * Description: returns 1 if valid, 0 on error.
397 */
398static inline int
399is_pte_valid(const gpt_entry *pte, const u64 lastlba)
400{
401 if ((!efi_guidcmp(pte->partition_type_guid, NULL_GUID)) ||
402 le64_to_cpu(pte->starting_lba) > lastlba ||
403 le64_to_cpu(pte->ending_lba) > lastlba)
404 return 0;
405 return 1;
406}
407
408/**
409 * compare_gpts() - Search disk for valid GPT headers and PTEs
410 * @pgpt is the primary GPT header
411 * @agpt is the alternate GPT header
412 * @lastlba is the last LBA number
413 * Description: Returns nothing. Sanity checks pgpt and agpt fields
414 * and prints warnings on discrepancies.
415 *
416 */
417static void
418compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
419{
420 int error_found = 0;
421 if (!pgpt || !agpt)
422 return;
423 if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) {
424 printk(KERN_WARNING
425 "GPT:Primary header LBA != Alt. header alternate_lba\n");
426 printk(KERN_WARNING "GPT:%lld != %lld\n",
427 (unsigned long long)le64_to_cpu(pgpt->my_lba),
428 (unsigned long long)le64_to_cpu(agpt->alternate_lba));
429 error_found++;
430 }
431 if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) {
432 printk(KERN_WARNING
433 "GPT:Primary header alternate_lba != Alt. header my_lba\n");
434 printk(KERN_WARNING "GPT:%lld != %lld\n",
435 (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
436 (unsigned long long)le64_to_cpu(agpt->my_lba));
437 error_found++;
438 }
439 if (le64_to_cpu(pgpt->first_usable_lba) !=
440 le64_to_cpu(agpt->first_usable_lba)) {
441 printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n");
442 printk(KERN_WARNING "GPT:%lld != %lld\n",
443 (unsigned long long)le64_to_cpu(pgpt->first_usable_lba),
444 (unsigned long long)le64_to_cpu(agpt->first_usable_lba));
445 error_found++;
446 }
447 if (le64_to_cpu(pgpt->last_usable_lba) !=
448 le64_to_cpu(agpt->last_usable_lba)) {
449 printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n");
450 printk(KERN_WARNING "GPT:%lld != %lld\n",
451 (unsigned long long)le64_to_cpu(pgpt->last_usable_lba),
452 (unsigned long long)le64_to_cpu(agpt->last_usable_lba));
453 error_found++;
454 }
455 if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) {
456 printk(KERN_WARNING "GPT:disk_guids don't match.\n");
457 error_found++;
458 }
459 if (le32_to_cpu(pgpt->num_partition_entries) !=
460 le32_to_cpu(agpt->num_partition_entries)) {
461 printk(KERN_WARNING "GPT:num_partition_entries don't match: "
462 "0x%x != 0x%x\n",
463 le32_to_cpu(pgpt->num_partition_entries),
464 le32_to_cpu(agpt->num_partition_entries));
465 error_found++;
466 }
467 if (le32_to_cpu(pgpt->sizeof_partition_entry) !=
468 le32_to_cpu(agpt->sizeof_partition_entry)) {
469 printk(KERN_WARNING
470 "GPT:sizeof_partition_entry values don't match: "
471 "0x%x != 0x%x\n",
472 le32_to_cpu(pgpt->sizeof_partition_entry),
473 le32_to_cpu(agpt->sizeof_partition_entry));
474 error_found++;
475 }
476 if (le32_to_cpu(pgpt->partition_entry_array_crc32) !=
477 le32_to_cpu(agpt->partition_entry_array_crc32)) {
478 printk(KERN_WARNING
479 "GPT:partition_entry_array_crc32 values don't match: "
480 "0x%x != 0x%x\n",
481 le32_to_cpu(pgpt->partition_entry_array_crc32),
482 le32_to_cpu(agpt->partition_entry_array_crc32));
483 error_found++;
484 }
485 if (le64_to_cpu(pgpt->alternate_lba) != lastlba) {
486 printk(KERN_WARNING
487 "GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
488 printk(KERN_WARNING "GPT:%lld != %lld\n",
489 (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
490 (unsigned long long)lastlba);
491 error_found++;
492 }
493
494 if (le64_to_cpu(agpt->my_lba) != lastlba) {
495 printk(KERN_WARNING
496 "GPT:Alternate GPT header not at the end of the disk.\n");
497 printk(KERN_WARNING "GPT:%lld != %lld\n",
498 (unsigned long long)le64_to_cpu(agpt->my_lba),
499 (unsigned long long)lastlba);
500 error_found++;
501 }
502
503 if (error_found)
504 printk(KERN_WARNING
505 "GPT: Use GNU Parted to correct GPT errors.\n");
506 return;
507}
508
509/**
510 * find_valid_gpt() - Search disk for valid GPT headers and PTEs
511 * @state
512 * @gpt is a GPT header ptr, filled on return.
513 * @ptes is a PTEs ptr, filled on return.
514 * Description: Returns 1 if valid, 0 on error.
515 * If valid, returns pointers to newly allocated GPT header and PTEs.
516 * Validity depends on PMBR being valid (or being overridden by the
517 * 'gpt' kernel command line option) and finding either the Primary
518 * GPT header and PTEs valid, or the Alternate GPT header and PTEs
519 * valid. If the Primary GPT header is not valid, the Alternate GPT header
520 * is not checked unless the 'gpt' kernel command line option is passed.
521 * This protects against devices which misreport their size, and forces
522 * the user to decide to use the Alternate GPT.
523 */
524static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
525 gpt_entry **ptes)
526{
527 int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
528 gpt_header *pgpt = NULL, *agpt = NULL;
529 gpt_entry *pptes = NULL, *aptes = NULL;
530 legacy_mbr *legacymbr;
531 u64 lastlba;
532
533 if (!ptes)
534 return 0;
535
536 lastlba = last_lba(state->bdev);
537 if (!force_gpt) {
538 /* This will be added to the EFI Spec. per Intel after v1.02. */
539 legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
540 if (legacymbr) {
541 read_lba(state, 0, (u8 *) legacymbr,
542 sizeof (*legacymbr));
543 good_pmbr = is_pmbr_valid(legacymbr);
544 kfree(legacymbr);
545 }
546 if (!good_pmbr)
547 goto fail;
548 }
549
550 good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
551 &pgpt, &pptes);
552 if (good_pgpt)
553 good_agpt = is_gpt_valid(state,
554 le64_to_cpu(pgpt->alternate_lba),
555 &agpt, &aptes);
556 if (!good_agpt && force_gpt)
557 good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
558
559 /* The obviously unsuccessful case */
560 if (!good_pgpt && !good_agpt)
561 goto fail;
562
563 compare_gpts(pgpt, agpt, lastlba);
564
565 /* The good cases */
566 if (good_pgpt) {
567 *gpt = pgpt;
568 *ptes = pptes;
569 kfree(agpt);
570 kfree(aptes);
571 if (!good_agpt) {
572 printk(KERN_WARNING
573 "Alternate GPT is invalid, "
574 "using primary GPT.\n");
575 }
576 return 1;
577 }
578 else if (good_agpt) {
579 *gpt = agpt;
580 *ptes = aptes;
581 kfree(pgpt);
582 kfree(pptes);
583 printk(KERN_WARNING
584 "Primary GPT is invalid, using alternate GPT.\n");
585 return 1;
586 }
587
588 fail:
589 kfree(pgpt);
590 kfree(agpt);
591 kfree(pptes);
592 kfree(aptes);
593 *gpt = NULL;
594 *ptes = NULL;
595 return 0;
596}
597
598/**
599 * efi_partition(struct parsed_partitions *state)
600 * @state
601 *
602 * Description: called from check.c, if the disk contains GPT
603 * partitions, sets up partition entries in the kernel.
604 *
605 * If the first block on the disk is a legacy MBR,
606 * it will get handled by msdos_partition().
607 * If it's a Protective MBR, we'll handle it here.
608 *
609 * We do not create a Linux partition for GPT, but
610 * only for the actual data partitions.
611 * Returns:
612 * -1 if unable to read the partition table
613 * 0 if this isn't our partition table
614 * 1 if successful
615 *
616 */
617int efi_partition(struct parsed_partitions *state)
618{
619 gpt_header *gpt = NULL;
620 gpt_entry *ptes = NULL;
621 u32 i;
622 unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
623 u8 unparsed_guid[37];
624
625 if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
626 kfree(gpt);
627 kfree(ptes);
628 return 0;
629 }
630
631 pr_debug("GUID Partition Table is valid! Yea!\n");
632
633 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
634 struct partition_meta_info *info;
635 unsigned label_count = 0;
636 unsigned label_max;
637 u64 start = le64_to_cpu(ptes[i].starting_lba);
638 u64 size = le64_to_cpu(ptes[i].ending_lba) -
639 le64_to_cpu(ptes[i].starting_lba) + 1ULL;
640
641 if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
642 continue;
643
644 put_partition(state, i+1, start * ssz, size * ssz);
645
646 /* If this is a RAID volume, tell md */
647 if (!efi_guidcmp(ptes[i].partition_type_guid,
648 PARTITION_LINUX_RAID_GUID))
649 state->parts[i + 1].flags = ADDPART_FLAG_RAID;
650
651 info = &state->parts[i + 1].info;
652 /* Instead of doing a manual swap to big endian, reuse the
653 * common ASCII hex format as the interim.
654 */
655 efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
656 part_pack_uuid(unparsed_guid, info->uuid);
657
658 /* Naively convert UTF16-LE to 7 bits. */
659 label_max = min(sizeof(info->volname) - 1,
660 sizeof(ptes[i].partition_name));
661 info->volname[label_max] = 0;
662 while (label_count < label_max) {
663 u8 c = ptes[i].partition_name[label_count] & 0xff;
664 if (c && !isprint(c))
665 c = '!';
666 info->volname[label_count] = c;
667 label_count++;
668 }
669 state->parts[i + 1].has_info = true;
670 }
671 kfree(ptes);
672 kfree(gpt);
673 strlcat(state->pp_buf, "\n", PAGE_SIZE);
674 return 1;
675}
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
deleted file mode 100644
index b69ab729558f..000000000000
--- a/fs/partitions/efi.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/************************************************************
2 * EFI GUID Partition Table
3 * Per Intel EFI Specification v1.02
4 * http://developer.intel.com/technology/efi/efi.htm
5 *
6 * By Matt Domsch <Matt_Domsch@dell.com> Fri Sep 22 22:15:56 CDT 2000
7 * Copyright 2000,2001 Dell Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 ************************************************************/
24
25#ifndef FS_PART_EFI_H_INCLUDED
26#define FS_PART_EFI_H_INCLUDED
27
28#include <linux/types.h>
29#include <linux/fs.h>
30#include <linux/genhd.h>
31#include <linux/kernel.h>
32#include <linux/major.h>
33#include <linux/string.h>
34#include <linux/efi.h>
35
36#define MSDOS_MBR_SIGNATURE 0xaa55
37#define EFI_PMBR_OSTYPE_EFI 0xEF
38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
39
40#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
41#define GPT_HEADER_REVISION_V1 0x00010000
42#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
43
44#define PARTITION_SYSTEM_GUID \
45 EFI_GUID( 0xC12A7328, 0xF81F, 0x11d2, \
46 0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B)
47#define LEGACY_MBR_PARTITION_GUID \
48 EFI_GUID( 0x024DEE41, 0x33E7, 0x11d3, \
49 0x9D, 0x69, 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F)
50#define PARTITION_MSFT_RESERVED_GUID \
51 EFI_GUID( 0xE3C9E316, 0x0B5C, 0x4DB8, \
52 0x81, 0x7D, 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE)
53#define PARTITION_BASIC_DATA_GUID \
54 EFI_GUID( 0xEBD0A0A2, 0xB9E5, 0x4433, \
55 0x87, 0xC0, 0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7)
56#define PARTITION_LINUX_RAID_GUID \
57 EFI_GUID( 0xa19d880f, 0x05fc, 0x4d3b, \
58 0xa0, 0x06, 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e)
59#define PARTITION_LINUX_SWAP_GUID \
60 EFI_GUID( 0x0657fd6d, 0xa4ab, 0x43c4, \
61 0x84, 0xe5, 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f)
62#define PARTITION_LINUX_LVM_GUID \
63 EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \
64 0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28)
65
66typedef struct _gpt_header {
67 __le64 signature;
68 __le32 revision;
69 __le32 header_size;
70 __le32 header_crc32;
71 __le32 reserved1;
72 __le64 my_lba;
73 __le64 alternate_lba;
74 __le64 first_usable_lba;
75 __le64 last_usable_lba;
76 efi_guid_t disk_guid;
77 __le64 partition_entry_lba;
78 __le32 num_partition_entries;
79 __le32 sizeof_partition_entry;
80 __le32 partition_entry_array_crc32;
81
82 /* The rest of the logical block is reserved by UEFI and must be zero.
83 * EFI standard handles this by:
84 *
85 * uint8_t reserved2[ BlockSize - 92 ];
86 */
87} __attribute__ ((packed)) gpt_header;
88
89typedef struct _gpt_entry_attributes {
90 u64 required_to_function:1;
91 u64 reserved:47;
92 u64 type_guid_specific:16;
93} __attribute__ ((packed)) gpt_entry_attributes;
94
95typedef struct _gpt_entry {
96 efi_guid_t partition_type_guid;
97 efi_guid_t unique_partition_guid;
98 __le64 starting_lba;
99 __le64 ending_lba;
100 gpt_entry_attributes attributes;
101 efi_char16_t partition_name[72 / sizeof (efi_char16_t)];
102} __attribute__ ((packed)) gpt_entry;
103
104typedef struct _legacy_mbr {
105 u8 boot_code[440];
106 __le32 unique_mbr_signature;
107 __le16 unknown;
108 struct partition partition_record[4];
109 __le16 signature;
110} __attribute__ ((packed)) legacy_mbr;
111
112/* Functions */
113extern int efi_partition(struct parsed_partitions *state);
114
115#endif
116
117/*
118 * Overrides for Emacs so that we follow Linus's tabbing style.
119 * Emacs will notice this stuff at the end of the file and automatically
120 * adjust the settings for this buffer only. This must remain at the end
121 * of the file.
122 * --------------------------------------------------------------------------
123 * Local variables:
124 * c-indent-level: 4
125 * c-brace-imaginary-offset: 0
126 * c-brace-offset: -4
127 * c-argdecl-indent: 4
128 * c-label-offset: -4
129 * c-continued-statement-offset: 4
130 * c-continued-brace-offset: 0
131 * indent-tabs-mode: nil
132 * tab-width: 8
133 * End:
134 */
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
deleted file mode 100644
index d513a07f44bb..000000000000
--- a/fs/partitions/ibm.c
+++ /dev/null
@@ -1,275 +0,0 @@
1/*
2 * File...........: linux/fs/partitions/ibm.c
3 * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
4 * Volker Sameske <sameske@de.ibm.com>
5 * Bugreports.to..: <Linux390@de.ibm.com>
6 * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
7 */
8
9#include <linux/buffer_head.h>
10#include <linux/hdreg.h>
11#include <linux/slab.h>
12#include <asm/dasd.h>
13#include <asm/ebcdic.h>
14#include <asm/uaccess.h>
15#include <asm/vtoc.h>
16
17#include "check.h"
18#include "ibm.h"
19
20/*
21 * compute the block number from a
22 * cyl-cyl-head-head structure
23 */
24static sector_t
25cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
26
27 sector_t cyl;
28 __u16 head;
29
30 /*decode cylinder and heads for large volumes */
31 cyl = ptr->hh & 0xFFF0;
32 cyl <<= 12;
33 cyl |= ptr->cc;
34 head = ptr->hh & 0x000F;
35 return cyl * geo->heads * geo->sectors +
36 head * geo->sectors;
37}
38
39/*
40 * compute the block number from a
41 * cyl-cyl-head-head-block structure
42 */
43static sector_t
44cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
45
46 sector_t cyl;
47 __u16 head;
48
49 /*decode cylinder and heads for large volumes */
50 cyl = ptr->hh & 0xFFF0;
51 cyl <<= 12;
52 cyl |= ptr->cc;
53 head = ptr->hh & 0x000F;
54 return cyl * geo->heads * geo->sectors +
55 head * geo->sectors +
56 ptr->b;
57}
58
59/*
60 */
61int ibm_partition(struct parsed_partitions *state)
62{
63 struct block_device *bdev = state->bdev;
64 int blocksize, res;
65 loff_t i_size, offset, size, fmt_size;
66 dasd_information2_t *info;
67 struct hd_geometry *geo;
68 char type[5] = {0,};
69 char name[7] = {0,};
70 union label_t {
71 struct vtoc_volume_label_cdl vol;
72 struct vtoc_volume_label_ldl lnx;
73 struct vtoc_cms_label cms;
74 } *label;
75 unsigned char *data;
76 Sector sect;
77 sector_t labelsect;
78 char tmp[64];
79
80 res = 0;
81 blocksize = bdev_logical_block_size(bdev);
82 if (blocksize <= 0)
83 goto out_exit;
84 i_size = i_size_read(bdev->bd_inode);
85 if (i_size == 0)
86 goto out_exit;
87
88 info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
89 if (info == NULL)
90 goto out_exit;
91 geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
92 if (geo == NULL)
93 goto out_nogeo;
94 label = kmalloc(sizeof(union label_t), GFP_KERNEL);
95 if (label == NULL)
96 goto out_nolab;
97
98 if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
99 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
100 goto out_freeall;
101
102 /*
103 * Special case for FBA disks: label sector does not depend on
104 * blocksize.
105 */
106 if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
107 (info->cu_type == 0x3880 && info->dev_type == 0x3370))
108 labelsect = info->label_block;
109 else
110 labelsect = info->label_block * (blocksize >> 9);
111
112 /*
113 * Get volume label, extract name and type.
114 */
115 data = read_part_sector(state, labelsect, &sect);
116 if (data == NULL)
117 goto out_readerr;
118
119 memcpy(label, data, sizeof(union label_t));
120 put_dev_sector(sect);
121
122 if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
123 strncpy(type, label->vol.vollbl, 4);
124 strncpy(name, label->vol.volid, 6);
125 } else {
126 strncpy(type, label->lnx.vollbl, 4);
127 strncpy(name, label->lnx.volid, 6);
128 }
129 EBCASC(type, 4);
130 EBCASC(name, 6);
131
132 res = 1;
133
134 /*
135 * Three different formats: LDL, CDL and unformated disk
136 *
137 * identified by info->format
138 *
139 * unformated disks we do not have to care about
140 */
141 if (info->format == DASD_FORMAT_LDL) {
142 if (strncmp(type, "CMS1", 4) == 0) {
143 /*
144 * VM style CMS1 labeled disk
145 */
146 blocksize = label->cms.block_size;
147 if (label->cms.disk_offset != 0) {
148 snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name);
149 strlcat(state->pp_buf, tmp, PAGE_SIZE);
150 /* disk is reserved minidisk */
151 offset = label->cms.disk_offset;
152 size = (label->cms.block_count - 1)
153 * (blocksize >> 9);
154 } else {
155 snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name);
156 strlcat(state->pp_buf, tmp, PAGE_SIZE);
157 offset = (info->label_block + 1);
158 size = label->cms.block_count
159 * (blocksize >> 9);
160 }
161 put_partition(state, 1, offset*(blocksize >> 9),
162 size-offset*(blocksize >> 9));
163 } else {
164 if (strncmp(type, "LNX1", 4) == 0) {
165 snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name);
166 strlcat(state->pp_buf, tmp, PAGE_SIZE);
167 if (label->lnx.ldl_version == 0xf2) {
168 fmt_size = label->lnx.formatted_blocks
169 * (blocksize >> 9);
170 } else if (!strcmp(info->type, "ECKD")) {
171 /* formated w/o large volume support */
172 fmt_size = geo->cylinders * geo->heads
173 * geo->sectors * (blocksize >> 9);
174 } else {
175 /* old label and no usable disk geometry
176 * (e.g. DIAG) */
177 fmt_size = i_size >> 9;
178 }
179 size = i_size >> 9;
180 if (fmt_size < size)
181 size = fmt_size;
182 offset = (info->label_block + 1);
183 } else {
184 /* unlabeled disk */
185 strlcat(state->pp_buf, "(nonl)", PAGE_SIZE);
186 size = i_size >> 9;
187 offset = (info->label_block + 1);
188 }
189 put_partition(state, 1, offset*(blocksize >> 9),
190 size-offset*(blocksize >> 9));
191 }
192 } else if (info->format == DASD_FORMAT_CDL) {
193 /*
194 * New style CDL formatted disk
195 */
196 sector_t blk;
197 int counter;
198
199 /*
200 * check if VOL1 label is available
201 * if not, something is wrong, skipping partition detection
202 */
203 if (strncmp(type, "VOL1", 4) == 0) {
204 snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name);
205 strlcat(state->pp_buf, tmp, PAGE_SIZE);
206 /*
207 * get block number and read then go through format1
208 * labels
209 */
210 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
211 counter = 0;
212 data = read_part_sector(state, blk * (blocksize/512),
213 &sect);
214 while (data != NULL) {
215 struct vtoc_format1_label f1;
216
217 memcpy(&f1, data,
218 sizeof(struct vtoc_format1_label));
219 put_dev_sector(sect);
220
221 /* skip FMT4 / FMT5 / FMT7 labels */
222 if (f1.DS1FMTID == _ascebc['4']
223 || f1.DS1FMTID == _ascebc['5']
224 || f1.DS1FMTID == _ascebc['7']
225 || f1.DS1FMTID == _ascebc['9']) {
226 blk++;
227 data = read_part_sector(state,
228 blk * (blocksize/512), &sect);
229 continue;
230 }
231
232 /* only FMT1 and 8 labels valid at this point */
233 if (f1.DS1FMTID != _ascebc['1'] &&
234 f1.DS1FMTID != _ascebc['8'])
235 break;
236
237 /* OK, we got valid partition data */
238 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
239 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
240 offset + geo->sectors;
241 if (counter >= state->limit)
242 break;
243 put_partition(state, counter + 1,
244 offset * (blocksize >> 9),
245 size * (blocksize >> 9));
246 counter++;
247 blk++;
248 data = read_part_sector(state,
249 blk * (blocksize/512), &sect);
250 }
251
252 if (!data)
253 /* Are we not supposed to report this ? */
254 goto out_readerr;
255 } else
256 printk(KERN_WARNING "Warning, expected Label VOL1 not "
257 "found, treating as CDL formated Disk");
258
259 }
260
261 strlcat(state->pp_buf, "\n", PAGE_SIZE);
262 goto out_freeall;
263
264
265out_readerr:
266 res = -1;
267out_freeall:
268 kfree(label);
269out_nolab:
270 kfree(geo);
271out_nogeo:
272 kfree(info);
273out_exit:
274 return res;
275}
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h
deleted file mode 100644
index 08fb0804a812..000000000000
--- a/fs/partitions/ibm.h
+++ /dev/null
@@ -1 +0,0 @@
1int ibm_partition(struct parsed_partitions *);
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
deleted file mode 100644
index 0ea19312706b..000000000000
--- a/fs/partitions/karma.c
+++ /dev/null
@@ -1,57 +0,0 @@
1/*
2 * fs/partitions/karma.c
3 * Rio Karma partition info.
4 *
5 * Copyright (C) 2006 Bob Copeland (me@bobcopeland.com)
6 * based on osf.c
7 */
8
9#include "check.h"
10#include "karma.h"
11
12int karma_partition(struct parsed_partitions *state)
13{
14 int i;
15 int slot = 1;
16 Sector sect;
17 unsigned char *data;
18 struct disklabel {
19 u8 d_reserved[270];
20 struct d_partition {
21 __le32 p_res;
22 u8 p_fstype;
23 u8 p_res2[3];
24 __le32 p_offset;
25 __le32 p_size;
26 } d_partitions[2];
27 u8 d_blank[208];
28 __le16 d_magic;
29 } __attribute__((packed)) *label;
30 struct d_partition *p;
31
32 data = read_part_sector(state, 0, &sect);
33 if (!data)
34 return -1;
35
36 label = (struct disklabel *)data;
37 if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) {
38 put_dev_sector(sect);
39 return 0;
40 }
41
42 p = label->d_partitions;
43 for (i = 0 ; i < 2; i++, p++) {
44 if (slot == state->limit)
45 break;
46
47 if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) {
48 put_partition(state, slot, le32_to_cpu(p->p_offset),
49 le32_to_cpu(p->p_size));
50 }
51 slot++;
52 }
53 strlcat(state->pp_buf, "\n", PAGE_SIZE);
54 put_dev_sector(sect);
55 return 1;
56}
57
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
deleted file mode 100644
index c764b2e9df21..000000000000
--- a/fs/partitions/karma.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/karma.h
3 */
4
5#define KARMA_LABEL_MAGIC 0xAB56
6
7int karma_partition(struct parsed_partitions *state);
8
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
deleted file mode 100644
index bd8ae788f689..000000000000
--- a/fs/partitions/ldm.c
+++ /dev/null
@@ -1,1570 +0,0 @@
1/**
2 * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 *
8 * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
9 *
10 * This program is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License as published by the Free Software
12 * Foundation; either version 2 of the License, or (at your option) any later
13 * version.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program (in the main directory of the source in the file COPYING); if
22 * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
23 * Boston, MA 02111-1307 USA
24 */
25
26#include <linux/slab.h>
27#include <linux/pagemap.h>
28#include <linux/stringify.h>
29#include <linux/kernel.h>
30#include "ldm.h"
31#include "check.h"
32#include "msdos.h"
33
34/**
35 * ldm_debug/info/error/crit - Output an error message
36 * @f: A printf format string containing the message
37 * @...: Variables to substitute into @f
38 *
39 * ldm_debug() writes a DEBUG level message to the syslog but only if the
40 * driver was compiled with debug enabled. Otherwise, the call turns into a NOP.
41 */
42#ifndef CONFIG_LDM_DEBUG
43#define ldm_debug(...) do {} while (0)
44#else
45#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
46#endif
47
48#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
49#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
50#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
51
52static __printf(3, 4)
53void _ldm_printk(const char *level, const char *function, const char *fmt, ...)
54{
55 struct va_format vaf;
56 va_list args;
57
58 va_start (args, fmt);
59
60 vaf.fmt = fmt;
61 vaf.va = &args;
62
63 printk("%s%s(): %pV\n", level, function, &vaf);
64
65 va_end(args);
66}
67
68/**
69 * ldm_parse_hexbyte - Convert a ASCII hex number to a byte
70 * @src: Pointer to at least 2 characters to convert.
71 *
72 * Convert a two character ASCII hex string to a number.
73 *
74 * Return: 0-255 Success, the byte was parsed correctly
75 * -1 Error, an invalid character was supplied
76 */
77static int ldm_parse_hexbyte (const u8 *src)
78{
79 unsigned int x; /* For correct wrapping */
80 int h;
81
82 /* high part */
83 x = h = hex_to_bin(src[0]);
84 if (h < 0)
85 return -1;
86
87 /* low part */
88 h = hex_to_bin(src[1]);
89 if (h < 0)
90 return -1;
91
92 return (x << 4) + h;
93}
94
95/**
96 * ldm_parse_guid - Convert GUID from ASCII to binary
97 * @src: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
98 * @dest: Memory block to hold binary GUID (16 bytes)
99 *
100 * N.B. The GUID need not be NULL terminated.
101 *
102 * Return: 'true' @dest contains binary GUID
103 * 'false' @dest contents are undefined
104 */
105static bool ldm_parse_guid (const u8 *src, u8 *dest)
106{
107 static const int size[] = { 4, 2, 2, 2, 6 };
108 int i, j, v;
109
110 if (src[8] != '-' || src[13] != '-' ||
111 src[18] != '-' || src[23] != '-')
112 return false;
113
114 for (j = 0; j < 5; j++, src++)
115 for (i = 0; i < size[j]; i++, src+=2, *dest++ = v)
116 if ((v = ldm_parse_hexbyte (src)) < 0)
117 return false;
118
119 return true;
120}
121
122/**
123 * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure
124 * @data: Raw database PRIVHEAD structure loaded from the device
125 * @ph: In-memory privhead structure in which to return parsed information
126 *
127 * This parses the LDM database PRIVHEAD structure supplied in @data and
128 * sets up the in-memory privhead structure @ph with the obtained information.
129 *
130 * Return: 'true' @ph contains the PRIVHEAD data
131 * 'false' @ph contents are undefined
132 */
133static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
134{
135 bool is_vista = false;
136
137 BUG_ON(!data || !ph);
138 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
139 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
140 " corrupt. Aborting.");
141 return false;
142 }
143 ph->ver_major = get_unaligned_be16(data + 0x000C);
144 ph->ver_minor = get_unaligned_be16(data + 0x000E);
145 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
146 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
147 ph->config_start = get_unaligned_be64(data + 0x012B);
148 ph->config_size = get_unaligned_be64(data + 0x0133);
149 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
150 if (ph->ver_major == 2 && ph->ver_minor == 12)
151 is_vista = true;
152 if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) {
153 ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d."
154 " Aborting.", ph->ver_major, ph->ver_minor);
155 return false;
156 }
157 ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major,
158 ph->ver_minor, is_vista ? "Vista" : "2000/XP");
159 if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */
160 /* Warn the user and continue, carefully. */
161 ldm_info("Database is normally %u bytes, it claims to "
162 "be %llu bytes.", LDM_DB_SIZE,
163 (unsigned long long)ph->config_size);
164 }
165 if ((ph->logical_disk_size == 0) || (ph->logical_disk_start +
166 ph->logical_disk_size > ph->config_start)) {
167 ldm_error("PRIVHEAD disk size doesn't match real disk size");
168 return false;
169 }
170 if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) {
171 ldm_error("PRIVHEAD contains an invalid GUID.");
172 return false;
173 }
174 ldm_debug("Parsed PRIVHEAD successfully.");
175 return true;
176}
177
178/**
179 * ldm_parse_tocblock - Read the LDM Database TOCBLOCK structure
180 * @data: Raw database TOCBLOCK structure loaded from the device
181 * @toc: In-memory toc structure in which to return parsed information
182 *
183 * This parses the LDM Database TOCBLOCK (table of contents) structure supplied
184 * in @data and sets up the in-memory tocblock structure @toc with the obtained
185 * information.
186 *
187 * N.B. The *_start and *_size values returned in @toc are not range-checked.
188 *
189 * Return: 'true' @toc contains the TOCBLOCK data
190 * 'false' @toc contents are undefined
191 */
192static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
193{
194 BUG_ON (!data || !toc);
195
196 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
197 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
198 return false;
199 }
200 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
201 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
202 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
203 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
204
205 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
206 sizeof (toc->bitmap1_name)) != 0) {
207 ldm_crit ("TOCBLOCK's first bitmap is '%s', should be '%s'.",
208 TOC_BITMAP1, toc->bitmap1_name);
209 return false;
210 }
211 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
212 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
213 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
214 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
215 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
216 sizeof (toc->bitmap2_name)) != 0) {
217 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
218 TOC_BITMAP2, toc->bitmap2_name);
219 return false;
220 }
221 ldm_debug ("Parsed TOCBLOCK successfully.");
222 return true;
223}
224
225/**
226 * ldm_parse_vmdb - Read the LDM Database VMDB structure
227 * @data: Raw database VMDB structure loaded from the device
228 * @vm: In-memory vmdb structure in which to return parsed information
229 *
230 * This parses the LDM Database VMDB structure supplied in @data and sets up
231 * the in-memory vmdb structure @vm with the obtained information.
232 *
233 * N.B. The *_start, *_size and *_seq values will be range-checked later.
234 *
235 * Return: 'true' @vm contains VMDB info
236 * 'false' @vm contents are undefined
237 */
238static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
239{
240 BUG_ON (!data || !vm);
241
242 if (MAGIC_VMDB != get_unaligned_be32(data)) {
243 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
244 return false;
245 }
246
247 vm->ver_major = get_unaligned_be16(data + 0x12);
248 vm->ver_minor = get_unaligned_be16(data + 0x14);
249 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
250 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
251 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
252 return false;
253 }
254
255 vm->vblk_size = get_unaligned_be32(data + 0x08);
256 if (vm->vblk_size == 0) {
257 ldm_error ("Illegal VBLK size");
258 return false;
259 }
260
261 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
262 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
263
264 ldm_debug ("Parsed VMDB successfully.");
265 return true;
266}
267
268/**
269 * ldm_compare_privheads - Compare two privhead objects
270 * @ph1: First privhead
271 * @ph2: Second privhead
272 *
273 * This compares the two privhead structures @ph1 and @ph2.
274 *
275 * Return: 'true' Identical
276 * 'false' Different
277 */
278static bool ldm_compare_privheads (const struct privhead *ph1,
279 const struct privhead *ph2)
280{
281 BUG_ON (!ph1 || !ph2);
282
283 return ((ph1->ver_major == ph2->ver_major) &&
284 (ph1->ver_minor == ph2->ver_minor) &&
285 (ph1->logical_disk_start == ph2->logical_disk_start) &&
286 (ph1->logical_disk_size == ph2->logical_disk_size) &&
287 (ph1->config_start == ph2->config_start) &&
288 (ph1->config_size == ph2->config_size) &&
289 !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
290}
291
292/**
293 * ldm_compare_tocblocks - Compare two tocblock objects
294 * @toc1: First toc
295 * @toc2: Second toc
296 *
297 * This compares the two tocblock structures @toc1 and @toc2.
298 *
299 * Return: 'true' Identical
300 * 'false' Different
301 */
302static bool ldm_compare_tocblocks (const struct tocblock *toc1,
303 const struct tocblock *toc2)
304{
305 BUG_ON (!toc1 || !toc2);
306
307 return ((toc1->bitmap1_start == toc2->bitmap1_start) &&
308 (toc1->bitmap1_size == toc2->bitmap1_size) &&
309 (toc1->bitmap2_start == toc2->bitmap2_start) &&
310 (toc1->bitmap2_size == toc2->bitmap2_size) &&
311 !strncmp (toc1->bitmap1_name, toc2->bitmap1_name,
312 sizeof (toc1->bitmap1_name)) &&
313 !strncmp (toc1->bitmap2_name, toc2->bitmap2_name,
314 sizeof (toc1->bitmap2_name)));
315}
316
317/**
318 * ldm_validate_privheads - Compare the primary privhead with its backups
319 * @state: Partition check state including device holding the LDM Database
320 * @ph1: Memory struct to fill with ph contents
321 *
322 * Read and compare all three privheads from disk.
323 *
324 * The privheads on disk show the size and location of the main disk area and
325 * the configuration area (the database). The values are range-checked against
326 * @hd, which contains the real size of the disk.
327 *
328 * Return: 'true' Success
329 * 'false' Error
330 */
331static bool ldm_validate_privheads(struct parsed_partitions *state,
332 struct privhead *ph1)
333{
334 static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 };
335 struct privhead *ph[3] = { ph1 };
336 Sector sect;
337 u8 *data;
338 bool result = false;
339 long num_sects;
340 int i;
341
342 BUG_ON (!state || !ph1);
343
344 ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL);
345 ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL);
346 if (!ph[1] || !ph[2]) {
347 ldm_crit ("Out of memory.");
348 goto out;
349 }
350
351 /* off[1 & 2] are relative to ph[0]->config_start */
352 ph[0]->config_start = 0;
353
354 /* Read and parse privheads */
355 for (i = 0; i < 3; i++) {
356 data = read_part_sector(state, ph[0]->config_start + off[i],
357 &sect);
358 if (!data) {
359 ldm_crit ("Disk read failed.");
360 goto out;
361 }
362 result = ldm_parse_privhead (data, ph[i]);
363 put_dev_sector (sect);
364 if (!result) {
365 ldm_error ("Cannot find PRIVHEAD %d.", i+1); /* Log again */
366 if (i < 2)
367 goto out; /* Already logged */
368 else
369 break; /* FIXME ignore for now, 3rd PH can fail on odd-sized disks */
370 }
371 }
372
373 num_sects = state->bdev->bd_inode->i_size >> 9;
374
375 if ((ph[0]->config_start > num_sects) ||
376 ((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
377 ldm_crit ("Database extends beyond the end of the disk.");
378 goto out;
379 }
380
381 if ((ph[0]->logical_disk_start > ph[0]->config_start) ||
382 ((ph[0]->logical_disk_start + ph[0]->logical_disk_size)
383 > ph[0]->config_start)) {
384 ldm_crit ("Disk and database overlap.");
385 goto out;
386 }
387
388 if (!ldm_compare_privheads (ph[0], ph[1])) {
389 ldm_crit ("Primary and backup PRIVHEADs don't match.");
390 goto out;
391 }
392 /* FIXME ignore this for now
393 if (!ldm_compare_privheads (ph[0], ph[2])) {
394 ldm_crit ("Primary and backup PRIVHEADs don't match.");
395 goto out;
396 }*/
397 ldm_debug ("Validated PRIVHEADs successfully.");
398 result = true;
399out:
400 kfree (ph[1]);
401 kfree (ph[2]);
402 return result;
403}
404
405/**
406 * ldm_validate_tocblocks - Validate the table of contents and its backups
407 * @state: Partition check state including device holding the LDM Database
408 * @base: Offset, into @state->bdev, of the database
409 * @ldb: Cache of the database structures
410 *
411 * Find and compare the four tables of contents of the LDM Database stored on
412 * @state->bdev and return the parsed information into @toc1.
413 *
414 * The offsets and sizes of the configs are range-checked against a privhead.
415 *
416 * Return: 'true' @toc1 contains validated TOCBLOCK info
417 * 'false' @toc1 contents are undefined
418 */
419static bool ldm_validate_tocblocks(struct parsed_partitions *state,
420 unsigned long base, struct ldmdb *ldb)
421{
422 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
423 struct tocblock *tb[4];
424 struct privhead *ph;
425 Sector sect;
426 u8 *data;
427 int i, nr_tbs;
428 bool result = false;
429
430 BUG_ON(!state || !ldb);
431 ph = &ldb->ph;
432 tb[0] = &ldb->toc;
433 tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
434 if (!tb[1]) {
435 ldm_crit("Out of memory.");
436 goto err;
437 }
438 tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1]));
439 tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2]));
440 /*
441 * Try to read and parse all four TOCBLOCKs.
442 *
443 * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so
444 * skip any that fail as long as we get at least one valid TOCBLOCK.
445 */
446 for (nr_tbs = i = 0; i < 4; i++) {
447 data = read_part_sector(state, base + off[i], &sect);
448 if (!data) {
449 ldm_error("Disk read failed for TOCBLOCK %d.", i);
450 continue;
451 }
452 if (ldm_parse_tocblock(data, tb[nr_tbs]))
453 nr_tbs++;
454 put_dev_sector(sect);
455 }
456 if (!nr_tbs) {
457 ldm_crit("Failed to find a valid TOCBLOCK.");
458 goto err;
459 }
460 /* Range check the TOCBLOCK against a privhead. */
461 if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) ||
462 ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) >
463 ph->config_size)) {
464 ldm_crit("The bitmaps are out of range. Giving up.");
465 goto err;
466 }
467 /* Compare all loaded TOCBLOCKs. */
468 for (i = 1; i < nr_tbs; i++) {
469 if (!ldm_compare_tocblocks(tb[0], tb[i])) {
470 ldm_crit("TOCBLOCKs 0 and %d do not match.", i);
471 goto err;
472 }
473 }
474 ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs);
475 result = true;
476err:
477 kfree(tb[1]);
478 return result;
479}
480
481/**
482 * ldm_validate_vmdb - Read the VMDB and validate it
483 * @state: Partition check state including device holding the LDM Database
484 * @base: Offset, into @bdev, of the database
485 * @ldb: Cache of the database structures
486 *
487 * Find the vmdb of the LDM Database stored on @bdev and return the parsed
488 * information in @ldb.
489 *
490 * Return: 'true' @ldb contains validated VBDB info
491 * 'false' @ldb contents are undefined
492 */
493static bool ldm_validate_vmdb(struct parsed_partitions *state,
494 unsigned long base, struct ldmdb *ldb)
495{
496 Sector sect;
497 u8 *data;
498 bool result = false;
499 struct vmdb *vm;
500 struct tocblock *toc;
501
502 BUG_ON (!state || !ldb);
503
504 vm = &ldb->vm;
505 toc = &ldb->toc;
506
507 data = read_part_sector(state, base + OFF_VMDB, &sect);
508 if (!data) {
509 ldm_crit ("Disk read failed.");
510 return false;
511 }
512
513 if (!ldm_parse_vmdb (data, vm))
514 goto out; /* Already logged */
515
516 /* Are there uncommitted transactions? */
517 if (get_unaligned_be16(data + 0x10) != 0x01) {
518 ldm_crit ("Database is not in a consistent state. Aborting.");
519 goto out;
520 }
521
522 if (vm->vblk_offset != 512)
523 ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset);
524
525 /*
526 * The last_vblkd_seq can be before the end of the vmdb, just make sure
527 * it is not out of bounds.
528 */
529 if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) {
530 ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK. "
531 "Database is corrupt. Aborting.");
532 goto out;
533 }
534
535 result = true;
536out:
537 put_dev_sector (sect);
538 return result;
539}
540
541
542/**
543 * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk
544 * @state: Partition check state including device holding the LDM Database
545 *
546 * This function provides a weak test to decide whether the device is a dynamic
547 * disk or not. It looks for an MS-DOS-style partition table containing at
548 * least one partition of type 0x42 (formerly SFS, now used by Windows for
549 * dynamic disks).
550 *
551 * N.B. The only possible error can come from the read_part_sector and that is
552 * only likely to happen if the underlying device is strange. If that IS
553 * the case we should return zero to let someone else try.
554 *
555 * Return: 'true' @state->bdev is a dynamic disk
556 * 'false' @state->bdev is not a dynamic disk, or an error occurred
557 */
558static bool ldm_validate_partition_table(struct parsed_partitions *state)
559{
560 Sector sect;
561 u8 *data;
562 struct partition *p;
563 int i;
564 bool result = false;
565
566 BUG_ON(!state);
567
568 data = read_part_sector(state, 0, &sect);
569 if (!data) {
570 ldm_info ("Disk read failed.");
571 return false;
572 }
573
574 if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
575 goto out;
576
577 p = (struct partition*)(data + 0x01BE);
578 for (i = 0; i < 4; i++, p++)
579 if (SYS_IND (p) == LDM_PARTITION) {
580 result = true;
581 break;
582 }
583
584 if (result)
585 ldm_debug ("Found W2K dynamic disk partition type.");
586
587out:
588 put_dev_sector (sect);
589 return result;
590}
591
592/**
593 * ldm_get_disk_objid - Search a linked list of vblk's for a given Disk Id
594 * @ldb: Cache of the database structures
595 *
596 * The LDM Database contains a list of all partitions on all dynamic disks.
597 * The primary PRIVHEAD, at the beginning of the physical disk, tells us
598 * the GUID of this disk. This function searches for the GUID in a linked
599 * list of vblk's.
600 *
601 * Return: Pointer, A matching vblk was found
602 * NULL, No match, or an error
603 */
604static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
605{
606 struct list_head *item;
607
608 BUG_ON (!ldb);
609
610 list_for_each (item, &ldb->v_disk) {
611 struct vblk *v = list_entry (item, struct vblk, list);
612 if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
613 return v;
614 }
615
616 return NULL;
617}
618
619/**
620 * ldm_create_data_partitions - Create data partitions for this device
621 * @pp: List of the partitions parsed so far
622 * @ldb: Cache of the database structures
623 *
624 * The database contains ALL the partitions for ALL disk groups, so we need to
625 * filter out this specific disk. Using the disk's object id, we can find all
626 * the partitions in the database that belong to this disk.
627 *
628 * Add each partition in our database, to the parsed_partitions structure.
629 *
630 * N.B. This function creates the partitions in the order it finds partition
631 * objects in the linked list.
632 *
633 * Return: 'true' Partition created
634 * 'false' Error, probably a range checking problem
635 */
636static bool ldm_create_data_partitions (struct parsed_partitions *pp,
637 const struct ldmdb *ldb)
638{
639 struct list_head *item;
640 struct vblk *vb;
641 struct vblk *disk;
642 struct vblk_part *part;
643 int part_num = 1;
644
645 BUG_ON (!pp || !ldb);
646
647 disk = ldm_get_disk_objid (ldb);
648 if (!disk) {
649 ldm_crit ("Can't find the ID of this disk in the database.");
650 return false;
651 }
652
653 strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE);
654
655 /* Create the data partitions */
656 list_for_each (item, &ldb->v_part) {
657 vb = list_entry (item, struct vblk, list);
658 part = &vb->vblk.part;
659
660 if (part->disk_id != disk->obj_id)
661 continue;
662
663 put_partition (pp, part_num, ldb->ph.logical_disk_start +
664 part->start, part->size);
665 part_num++;
666 }
667
668 strlcat(pp->pp_buf, "\n", PAGE_SIZE);
669 return true;
670}
671
672
673/**
674 * ldm_relative - Calculate the next relative offset
675 * @buffer: Block of data being worked on
676 * @buflen: Size of the block of data
677 * @base: Size of the previous fixed width fields
678 * @offset: Cumulative size of the previous variable-width fields
679 *
680 * Because many of the VBLK fields are variable-width, it's necessary
681 * to calculate each offset based on the previous one and the length
682 * of the field it pointed to.
683 *
684 * Return: -1 Error, the calculated offset exceeded the size of the buffer
685 * n OK, a range-checked offset into buffer
686 */
687static int ldm_relative(const u8 *buffer, int buflen, int base, int offset)
688{
689
690 base += offset;
691 if (!buffer || offset < 0 || base > buflen) {
692 if (!buffer)
693 ldm_error("!buffer");
694 if (offset < 0)
695 ldm_error("offset (%d) < 0", offset);
696 if (base > buflen)
697 ldm_error("base (%d) > buflen (%d)", base, buflen);
698 return -1;
699 }
700 if (base + buffer[base] >= buflen) {
701 ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base,
702 buffer[base], buflen);
703 return -1;
704 }
705 return buffer[base] + offset + 1;
706}
707
708/**
709 * ldm_get_vnum - Convert a variable-width, big endian number, into cpu order
710 * @block: Pointer to the variable-width number to convert
711 *
712 * Large numbers in the LDM Database are often stored in a packed format. Each
713 * number is prefixed by a one byte width marker. All numbers in the database
714 * are stored in big-endian byte order. This function reads one of these
715 * numbers and returns the result
716 *
717 * N.B. This function DOES NOT perform any range checking, though the most
718 * it will read is eight bytes.
719 *
720 * Return: n A number
721 * 0 Zero, or an error occurred
722 */
723static u64 ldm_get_vnum (const u8 *block)
724{
725 u64 tmp = 0;
726 u8 length;
727
728 BUG_ON (!block);
729
730 length = *block++;
731
732 if (length && length <= 8)
733 while (length--)
734 tmp = (tmp << 8) | *block++;
735 else
736 ldm_error ("Illegal length %d.", length);
737
738 return tmp;
739}
740
741/**
742 * ldm_get_vstr - Read a length-prefixed string into a buffer
743 * @block: Pointer to the length marker
744 * @buffer: Location to copy string to
745 * @buflen: Size of the output buffer
746 *
747 * Many of the strings in the LDM Database are not NULL terminated. Instead
748 * they are prefixed by a one byte length marker. This function copies one of
749 * these strings into a buffer.
750 *
751 * N.B. This function DOES NOT perform any range checking on the input.
752 * If the buffer is too small, the output will be truncated.
753 *
754 * Return: 0, Error and @buffer contents are undefined
755 * n, String length in characters (excluding NULL)
756 * buflen-1, String was truncated.
757 */
758static int ldm_get_vstr (const u8 *block, u8 *buffer, int buflen)
759{
760 int length;
761
762 BUG_ON (!block || !buffer);
763
764 length = block[0];
765 if (length >= buflen) {
766 ldm_error ("Truncating string %d -> %d.", length, buflen);
767 length = buflen - 1;
768 }
769 memcpy (buffer, block + 1, length);
770 buffer[length] = 0;
771 return length;
772}
773
774
775/**
776 * ldm_parse_cmp3 - Read a raw VBLK Component object into a vblk structure
777 * @buffer: Block of data being worked on
778 * @buflen: Size of the block of data
779 * @vb: In-memory vblk in which to return information
780 *
781 * Read a raw VBLK Component object (version 3) into a vblk structure.
782 *
783 * Return: 'true' @vb contains a Component VBLK
784 * 'false' @vb contents are not defined
785 */
786static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
787{
788 int r_objid, r_name, r_vstate, r_child, r_parent, r_stripe, r_cols, len;
789 struct vblk_comp *comp;
790
791 BUG_ON (!buffer || !vb);
792
793 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
794 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
795 r_vstate = ldm_relative (buffer, buflen, 0x18, r_name);
796 r_child = ldm_relative (buffer, buflen, 0x1D, r_vstate);
797 r_parent = ldm_relative (buffer, buflen, 0x2D, r_child);
798
799 if (buffer[0x12] & VBLK_FLAG_COMP_STRIPE) {
800 r_stripe = ldm_relative (buffer, buflen, 0x2E, r_parent);
801 r_cols = ldm_relative (buffer, buflen, 0x2E, r_stripe);
802 len = r_cols;
803 } else {
804 r_stripe = 0;
805 r_cols = 0;
806 len = r_parent;
807 }
808 if (len < 0)
809 return false;
810
811 len += VBLK_SIZE_CMP3;
812 if (len != get_unaligned_be32(buffer + 0x14))
813 return false;
814
815 comp = &vb->vblk.comp;
816 ldm_get_vstr (buffer + 0x18 + r_name, comp->state,
817 sizeof (comp->state));
818 comp->type = buffer[0x18 + r_vstate];
819 comp->children = ldm_get_vnum (buffer + 0x1D + r_vstate);
820 comp->parent_id = ldm_get_vnum (buffer + 0x2D + r_child);
821 comp->chunksize = r_stripe ? ldm_get_vnum (buffer+r_parent+0x2E) : 0;
822
823 return true;
824}
825
826/**
827 * ldm_parse_dgr3 - Read a raw VBLK Disk Group object into a vblk structure
828 * @buffer: Block of data being worked on
829 * @buflen: Size of the block of data
830 * @vb: In-memory vblk in which to return information
831 *
832 * Read a raw VBLK Disk Group object (version 3) into a vblk structure.
833 *
834 * Return: 'true' @vb contains a Disk Group VBLK
835 * 'false' @vb contents are not defined
836 */
837static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
838{
839 int r_objid, r_name, r_diskid, r_id1, r_id2, len;
840 struct vblk_dgrp *dgrp;
841
842 BUG_ON (!buffer || !vb);
843
844 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
845 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
846 r_diskid = ldm_relative (buffer, buflen, 0x18, r_name);
847
848 if (buffer[0x12] & VBLK_FLAG_DGR3_IDS) {
849 r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
850 r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
851 len = r_id2;
852 } else {
853 r_id1 = 0;
854 r_id2 = 0;
855 len = r_diskid;
856 }
857 if (len < 0)
858 return false;
859
860 len += VBLK_SIZE_DGR3;
861 if (len != get_unaligned_be32(buffer + 0x14))
862 return false;
863
864 dgrp = &vb->vblk.dgrp;
865 ldm_get_vstr (buffer + 0x18 + r_name, dgrp->disk_id,
866 sizeof (dgrp->disk_id));
867 return true;
868}
869
870/**
871 * ldm_parse_dgr4 - Read a raw VBLK Disk Group object into a vblk structure
872 * @buffer: Block of data being worked on
873 * @buflen: Size of the block of data
874 * @vb: In-memory vblk in which to return information
875 *
876 * Read a raw VBLK Disk Group object (version 4) into a vblk structure.
877 *
878 * Return: 'true' @vb contains a Disk Group VBLK
879 * 'false' @vb contents are not defined
880 */
881static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
882{
883 char buf[64];
884 int r_objid, r_name, r_id1, r_id2, len;
885 struct vblk_dgrp *dgrp;
886
887 BUG_ON (!buffer || !vb);
888
889 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
890 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
891
892 if (buffer[0x12] & VBLK_FLAG_DGR4_IDS) {
893 r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
894 r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
895 len = r_id2;
896 } else {
897 r_id1 = 0;
898 r_id2 = 0;
899 len = r_name;
900 }
901 if (len < 0)
902 return false;
903
904 len += VBLK_SIZE_DGR4;
905 if (len != get_unaligned_be32(buffer + 0x14))
906 return false;
907
908 dgrp = &vb->vblk.dgrp;
909
910 ldm_get_vstr (buffer + 0x18 + r_objid, buf, sizeof (buf));
911 return true;
912}
913
914/**
915 * ldm_parse_dsk3 - Read a raw VBLK Disk object into a vblk structure
916 * @buffer: Block of data being worked on
917 * @buflen: Size of the block of data
918 * @vb: In-memory vblk in which to return information
919 *
920 * Read a raw VBLK Disk object (version 3) into a vblk structure.
921 *
922 * Return: 'true' @vb contains a Disk VBLK
923 * 'false' @vb contents are not defined
924 */
925static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
926{
927 int r_objid, r_name, r_diskid, r_altname, len;
928 struct vblk_disk *disk;
929
930 BUG_ON (!buffer || !vb);
931
932 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
933 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
934 r_diskid = ldm_relative (buffer, buflen, 0x18, r_name);
935 r_altname = ldm_relative (buffer, buflen, 0x18, r_diskid);
936 len = r_altname;
937 if (len < 0)
938 return false;
939
940 len += VBLK_SIZE_DSK3;
941 if (len != get_unaligned_be32(buffer + 0x14))
942 return false;
943
944 disk = &vb->vblk.disk;
945 ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
946 sizeof (disk->alt_name));
947 if (!ldm_parse_guid (buffer + 0x19 + r_name, disk->disk_id))
948 return false;
949
950 return true;
951}
952
953/**
954 * ldm_parse_dsk4 - Read a raw VBLK Disk object into a vblk structure
955 * @buffer: Block of data being worked on
956 * @buflen: Size of the block of data
957 * @vb: In-memory vblk in which to return information
958 *
959 * Read a raw VBLK Disk object (version 4) into a vblk structure.
960 *
961 * Return: 'true' @vb contains a Disk VBLK
962 * 'false' @vb contents are not defined
963 */
964static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
965{
966 int r_objid, r_name, len;
967 struct vblk_disk *disk;
968
969 BUG_ON (!buffer || !vb);
970
971 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
972 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
973 len = r_name;
974 if (len < 0)
975 return false;
976
977 len += VBLK_SIZE_DSK4;
978 if (len != get_unaligned_be32(buffer + 0x14))
979 return false;
980
981 disk = &vb->vblk.disk;
982 memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
983 return true;
984}
985
986/**
987 * ldm_parse_prt3 - Read a raw VBLK Partition object into a vblk structure
988 * @buffer: Block of data being worked on
989 * @buflen: Size of the block of data
990 * @vb: In-memory vblk in which to return information
991 *
992 * Read a raw VBLK Partition object (version 3) into a vblk structure.
993 *
994 * Return: 'true' @vb contains a Partition VBLK
995 * 'false' @vb contents are not defined
996 */
997static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
998{
999 int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len;
1000 struct vblk_part *part;
1001
1002 BUG_ON(!buffer || !vb);
1003 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
1004 if (r_objid < 0) {
1005 ldm_error("r_objid %d < 0", r_objid);
1006 return false;
1007 }
1008 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
1009 if (r_name < 0) {
1010 ldm_error("r_name %d < 0", r_name);
1011 return false;
1012 }
1013 r_size = ldm_relative(buffer, buflen, 0x34, r_name);
1014 if (r_size < 0) {
1015 ldm_error("r_size %d < 0", r_size);
1016 return false;
1017 }
1018 r_parent = ldm_relative(buffer, buflen, 0x34, r_size);
1019 if (r_parent < 0) {
1020 ldm_error("r_parent %d < 0", r_parent);
1021 return false;
1022 }
1023 r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent);
1024 if (r_diskid < 0) {
1025 ldm_error("r_diskid %d < 0", r_diskid);
1026 return false;
1027 }
1028 if (buffer[0x12] & VBLK_FLAG_PART_INDEX) {
1029 r_index = ldm_relative(buffer, buflen, 0x34, r_diskid);
1030 if (r_index < 0) {
1031 ldm_error("r_index %d < 0", r_index);
1032 return false;
1033 }
1034 len = r_index;
1035 } else {
1036 r_index = 0;
1037 len = r_diskid;
1038 }
1039 if (len < 0) {
1040 ldm_error("len %d < 0", len);
1041 return false;
1042 }
1043 len += VBLK_SIZE_PRT3;
1044 if (len > get_unaligned_be32(buffer + 0x14)) {
1045 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1046 get_unaligned_be32(buffer + 0x14));
1047 return false;
1048 }
1049 part = &vb->vblk.part;
1050 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1051 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1052 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1053 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1054 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
1055 if (vb->flags & VBLK_FLAG_PART_INDEX)
1056 part->partnum = buffer[0x35 + r_diskid];
1057 else
1058 part->partnum = 0;
1059 return true;
1060}
1061
1062/**
1063 * ldm_parse_vol5 - Read a raw VBLK Volume object into a vblk structure
1064 * @buffer: Block of data being worked on
1065 * @buflen: Size of the block of data
1066 * @vb: In-memory vblk in which to return information
1067 *
1068 * Read a raw VBLK Volume object (version 5) into a vblk structure.
1069 *
1070 * Return: 'true' @vb contains a Volume VBLK
1071 * 'false' @vb contents are not defined
1072 */
1073static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1074{
1075 int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size;
1076 int r_id1, r_id2, r_size2, r_drive, len;
1077 struct vblk_volu *volu;
1078
1079 BUG_ON(!buffer || !vb);
1080 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
1081 if (r_objid < 0) {
1082 ldm_error("r_objid %d < 0", r_objid);
1083 return false;
1084 }
1085 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
1086 if (r_name < 0) {
1087 ldm_error("r_name %d < 0", r_name);
1088 return false;
1089 }
1090 r_vtype = ldm_relative(buffer, buflen, 0x18, r_name);
1091 if (r_vtype < 0) {
1092 ldm_error("r_vtype %d < 0", r_vtype);
1093 return false;
1094 }
1095 r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype);
1096 if (r_disable_drive_letter < 0) {
1097 ldm_error("r_disable_drive_letter %d < 0",
1098 r_disable_drive_letter);
1099 return false;
1100 }
1101 r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter);
1102 if (r_child < 0) {
1103 ldm_error("r_child %d < 0", r_child);
1104 return false;
1105 }
1106 r_size = ldm_relative(buffer, buflen, 0x3D, r_child);
1107 if (r_size < 0) {
1108 ldm_error("r_size %d < 0", r_size);
1109 return false;
1110 }
1111 if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) {
1112 r_id1 = ldm_relative(buffer, buflen, 0x52, r_size);
1113 if (r_id1 < 0) {
1114 ldm_error("r_id1 %d < 0", r_id1);
1115 return false;
1116 }
1117 } else
1118 r_id1 = r_size;
1119 if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) {
1120 r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1);
1121 if (r_id2 < 0) {
1122 ldm_error("r_id2 %d < 0", r_id2);
1123 return false;
1124 }
1125 } else
1126 r_id2 = r_id1;
1127 if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) {
1128 r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2);
1129 if (r_size2 < 0) {
1130 ldm_error("r_size2 %d < 0", r_size2);
1131 return false;
1132 }
1133 } else
1134 r_size2 = r_id2;
1135 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1136 r_drive = ldm_relative(buffer, buflen, 0x52, r_size2);
1137 if (r_drive < 0) {
1138 ldm_error("r_drive %d < 0", r_drive);
1139 return false;
1140 }
1141 } else
1142 r_drive = r_size2;
1143 len = r_drive;
1144 if (len < 0) {
1145 ldm_error("len %d < 0", len);
1146 return false;
1147 }
1148 len += VBLK_SIZE_VOL5;
1149 if (len > get_unaligned_be32(buffer + 0x14)) {
1150 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1151 get_unaligned_be32(buffer + 0x14));
1152 return false;
1153 }
1154 volu = &vb->vblk.volu;
1155 ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type,
1156 sizeof(volu->volume_type));
1157 memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter,
1158 sizeof(volu->volume_state));
1159 volu->size = ldm_get_vnum(buffer + 0x3D + r_child);
1160 volu->partition_type = buffer[0x41 + r_size];
1161 memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid));
1162 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1163 ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint,
1164 sizeof(volu->drive_hint));
1165 }
1166 return true;
1167}
1168
1169/**
1170 * ldm_parse_vblk - Read a raw VBLK object into a vblk structure
1171 * @buf: Block of data being worked on
1172 * @len: Size of the block of data
1173 * @vb: In-memory vblk in which to return information
1174 *
1175 * Read a raw VBLK object into a vblk structure. This function just reads the
1176 * information common to all VBLK types, then delegates the rest of the work to
1177 * helper functions: ldm_parse_*.
1178 *
1179 * Return: 'true' @vb contains a VBLK
1180 * 'false' @vb contents are not defined
1181 */
1182static bool ldm_parse_vblk (const u8 *buf, int len, struct vblk *vb)
1183{
1184 bool result = false;
1185 int r_objid;
1186
1187 BUG_ON (!buf || !vb);
1188
1189 r_objid = ldm_relative (buf, len, 0x18, 0);
1190 if (r_objid < 0) {
1191 ldm_error ("VBLK header is corrupt.");
1192 return false;
1193 }
1194
1195 vb->flags = buf[0x12];
1196 vb->type = buf[0x13];
1197 vb->obj_id = ldm_get_vnum (buf + 0x18);
1198 ldm_get_vstr (buf+0x18+r_objid, vb->name, sizeof (vb->name));
1199
1200 switch (vb->type) {
1201 case VBLK_CMP3: result = ldm_parse_cmp3 (buf, len, vb); break;
1202 case VBLK_DSK3: result = ldm_parse_dsk3 (buf, len, vb); break;
1203 case VBLK_DSK4: result = ldm_parse_dsk4 (buf, len, vb); break;
1204 case VBLK_DGR3: result = ldm_parse_dgr3 (buf, len, vb); break;
1205 case VBLK_DGR4: result = ldm_parse_dgr4 (buf, len, vb); break;
1206 case VBLK_PRT3: result = ldm_parse_prt3 (buf, len, vb); break;
1207 case VBLK_VOL5: result = ldm_parse_vol5 (buf, len, vb); break;
1208 }
1209
1210 if (result)
1211 ldm_debug ("Parsed VBLK 0x%llx (type: 0x%02x) ok.",
1212 (unsigned long long) vb->obj_id, vb->type);
1213 else
1214 ldm_error ("Failed to parse VBLK 0x%llx (type: 0x%02x).",
1215 (unsigned long long) vb->obj_id, vb->type);
1216
1217 return result;
1218}
1219
1220
1221/**
1222 * ldm_ldmdb_add - Adds a raw VBLK entry to the ldmdb database
1223 * @data: Raw VBLK to add to the database
1224 * @len: Size of the raw VBLK
1225 * @ldb: Cache of the database structures
1226 *
1227 * The VBLKs are sorted into categories. Partitions are also sorted by offset.
1228 *
1229 * N.B. This function does not check the validity of the VBLKs.
1230 *
1231 * Return: 'true' The VBLK was added
1232 * 'false' An error occurred
1233 */
1234static bool ldm_ldmdb_add (u8 *data, int len, struct ldmdb *ldb)
1235{
1236 struct vblk *vb;
1237 struct list_head *item;
1238
1239 BUG_ON (!data || !ldb);
1240
1241 vb = kmalloc (sizeof (*vb), GFP_KERNEL);
1242 if (!vb) {
1243 ldm_crit ("Out of memory.");
1244 return false;
1245 }
1246
1247 if (!ldm_parse_vblk (data, len, vb)) {
1248 kfree(vb);
1249 return false; /* Already logged */
1250 }
1251
1252 /* Put vblk into the correct list. */
1253 switch (vb->type) {
1254 case VBLK_DGR3:
1255 case VBLK_DGR4:
1256 list_add (&vb->list, &ldb->v_dgrp);
1257 break;
1258 case VBLK_DSK3:
1259 case VBLK_DSK4:
1260 list_add (&vb->list, &ldb->v_disk);
1261 break;
1262 case VBLK_VOL5:
1263 list_add (&vb->list, &ldb->v_volu);
1264 break;
1265 case VBLK_CMP3:
1266 list_add (&vb->list, &ldb->v_comp);
1267 break;
1268 case VBLK_PRT3:
1269 /* Sort by the partition's start sector. */
1270 list_for_each (item, &ldb->v_part) {
1271 struct vblk *v = list_entry (item, struct vblk, list);
1272 if ((v->vblk.part.disk_id == vb->vblk.part.disk_id) &&
1273 (v->vblk.part.start > vb->vblk.part.start)) {
1274 list_add_tail (&vb->list, &v->list);
1275 return true;
1276 }
1277 }
1278 list_add_tail (&vb->list, &ldb->v_part);
1279 break;
1280 }
1281 return true;
1282}
1283
1284/**
1285 * ldm_frag_add - Add a VBLK fragment to a list
1286 * @data: Raw fragment to be added to the list
1287 * @size: Size of the raw fragment
1288 * @frags: Linked list of VBLK fragments
1289 *
1290 * Fragmented VBLKs may not be consecutive in the database, so they are placed
1291 * in a list so they can be pieced together later.
1292 *
1293 * Return: 'true' Success, the VBLK was added to the list
1294 * 'false' Error, a problem occurred
1295 */
1296static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1297{
1298 struct frag *f;
1299 struct list_head *item;
1300 int rec, num, group;
1301
1302 BUG_ON (!data || !frags);
1303
1304 if (size < 2 * VBLK_SIZE_HEAD) {
1305 ldm_error("Value of size is to small.");
1306 return false;
1307 }
1308
1309 group = get_unaligned_be32(data + 0x08);
1310 rec = get_unaligned_be16(data + 0x0C);
1311 num = get_unaligned_be16(data + 0x0E);
1312 if ((num < 1) || (num > 4)) {
1313 ldm_error ("A VBLK claims to have %d parts.", num);
1314 return false;
1315 }
1316 if (rec >= num) {
1317 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1318 return false;
1319 }
1320
1321 list_for_each (item, frags) {
1322 f = list_entry (item, struct frag, list);
1323 if (f->group == group)
1324 goto found;
1325 }
1326
1327 f = kmalloc (sizeof (*f) + size*num, GFP_KERNEL);
1328 if (!f) {
1329 ldm_crit ("Out of memory.");
1330 return false;
1331 }
1332
1333 f->group = group;
1334 f->num = num;
1335 f->rec = rec;
1336 f->map = 0xFF << num;
1337
1338 list_add_tail (&f->list, frags);
1339found:
1340 if (rec >= f->num) {
1341 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1342 return false;
1343 }
1344
1345 if (f->map & (1 << rec)) {
1346 ldm_error ("Duplicate VBLK, part %d.", rec);
1347 f->map &= 0x7F; /* Mark the group as broken */
1348 return false;
1349 }
1350
1351 f->map |= (1 << rec);
1352
1353 data += VBLK_SIZE_HEAD;
1354 size -= VBLK_SIZE_HEAD;
1355
1356 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1357
1358 return true;
1359}
1360
1361/**
1362 * ldm_frag_free - Free a linked list of VBLK fragments
1363 * @list: Linked list of fragments
1364 *
1365 * Free a linked list of VBLK fragments
1366 *
1367 * Return: none
1368 */
1369static void ldm_frag_free (struct list_head *list)
1370{
1371 struct list_head *item, *tmp;
1372
1373 BUG_ON (!list);
1374
1375 list_for_each_safe (item, tmp, list)
1376 kfree (list_entry (item, struct frag, list));
1377}
1378
1379/**
1380 * ldm_frag_commit - Validate fragmented VBLKs and add them to the database
1381 * @frags: Linked list of VBLK fragments
1382 * @ldb: Cache of the database structures
1383 *
1384 * Now that all the fragmented VBLKs have been collected, they must be added to
1385 * the database for later use.
1386 *
1387 * Return: 'true' All the fragments we added successfully
1388 * 'false' One or more of the fragments we invalid
1389 */
1390static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
1391{
1392 struct frag *f;
1393 struct list_head *item;
1394
1395 BUG_ON (!frags || !ldb);
1396
1397 list_for_each (item, frags) {
1398 f = list_entry (item, struct frag, list);
1399
1400 if (f->map != 0xFF) {
1401 ldm_error ("VBLK group %d is incomplete (0x%02x).",
1402 f->group, f->map);
1403 return false;
1404 }
1405
1406 if (!ldm_ldmdb_add (f->data, f->num*ldb->vm.vblk_size, ldb))
1407 return false; /* Already logged */
1408 }
1409 return true;
1410}
1411
1412/**
1413 * ldm_get_vblks - Read the on-disk database of VBLKs into memory
1414 * @state: Partition check state including device holding the LDM Database
1415 * @base: Offset, into @state->bdev, of the database
1416 * @ldb: Cache of the database structures
1417 *
1418 * To use the information from the VBLKs, they need to be read from the disk,
1419 * unpacked and validated. We cache them in @ldb according to their type.
1420 *
1421 * Return: 'true' All the VBLKs were read successfully
1422 * 'false' An error occurred
1423 */
1424static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base,
1425 struct ldmdb *ldb)
1426{
1427 int size, perbuf, skip, finish, s, v, recs;
1428 u8 *data = NULL;
1429 Sector sect;
1430 bool result = false;
1431 LIST_HEAD (frags);
1432
1433 BUG_ON(!state || !ldb);
1434
1435 size = ldb->vm.vblk_size;
1436 perbuf = 512 / size;
1437 skip = ldb->vm.vblk_offset >> 9; /* Bytes to sectors */
1438 finish = (size * ldb->vm.last_vblk_seq) >> 9;
1439
1440 for (s = skip; s < finish; s++) { /* For each sector */
1441 data = read_part_sector(state, base + OFF_VMDB + s, &sect);
1442 if (!data) {
1443 ldm_crit ("Disk read failed.");
1444 goto out;
1445 }
1446
1447 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1448 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1449 ldm_error ("Expected to find a VBLK.");
1450 goto out;
1451 }
1452
1453 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1454 if (recs == 1) {
1455 if (!ldm_ldmdb_add (data, size, ldb))
1456 goto out; /* Already logged */
1457 } else if (recs > 1) {
1458 if (!ldm_frag_add (data, size, &frags))
1459 goto out; /* Already logged */
1460 }
1461 /* else Record is not in use, ignore it. */
1462 }
1463 put_dev_sector (sect);
1464 data = NULL;
1465 }
1466
1467 result = ldm_frag_commit (&frags, ldb); /* Failures, already logged */
1468out:
1469 if (data)
1470 put_dev_sector (sect);
1471 ldm_frag_free (&frags);
1472
1473 return result;
1474}
1475
1476/**
1477 * ldm_free_vblks - Free a linked list of vblk's
1478 * @lh: Head of a linked list of struct vblk
1479 *
1480 * Free a list of vblk's and free the memory used to maintain the list.
1481 *
1482 * Return: none
1483 */
1484static void ldm_free_vblks (struct list_head *lh)
1485{
1486 struct list_head *item, *tmp;
1487
1488 BUG_ON (!lh);
1489
1490 list_for_each_safe (item, tmp, lh)
1491 kfree (list_entry (item, struct vblk, list));
1492}
1493
1494
1495/**
1496 * ldm_partition - Find out whether a device is a dynamic disk and handle it
1497 * @state: Partition check state including device holding the LDM Database
1498 *
1499 * This determines whether the device @bdev is a dynamic disk and if so creates
1500 * the partitions necessary in the gendisk structure pointed to by @hd.
1501 *
1502 * We create a dummy device 1, which contains the LDM database, and then create
1503 * each partition described by the LDM database in sequence as devices 2+. For
1504 * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
1505 * and so on: the actual data containing partitions.
1506 *
1507 * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
1508 * 0 Success, @state->bdev is not a dynamic disk
1509 * -1 An error occurred before enough information had been read
1510 * Or @state->bdev is a dynamic disk, but it may be corrupted
1511 */
1512int ldm_partition(struct parsed_partitions *state)
1513{
1514 struct ldmdb *ldb;
1515 unsigned long base;
1516 int result = -1;
1517
1518 BUG_ON(!state);
1519
1520 /* Look for signs of a Dynamic Disk */
1521 if (!ldm_validate_partition_table(state))
1522 return 0;
1523
1524 ldb = kmalloc (sizeof (*ldb), GFP_KERNEL);
1525 if (!ldb) {
1526 ldm_crit ("Out of memory.");
1527 goto out;
1528 }
1529
1530 /* Parse and check privheads. */
1531 if (!ldm_validate_privheads(state, &ldb->ph))
1532 goto out; /* Already logged */
1533
1534 /* All further references are relative to base (database start). */
1535 base = ldb->ph.config_start;
1536
1537 /* Parse and check tocs and vmdb. */
1538 if (!ldm_validate_tocblocks(state, base, ldb) ||
1539 !ldm_validate_vmdb(state, base, ldb))
1540 goto out; /* Already logged */
1541
1542 /* Initialize vblk lists in ldmdb struct */
1543 INIT_LIST_HEAD (&ldb->v_dgrp);
1544 INIT_LIST_HEAD (&ldb->v_disk);
1545 INIT_LIST_HEAD (&ldb->v_volu);
1546 INIT_LIST_HEAD (&ldb->v_comp);
1547 INIT_LIST_HEAD (&ldb->v_part);
1548
1549 if (!ldm_get_vblks(state, base, ldb)) {
1550 ldm_crit ("Failed to read the VBLKs from the database.");
1551 goto cleanup;
1552 }
1553
1554 /* Finally, create the data partition devices. */
1555 if (ldm_create_data_partitions(state, ldb)) {
1556 ldm_debug ("Parsed LDM database successfully.");
1557 result = 1;
1558 }
1559 /* else Already logged */
1560
1561cleanup:
1562 ldm_free_vblks (&ldb->v_dgrp);
1563 ldm_free_vblks (&ldb->v_disk);
1564 ldm_free_vblks (&ldb->v_volu);
1565 ldm_free_vblks (&ldb->v_comp);
1566 ldm_free_vblks (&ldb->v_part);
1567out:
1568 kfree (ldb);
1569 return result;
1570}
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
deleted file mode 100644
index 374242c0971a..000000000000
--- a/fs/partitions/ldm.h
+++ /dev/null
@@ -1,215 +0,0 @@
1/**
2 * ldm - Part of the Linux-NTFS project.
3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 *
8 * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program (in the main directory of the Linux-NTFS source
22 * in the file COPYING); if not, write to the Free Software Foundation,
23 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#ifndef _FS_PT_LDM_H_
27#define _FS_PT_LDM_H_
28
29#include <linux/types.h>
30#include <linux/list.h>
31#include <linux/genhd.h>
32#include <linux/fs.h>
33#include <asm/unaligned.h>
34#include <asm/byteorder.h>
35
36struct parsed_partitions;
37
38/* Magic numbers in CPU format. */
39#define MAGIC_VMDB 0x564D4442 /* VMDB */
40#define MAGIC_VBLK 0x56424C4B /* VBLK */
41#define MAGIC_PRIVHEAD 0x5052495648454144ULL /* PRIVHEAD */
42#define MAGIC_TOCBLOCK 0x544F43424C4F434BULL /* TOCBLOCK */
43
44/* The defined vblk types. */
45#define VBLK_VOL5 0x51 /* Volume, version 5 */
46#define VBLK_CMP3 0x32 /* Component, version 3 */
47#define VBLK_PRT3 0x33 /* Partition, version 3 */
48#define VBLK_DSK3 0x34 /* Disk, version 3 */
49#define VBLK_DSK4 0x44 /* Disk, version 4 */
50#define VBLK_DGR3 0x35 /* Disk Group, version 3 */
51#define VBLK_DGR4 0x45 /* Disk Group, version 4 */
52
53/* vblk flags indicating extra information will be present */
54#define VBLK_FLAG_COMP_STRIPE 0x10
55#define VBLK_FLAG_PART_INDEX 0x08
56#define VBLK_FLAG_DGR3_IDS 0x08
57#define VBLK_FLAG_DGR4_IDS 0x08
58#define VBLK_FLAG_VOLU_ID1 0x08
59#define VBLK_FLAG_VOLU_ID2 0x20
60#define VBLK_FLAG_VOLU_SIZE 0x80
61#define VBLK_FLAG_VOLU_DRIVE 0x02
62
63/* size of a vblk's static parts */
64#define VBLK_SIZE_HEAD 16
65#define VBLK_SIZE_CMP3 22 /* Name and version */
66#define VBLK_SIZE_DGR3 12
67#define VBLK_SIZE_DGR4 44
68#define VBLK_SIZE_DSK3 12
69#define VBLK_SIZE_DSK4 45
70#define VBLK_SIZE_PRT3 28
71#define VBLK_SIZE_VOL5 58
72
73/* component types */
74#define COMP_STRIPE 0x01 /* Stripe-set */
75#define COMP_BASIC 0x02 /* Basic disk */
76#define COMP_RAID 0x03 /* Raid-set */
77
78/* Other constants. */
79#define LDM_DB_SIZE 2048 /* Size in sectors (= 1MiB). */
80
81#define OFF_PRIV1 6 /* Offset of the first privhead
82 relative to the start of the
83 device in sectors */
84
85/* Offsets to structures within the LDM Database in sectors. */
86#define OFF_PRIV2 1856 /* Backup private headers. */
87#define OFF_PRIV3 2047
88
89#define OFF_TOCB1 1 /* Tables of contents. */
90#define OFF_TOCB2 2
91#define OFF_TOCB3 2045
92#define OFF_TOCB4 2046
93
94#define OFF_VMDB 17 /* List of partitions. */
95
96#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */
97
98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100
101/* Borrowed from msdos.c */
102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
103
104struct frag { /* VBLK Fragment handling */
105 struct list_head list;
106 u32 group;
107 u8 num; /* Total number of records */
108 u8 rec; /* This is record number n */
109 u8 map; /* Which portions are in use */
110 u8 data[0];
111};
112
113/* In memory LDM database structures. */
114
115#define GUID_SIZE 16
116
117struct privhead { /* Offsets and sizes are in sectors. */
118 u16 ver_major;
119 u16 ver_minor;
120 u64 logical_disk_start;
121 u64 logical_disk_size;
122 u64 config_start;
123 u64 config_size;
124 u8 disk_id[GUID_SIZE];
125};
126
127struct tocblock { /* We have exactly two bitmaps. */
128 u8 bitmap1_name[16];
129 u64 bitmap1_start;
130 u64 bitmap1_size;
131 u8 bitmap2_name[16];
132 u64 bitmap2_start;
133 u64 bitmap2_size;
134};
135
136struct vmdb { /* VMDB: The database header */
137 u16 ver_major;
138 u16 ver_minor;
139 u32 vblk_size;
140 u32 vblk_offset;
141 u32 last_vblk_seq;
142};
143
144struct vblk_comp { /* VBLK Component */
145 u8 state[16];
146 u64 parent_id;
147 u8 type;
148 u8 children;
149 u16 chunksize;
150};
151
152struct vblk_dgrp { /* VBLK Disk Group */
153 u8 disk_id[64];
154};
155
156struct vblk_disk { /* VBLK Disk */
157 u8 disk_id[GUID_SIZE];
158 u8 alt_name[128];
159};
160
161struct vblk_part { /* VBLK Partition */
162 u64 start;
163 u64 size; /* start, size and vol_off in sectors */
164 u64 volume_offset;
165 u64 parent_id;
166 u64 disk_id;
167 u8 partnum;
168};
169
170struct vblk_volu { /* VBLK Volume */
171 u8 volume_type[16];
172 u8 volume_state[16];
173 u8 guid[16];
174 u8 drive_hint[4];
175 u64 size;
176 u8 partition_type;
177};
178
179struct vblk_head { /* VBLK standard header */
180 u32 group;
181 u16 rec;
182 u16 nrec;
183};
184
185struct vblk { /* Generalised VBLK */
186 u8 name[64];
187 u64 obj_id;
188 u32 sequence;
189 u8 flags;
190 u8 type;
191 union {
192 struct vblk_comp comp;
193 struct vblk_dgrp dgrp;
194 struct vblk_disk disk;
195 struct vblk_part part;
196 struct vblk_volu volu;
197 } vblk;
198 struct list_head list;
199};
200
201struct ldmdb { /* Cache of the database */
202 struct privhead ph;
203 struct tocblock toc;
204 struct vmdb vm;
205 struct list_head v_dgrp;
206 struct list_head v_disk;
207 struct list_head v_volu;
208 struct list_head v_comp;
209 struct list_head v_part;
210};
211
212int ldm_partition(struct parsed_partitions *state);
213
214#endif /* _FS_PT_LDM_H_ */
215
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
deleted file mode 100644
index 11f688bd76c5..000000000000
--- a/fs/partitions/mac.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * fs/partitions/mac.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 * Re-organised Feb 1998 Russell King
7 */
8
9#include <linux/ctype.h>
10#include "check.h"
11#include "mac.h"
12
13#ifdef CONFIG_PPC_PMAC
14#include <asm/machdep.h>
15extern void note_bootable_part(dev_t dev, int part, int goodness);
16#endif
17
18/*
19 * Code to understand MacOS partition tables.
20 */
21
22static inline void mac_fix_string(char *stg, int len)
23{
24 int i;
25
26 for (i = len - 1; i >= 0 && stg[i] == ' '; i--)
27 stg[i] = 0;
28}
29
30int mac_partition(struct parsed_partitions *state)
31{
32 Sector sect;
33 unsigned char *data;
34 int slot, blocks_in_map;
35 unsigned secsize;
36#ifdef CONFIG_PPC_PMAC
37 int found_root = 0;
38 int found_root_goodness = 0;
39#endif
40 struct mac_partition *part;
41 struct mac_driver_desc *md;
42
43 /* Get 0th block and look at the first partition map entry. */
44 md = read_part_sector(state, 0, &sect);
45 if (!md)
46 return -1;
47 if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) {
48 put_dev_sector(sect);
49 return 0;
50 }
51 secsize = be16_to_cpu(md->block_size);
52 put_dev_sector(sect);
53 data = read_part_sector(state, secsize/512, &sect);
54 if (!data)
55 return -1;
56 part = (struct mac_partition *) (data + secsize%512);
57 if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
58 put_dev_sector(sect);
59 return 0; /* not a MacOS disk */
60 }
61 blocks_in_map = be32_to_cpu(part->map_count);
62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
69 put_dev_sector(sect);
70 data = read_part_sector(state, pos/512, &sect);
71 if (!data)
72 return -1;
73 part = (struct mac_partition *) (data + pos%512);
74 if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC)
75 break;
76 put_partition(state, slot,
77 be32_to_cpu(part->start_block) * (secsize/512),
78 be32_to_cpu(part->block_count) * (secsize/512));
79
80 if (!strnicmp(part->type, "Linux_RAID", 10))
81 state->parts[slot].flags = ADDPART_FLAG_RAID;
82#ifdef CONFIG_PPC_PMAC
83 /*
84 * If this is the first bootable partition, tell the
85 * setup code, in case it wants to make this the root.
86 */
87 if (machine_is(powermac)) {
88 int goodness = 0;
89
90 mac_fix_string(part->processor, 16);
91 mac_fix_string(part->name, 32);
92 mac_fix_string(part->type, 32);
93
94 if ((be32_to_cpu(part->status) & MAC_STATUS_BOOTABLE)
95 && strcasecmp(part->processor, "powerpc") == 0)
96 goodness++;
97
98 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0
99 || (strnicmp(part->type, "Linux", 5) == 0
100 && strcasecmp(part->type, "Linux_swap") != 0)) {
101 int i, l;
102
103 goodness++;
104 l = strlen(part->name);
105 if (strcmp(part->name, "/") == 0)
106 goodness++;
107 for (i = 0; i <= l - 4; ++i) {
108 if (strnicmp(part->name + i, "root",
109 4) == 0) {
110 goodness += 2;
111 break;
112 }
113 }
114 if (strnicmp(part->name, "swap", 4) == 0)
115 goodness--;
116 }
117
118 if (goodness > found_root_goodness) {
119 found_root = slot;
120 found_root_goodness = goodness;
121 }
122 }
123#endif /* CONFIG_PPC_PMAC */
124 }
125#ifdef CONFIG_PPC_PMAC
126 if (found_root_goodness)
127 note_bootable_part(state->bdev->bd_dev, found_root,
128 found_root_goodness);
129#endif
130
131 put_dev_sector(sect);
132 strlcat(state->pp_buf, "\n", PAGE_SIZE);
133 return 1;
134}
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h
deleted file mode 100644
index 3c7d98436380..000000000000
--- a/fs/partitions/mac.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * fs/partitions/mac.h
3 */
4
5#define MAC_PARTITION_MAGIC 0x504d
6
7/* type field value for A/UX or other Unix partitions */
8#define APPLE_AUX_TYPE "Apple_UNIX_SVR2"
9
10struct mac_partition {
11 __be16 signature; /* expected to be MAC_PARTITION_MAGIC */
12 __be16 res1;
13 __be32 map_count; /* # blocks in partition map */
14 __be32 start_block; /* absolute starting block # of partition */
15 __be32 block_count; /* number of blocks in partition */
16 char name[32]; /* partition name */
17 char type[32]; /* string type description */
18 __be32 data_start; /* rel block # of first data block */
19 __be32 data_count; /* number of data blocks */
20 __be32 status; /* partition status bits */
21 __be32 boot_start;
22 __be32 boot_size;
23 __be32 boot_load;
24 __be32 boot_load2;
25 __be32 boot_entry;
26 __be32 boot_entry2;
27 __be32 boot_cksum;
28 char processor[16]; /* identifies ISA of boot */
29 /* there is more stuff after this that we don't need */
30};
31
32#define MAC_STATUS_BOOTABLE 8 /* partition is bootable */
33
34#define MAC_DRIVER_MAGIC 0x4552
35
36/* Driver descriptor structure, in block 0 */
37struct mac_driver_desc {
38 __be16 signature; /* expected to be MAC_DRIVER_MAGIC */
39 __be16 block_size;
40 __be32 block_count;
41 /* ... more stuff */
42};
43
44int mac_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
deleted file mode 100644
index 5f79a6677c69..000000000000
--- a/fs/partitions/msdos.c
+++ /dev/null
@@ -1,552 +0,0 @@
1/*
2 * fs/partitions/msdos.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 *
7 * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
8 * in the early extended-partition checks and added DM partitions
9 *
10 * Support for DiskManager v6.0x added by Mark Lord,
11 * with information provided by OnTrack. This now works for linux fdisk
12 * and LILO, as well as loadlin and bootln. Note that disks other than
13 * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1).
14 *
15 * More flexible handling of extended partitions - aeb, 950831
16 *
17 * Check partition table on IDE disks for common CHS translations
18 *
19 * Re-organised Feb 1998 Russell King
20 */
21#include <linux/msdos_fs.h>
22
23#include "check.h"
24#include "msdos.h"
25#include "efi.h"
26
27/*
28 * Many architectures don't like unaligned accesses, while
29 * the nr_sects and start_sect partition table entries are
30 * at a 2 (mod 4) address.
31 */
32#include <asm/unaligned.h>
33
34#define SYS_IND(p) get_unaligned(&p->sys_ind)
35
36static inline sector_t nr_sects(struct partition *p)
37{
38 return (sector_t)get_unaligned_le32(&p->nr_sects);
39}
40
41static inline sector_t start_sect(struct partition *p)
42{
43 return (sector_t)get_unaligned_le32(&p->start_sect);
44}
45
46static inline int is_extended_partition(struct partition *p)
47{
48 return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
49 SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
50 SYS_IND(p) == LINUX_EXTENDED_PARTITION);
51}
52
53#define MSDOS_LABEL_MAGIC1 0x55
54#define MSDOS_LABEL_MAGIC2 0xAA
55
56static inline int
57msdos_magic_present(unsigned char *p)
58{
59 return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
60}
61
62/* Value is EBCDIC 'IBMA' */
63#define AIX_LABEL_MAGIC1 0xC9
64#define AIX_LABEL_MAGIC2 0xC2
65#define AIX_LABEL_MAGIC3 0xD4
66#define AIX_LABEL_MAGIC4 0xC1
67static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
68{
69 struct partition *pt = (struct partition *) (p + 0x1be);
70 Sector sect;
71 unsigned char *d;
72 int slot, ret = 0;
73
74 if (!(p[0] == AIX_LABEL_MAGIC1 &&
75 p[1] == AIX_LABEL_MAGIC2 &&
76 p[2] == AIX_LABEL_MAGIC3 &&
77 p[3] == AIX_LABEL_MAGIC4))
78 return 0;
79 /* Assume the partition table is valid if Linux partitions exists */
80 for (slot = 1; slot <= 4; slot++, pt++) {
81 if (pt->sys_ind == LINUX_SWAP_PARTITION ||
82 pt->sys_ind == LINUX_RAID_PARTITION ||
83 pt->sys_ind == LINUX_DATA_PARTITION ||
84 pt->sys_ind == LINUX_LVM_PARTITION ||
85 is_extended_partition(pt))
86 return 0;
87 }
88 d = read_part_sector(state, 7, &sect);
89 if (d) {
90 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
91 ret = 1;
92 put_dev_sector(sect);
93 };
94 return ret;
95}
96
97/*
98 * Create devices for each logical partition in an extended partition.
99 * The logical partitions form a linked list, with each entry being
100 * a partition table with two entries. The first entry
101 * is the real data partition (with a start relative to the partition
102 * table start). The second is a pointer to the next logical partition
103 * (with a start relative to the entire extended partition).
104 * We do not create a Linux partition for the partition tables, but
105 * only for the actual data partitions.
106 */
107
108static void parse_extended(struct parsed_partitions *state,
109 sector_t first_sector, sector_t first_size)
110{
111 struct partition *p;
112 Sector sect;
113 unsigned char *data;
114 sector_t this_sector, this_size;
115 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
116 int loopct = 0; /* number of links followed
117 without finding a data partition */
118 int i;
119
120 this_sector = first_sector;
121 this_size = first_size;
122
123 while (1) {
124 if (++loopct > 100)
125 return;
126 if (state->next == state->limit)
127 return;
128 data = read_part_sector(state, this_sector, &sect);
129 if (!data)
130 return;
131
132 if (!msdos_magic_present(data + 510))
133 goto done;
134
135 p = (struct partition *) (data + 0x1be);
136
137 /*
138 * Usually, the first entry is the real data partition,
139 * the 2nd entry is the next extended partition, or empty,
140 * and the 3rd and 4th entries are unused.
141 * However, DRDOS sometimes has the extended partition as
142 * the first entry (when the data partition is empty),
143 * and OS/2 seems to use all four entries.
144 */
145
146 /*
147 * First process the data partition(s)
148 */
149 for (i=0; i<4; i++, p++) {
150 sector_t offs, size, next;
151 if (!nr_sects(p) || is_extended_partition(p))
152 continue;
153
154 /* Check the 3rd and 4th entries -
155 these sometimes contain random garbage */
156 offs = start_sect(p)*sector_size;
157 size = nr_sects(p)*sector_size;
158 next = this_sector + offs;
159 if (i >= 2) {
160 if (offs + size > this_size)
161 continue;
162 if (next < first_sector)
163 continue;
164 if (next + size > first_sector + first_size)
165 continue;
166 }
167
168 put_partition(state, state->next, next, size);
169 if (SYS_IND(p) == LINUX_RAID_PARTITION)
170 state->parts[state->next].flags = ADDPART_FLAG_RAID;
171 loopct = 0;
172 if (++state->next == state->limit)
173 goto done;
174 }
175 /*
176 * Next, process the (first) extended partition, if present.
177 * (So far, there seems to be no reason to make
178 * parse_extended() recursive and allow a tree
179 * of extended partitions.)
180 * It should be a link to the next logical partition.
181 */
182 p -= 4;
183 for (i=0; i<4; i++, p++)
184 if (nr_sects(p) && is_extended_partition(p))
185 break;
186 if (i == 4)
187 goto done; /* nothing left to do */
188
189 this_sector = first_sector + start_sect(p) * sector_size;
190 this_size = nr_sects(p) * sector_size;
191 put_dev_sector(sect);
192 }
193done:
194 put_dev_sector(sect);
195}
196
197/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
198 indicates linux swap. Be careful before believing this is Solaris. */
199
200static void parse_solaris_x86(struct parsed_partitions *state,
201 sector_t offset, sector_t size, int origin)
202{
203#ifdef CONFIG_SOLARIS_X86_PARTITION
204 Sector sect;
205 struct solaris_x86_vtoc *v;
206 int i;
207 short max_nparts;
208
209 v = read_part_sector(state, offset + 1, &sect);
210 if (!v)
211 return;
212 if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
213 put_dev_sector(sect);
214 return;
215 }
216 {
217 char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1];
218
219 snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin);
220 strlcat(state->pp_buf, tmp, PAGE_SIZE);
221 }
222 if (le32_to_cpu(v->v_version) != 1) {
223 char tmp[64];
224
225 snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n",
226 le32_to_cpu(v->v_version));
227 strlcat(state->pp_buf, tmp, PAGE_SIZE);
228 put_dev_sector(sect);
229 return;
230 }
231 /* Ensure we can handle previous case of VTOC with 8 entries gracefully */
232 max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
233 for (i=0; i<max_nparts && state->next<state->limit; i++) {
234 struct solaris_x86_slice *s = &v->v_slice[i];
235 char tmp[3 + 10 + 1 + 1];
236
237 if (s->s_size == 0)
238 continue;
239 snprintf(tmp, sizeof(tmp), " [s%d]", i);
240 strlcat(state->pp_buf, tmp, PAGE_SIZE);
241 /* solaris partitions are relative to current MS-DOS
242 * one; must add the offset of the current partition */
243 put_partition(state, state->next++,
244 le32_to_cpu(s->s_start)+offset,
245 le32_to_cpu(s->s_size));
246 }
247 put_dev_sector(sect);
248 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
249#endif
250}
251
252#if defined(CONFIG_BSD_DISKLABEL)
253/*
254 * Create devices for BSD partitions listed in a disklabel, under a
255 * dos-like partition. See parse_extended() for more information.
256 */
257static void parse_bsd(struct parsed_partitions *state,
258 sector_t offset, sector_t size, int origin, char *flavour,
259 int max_partitions)
260{
261 Sector sect;
262 struct bsd_disklabel *l;
263 struct bsd_partition *p;
264 char tmp[64];
265
266 l = read_part_sector(state, offset + 1, &sect);
267 if (!l)
268 return;
269 if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
270 put_dev_sector(sect);
271 return;
272 }
273
274 snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour);
275 strlcat(state->pp_buf, tmp, PAGE_SIZE);
276
277 if (le16_to_cpu(l->d_npartitions) < max_partitions)
278 max_partitions = le16_to_cpu(l->d_npartitions);
279 for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) {
280 sector_t bsd_start, bsd_size;
281
282 if (state->next == state->limit)
283 break;
284 if (p->p_fstype == BSD_FS_UNUSED)
285 continue;
286 bsd_start = le32_to_cpu(p->p_offset);
287 bsd_size = le32_to_cpu(p->p_size);
288 if (offset == bsd_start && size == bsd_size)
289 /* full parent partition, we have it already */
290 continue;
291 if (offset > bsd_start || offset+size < bsd_start+bsd_size) {
292 strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE);
293 continue;
294 }
295 put_partition(state, state->next++, bsd_start, bsd_size);
296 }
297 put_dev_sector(sect);
298 if (le16_to_cpu(l->d_npartitions) > max_partitions) {
299 snprintf(tmp, sizeof(tmp), " (ignored %d more)",
300 le16_to_cpu(l->d_npartitions) - max_partitions);
301 strlcat(state->pp_buf, tmp, PAGE_SIZE);
302 }
303 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
304}
305#endif
306
307static void parse_freebsd(struct parsed_partitions *state,
308 sector_t offset, sector_t size, int origin)
309{
310#ifdef CONFIG_BSD_DISKLABEL
311 parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS);
312#endif
313}
314
315static void parse_netbsd(struct parsed_partitions *state,
316 sector_t offset, sector_t size, int origin)
317{
318#ifdef CONFIG_BSD_DISKLABEL
319 parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS);
320#endif
321}
322
323static void parse_openbsd(struct parsed_partitions *state,
324 sector_t offset, sector_t size, int origin)
325{
326#ifdef CONFIG_BSD_DISKLABEL
327 parse_bsd(state, offset, size, origin, "openbsd",
328 OPENBSD_MAXPARTITIONS);
329#endif
330}
331
332/*
333 * Create devices for Unixware partitions listed in a disklabel, under a
334 * dos-like partition. See parse_extended() for more information.
335 */
336static void parse_unixware(struct parsed_partitions *state,
337 sector_t offset, sector_t size, int origin)
338{
339#ifdef CONFIG_UNIXWARE_DISKLABEL
340 Sector sect;
341 struct unixware_disklabel *l;
342 struct unixware_slice *p;
343
344 l = read_part_sector(state, offset + 29, &sect);
345 if (!l)
346 return;
347 if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
348 le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) {
349 put_dev_sector(sect);
350 return;
351 }
352 {
353 char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1];
354
355 snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin);
356 strlcat(state->pp_buf, tmp, PAGE_SIZE);
357 }
358 p = &l->vtoc.v_slice[1];
359 /* I omit the 0th slice as it is the same as whole disk. */
360 while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
361 if (state->next == state->limit)
362 break;
363
364 if (p->s_label != UNIXWARE_FS_UNUSED)
365 put_partition(state, state->next++,
366 le32_to_cpu(p->start_sect),
367 le32_to_cpu(p->nr_sects));
368 p++;
369 }
370 put_dev_sector(sect);
371 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
372#endif
373}
374
375/*
376 * Minix 2.0.0/2.0.2 subpartition support.
377 * Anand Krishnamurthy <anandk@wiproge.med.ge.com>
378 * Rajeev V. Pillai <rajeevvp@yahoo.com>
379 */
380static void parse_minix(struct parsed_partitions *state,
381 sector_t offset, sector_t size, int origin)
382{
383#ifdef CONFIG_MINIX_SUBPARTITION
384 Sector sect;
385 unsigned char *data;
386 struct partition *p;
387 int i;
388
389 data = read_part_sector(state, offset, &sect);
390 if (!data)
391 return;
392
393 p = (struct partition *)(data + 0x1be);
394
395 /* The first sector of a Minix partition can have either
396 * a secondary MBR describing its subpartitions, or
397 * the normal boot sector. */
398 if (msdos_magic_present (data + 510) &&
399 SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
400 char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
401
402 snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin);
403 strlcat(state->pp_buf, tmp, PAGE_SIZE);
404 for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) {
405 if (state->next == state->limit)
406 break;
407 /* add each partition in use */
408 if (SYS_IND(p) == MINIX_PARTITION)
409 put_partition(state, state->next++,
410 start_sect(p), nr_sects(p));
411 }
412 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
413 }
414 put_dev_sector(sect);
415#endif /* CONFIG_MINIX_SUBPARTITION */
416}
417
418static struct {
419 unsigned char id;
420 void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
421} subtypes[] = {
422 {FREEBSD_PARTITION, parse_freebsd},
423 {NETBSD_PARTITION, parse_netbsd},
424 {OPENBSD_PARTITION, parse_openbsd},
425 {MINIX_PARTITION, parse_minix},
426 {UNIXWARE_PARTITION, parse_unixware},
427 {SOLARIS_X86_PARTITION, parse_solaris_x86},
428 {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86},
429 {0, NULL},
430};
431
432int msdos_partition(struct parsed_partitions *state)
433{
434 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
435 Sector sect;
436 unsigned char *data;
437 struct partition *p;
438 struct fat_boot_sector *fb;
439 int slot;
440
441 data = read_part_sector(state, 0, &sect);
442 if (!data)
443 return -1;
444 if (!msdos_magic_present(data + 510)) {
445 put_dev_sector(sect);
446 return 0;
447 }
448
449 if (aix_magic_present(state, data)) {
450 put_dev_sector(sect);
451 strlcat(state->pp_buf, " [AIX]", PAGE_SIZE);
452 return 0;
453 }
454
455 /*
456 * Now that the 55aa signature is present, this is probably
457 * either the boot sector of a FAT filesystem or a DOS-type
458 * partition table. Reject this in case the boot indicator
459 * is not 0 or 0x80.
460 */
461 p = (struct partition *) (data + 0x1be);
462 for (slot = 1; slot <= 4; slot++, p++) {
463 if (p->boot_ind != 0 && p->boot_ind != 0x80) {
464 /*
465 * Even without a valid boot inidicator value
466 * its still possible this is valid FAT filesystem
467 * without a partition table.
468 */
469 fb = (struct fat_boot_sector *) data;
470 if (slot == 1 && fb->reserved && fb->fats
471 && fat_valid_media(fb->media)) {
472 strlcat(state->pp_buf, "\n", PAGE_SIZE);
473 put_dev_sector(sect);
474 return 1;
475 } else {
476 put_dev_sector(sect);
477 return 0;
478 }
479 }
480 }
481
482#ifdef CONFIG_EFI_PARTITION
483 p = (struct partition *) (data + 0x1be);
484 for (slot = 1 ; slot <= 4 ; slot++, p++) {
485 /* If this is an EFI GPT disk, msdos should ignore it. */
486 if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
487 put_dev_sector(sect);
488 return 0;
489 }
490 }
491#endif
492 p = (struct partition *) (data + 0x1be);
493
494 /*
495 * Look for partitions in two passes:
496 * First find the primary and DOS-type extended partitions.
497 * On the second pass look inside *BSD, Unixware and Solaris partitions.
498 */
499
500 state->next = 5;
501 for (slot = 1 ; slot <= 4 ; slot++, p++) {
502 sector_t start = start_sect(p)*sector_size;
503 sector_t size = nr_sects(p)*sector_size;
504 if (!size)
505 continue;
506 if (is_extended_partition(p)) {
507 /*
508 * prevent someone doing mkfs or mkswap on an
509 * extended partition, but leave room for LILO
510 * FIXME: this uses one logical sector for > 512b
511 * sector, although it may not be enough/proper.
512 */
513 sector_t n = 2;
514 n = min(size, max(sector_size, n));
515 put_partition(state, slot, start, n);
516
517 strlcat(state->pp_buf, " <", PAGE_SIZE);
518 parse_extended(state, start, size);
519 strlcat(state->pp_buf, " >", PAGE_SIZE);
520 continue;
521 }
522 put_partition(state, slot, start, size);
523 if (SYS_IND(p) == LINUX_RAID_PARTITION)
524 state->parts[slot].flags = ADDPART_FLAG_RAID;
525 if (SYS_IND(p) == DM6_PARTITION)
526 strlcat(state->pp_buf, "[DM]", PAGE_SIZE);
527 if (SYS_IND(p) == EZD_PARTITION)
528 strlcat(state->pp_buf, "[EZD]", PAGE_SIZE);
529 }
530
531 strlcat(state->pp_buf, "\n", PAGE_SIZE);
532
533 /* second pass - output for each on a separate line */
534 p = (struct partition *) (0x1be + data);
535 for (slot = 1 ; slot <= 4 ; slot++, p++) {
536 unsigned char id = SYS_IND(p);
537 int n;
538
539 if (!nr_sects(p))
540 continue;
541
542 for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++)
543 ;
544
545 if (!subtypes[n].parse)
546 continue;
547 subtypes[n].parse(state, start_sect(p) * sector_size,
548 nr_sects(p) * sector_size, slot);
549 }
550 put_dev_sector(sect);
551 return 1;
552}
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h
deleted file mode 100644
index 38c781c490b3..000000000000
--- a/fs/partitions/msdos.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/msdos.h
3 */
4
5#define MSDOS_LABEL_MAGIC 0xAA55
6
7int msdos_partition(struct parsed_partitions *state);
8
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
deleted file mode 100644
index 764b86a01965..000000000000
--- a/fs/partitions/osf.c
+++ /dev/null
@@ -1,86 +0,0 @@
1/*
2 * fs/partitions/osf.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include "check.h"
11#include "osf.h"
12
13#define MAX_OSF_PARTITIONS 18
14
15int osf_partition(struct parsed_partitions *state)
16{
17 int i;
18 int slot = 1;
19 unsigned int npartitions;
20 Sector sect;
21 unsigned char *data;
22 struct disklabel {
23 __le32 d_magic;
24 __le16 d_type,d_subtype;
25 u8 d_typename[16];
26 u8 d_packname[16];
27 __le32 d_secsize;
28 __le32 d_nsectors;
29 __le32 d_ntracks;
30 __le32 d_ncylinders;
31 __le32 d_secpercyl;
32 __le32 d_secprtunit;
33 __le16 d_sparespertrack;
34 __le16 d_sparespercyl;
35 __le32 d_acylinders;
36 __le16 d_rpm, d_interleave, d_trackskew, d_cylskew;
37 __le32 d_headswitch, d_trkseek, d_flags;
38 __le32 d_drivedata[5];
39 __le32 d_spare[5];
40 __le32 d_magic2;
41 __le16 d_checksum;
42 __le16 d_npartitions;
43 __le32 d_bbsize, d_sbsize;
44 struct d_partition {
45 __le32 p_size;
46 __le32 p_offset;
47 __le32 p_fsize;
48 u8 p_fstype;
49 u8 p_frag;
50 __le16 p_cpg;
51 } d_partitions[MAX_OSF_PARTITIONS];
52 } * label;
53 struct d_partition * partition;
54
55 data = read_part_sector(state, 0, &sect);
56 if (!data)
57 return -1;
58
59 label = (struct disklabel *) (data+64);
60 partition = label->d_partitions;
61 if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) {
62 put_dev_sector(sect);
63 return 0;
64 }
65 if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) {
66 put_dev_sector(sect);
67 return 0;
68 }
69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
75 if (slot == state->limit)
76 break;
77 if (le32_to_cpu(partition->p_size))
78 put_partition(state, slot,
79 le32_to_cpu(partition->p_offset),
80 le32_to_cpu(partition->p_size));
81 slot++;
82 }
83 strlcat(state->pp_buf, "\n", PAGE_SIZE);
84 put_dev_sector(sect);
85 return 1;
86}
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h
deleted file mode 100644
index 20ed2315ec16..000000000000
--- a/fs/partitions/osf.h
+++ /dev/null
@@ -1,7 +0,0 @@
1/*
2 * fs/partitions/osf.h
3 */
4
5#define DISKLABELMAGIC (0x82564557UL)
6
7int osf_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
deleted file mode 100644
index ea8a86dceaf4..000000000000
--- a/fs/partitions/sgi.c
+++ /dev/null
@@ -1,82 +0,0 @@
1/*
2 * fs/partitions/sgi.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 */
6
7#include "check.h"
8#include "sgi.h"
9
10struct sgi_disklabel {
11 __be32 magic_mushroom; /* Big fat spliff... */
12 __be16 root_part_num; /* Root partition number */
13 __be16 swap_part_num; /* Swap partition number */
14 s8 boot_file[16]; /* Name of boot file for ARCS */
15 u8 _unused0[48]; /* Device parameter useless crapola.. */
16 struct sgi_volume {
17 s8 name[8]; /* Name of volume */
18 __be32 block_num; /* Logical block number */
19 __be32 num_bytes; /* How big, in bytes */
20 } volume[15];
21 struct sgi_partition {
22 __be32 num_blocks; /* Size in logical blocks */
23 __be32 first_block; /* First logical block */
24 __be32 type; /* Type of this partition */
25 } partitions[16];
26 __be32 csum; /* Disk label checksum */
27 __be32 _unused1; /* Padding */
28};
29
30int sgi_partition(struct parsed_partitions *state)
31{
32 int i, csum;
33 __be32 magic;
34 int slot = 1;
35 unsigned int start, blocks;
36 __be32 *ui, cs;
37 Sector sect;
38 struct sgi_disklabel *label;
39 struct sgi_partition *p;
40 char b[BDEVNAME_SIZE];
41
42 label = read_part_sector(state, 0, &sect);
43 if (!label)
44 return -1;
45 p = &label->partitions[0];
46 magic = label->magic_mushroom;
47 if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
48 /*printk("Dev %s SGI disklabel: bad magic %08x\n",
49 bdevname(bdev, b), be32_to_cpu(magic));*/
50 put_dev_sector(sect);
51 return 0;
52 }
53 ui = ((__be32 *) (label + 1)) - 1;
54 for(csum = 0; ui >= ((__be32 *) label);) {
55 cs = *ui--;
56 csum += be32_to_cpu(cs);
57 }
58 if(csum) {
59 printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
60 bdevname(state->bdev, b));
61 put_dev_sector(sect);
62 return 0;
63 }
64 /* All SGI disk labels have 16 partitions, disks under Linux only
65 * have 15 minor's. Luckily there are always a few zero length
66 * partitions which we don't care about so we never overflow the
67 * current_minor.
68 */
69 for(i = 0; i < 16; i++, p++) {
70 blocks = be32_to_cpu(p->num_blocks);
71 start = be32_to_cpu(p->first_block);
72 if (blocks) {
73 put_partition(state, slot, start, blocks);
74 if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION)
75 state->parts[slot].flags = ADDPART_FLAG_RAID;
76 }
77 slot++;
78 }
79 strlcat(state->pp_buf, "\n", PAGE_SIZE);
80 put_dev_sector(sect);
81 return 1;
82}
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h
deleted file mode 100644
index b9553ebdd5a9..000000000000
--- a/fs/partitions/sgi.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/sgi.h
3 */
4
5extern int sgi_partition(struct parsed_partitions *state);
6
7#define SGI_LABEL_MAGIC 0x0be5a941
8
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
deleted file mode 100644
index b5b6fcfb3d36..000000000000
--- a/fs/partitions/sun.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * fs/partitions/sun.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include "check.h"
11#include "sun.h"
12
13int sun_partition(struct parsed_partitions *state)
14{
15 int i;
16 __be16 csum;
17 int slot = 1;
18 __be16 *ush;
19 Sector sect;
20 struct sun_disklabel {
21 unsigned char info[128]; /* Informative text string */
22 struct sun_vtoc {
23 __be32 version; /* Layout version */
24 char volume[8]; /* Volume name */
25 __be16 nparts; /* Number of partitions */
26 struct sun_info { /* Partition hdrs, sec 2 */
27 __be16 id;
28 __be16 flags;
29 } infos[8];
30 __be16 padding; /* Alignment padding */
31 __be32 bootinfo[3]; /* Info needed by mboot */
32 __be32 sanity; /* To verify vtoc sanity */
33 __be32 reserved[10]; /* Free space */
34 __be32 timestamp[8]; /* Partition timestamp */
35 } vtoc;
36 __be32 write_reinstruct; /* sectors to skip, writes */
37 __be32 read_reinstruct; /* sectors to skip, reads */
38 unsigned char spare[148]; /* Padding */
39 __be16 rspeed; /* Disk rotational speed */
40 __be16 pcylcount; /* Physical cylinder count */
41 __be16 sparecyl; /* extra sects per cylinder */
42 __be16 obs1; /* gap1 */
43 __be16 obs2; /* gap2 */
44 __be16 ilfact; /* Interleave factor */
45 __be16 ncyl; /* Data cylinder count */
46 __be16 nacyl; /* Alt. cylinder count */
47 __be16 ntrks; /* Tracks per cylinder */
48 __be16 nsect; /* Sectors per track */
49 __be16 obs3; /* bhead - Label head offset */
50 __be16 obs4; /* ppart - Physical Partition */
51 struct sun_partition {
52 __be32 start_cylinder;
53 __be32 num_sectors;
54 } partitions[8];
55 __be16 magic; /* Magic number */
56 __be16 csum; /* Label xor'd checksum */
57 } * label;
58 struct sun_partition *p;
59 unsigned long spc;
60 char b[BDEVNAME_SIZE];
61 int use_vtoc;
62 int nparts;
63
64 label = read_part_sector(state, 0, &sect);
65 if (!label)
66 return -1;
67
68 p = label->partitions;
69 if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
70/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
71 bdevname(bdev, b), be16_to_cpu(label->magic)); */
72 put_dev_sector(sect);
73 return 0;
74 }
75 /* Look at the checksum */
76 ush = ((__be16 *) (label+1)) - 1;
77 for (csum = 0; ush >= ((__be16 *) label);)
78 csum ^= *ush--;
79 if (csum) {
80 printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
81 bdevname(state->bdev, b));
82 put_dev_sector(sect);
83 return 0;
84 }
85
86 /* Check to see if we can use the VTOC table */
87 use_vtoc = ((be32_to_cpu(label->vtoc.sanity) == SUN_VTOC_SANITY) &&
88 (be32_to_cpu(label->vtoc.version) == 1) &&
89 (be16_to_cpu(label->vtoc.nparts) <= 8));
90
91 /* Use 8 partition entries if not specified in validated VTOC */
92 nparts = (use_vtoc) ? be16_to_cpu(label->vtoc.nparts) : 8;
93
94 /*
95 * So that old Linux-Sun partitions continue to work,
96 * alow the VTOC to be used under the additional condition ...
97 */
98 use_vtoc = use_vtoc || !(label->vtoc.sanity ||
99 label->vtoc.version || label->vtoc.nparts);
100 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
101 for (i = 0; i < nparts; i++, p++) {
102 unsigned long st_sector;
103 unsigned int num_sectors;
104
105 st_sector = be32_to_cpu(p->start_cylinder) * spc;
106 num_sectors = be32_to_cpu(p->num_sectors);
107 if (num_sectors) {
108 put_partition(state, slot, st_sector, num_sectors);
109 state->parts[slot].flags = 0;
110 if (use_vtoc) {
111 if (be16_to_cpu(label->vtoc.infos[i].id) == LINUX_RAID_PARTITION)
112 state->parts[slot].flags |= ADDPART_FLAG_RAID;
113 else if (be16_to_cpu(label->vtoc.infos[i].id) == SUN_WHOLE_DISK)
114 state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK;
115 }
116 }
117 slot++;
118 }
119 strlcat(state->pp_buf, "\n", PAGE_SIZE);
120 put_dev_sector(sect);
121 return 1;
122}
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h
deleted file mode 100644
index 2424baa8319f..000000000000
--- a/fs/partitions/sun.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/sun.h
3 */
4
5#define SUN_LABEL_MAGIC 0xDABE
6#define SUN_VTOC_SANITY 0x600DDEEE
7
8int sun_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
deleted file mode 100644
index 9627ccffc1c4..000000000000
--- a/fs/partitions/sysv68.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * fs/partitions/sysv68.c
3 *
4 * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be>
5 */
6
7#include "check.h"
8#include "sysv68.h"
9
10/*
11 * Volume ID structure: on first 256-bytes sector of disk
12 */
13
14struct volumeid {
15 u8 vid_unused[248];
16 u8 vid_mac[8]; /* ASCII string "MOTOROLA" */
17};
18
19/*
20 * config block: second 256-bytes sector on disk
21 */
22
23struct dkconfig {
24 u8 ios_unused0[128];
25 __be32 ios_slcblk; /* Slice table block number */
26 __be16 ios_slccnt; /* Number of entries in slice table */
27 u8 ios_unused1[122];
28};
29
30/*
31 * combined volumeid and dkconfig block
32 */
33
34struct dkblk0 {
35 struct volumeid dk_vid;
36 struct dkconfig dk_ios;
37};
38
39/*
40 * Slice Table Structure
41 */
42
43struct slice {
44 __be32 nblocks; /* slice size (in blocks) */
45 __be32 blkoff; /* block offset of slice */
46};
47
48
49int sysv68_partition(struct parsed_partitions *state)
50{
51 int i, slices;
52 int slot = 1;
53 Sector sect;
54 unsigned char *data;
55 struct dkblk0 *b;
56 struct slice *slice;
57 char tmp[64];
58
59 data = read_part_sector(state, 0, &sect);
60 if (!data)
61 return -1;
62
63 b = (struct dkblk0 *)data;
64 if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) {
65 put_dev_sector(sect);
66 return 0;
67 }
68 slices = be16_to_cpu(b->dk_ios.ios_slccnt);
69 i = be32_to_cpu(b->dk_ios.ios_slcblk);
70 put_dev_sector(sect);
71
72 data = read_part_sector(state, i, &sect);
73 if (!data)
74 return -1;
75
76 slices -= 1; /* last slice is the whole disk */
77 snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices);
78 strlcat(state->pp_buf, tmp, PAGE_SIZE);
79 slice = (struct slice *)data;
80 for (i = 0; i < slices; i++, slice++) {
81 if (slot == state->limit)
82 break;
83 if (be32_to_cpu(slice->nblocks)) {
84 put_partition(state, slot,
85 be32_to_cpu(slice->blkoff),
86 be32_to_cpu(slice->nblocks));
87 snprintf(tmp, sizeof(tmp), "(s%u)", i);
88 strlcat(state->pp_buf, tmp, PAGE_SIZE);
89 }
90 slot++;
91 }
92 strlcat(state->pp_buf, "\n", PAGE_SIZE);
93 put_dev_sector(sect);
94 return 1;
95}
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
deleted file mode 100644
index bf2f5ffa97ac..000000000000
--- a/fs/partitions/sysv68.h
+++ /dev/null
@@ -1 +0,0 @@
1extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c
deleted file mode 100644
index 8dbaf9f77a99..000000000000
--- a/fs/partitions/ultrix.c
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * fs/partitions/ultrix.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Re-organised Jul 1999 Russell King
7 */
8
9#include "check.h"
10#include "ultrix.h"
11
12int ultrix_partition(struct parsed_partitions *state)
13{
14 int i;
15 Sector sect;
16 unsigned char *data;
17 struct ultrix_disklabel {
18 s32 pt_magic; /* magic no. indicating part. info exits */
19 s32 pt_valid; /* set by driver if pt is current */
20 struct pt_info {
21 s32 pi_nblocks; /* no. of sectors */
22 u32 pi_blkoff; /* block offset for start */
23 } pt_part[8];
24 } *label;
25
26#define PT_MAGIC 0x032957 /* Partition magic number */
27#define PT_VALID 1 /* Indicates if struct is valid */
28
29 data = read_part_sector(state, (16384 - sizeof(*label))/512, &sect);
30 if (!data)
31 return -1;
32
33 label = (struct ultrix_disklabel *)(data + 512 - sizeof(*label));
34
35 if (label->pt_magic == PT_MAGIC && label->pt_valid == PT_VALID) {
36 for (i=0; i<8; i++)
37 if (label->pt_part[i].pi_nblocks)
38 put_partition(state, i+1,
39 label->pt_part[i].pi_blkoff,
40 label->pt_part[i].pi_nblocks);
41 put_dev_sector(sect);
42 strlcat(state->pp_buf, "\n", PAGE_SIZE);
43 return 1;
44 } else {
45 put_dev_sector(sect);
46 return 0;
47 }
48}
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h
deleted file mode 100644
index a3cc00b2bded..000000000000
--- a/fs/partitions/ultrix.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/*
2 * fs/partitions/ultrix.h
3 */
4
5int ultrix_partition(struct parsed_partitions *state);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4065f07366b3..a932ced92a16 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1137,7 +1137,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1137 if (nr_pages < pipe->nrbufs) 1137 if (nr_pages < pipe->nrbufs)
1138 return -EBUSY; 1138 return -EBUSY;
1139 1139
1140 bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); 1140 bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
1141 if (unlikely(!bufs)) 1141 if (unlikely(!bufs))
1142 return -ENOMEM; 1142 return -ENOMEM;
1143 1143
@@ -1290,11 +1290,4 @@ static int __init init_pipe_fs(void)
1290 return err; 1290 return err;
1291} 1291}
1292 1292
1293static void __exit exit_pipe_fs(void)
1294{
1295 kern_unmount(pipe_mnt);
1296 unregister_filesystem(&pipe_fs_type);
1297}
1298
1299fs_initcall(init_pipe_fs); 1293fs_initcall(init_pipe_fs);
1300module_exit(exit_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index d42514e32380..ab5fa9e1a79a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -13,45 +13,30 @@
13#include "pnode.h" 13#include "pnode.h"
14 14
15/* return the next shared peer mount of @p */ 15/* return the next shared peer mount of @p */
16static inline struct vfsmount *next_peer(struct vfsmount *p) 16static inline struct mount *next_peer(struct mount *p)
17{ 17{
18 return list_entry(p->mnt_share.next, struct vfsmount, mnt_share); 18 return list_entry(p->mnt_share.next, struct mount, mnt_share);
19} 19}
20 20
21static inline struct vfsmount *first_slave(struct vfsmount *p) 21static inline struct mount *first_slave(struct mount *p)
22{ 22{
23 return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave); 23 return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
24} 24}
25 25
26static inline struct vfsmount *next_slave(struct vfsmount *p) 26static inline struct mount *next_slave(struct mount *p)
27{ 27{
28 return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); 28 return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
29} 29}
30 30
31/* 31static struct mount *get_peer_under_root(struct mount *mnt,
32 * Return true if path is reachable from root 32 struct mnt_namespace *ns,
33 * 33 const struct path *root)
34 * namespace_sem is held, and mnt is attached
35 */
36static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry,
37 const struct path *root)
38{
39 while (mnt != root->mnt && mnt->mnt_parent != mnt) {
40 dentry = mnt->mnt_mountpoint;
41 mnt = mnt->mnt_parent;
42 }
43 return mnt == root->mnt && is_subdir(dentry, root->dentry);
44}
45
46static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
47 struct mnt_namespace *ns,
48 const struct path *root)
49{ 34{
50 struct vfsmount *m = mnt; 35 struct mount *m = mnt;
51 36
52 do { 37 do {
53 /* Check the namespace first for optimization */ 38 /* Check the namespace first for optimization */
54 if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root)) 39 if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
55 return m; 40 return m;
56 41
57 m = next_peer(m); 42 m = next_peer(m);
@@ -66,12 +51,12 @@ static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
66 * 51 *
67 * Caller must hold namespace_sem 52 * Caller must hold namespace_sem
68 */ 53 */
69int get_dominating_id(struct vfsmount *mnt, const struct path *root) 54int get_dominating_id(struct mount *mnt, const struct path *root)
70{ 55{
71 struct vfsmount *m; 56 struct mount *m;
72 57
73 for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { 58 for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
74 struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root); 59 struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
75 if (d) 60 if (d)
76 return d->mnt_group_id; 61 return d->mnt_group_id;
77 } 62 }
@@ -79,10 +64,10 @@ int get_dominating_id(struct vfsmount *mnt, const struct path *root)
79 return 0; 64 return 0;
80} 65}
81 66
82static int do_make_slave(struct vfsmount *mnt) 67static int do_make_slave(struct mount *mnt)
83{ 68{
84 struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; 69 struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
85 struct vfsmount *slave_mnt; 70 struct mount *slave_mnt;
86 71
87 /* 72 /*
88 * slave 'mnt' to a peer mount that has the 73 * slave 'mnt' to a peer mount that has the
@@ -90,7 +75,7 @@ static int do_make_slave(struct vfsmount *mnt)
90 * slave it to anything that is available. 75 * slave it to anything that is available.
91 */ 76 */
92 while ((peer_mnt = next_peer(peer_mnt)) != mnt && 77 while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
93 peer_mnt->mnt_root != mnt->mnt_root) ; 78 peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
94 79
95 if (peer_mnt == mnt) { 80 if (peer_mnt == mnt) {
96 peer_mnt = next_peer(mnt); 81 peer_mnt = next_peer(mnt);
@@ -116,7 +101,7 @@ static int do_make_slave(struct vfsmount *mnt)
116 struct list_head *p = &mnt->mnt_slave_list; 101 struct list_head *p = &mnt->mnt_slave_list;
117 while (!list_empty(p)) { 102 while (!list_empty(p)) {
118 slave_mnt = list_first_entry(p, 103 slave_mnt = list_first_entry(p,
119 struct vfsmount, mnt_slave); 104 struct mount, mnt_slave);
120 list_del_init(&slave_mnt->mnt_slave); 105 list_del_init(&slave_mnt->mnt_slave);
121 slave_mnt->mnt_master = NULL; 106 slave_mnt->mnt_master = NULL;
122 } 107 }
@@ -129,7 +114,7 @@ static int do_make_slave(struct vfsmount *mnt)
129/* 114/*
130 * vfsmount lock must be held for write 115 * vfsmount lock must be held for write
131 */ 116 */
132void change_mnt_propagation(struct vfsmount *mnt, int type) 117void change_mnt_propagation(struct mount *mnt, int type)
133{ 118{
134 if (type == MS_SHARED) { 119 if (type == MS_SHARED) {
135 set_mnt_shared(mnt); 120 set_mnt_shared(mnt);
@@ -140,9 +125,9 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
140 list_del_init(&mnt->mnt_slave); 125 list_del_init(&mnt->mnt_slave);
141 mnt->mnt_master = NULL; 126 mnt->mnt_master = NULL;
142 if (type == MS_UNBINDABLE) 127 if (type == MS_UNBINDABLE)
143 mnt->mnt_flags |= MNT_UNBINDABLE; 128 mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
144 else 129 else
145 mnt->mnt_flags &= ~MNT_UNBINDABLE; 130 mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
146 } 131 }
147} 132}
148 133
@@ -156,20 +141,19 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
156 * vfsmount found while iterating with propagation_next() is 141 * vfsmount found while iterating with propagation_next() is
157 * a peer of one we'd found earlier. 142 * a peer of one we'd found earlier.
158 */ 143 */
159static struct vfsmount *propagation_next(struct vfsmount *m, 144static struct mount *propagation_next(struct mount *m,
160 struct vfsmount *origin) 145 struct mount *origin)
161{ 146{
162 /* are there any slaves of this mount? */ 147 /* are there any slaves of this mount? */
163 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) 148 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
164 return first_slave(m); 149 return first_slave(m);
165 150
166 while (1) { 151 while (1) {
167 struct vfsmount *next; 152 struct mount *master = m->mnt_master;
168 struct vfsmount *master = m->mnt_master;
169 153
170 if (master == origin->mnt_master) { 154 if (master == origin->mnt_master) {
171 next = next_peer(m); 155 struct mount *next = next_peer(m);
172 return ((next == origin) ? NULL : next); 156 return (next == origin) ? NULL : next;
173 } else if (m->mnt_slave.next != &master->mnt_slave_list) 157 } else if (m->mnt_slave.next != &master->mnt_slave_list)
174 return next_slave(m); 158 return next_slave(m);
175 159
@@ -187,13 +171,13 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
187 * @type return CL_SLAVE if the new mount has to be 171 * @type return CL_SLAVE if the new mount has to be
188 * cloned as a slave. 172 * cloned as a slave.
189 */ 173 */
190static struct vfsmount *get_source(struct vfsmount *dest, 174static struct mount *get_source(struct mount *dest,
191 struct vfsmount *last_dest, 175 struct mount *last_dest,
192 struct vfsmount *last_src, 176 struct mount *last_src,
193 int *type) 177 int *type)
194{ 178{
195 struct vfsmount *p_last_src = NULL; 179 struct mount *p_last_src = NULL;
196 struct vfsmount *p_last_dest = NULL; 180 struct mount *p_last_dest = NULL;
197 181
198 while (last_dest != dest->mnt_master) { 182 while (last_dest != dest->mnt_master) {
199 p_last_dest = last_dest; 183 p_last_dest = last_dest;
@@ -233,33 +217,33 @@ static struct vfsmount *get_source(struct vfsmount *dest,
233 * @source_mnt: source mount. 217 * @source_mnt: source mount.
234 * @tree_list : list of heads of trees to be attached. 218 * @tree_list : list of heads of trees to be attached.
235 */ 219 */
236int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, 220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 struct vfsmount *source_mnt, struct list_head *tree_list) 221 struct mount *source_mnt, struct list_head *tree_list)
238{ 222{
239 struct vfsmount *m, *child; 223 struct mount *m, *child;
240 int ret = 0; 224 int ret = 0;
241 struct vfsmount *prev_dest_mnt = dest_mnt; 225 struct mount *prev_dest_mnt = dest_mnt;
242 struct vfsmount *prev_src_mnt = source_mnt; 226 struct mount *prev_src_mnt = source_mnt;
243 LIST_HEAD(tmp_list); 227 LIST_HEAD(tmp_list);
244 LIST_HEAD(umount_list); 228 LIST_HEAD(umount_list);
245 229
246 for (m = propagation_next(dest_mnt, dest_mnt); m; 230 for (m = propagation_next(dest_mnt, dest_mnt); m;
247 m = propagation_next(m, dest_mnt)) { 231 m = propagation_next(m, dest_mnt)) {
248 int type; 232 int type;
249 struct vfsmount *source; 233 struct mount *source;
250 234
251 if (IS_MNT_NEW(m)) 235 if (IS_MNT_NEW(m))
252 continue; 236 continue;
253 237
254 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
255 239
256 if (!(child = copy_tree(source, source->mnt_root, type))) { 240 if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
257 ret = -ENOMEM; 241 ret = -ENOMEM;
258 list_splice(tree_list, tmp_list.prev); 242 list_splice(tree_list, tmp_list.prev);
259 goto out; 243 goto out;
260 } 244 }
261 245
262 if (is_subdir(dest_dentry, m->mnt_root)) { 246 if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
263 mnt_set_mountpoint(m, dest_dentry, child); 247 mnt_set_mountpoint(m, dest_dentry, child);
264 list_add_tail(&child->mnt_hash, tree_list); 248 list_add_tail(&child->mnt_hash, tree_list);
265 } else { 249 } else {
@@ -275,7 +259,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
275out: 259out:
276 br_write_lock(vfsmount_lock); 260 br_write_lock(vfsmount_lock);
277 while (!list_empty(&tmp_list)) { 261 while (!list_empty(&tmp_list)) {
278 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); 262 child = list_first_entry(&tmp_list, struct mount, mnt_hash);
279 umount_tree(child, 0, &umount_list); 263 umount_tree(child, 0, &umount_list);
280 } 264 }
281 br_write_unlock(vfsmount_lock); 265 br_write_unlock(vfsmount_lock);
@@ -286,7 +270,7 @@ out:
286/* 270/*
287 * return true if the refcount is greater than count 271 * return true if the refcount is greater than count
288 */ 272 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 273static inline int do_refcount_check(struct mount *mnt, int count)
290{ 274{
291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; 275 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 276 return (mycount > count);
@@ -302,10 +286,10 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
302 * 286 *
303 * vfsmount lock must be held for write 287 * vfsmount lock must be held for write
304 */ 288 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 289int propagate_mount_busy(struct mount *mnt, int refcnt)
306{ 290{
307 struct vfsmount *m, *child; 291 struct mount *m, *child;
308 struct vfsmount *parent = mnt->mnt_parent; 292 struct mount *parent = mnt->mnt_parent;
309 int ret = 0; 293 int ret = 0;
310 294
311 if (mnt == parent) 295 if (mnt == parent)
@@ -321,7 +305,7 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
321 305
322 for (m = propagation_next(parent, parent); m; 306 for (m = propagation_next(parent, parent); m;
323 m = propagation_next(m, parent)) { 307 m = propagation_next(m, parent)) {
324 child = __lookup_mnt(m, mnt->mnt_mountpoint, 0); 308 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
325 if (child && list_empty(&child->mnt_mounts) && 309 if (child && list_empty(&child->mnt_mounts) &&
326 (ret = do_refcount_check(child, 1))) 310 (ret = do_refcount_check(child, 1)))
327 break; 311 break;
@@ -333,17 +317,17 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
333 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 317 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
334 * parent propagates to. 318 * parent propagates to.
335 */ 319 */
336static void __propagate_umount(struct vfsmount *mnt) 320static void __propagate_umount(struct mount *mnt)
337{ 321{
338 struct vfsmount *parent = mnt->mnt_parent; 322 struct mount *parent = mnt->mnt_parent;
339 struct vfsmount *m; 323 struct mount *m;
340 324
341 BUG_ON(parent == mnt); 325 BUG_ON(parent == mnt);
342 326
343 for (m = propagation_next(parent, parent); m; 327 for (m = propagation_next(parent, parent); m;
344 m = propagation_next(m, parent)) { 328 m = propagation_next(m, parent)) {
345 329
346 struct vfsmount *child = __lookup_mnt(m, 330 struct mount *child = __lookup_mnt(&m->mnt,
347 mnt->mnt_mountpoint, 0); 331 mnt->mnt_mountpoint, 0);
348 /* 332 /*
349 * umount the child only if the child has no 333 * umount the child only if the child has no
@@ -363,7 +347,7 @@ static void __propagate_umount(struct vfsmount *mnt)
363 */ 347 */
364int propagate_umount(struct list_head *list) 348int propagate_umount(struct list_head *list)
365{ 349{
366 struct vfsmount *mnt; 350 struct mount *mnt;
367 351
368 list_for_each_entry(mnt, list, mnt_hash) 352 list_for_each_entry(mnt, list, mnt_hash)
369 __propagate_umount(mnt); 353 __propagate_umount(mnt);
diff --git a/fs/pnode.h b/fs/pnode.h
index 1ea4ae1efcd3..65c60979d541 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -9,13 +9,13 @@
9#define _LINUX_PNODE_H 9#define _LINUX_PNODE_H
10 10
11#include <linux/list.h> 11#include <linux/list.h>
12#include <linux/mount.h> 12#include "mount.h"
13 13
14#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) 14#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
15#define IS_MNT_SLAVE(mnt) (mnt->mnt_master) 15#define IS_MNT_SLAVE(m) ((m)->mnt_master)
16#define IS_MNT_NEW(mnt) (!mnt->mnt_ns) 16#define IS_MNT_NEW(m) (!(m)->mnt_ns)
17#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) 17#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
18#define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE) 18#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
19 19
20#define CL_EXPIRE 0x01 20#define CL_EXPIRE 0x01
21#define CL_SLAVE 0x02 21#define CL_SLAVE 0x02
@@ -23,17 +23,25 @@
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25 25
26static inline void set_mnt_shared(struct vfsmount *mnt) 26static inline void set_mnt_shared(struct mount *mnt)
27{ 27{
28 mnt->mnt_flags &= ~MNT_SHARED_MASK; 28 mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
29 mnt->mnt_flags |= MNT_SHARED; 29 mnt->mnt.mnt_flags |= MNT_SHARED;
30} 30}
31 31
32void change_mnt_propagation(struct vfsmount *, int); 32void change_mnt_propagation(struct mount *, int);
33int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, 33int propagate_mnt(struct mount *, struct dentry *, struct mount *,
34 struct list_head *); 34 struct list_head *);
35int propagate_umount(struct list_head *); 35int propagate_umount(struct list_head *);
36int propagate_mount_busy(struct vfsmount *, int); 36int propagate_mount_busy(struct mount *, int);
37void mnt_release_group_id(struct vfsmount *); 37void mnt_release_group_id(struct mount *);
38int get_dominating_id(struct vfsmount *mnt, const struct path *root); 38int get_dominating_id(struct mount *mnt, const struct path *root);
39unsigned int mnt_get_count(struct mount *mnt);
40void mnt_set_mountpoint(struct mount *, struct dentry *,
41 struct mount *);
42void release_mounts(struct list_head *);
43void umount_tree(struct mount *, int, struct list_head *);
44struct mount *copy_tree(struct mount *, struct dentry *, int);
45bool is_path_reachable(struct mount *, struct dentry *,
46 const struct path *root);
39#endif /* _LINUX_PNODE_H */ 47#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..c602b8d20f06 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -380,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
380 380
381 state = *get_task_state(task); 381 state = *get_task_state(task);
382 vsize = eip = esp = 0; 382 vsize = eip = esp = 0;
383 permitted = ptrace_may_access(task, PTRACE_MODE_READ); 383 permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
384 mm = get_task_mm(task); 384 mm = get_task_mm(task);
385 if (mm) { 385 if (mm) {
386 vsize = task_vsize(mm); 386 vsize = task_vsize(mm);
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
394 394
395 sigemptyset(&sigign); 395 sigemptyset(&sigign);
396 sigemptyset(&sigcatch); 396 sigemptyset(&sigcatch);
397 cutime = cstime = utime = stime = cputime_zero; 397 cutime = cstime = utime = stime = 0;
398 cgtime = gtime = cputime_zero; 398 cgtime = gtime = 0;
399 399
400 if (lock_task_sighand(task, &flags)) { 400 if (lock_task_sighand(task, &flags)) {
401 struct signal_struct *sig = task->signal; 401 struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
423 do { 423 do {
424 min_flt += t->min_flt; 424 min_flt += t->min_flt;
425 maj_flt += t->maj_flt; 425 maj_flt += t->maj_flt;
426 gtime = cputime_add(gtime, t->gtime); 426 gtime += t->gtime;
427 t = next_thread(t); 427 t = next_thread(t);
428 } while (t != task); 428 } while (t != task);
429 429
430 min_flt += sig->min_flt; 430 min_flt += sig->min_flt;
431 maj_flt += sig->maj_flt; 431 maj_flt += sig->maj_flt;
432 thread_group_times(task, &utime, &stime); 432 thread_group_times(task, &utime, &stime);
433 gtime = cputime_add(gtime, sig->gtime); 433 gtime += sig->gtime;
434 } 434 }
435 435
436 sid = task_session_nr_ns(task, ns); 436 sid = task_session_nr_ns(task, ns);
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
464 464
465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ 465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", 467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
468 pid_nr_ns(pid, ns), 468 pid_nr_ns(pid, ns),
469 tcomm, 469 tcomm,
470 state, 470 state,
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
511 task->policy, 511 task->policy,
512 (unsigned long long)delayacct_blkio_ticks(task), 512 (unsigned long long)delayacct_blkio_ticks(task),
513 cputime_to_clock_t(gtime), 513 cputime_to_clock_t(gtime),
514 cputime_to_clock_t(cgtime)); 514 cputime_to_clock_t(cgtime),
515 (mm && permitted) ? mm->start_data : 0,
516 (mm && permitted) ? mm->end_data : 0,
517 (mm && permitted) ? mm->start_brk : 0);
515 if (mm) 518 if (mm)
516 mmput(mm); 519 mmput(mm);
517 return 0; 520 return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..5485a5388ecb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,9 +83,11 @@
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/fs_struct.h> 84#include <linux/fs_struct.h>
85#include <linux/slab.h> 85#include <linux/slab.h>
86#include <linux/flex_array.h>
86#ifdef CONFIG_HARDWALL 87#ifdef CONFIG_HARDWALL
87#include <asm/hardwall.h> 88#include <asm/hardwall.h>
88#endif 89#endif
90#include <trace/events/oom.h>
89#include "internal.h" 91#include "internal.h"
90 92
91/* NOTE: 93/* NOTE:
@@ -101,7 +103,7 @@
101struct pid_entry { 103struct pid_entry {
102 char *name; 104 char *name;
103 int len; 105 int len;
104 mode_t mode; 106 umode_t mode;
105 const struct inode_operations *iop; 107 const struct inode_operations *iop;
106 const struct file_operations *fop; 108 const struct file_operations *fop;
107 union proc_op op; 109 union proc_op op;
@@ -133,6 +135,8 @@ struct pid_entry {
133 NULL, &proc_single_file_operations, \ 135 NULL, &proc_single_file_operations, \
134 { .proc_show = show } ) 136 { .proc_show = show } )
135 137
138static int proc_fd_permission(struct inode *inode, int mask);
139
136/* 140/*
137 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
138 * and .. links. 142 * and .. links.
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
165 return result; 169 return result;
166} 170}
167 171
168static int proc_cwd_link(struct inode *inode, struct path *path) 172static int proc_cwd_link(struct dentry *dentry, struct path *path)
169{ 173{
170 struct task_struct *task = get_proc_task(inode); 174 struct task_struct *task = get_proc_task(dentry->d_inode);
171 int result = -ENOENT; 175 int result = -ENOENT;
172 176
173 if (task) { 177 if (task) {
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
182 return result; 186 return result;
183} 187}
184 188
185static int proc_root_link(struct inode *inode, struct path *path) 189static int proc_root_link(struct dentry *dentry, struct path *path)
186{ 190{
187 struct task_struct *task = get_proc_task(inode); 191 struct task_struct *task = get_proc_task(dentry->d_inode);
188 int result = -ENOENT; 192 int result = -ENOENT;
189 193
190 if (task) { 194 if (task) {
@@ -627,122 +631,54 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
627 return 0; 631 return 0;
628} 632}
629 633
630static const struct inode_operations proc_def_inode_operations = { 634/*
631 .setattr = proc_setattr, 635 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
632}; 636 * or euid/egid (for hide_pid_min=2)?
633 637 */
634static int mounts_open_common(struct inode *inode, struct file *file, 638static bool has_pid_permissions(struct pid_namespace *pid,
635 const struct seq_operations *op) 639 struct task_struct *task,
640 int hide_pid_min)
636{ 641{
637 struct task_struct *task = get_proc_task(inode); 642 if (pid->hide_pid < hide_pid_min)
638 struct nsproxy *nsp; 643 return true;
639 struct mnt_namespace *ns = NULL; 644 if (in_group_p(pid->pid_gid))
640 struct path root; 645 return true;
641 struct proc_mounts *p; 646 return ptrace_may_access(task, PTRACE_MODE_READ);
642 int ret = -EINVAL;
643
644 if (task) {
645 rcu_read_lock();
646 nsp = task_nsproxy(task);
647 if (nsp) {
648 ns = nsp->mnt_ns;
649 if (ns)
650 get_mnt_ns(ns);
651 }
652 rcu_read_unlock();
653 if (ns && get_task_root(task, &root) == 0)
654 ret = 0;
655 put_task_struct(task);
656 }
657
658 if (!ns)
659 goto err;
660 if (ret)
661 goto err_put_ns;
662
663 ret = -ENOMEM;
664 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
665 if (!p)
666 goto err_put_path;
667
668 file->private_data = &p->m;
669 ret = seq_open(file, op);
670 if (ret)
671 goto err_free;
672
673 p->m.private = p;
674 p->ns = ns;
675 p->root = root;
676 p->m.poll_event = ns->event;
677
678 return 0;
679
680 err_free:
681 kfree(p);
682 err_put_path:
683 path_put(&root);
684 err_put_ns:
685 put_mnt_ns(ns);
686 err:
687 return ret;
688} 647}
689 648
690static int mounts_release(struct inode *inode, struct file *file)
691{
692 struct proc_mounts *p = file->private_data;
693 path_put(&p->root);
694 put_mnt_ns(p->ns);
695 return seq_release(inode, file);
696}
697 649
698static unsigned mounts_poll(struct file *file, poll_table *wait) 650static int proc_pid_permission(struct inode *inode, int mask)
699{ 651{
700 struct proc_mounts *p = file->private_data; 652 struct pid_namespace *pid = inode->i_sb->s_fs_info;
701 unsigned res = POLLIN | POLLRDNORM; 653 struct task_struct *task;
702 654 bool has_perms;
703 poll_wait(file, &p->ns->poll, wait);
704 if (mnt_had_events(p))
705 res |= POLLERR | POLLPRI;
706
707 return res;
708}
709 655
710static int mounts_open(struct inode *inode, struct file *file) 656 task = get_proc_task(inode);
711{ 657 if (!task)
712 return mounts_open_common(inode, file, &mounts_op); 658 return -ESRCH;
713} 659 has_perms = has_pid_permissions(pid, task, 1);
660 put_task_struct(task);
714 661
715static const struct file_operations proc_mounts_operations = { 662 if (!has_perms) {
716 .open = mounts_open, 663 if (pid->hide_pid == 2) {
717 .read = seq_read, 664 /*
718 .llseek = seq_lseek, 665 * Let's make getdents(), stat(), and open()
719 .release = mounts_release, 666 * consistent with each other. If a process
720 .poll = mounts_poll, 667 * may not stat() a file, it shouldn't be seen
721}; 668 * in procfs at all.
669 */
670 return -ENOENT;
671 }
722 672
723static int mountinfo_open(struct inode *inode, struct file *file) 673 return -EPERM;
724{ 674 }
725 return mounts_open_common(inode, file, &mountinfo_op); 675 return generic_permission(inode, mask);
726} 676}
727 677
728static const struct file_operations proc_mountinfo_operations = {
729 .open = mountinfo_open,
730 .read = seq_read,
731 .llseek = seq_lseek,
732 .release = mounts_release,
733 .poll = mounts_poll,
734};
735 678
736static int mountstats_open(struct inode *inode, struct file *file)
737{
738 return mounts_open_common(inode, file, &mountstats_op);
739}
740 679
741static const struct file_operations proc_mountstats_operations = { 680static const struct inode_operations proc_def_inode_operations = {
742 .open = mountstats_open, 681 .setattr = proc_setattr,
743 .read = seq_read,
744 .llseek = seq_lseek,
745 .release = mounts_release,
746}; 682};
747 683
748#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 684#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
@@ -1124,6 +1060,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1124 else 1060 else
1125 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1061 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1126 -OOM_DISABLE; 1062 -OOM_DISABLE;
1063 trace_oom_score_adj_update(task);
1127err_sighand: 1064err_sighand:
1128 unlock_task_sighand(task, &flags); 1065 unlock_task_sighand(task, &flags);
1129err_task_lock: 1066err_task_lock:
@@ -1211,6 +1148,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1211 task->signal->oom_score_adj = oom_score_adj; 1148 task->signal->oom_score_adj = oom_score_adj;
1212 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1149 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1213 task->signal->oom_score_adj_min = oom_score_adj; 1150 task->signal->oom_score_adj_min = oom_score_adj;
1151 trace_oom_score_adj_update(task);
1214 /* 1152 /*
1215 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1153 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1216 * always attainable. 1154 * always attainable.
@@ -1567,13 +1505,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
1567 .release = single_release, 1505 .release = single_release,
1568}; 1506};
1569 1507
1570static int proc_exe_link(struct inode *inode, struct path *exe_path) 1508static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1571{ 1509{
1572 struct task_struct *task; 1510 struct task_struct *task;
1573 struct mm_struct *mm; 1511 struct mm_struct *mm;
1574 struct file *exe_file; 1512 struct file *exe_file;
1575 1513
1576 task = get_proc_task(inode); 1514 task = get_proc_task(dentry->d_inode);
1577 if (!task) 1515 if (!task)
1578 return -ENOENT; 1516 return -ENOENT;
1579 mm = get_task_mm(task); 1517 mm = get_task_mm(task);
@@ -1603,7 +1541,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1603 if (!proc_fd_access_allowed(inode)) 1541 if (!proc_fd_access_allowed(inode))
1604 goto out; 1542 goto out;
1605 1543
1606 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1544 error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
1607out: 1545out:
1608 return ERR_PTR(error); 1546 return ERR_PTR(error);
1609} 1547}
@@ -1642,7 +1580,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1642 if (!proc_fd_access_allowed(inode)) 1580 if (!proc_fd_access_allowed(inode))
1643 goto out; 1581 goto out;
1644 1582
1645 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1583 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1646 if (error) 1584 if (error)
1647 goto out; 1585 goto out;
1648 1586
@@ -1723,6 +1661,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1723 struct inode *inode = dentry->d_inode; 1661 struct inode *inode = dentry->d_inode;
1724 struct task_struct *task; 1662 struct task_struct *task;
1725 const struct cred *cred; 1663 const struct cred *cred;
1664 struct pid_namespace *pid = dentry->d_sb->s_fs_info;
1726 1665
1727 generic_fillattr(inode, stat); 1666 generic_fillattr(inode, stat);
1728 1667
@@ -1731,6 +1670,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1731 stat->gid = 0; 1670 stat->gid = 0;
1732 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1671 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1733 if (task) { 1672 if (task) {
1673 if (!has_pid_permissions(pid, task, 2)) {
1674 rcu_read_unlock();
1675 /*
1676 * This doesn't prevent learning whether PID exists,
1677 * it only makes getattr() consistent with readdir().
1678 */
1679 return -ENOENT;
1680 }
1734 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1681 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1735 task_dumpable(task)) { 1682 task_dumpable(task)) {
1736 cred = __task_cred(task); 1683 cred = __task_cred(task);
@@ -1934,9 +1881,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1934 return -ENOENT; 1881 return -ENOENT;
1935} 1882}
1936 1883
1937static int proc_fd_link(struct inode *inode, struct path *path) 1884static int proc_fd_link(struct dentry *dentry, struct path *path)
1938{ 1885{
1939 return proc_fd_info(inode, path, NULL); 1886 return proc_fd_info(dentry->d_inode, path, NULL);
1940} 1887}
1941 1888
1942static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1889static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -2157,6 +2104,355 @@ static const struct file_operations proc_fd_operations = {
2157 .llseek = default_llseek, 2104 .llseek = default_llseek,
2158}; 2105};
2159 2106
2107#ifdef CONFIG_CHECKPOINT_RESTORE
2108
2109/*
2110 * dname_to_vma_addr - maps a dentry name into two unsigned longs
2111 * which represent vma start and end addresses.
2112 */
2113static int dname_to_vma_addr(struct dentry *dentry,
2114 unsigned long *start, unsigned long *end)
2115{
2116 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
2117 return -EINVAL;
2118
2119 return 0;
2120}
2121
2122static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2123{
2124 unsigned long vm_start, vm_end;
2125 bool exact_vma_exists = false;
2126 struct mm_struct *mm = NULL;
2127 struct task_struct *task;
2128 const struct cred *cred;
2129 struct inode *inode;
2130 int status = 0;
2131
2132 if (nd && nd->flags & LOOKUP_RCU)
2133 return -ECHILD;
2134
2135 if (!capable(CAP_SYS_ADMIN)) {
2136 status = -EACCES;
2137 goto out_notask;
2138 }
2139
2140 inode = dentry->d_inode;
2141 task = get_proc_task(inode);
2142 if (!task)
2143 goto out_notask;
2144
2145 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2146 goto out;
2147
2148 mm = get_task_mm(task);
2149 if (!mm)
2150 goto out;
2151
2152 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
2153 down_read(&mm->mmap_sem);
2154 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
2155 up_read(&mm->mmap_sem);
2156 }
2157
2158 mmput(mm);
2159
2160 if (exact_vma_exists) {
2161 if (task_dumpable(task)) {
2162 rcu_read_lock();
2163 cred = __task_cred(task);
2164 inode->i_uid = cred->euid;
2165 inode->i_gid = cred->egid;
2166 rcu_read_unlock();
2167 } else {
2168 inode->i_uid = 0;
2169 inode->i_gid = 0;
2170 }
2171 security_task_to_inode(task, inode);
2172 status = 1;
2173 }
2174
2175out:
2176 put_task_struct(task);
2177
2178out_notask:
2179 if (status <= 0)
2180 d_drop(dentry);
2181
2182 return status;
2183}
2184
2185static const struct dentry_operations tid_map_files_dentry_operations = {
2186 .d_revalidate = map_files_d_revalidate,
2187 .d_delete = pid_delete_dentry,
2188};
2189
2190static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
2191{
2192 unsigned long vm_start, vm_end;
2193 struct vm_area_struct *vma;
2194 struct task_struct *task;
2195 struct mm_struct *mm;
2196 int rc;
2197
2198 rc = -ENOENT;
2199 task = get_proc_task(dentry->d_inode);
2200 if (!task)
2201 goto out;
2202
2203 mm = get_task_mm(task);
2204 put_task_struct(task);
2205 if (!mm)
2206 goto out;
2207
2208 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
2209 if (rc)
2210 goto out_mmput;
2211
2212 down_read(&mm->mmap_sem);
2213 vma = find_exact_vma(mm, vm_start, vm_end);
2214 if (vma && vma->vm_file) {
2215 *path = vma->vm_file->f_path;
2216 path_get(path);
2217 rc = 0;
2218 }
2219 up_read(&mm->mmap_sem);
2220
2221out_mmput:
2222 mmput(mm);
2223out:
2224 return rc;
2225}
2226
2227struct map_files_info {
2228 struct file *file;
2229 unsigned long len;
2230 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2231};
2232
2233static struct dentry *
2234proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2235 struct task_struct *task, const void *ptr)
2236{
2237 const struct file *file = ptr;
2238 struct proc_inode *ei;
2239 struct inode *inode;
2240
2241 if (!file)
2242 return ERR_PTR(-ENOENT);
2243
2244 inode = proc_pid_make_inode(dir->i_sb, task);
2245 if (!inode)
2246 return ERR_PTR(-ENOENT);
2247
2248 ei = PROC_I(inode);
2249 ei->op.proc_get_link = proc_map_files_get_link;
2250
2251 inode->i_op = &proc_pid_link_inode_operations;
2252 inode->i_size = 64;
2253 inode->i_mode = S_IFLNK;
2254
2255 if (file->f_mode & FMODE_READ)
2256 inode->i_mode |= S_IRUSR;
2257 if (file->f_mode & FMODE_WRITE)
2258 inode->i_mode |= S_IWUSR;
2259
2260 d_set_d_op(dentry, &tid_map_files_dentry_operations);
2261 d_add(dentry, inode);
2262
2263 return NULL;
2264}
2265
2266static struct dentry *proc_map_files_lookup(struct inode *dir,
2267 struct dentry *dentry, struct nameidata *nd)
2268{
2269 unsigned long vm_start, vm_end;
2270 struct vm_area_struct *vma;
2271 struct task_struct *task;
2272 struct dentry *result;
2273 struct mm_struct *mm;
2274
2275 result = ERR_PTR(-EACCES);
2276 if (!capable(CAP_SYS_ADMIN))
2277 goto out;
2278
2279 result = ERR_PTR(-ENOENT);
2280 task = get_proc_task(dir);
2281 if (!task)
2282 goto out;
2283
2284 result = ERR_PTR(-EACCES);
2285 if (lock_trace(task))
2286 goto out_put_task;
2287
2288 result = ERR_PTR(-ENOENT);
2289 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2290 goto out_unlock;
2291
2292 mm = get_task_mm(task);
2293 if (!mm)
2294 goto out_unlock;
2295
2296 down_read(&mm->mmap_sem);
2297 vma = find_exact_vma(mm, vm_start, vm_end);
2298 if (!vma)
2299 goto out_no_vma;
2300
2301 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
2302
2303out_no_vma:
2304 up_read(&mm->mmap_sem);
2305 mmput(mm);
2306out_unlock:
2307 unlock_trace(task);
2308out_put_task:
2309 put_task_struct(task);
2310out:
2311 return result;
2312}
2313
2314static const struct inode_operations proc_map_files_inode_operations = {
2315 .lookup = proc_map_files_lookup,
2316 .permission = proc_fd_permission,
2317 .setattr = proc_setattr,
2318};
2319
2320static int
2321proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2322{
2323 struct dentry *dentry = filp->f_path.dentry;
2324 struct inode *inode = dentry->d_inode;
2325 struct vm_area_struct *vma;
2326 struct task_struct *task;
2327 struct mm_struct *mm;
2328 ino_t ino;
2329 int ret;
2330
2331 ret = -EACCES;
2332 if (!capable(CAP_SYS_ADMIN))
2333 goto out;
2334
2335 ret = -ENOENT;
2336 task = get_proc_task(inode);
2337 if (!task)
2338 goto out;
2339
2340 ret = -EACCES;
2341 if (lock_trace(task))
2342 goto out_put_task;
2343
2344 ret = 0;
2345 switch (filp->f_pos) {
2346 case 0:
2347 ino = inode->i_ino;
2348 if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
2349 goto out_unlock;
2350 filp->f_pos++;
2351 case 1:
2352 ino = parent_ino(dentry);
2353 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2354 goto out_unlock;
2355 filp->f_pos++;
2356 default:
2357 {
2358 unsigned long nr_files, pos, i;
2359 struct flex_array *fa = NULL;
2360 struct map_files_info info;
2361 struct map_files_info *p;
2362
2363 mm = get_task_mm(task);
2364 if (!mm)
2365 goto out_unlock;
2366 down_read(&mm->mmap_sem);
2367
2368 nr_files = 0;
2369
2370 /*
2371 * We need two passes here:
2372 *
2373 * 1) Collect vmas of mapped files with mmap_sem taken
2374 * 2) Release mmap_sem and instantiate entries
2375 *
2376 * otherwise we get lockdep complained, since filldir()
2377 * routine might require mmap_sem taken in might_fault().
2378 */
2379
2380 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2381 if (vma->vm_file && ++pos > filp->f_pos)
2382 nr_files++;
2383 }
2384
2385 if (nr_files) {
2386 fa = flex_array_alloc(sizeof(info), nr_files,
2387 GFP_KERNEL);
2388 if (!fa || flex_array_prealloc(fa, 0, nr_files,
2389 GFP_KERNEL)) {
2390 ret = -ENOMEM;
2391 if (fa)
2392 flex_array_free(fa);
2393 up_read(&mm->mmap_sem);
2394 mmput(mm);
2395 goto out_unlock;
2396 }
2397 for (i = 0, vma = mm->mmap, pos = 2; vma;
2398 vma = vma->vm_next) {
2399 if (!vma->vm_file)
2400 continue;
2401 if (++pos <= filp->f_pos)
2402 continue;
2403
2404 get_file(vma->vm_file);
2405 info.file = vma->vm_file;
2406 info.len = snprintf(info.name,
2407 sizeof(info.name), "%lx-%lx",
2408 vma->vm_start, vma->vm_end);
2409 if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2410 BUG();
2411 }
2412 }
2413 up_read(&mm->mmap_sem);
2414
2415 for (i = 0; i < nr_files; i++) {
2416 p = flex_array_get(fa, i);
2417 ret = proc_fill_cache(filp, dirent, filldir,
2418 p->name, p->len,
2419 proc_map_files_instantiate,
2420 task, p->file);
2421 if (ret)
2422 break;
2423 filp->f_pos++;
2424 fput(p->file);
2425 }
2426 for (; i < nr_files; i++) {
2427 /*
2428 * In case of error don't forget
2429 * to put rest of file refs.
2430 */
2431 p = flex_array_get(fa, i);
2432 fput(p->file);
2433 }
2434 if (fa)
2435 flex_array_free(fa);
2436 mmput(mm);
2437 }
2438 }
2439
2440out_unlock:
2441 unlock_trace(task);
2442out_put_task:
2443 put_task_struct(task);
2444out:
2445 return ret;
2446}
2447
2448static const struct file_operations proc_map_files_operations = {
2449 .read = generic_read_dir,
2450 .readdir = proc_map_files_readdir,
2451 .llseek = default_llseek,
2452};
2453
2454#endif /* CONFIG_CHECKPOINT_RESTORE */
2455
2160/* 2456/*
2161 * /proc/pid/fd needs a special permission handler so that a process can still 2457 * /proc/pid/fd needs a special permission handler so that a process can still
2162 * access /proc/self/fd after it has executed a setuid(). 2458 * access /proc/self/fd after it has executed a setuid().
@@ -2772,6 +3068,9 @@ static const struct inode_operations proc_task_inode_operations;
2772static const struct pid_entry tgid_base_stuff[] = { 3068static const struct pid_entry tgid_base_stuff[] = {
2773 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 3069 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2774 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3070 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3071#ifdef CONFIG_CHECKPOINT_RESTORE
3072 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
3073#endif
2775 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3074 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2776 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3075 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2777#ifdef CONFIG_NET 3076#ifdef CONFIG_NET
@@ -2875,6 +3174,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
2875 .lookup = proc_tgid_base_lookup, 3174 .lookup = proc_tgid_base_lookup,
2876 .getattr = pid_getattr, 3175 .getattr = pid_getattr,
2877 .setattr = proc_setattr, 3176 .setattr = proc_setattr,
3177 .permission = proc_pid_permission,
2878}; 3178};
2879 3179
2880static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3180static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -3078,6 +3378,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3078 proc_pid_instantiate, iter.task, NULL); 3378 proc_pid_instantiate, iter.task, NULL);
3079} 3379}
3080 3380
3381static int fake_filldir(void *buf, const char *name, int namelen,
3382 loff_t offset, u64 ino, unsigned d_type)
3383{
3384 return 0;
3385}
3386
3081/* for the /proc/ directory itself, after non-process stuff has been done */ 3387/* for the /proc/ directory itself, after non-process stuff has been done */
3082int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3388int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3083{ 3389{
@@ -3085,6 +3391,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3085 struct task_struct *reaper; 3391 struct task_struct *reaper;
3086 struct tgid_iter iter; 3392 struct tgid_iter iter;
3087 struct pid_namespace *ns; 3393 struct pid_namespace *ns;
3394 filldir_t __filldir;
3088 3395
3089 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 3396 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3090 goto out_no_task; 3397 goto out_no_task;
@@ -3106,8 +3413,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3106 for (iter = next_tgid(ns, iter); 3413 for (iter = next_tgid(ns, iter);
3107 iter.task; 3414 iter.task;
3108 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3415 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3416 if (has_pid_permissions(ns, iter.task, 2))
3417 __filldir = filldir;
3418 else
3419 __filldir = fake_filldir;
3420
3109 filp->f_pos = iter.tgid + TGID_OFFSET; 3421 filp->f_pos = iter.tgid + TGID_OFFSET;
3110 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 3422 if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
3111 put_task_struct(iter.task); 3423 put_task_struct(iter.task);
3112 goto out; 3424 goto out;
3113 } 3425 }
@@ -3442,6 +3754,7 @@ static const struct inode_operations proc_task_inode_operations = {
3442 .lookup = proc_task_lookup, 3754 .lookup = proc_task_lookup,
3443 .getattr = proc_task_getattr, 3755 .getattr = proc_task_getattr,
3444 .setattr = proc_setattr, 3756 .setattr = proc_setattr,
3757 .permission = proc_pid_permission,
3445}; 3758};
3446 3759
3447static const struct file_operations proc_task_operations = { 3760static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 10090d9c7ad5..2edf34f2eb61 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
597 597
598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
599 const char *name, 599 const char *name,
600 mode_t mode, 600 umode_t mode,
601 nlink_t nlink) 601 nlink_t nlink)
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
@@ -659,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
659} 659}
660EXPORT_SYMBOL(proc_symlink); 660EXPORT_SYMBOL(proc_symlink);
661 661
662struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 662struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
663 struct proc_dir_entry *parent) 663 struct proc_dir_entry *parent)
664{ 664{
665 struct proc_dir_entry *ent; 665 struct proc_dir_entry *ent;
@@ -699,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
699} 699}
700EXPORT_SYMBOL(proc_mkdir); 700EXPORT_SYMBOL(proc_mkdir);
701 701
702struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 702struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
703 struct proc_dir_entry *parent) 703 struct proc_dir_entry *parent)
704{ 704{
705 struct proc_dir_entry *ent; 705 struct proc_dir_entry *ent;
@@ -728,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
728} 728}
729EXPORT_SYMBOL(create_proc_entry); 729EXPORT_SYMBOL(create_proc_entry);
730 730
731struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, 731struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
732 struct proc_dir_entry *parent, 732 struct proc_dir_entry *parent,
733 const struct file_operations *proc_fops, 733 const struct file_operations *proc_fops,
734 void *data) 734 void *data)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7737c5468a40..84fd3235a590 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -7,6 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/pid_namespace.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <linux/stat.h> 13#include <linux/stat.h>
@@ -17,7 +18,9 @@
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysctl.h> 20#include <linux/sysctl.h>
21#include <linux/seq_file.h>
20#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mount.h>
21 24
22#include <asm/system.h> 25#include <asm/system.h>
23#include <asm/uaccess.h> 26#include <asm/uaccess.h>
@@ -77,7 +80,6 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
77static void proc_i_callback(struct rcu_head *head) 80static void proc_i_callback(struct rcu_head *head)
78{ 81{
79 struct inode *inode = container_of(head, struct inode, i_rcu); 82 struct inode *inode = container_of(head, struct inode, i_rcu);
80 INIT_LIST_HEAD(&inode->i_dentry);
81 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 83 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
82} 84}
83 85
@@ -102,12 +104,27 @@ void __init proc_init_inodecache(void)
102 init_once); 104 init_once);
103} 105}
104 106
107static int proc_show_options(struct seq_file *seq, struct dentry *root)
108{
109 struct super_block *sb = root->d_sb;
110 struct pid_namespace *pid = sb->s_fs_info;
111
112 if (pid->pid_gid)
113 seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
114 if (pid->hide_pid != 0)
115 seq_printf(seq, ",hidepid=%u", pid->hide_pid);
116
117 return 0;
118}
119
105static const struct super_operations proc_sops = { 120static const struct super_operations proc_sops = {
106 .alloc_inode = proc_alloc_inode, 121 .alloc_inode = proc_alloc_inode,
107 .destroy_inode = proc_destroy_inode, 122 .destroy_inode = proc_destroy_inode,
108 .drop_inode = generic_delete_inode, 123 .drop_inode = generic_delete_inode,
109 .evict_inode = proc_evict_inode, 124 .evict_inode = proc_evict_inode,
110 .statfs = simple_statfs, 125 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127 .show_options = proc_show_options,
111}; 128};
112 129
113static void __pde_users_dec(struct proc_dir_entry *pde) 130static void __pde_users_dec(struct proc_dir_entry *pde)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7838e5cfec14..292577531ad1 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde);
117 117
118int proc_fill_super(struct super_block *); 118int proc_fill_super(struct super_block *);
119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
120int proc_remount(struct super_block *sb, int *flags, char *data);
120 121
121/* 122/*
122 * These are generic /proc routines that use the internal 123 * These are generic /proc routines that use the internal
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f702acb..27da860115c6 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -9,7 +9,6 @@
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/utsname.h> 10#include <linux/utsname.h>
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
15#include "internal.h" 14#include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index f738024ccc8e..06e1cc17caf6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = {
179 179
180 180
181struct proc_dir_entry *proc_net_fops_create(struct net *net, 181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, mode_t mode, const struct file_operations *fops) 182 const char *name, umode_t mode, const struct file_operations *fops)
183{ 183{
184 return proc_create(name, mode, net->proc_net, fops); 184 return proc_create(name, mode, net->proc_net, fops);
185} 185}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 03102d978180..46a15d8a29ca 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/pid_namespace.h> 20#include <linux/pid_namespace.h>
21#include <linux/parser.h>
21 22
22#include "internal.h" 23#include "internal.h"
23 24
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data)
36 return err; 37 return err;
37} 38}
38 39
40enum {
41 Opt_gid, Opt_hidepid, Opt_err,
42};
43
44static const match_table_t tokens = {
45 {Opt_hidepid, "hidepid=%u"},
46 {Opt_gid, "gid=%u"},
47 {Opt_err, NULL},
48};
49
50static int proc_parse_options(char *options, struct pid_namespace *pid)
51{
52 char *p;
53 substring_t args[MAX_OPT_ARGS];
54 int option;
55
56 if (!options)
57 return 1;
58
59 while ((p = strsep(&options, ",")) != NULL) {
60 int token;
61 if (!*p)
62 continue;
63
64 args[0].to = args[0].from = 0;
65 token = match_token(p, tokens, args);
66 switch (token) {
67 case Opt_gid:
68 if (match_int(&args[0], &option))
69 return 0;
70 pid->pid_gid = option;
71 break;
72 case Opt_hidepid:
73 if (match_int(&args[0], &option))
74 return 0;
75 if (option < 0 || option > 2) {
76 pr_err("proc: hidepid value must be between 0 and 2.\n");
77 return 0;
78 }
79 pid->hide_pid = option;
80 break;
81 default:
82 pr_err("proc: unrecognized mount option \"%s\" "
83 "or missing value\n", p);
84 return 0;
85 }
86 }
87
88 return 1;
89}
90
91int proc_remount(struct super_block *sb, int *flags, char *data)
92{
93 struct pid_namespace *pid = sb->s_fs_info;
94 return !proc_parse_options(data, pid);
95}
96
39static struct dentry *proc_mount(struct file_system_type *fs_type, 97static struct dentry *proc_mount(struct file_system_type *fs_type,
40 int flags, const char *dev_name, void *data) 98 int flags, const char *dev_name, void *data)
41{ 99{
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct super_block *sb; 101 struct super_block *sb;
44 struct pid_namespace *ns; 102 struct pid_namespace *ns;
45 struct proc_inode *ei; 103 struct proc_inode *ei;
104 char *options;
46 105
47 if (flags & MS_KERNMOUNT) 106 if (flags & MS_KERNMOUNT) {
48 ns = (struct pid_namespace *)data; 107 ns = (struct pid_namespace *)data;
49 else 108 options = NULL;
109 } else {
50 ns = current->nsproxy->pid_ns; 110 ns = current->nsproxy->pid_ns;
111 options = data;
112 }
51 113
52 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 114 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
53 if (IS_ERR(sb)) 115 if (IS_ERR(sb))
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
55 117
56 if (!sb->s_root) { 118 if (!sb->s_root) {
57 sb->s_flags = flags; 119 sb->s_flags = flags;
120 if (!proc_parse_options(options, ns)) {
121 deactivate_locked_super(sb);
122 return ERR_PTR(-EINVAL);
123 }
58 err = proc_fill_super(sb); 124 err = proc_fill_super(sb);
59 if (err) { 125 if (err) {
60 deactivate_locked_super(sb); 126 deactivate_locked_super(sb);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 0855e6f20391..d76ca6ae2b1b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,29 +22,27 @@
22#define arch_idle_time(cpu) 0 22#define arch_idle_time(cpu) 0
23#endif 23#endif
24 24
25static cputime64_t get_idle_time(int cpu) 25static u64 get_idle_time(int cpu)
26{ 26{
27 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 27 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
28 cputime64_t idle;
29 28
30 if (idle_time == -1ULL) { 29 if (idle_time == -1ULL) {
31 /* !NO_HZ so we can rely on cpustat.idle */ 30 /* !NO_HZ so we can rely on cpustat.idle */
32 idle = kstat_cpu(cpu).cpustat.idle; 31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
33 idle = cputime64_add(idle, arch_idle_time(cpu)); 32 idle += arch_idle_time(cpu);
34 } else 33 } else
35 idle = usecs_to_cputime64(idle_time); 34 idle = usecs_to_cputime64(idle_time);
36 35
37 return idle; 36 return idle;
38} 37}
39 38
40static cputime64_t get_iowait_time(int cpu) 39static u64 get_iowait_time(int cpu)
41{ 40{
42 u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); 41 u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
43 cputime64_t iowait;
44 42
45 if (iowait_time == -1ULL) 43 if (iowait_time == -1ULL)
46 /* !NO_HZ so we can rely on cpustat.iowait */ 44 /* !NO_HZ so we can rely on cpustat.iowait */
47 iowait = kstat_cpu(cpu).cpustat.iowait; 45 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
48 else 46 else
49 iowait = usecs_to_cputime64(iowait_time); 47 iowait = usecs_to_cputime64(iowait_time);
50 48
@@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v)
55{ 53{
56 int i, j; 54 int i, j;
57 unsigned long jif; 55 unsigned long jif;
58 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 56 u64 user, nice, system, idle, iowait, irq, softirq, steal;
59 cputime64_t guest, guest_nice; 57 u64 guest, guest_nice;
60 u64 sum = 0; 58 u64 sum = 0;
61 u64 sum_softirq = 0; 59 u64 sum_softirq = 0;
62 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 60 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
63 struct timespec boottime; 61 struct timespec boottime;
64 62
65 user = nice = system = idle = iowait = 63 user = nice = system = idle = iowait =
66 irq = softirq = steal = cputime64_zero; 64 irq = softirq = steal = 0;
67 guest = guest_nice = cputime64_zero; 65 guest = guest_nice = 0;
68 getboottime(&boottime); 66 getboottime(&boottime);
69 jif = boottime.tv_sec; 67 jif = boottime.tv_sec;
70 68
71 for_each_possible_cpu(i) { 69 for_each_possible_cpu(i) {
72 user = cputime64_add(user, kstat_cpu(i).cpustat.user); 70 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
73 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 71 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
74 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 72 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
75 idle = cputime64_add(idle, get_idle_time(i)); 73 idle += get_idle_time(i);
76 iowait = cputime64_add(iowait, get_iowait_time(i)); 74 iowait += get_iowait_time(i);
77 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 75 irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
78 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 76 softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
79 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 77 steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
80 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 78 guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
81 guest_nice = cputime64_add(guest_nice, 79 guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
82 kstat_cpu(i).cpustat.guest_nice);
83 sum += kstat_cpu_irqs_sum(i);
84 sum += arch_irq_stat_cpu(i);
85 80
86 for (j = 0; j < NR_SOFTIRQS; j++) { 81 for (j = 0; j < NR_SOFTIRQS; j++) {
87 unsigned int softirq_stat = kstat_softirqs_cpu(j, i); 82 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
106 (unsigned long long)cputime64_to_clock_t(guest_nice)); 101 (unsigned long long)cputime64_to_clock_t(guest_nice));
107 for_each_online_cpu(i) { 102 for_each_online_cpu(i) {
108 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 103 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
109 user = kstat_cpu(i).cpustat.user; 104 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
110 nice = kstat_cpu(i).cpustat.nice; 105 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
111 system = kstat_cpu(i).cpustat.system; 106 system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
112 idle = get_idle_time(i); 107 idle = get_idle_time(i);
113 iowait = get_iowait_time(i); 108 iowait = get_iowait_time(i);
114 irq = kstat_cpu(i).cpustat.irq; 109 irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
115 softirq = kstat_cpu(i).cpustat.softirq; 110 softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
116 steal = kstat_cpu(i).cpustat.steal; 111 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
117 guest = kstat_cpu(i).cpustat.guest; 112 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
118 guest_nice = kstat_cpu(i).cpustat.guest_nice; 113 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
119 seq_printf(p, 114 seq_printf(p,
120 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " 115 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
121 "%llu\n", 116 "%llu\n",
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..9610ac772d7e 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
11{ 11{
12 struct timespec uptime; 12 struct timespec uptime;
13 struct timespec idle; 13 struct timespec idle;
14 u64 idletime;
15 u64 nsec;
16 u32 rem;
14 int i; 17 int i;
15 cputime_t idletime = cputime_zero;
16 18
19 idletime = 0;
17 for_each_possible_cpu(i) 20 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); 21 idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
19 22
20 do_posix_clock_monotonic_gettime(&uptime); 23 do_posix_clock_monotonic_gettime(&uptime);
21 monotonic_to_bootbased(&uptime); 24 monotonic_to_bootbased(&uptime);
22 cputime_to_timespec(idletime, &idle); 25 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
26 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
27 idle.tv_nsec = rem;
23 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 28 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
24 (unsigned long) uptime.tv_sec, 29 (unsigned long) uptime.tv_sec,
25 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 30 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
new file mode 100644
index 000000000000..12412852d88a
--- /dev/null
+++ b/fs/proc_namespace.c
@@ -0,0 +1,333 @@
1/*
2 * fs/proc_namespace.c - handling of /proc/<pid>/{mounts,mountinfo,mountstats}
3 *
4 * In fact, that's a piece of procfs; it's *almost* isolated from
5 * the rest of fs/proc, but has rather close relationships with
6 * fs/namespace.c, thus here instead of fs/proc
7 *
8 */
9#include <linux/mnt_namespace.h>
10#include <linux/nsproxy.h>
11#include <linux/security.h>
12#include <linux/fs_struct.h>
13#include "proc/internal.h" /* only for get_proc_task() in ->open() */
14
15#include "pnode.h"
16#include "internal.h"
17
18static unsigned mounts_poll(struct file *file, poll_table *wait)
19{
20 struct proc_mounts *p = file->private_data;
21 struct mnt_namespace *ns = p->ns;
22 unsigned res = POLLIN | POLLRDNORM;
23
24 poll_wait(file, &p->ns->poll, wait);
25
26 br_read_lock(vfsmount_lock);
27 if (p->m.poll_event != ns->event) {
28 p->m.poll_event = ns->event;
29 res |= POLLERR | POLLPRI;
30 }
31 br_read_unlock(vfsmount_lock);
32
33 return res;
34}
35
36struct proc_fs_info {
37 int flag;
38 const char *str;
39};
40
41static int show_sb_opts(struct seq_file *m, struct super_block *sb)
42{
43 static const struct proc_fs_info fs_info[] = {
44 { MS_SYNCHRONOUS, ",sync" },
45 { MS_DIRSYNC, ",dirsync" },
46 { MS_MANDLOCK, ",mand" },
47 { 0, NULL }
48 };
49 const struct proc_fs_info *fs_infop;
50
51 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
52 if (sb->s_flags & fs_infop->flag)
53 seq_puts(m, fs_infop->str);
54 }
55
56 return security_sb_show_options(m, sb);
57}
58
59static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
60{
61 static const struct proc_fs_info mnt_info[] = {
62 { MNT_NOSUID, ",nosuid" },
63 { MNT_NODEV, ",nodev" },
64 { MNT_NOEXEC, ",noexec" },
65 { MNT_NOATIME, ",noatime" },
66 { MNT_NODIRATIME, ",nodiratime" },
67 { MNT_RELATIME, ",relatime" },
68 { 0, NULL }
69 };
70 const struct proc_fs_info *fs_infop;
71
72 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
73 if (mnt->mnt_flags & fs_infop->flag)
74 seq_puts(m, fs_infop->str);
75 }
76}
77
78static inline void mangle(struct seq_file *m, const char *s)
79{
80 seq_escape(m, s, " \t\n\\");
81}
82
83static void show_type(struct seq_file *m, struct super_block *sb)
84{
85 mangle(m, sb->s_type->name);
86 if (sb->s_subtype && sb->s_subtype[0]) {
87 seq_putc(m, '.');
88 mangle(m, sb->s_subtype);
89 }
90}
91
92static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
93{
94 struct mount *r = real_mount(mnt);
95 int err = 0;
96 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
97 struct super_block *sb = mnt_path.dentry->d_sb;
98
99 if (sb->s_op->show_devname) {
100 err = sb->s_op->show_devname(m, mnt_path.dentry);
101 if (err)
102 goto out;
103 } else {
104 mangle(m, r->mnt_devname ? r->mnt_devname : "none");
105 }
106 seq_putc(m, ' ');
107 seq_path(m, &mnt_path, " \t\n\\");
108 seq_putc(m, ' ');
109 show_type(m, sb);
110 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
111 err = show_sb_opts(m, sb);
112 if (err)
113 goto out;
114 show_mnt_opts(m, mnt);
115 if (sb->s_op->show_options)
116 err = sb->s_op->show_options(m, mnt_path.dentry);
117 seq_puts(m, " 0 0\n");
118out:
119 return err;
120}
121
122static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
123{
124 struct proc_mounts *p = m->private;
125 struct mount *r = real_mount(mnt);
126 struct super_block *sb = mnt->mnt_sb;
127 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
128 struct path root = p->root;
129 int err = 0;
130
131 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
132 MAJOR(sb->s_dev), MINOR(sb->s_dev));
133 if (sb->s_op->show_path)
134 err = sb->s_op->show_path(m, mnt->mnt_root);
135 else
136 seq_dentry(m, mnt->mnt_root, " \t\n\\");
137 if (err)
138 goto out;
139 seq_putc(m, ' ');
140
141 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
142 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
143 if (err)
144 goto out;
145
146 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
147 show_mnt_opts(m, mnt);
148
149 /* Tagged fields ("foo:X" or "bar") */
150 if (IS_MNT_SHARED(r))
151 seq_printf(m, " shared:%i", r->mnt_group_id);
152 if (IS_MNT_SLAVE(r)) {
153 int master = r->mnt_master->mnt_group_id;
154 int dom = get_dominating_id(r, &p->root);
155 seq_printf(m, " master:%i", master);
156 if (dom && dom != master)
157 seq_printf(m, " propagate_from:%i", dom);
158 }
159 if (IS_MNT_UNBINDABLE(r))
160 seq_puts(m, " unbindable");
161
162 /* Filesystem specific data */
163 seq_puts(m, " - ");
164 show_type(m, sb);
165 seq_putc(m, ' ');
166 if (sb->s_op->show_devname)
167 err = sb->s_op->show_devname(m, mnt->mnt_root);
168 else
169 mangle(m, r->mnt_devname ? r->mnt_devname : "none");
170 if (err)
171 goto out;
172 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
173 err = show_sb_opts(m, sb);
174 if (err)
175 goto out;
176 if (sb->s_op->show_options)
177 err = sb->s_op->show_options(m, mnt->mnt_root);
178 seq_putc(m, '\n');
179out:
180 return err;
181}
182
183static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
184{
185 struct mount *r = real_mount(mnt);
186 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
187 struct super_block *sb = mnt_path.dentry->d_sb;
188 int err = 0;
189
190 /* device */
191 if (sb->s_op->show_devname) {
192 seq_puts(m, "device ");
193 err = sb->s_op->show_devname(m, mnt_path.dentry);
194 } else {
195 if (r->mnt_devname) {
196 seq_puts(m, "device ");
197 mangle(m, r->mnt_devname);
198 } else
199 seq_puts(m, "no device");
200 }
201
202 /* mount point */
203 seq_puts(m, " mounted on ");
204 seq_path(m, &mnt_path, " \t\n\\");
205 seq_putc(m, ' ');
206
207 /* file system type */
208 seq_puts(m, "with fstype ");
209 show_type(m, sb);
210
211 /* optional statistics */
212 if (sb->s_op->show_stats) {
213 seq_putc(m, ' ');
214 if (!err)
215 err = sb->s_op->show_stats(m, mnt_path.dentry);
216 }
217
218 seq_putc(m, '\n');
219 return err;
220}
221
222static int mounts_open_common(struct inode *inode, struct file *file,
223 int (*show)(struct seq_file *, struct vfsmount *))
224{
225 struct task_struct *task = get_proc_task(inode);
226 struct nsproxy *nsp;
227 struct mnt_namespace *ns = NULL;
228 struct path root;
229 struct proc_mounts *p;
230 int ret = -EINVAL;
231
232 if (!task)
233 goto err;
234
235 rcu_read_lock();
236 nsp = task_nsproxy(task);
237 if (!nsp) {
238 rcu_read_unlock();
239 put_task_struct(task);
240 goto err;
241 }
242 ns = nsp->mnt_ns;
243 if (!ns) {
244 rcu_read_unlock();
245 put_task_struct(task);
246 goto err;
247 }
248 get_mnt_ns(ns);
249 rcu_read_unlock();
250 task_lock(task);
251 if (!task->fs) {
252 task_unlock(task);
253 put_task_struct(task);
254 ret = -ENOENT;
255 goto err_put_ns;
256 }
257 get_fs_root(task->fs, &root);
258 task_unlock(task);
259 put_task_struct(task);
260
261 ret = -ENOMEM;
262 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
263 if (!p)
264 goto err_put_path;
265
266 file->private_data = &p->m;
267 ret = seq_open(file, &mounts_op);
268 if (ret)
269 goto err_free;
270
271 p->m.private = p;
272 p->ns = ns;
273 p->root = root;
274 p->m.poll_event = ns->event;
275 p->show = show;
276
277 return 0;
278
279 err_free:
280 kfree(p);
281 err_put_path:
282 path_put(&root);
283 err_put_ns:
284 put_mnt_ns(ns);
285 err:
286 return ret;
287}
288
289static int mounts_release(struct inode *inode, struct file *file)
290{
291 struct proc_mounts *p = file->private_data;
292 path_put(&p->root);
293 put_mnt_ns(p->ns);
294 return seq_release(inode, file);
295}
296
297static int mounts_open(struct inode *inode, struct file *file)
298{
299 return mounts_open_common(inode, file, show_vfsmnt);
300}
301
302static int mountinfo_open(struct inode *inode, struct file *file)
303{
304 return mounts_open_common(inode, file, show_mountinfo);
305}
306
307static int mountstats_open(struct inode *inode, struct file *file)
308{
309 return mounts_open_common(inode, file, show_vfsstat);
310}
311
312const struct file_operations proc_mounts_operations = {
313 .open = mounts_open,
314 .read = seq_read,
315 .llseek = seq_lseek,
316 .release = mounts_release,
317 .poll = mounts_poll,
318};
319
320const struct file_operations proc_mountinfo_operations = {
321 .open = mountinfo_open,
322 .read = seq_read,
323 .llseek = seq_lseek,
324 .release = mounts_release,
325 .poll = mounts_poll,
326};
327
328const struct file_operations proc_mountstats_operations = {
329 .open = mountstats_open,
330 .read = seq_read,
331 .llseek = seq_lseek,
332 .release = mounts_release,
333};
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 379a02dc1217..b3b426edb2fd 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -80,7 +80,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
80{ 80{
81 struct pstore_private *p = dentry->d_inode->i_private; 81 struct pstore_private *p = dentry->d_inode->i_private;
82 82
83 p->psi->erase(p->type, p->id, p->psi); 83 if (p->psi->erase)
84 p->psi->erase(p->type, p->id, p->psi);
84 85
85 return simple_unlink(dir, dentry); 86 return simple_unlink(dir, dentry);
86} 87}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 57bbf9078ac8..9ec22d3b4293 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -122,7 +122,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
122 memcpy(dst, s1 + s1_start, l1_cpy); 122 memcpy(dst, s1 + s1_start, l1_cpy);
123 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 123 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
124 124
125 ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part, 125 ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
126 hsize + l1_cpy + l2_cpy, psinfo); 126 hsize + l1_cpy + l2_cpy, psinfo);
127 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) 127 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
128 pstore_new_entry = 1; 128 pstore_new_entry = 1;
@@ -207,8 +207,7 @@ void pstore_get_records(int quiet)
207 return; 207 return;
208 208
209 mutex_lock(&psi->read_mutex); 209 mutex_lock(&psi->read_mutex);
210 rc = psi->open(psi); 210 if (psi->open && psi->open(psi))
211 if (rc)
212 goto out; 211 goto out;
213 212
214 while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) { 213 while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
@@ -219,7 +218,8 @@ void pstore_get_records(int quiet)
219 if (rc && (rc != -EEXIST || !quiet)) 218 if (rc && (rc != -EEXIST || !quiet))
220 failed++; 219 failed++;
221 } 220 }
222 psi->close(psi); 221 if (psi->close)
222 psi->close(psi);
223out: 223out:
224 mutex_unlock(&psi->read_mutex); 224 mutex_unlock(&psi->read_mutex);
225 225
@@ -243,33 +243,5 @@ static void pstore_timefunc(unsigned long dummy)
243 mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL); 243 mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
244} 244}
245 245
246/*
247 * Call platform driver to write a record to the
248 * persistent store.
249 */
250int pstore_write(enum pstore_type_id type, char *buf, size_t size)
251{
252 u64 id;
253 int ret;
254 unsigned long flags;
255
256 if (!psinfo)
257 return -ENODEV;
258
259 if (size > psinfo->bufsize)
260 return -EFBIG;
261
262 spin_lock_irqsave(&psinfo->buf_lock, flags);
263 memcpy(psinfo->buf, buf, size);
264 ret = psinfo->write(type, &id, 0, size, psinfo);
265 if (ret == 0 && pstore_is_mounted())
266 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
267 size, CURRENT_TIME, psinfo);
268 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
269
270 return 0;
271}
272EXPORT_SYMBOL_GPL(pstore_write);
273
274module_param(backend, charp, 0444); 246module_param(backend, charp, 0444);
275MODULE_PARM_DESC(backend, "Pstore backend to use"); 247MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3bdd21418432..2bfd987f4853 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -199,12 +199,13 @@ static const char *qnx4_checkroot(struct super_block *sb)
199 if (!strcmp(rootdir->di_fname, 199 if (!strcmp(rootdir->di_fname,
200 QNX4_BMNAME)) { 200 QNX4_BMNAME)) {
201 found = 1; 201 found = 1;
202 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL ); 202 qnx4_sb(sb)->BitMap = kmemdup(rootdir,
203 sizeof(struct qnx4_inode_entry),
204 GFP_KERNEL);
203 if (!qnx4_sb(sb)->BitMap) { 205 if (!qnx4_sb(sb)->BitMap) {
204 brelse (bh); 206 brelse (bh);
205 return "not enough memory for bitmap inode"; 207 return "not enough memory for bitmap inode";
206 } 208 }/* keep bitmap inode known */
207 memcpy( qnx4_sb(sb)->BitMap, rootdir, sizeof( struct qnx4_inode_entry ) ); /* keep bitmap inode known */
208 break; 209 break;
209 } 210 }
210 } 211 }
@@ -427,7 +428,6 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
427static void qnx4_i_callback(struct rcu_head *head) 428static void qnx4_i_callback(struct rcu_head *head)
428{ 429{
429 struct inode *inode = container_of(head, struct inode, i_rcu); 430 struct inode *inode = container_of(head, struct inode, i_rcu);
430 INIT_LIST_HEAD(&inode->i_dentry);
431 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 431 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
432} 432}
433 433
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5b572c89e6c4..5ec59b20cf76 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -73,7 +73,6 @@
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/kmod.h> 74#include <linux/kmod.h>
75#include <linux/namei.h> 75#include <linux/namei.h>
76#include <linux/buffer_head.h>
77#include <linux/capability.h> 76#include <linux/capability.h>
78#include <linux/quotaops.h> 77#include <linux/quotaops.h>
79#include "../internal.h" /* ugh */ 78#include "../internal.h" /* ugh */
@@ -2199,7 +2198,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
2199 if (error) 2198 if (error)
2200 return error; 2199 return error;
2201 /* Quota file not on the same filesystem? */ 2200 /* Quota file not on the same filesystem? */
2202 if (path->mnt->mnt_sb != sb) 2201 if (path->dentry->d_sb != sb)
2203 error = -EXDEV; 2202 error = -EXDEV;
2204 else 2203 else
2205 error = vfs_load_quota_inode(path->dentry->d_inode, type, 2204 error = vfs_load_quota_inode(path->dentry->d_inode, type,
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35f4b0ecdeb3..7898cd688a00 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -13,7 +13,6 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/buffer_head.h>
17#include <linux/capability.h> 16#include <linux/capability.h>
18#include <linux/quotaops.h> 17#include <linux/quotaops.h>
19#include <linux/types.h> 18#include <linux/types.h>
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 462ceb38fec6..aec766abe3af 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -52,7 +52,7 @@ static struct backing_dev_info ramfs_backing_dev_info = {
52}; 52};
53 53
54struct inode *ramfs_get_inode(struct super_block *sb, 54struct inode *ramfs_get_inode(struct super_block *sb,
55 const struct inode *dir, int mode, dev_t dev) 55 const struct inode *dir, umode_t mode, dev_t dev)
56{ 56{
57 struct inode * inode = new_inode(sb); 57 struct inode * inode = new_inode(sb);
58 58
@@ -92,7 +92,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
92 */ 92 */
93/* SMP-safe */ 93/* SMP-safe */
94static int 94static int
95ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 95ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
96{ 96{
97 struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev); 97 struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
98 int error = -ENOSPC; 98 int error = -ENOSPC;
@@ -106,7 +106,7 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
106 return error; 106 return error;
107} 107}
108 108
109static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) 109static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
110{ 110{
111 int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); 111 int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
112 if (!retval) 112 if (!retval)
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
114 return retval; 114 return retval;
115} 115}
116 116
117static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 117static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
118{ 118{
119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); 119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
120} 120}
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index d1aca1df4f92..70de42f09f1d 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -13,6 +13,7 @@
13#include <linux/reiserfs_fs_sb.h> 13#include <linux/reiserfs_fs_sb.h>
14#include <linux/reiserfs_fs_i.h> 14#include <linux/reiserfs_fs_i.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/seq_file.h>
16 17
17#define PREALLOCATION_SIZE 9 18#define PREALLOCATION_SIZE 9
18 19
@@ -634,6 +635,96 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
634 return 0; 635 return 0;
635} 636}
636 637
638static void print_sep(struct seq_file *seq, int *first)
639{
640 if (!*first)
641 seq_puts(seq, ":");
642 else
643 *first = 0;
644}
645
646void show_alloc_options(struct seq_file *seq, struct super_block *s)
647{
648 int first = 1;
649
650 if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
651 (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
652 return;
653
654 seq_puts(seq, ",alloc=");
655
656 if (TEST_OPTION(concentrating_formatted_nodes, s)) {
657 print_sep(seq, &first);
658 if (REISERFS_SB(s)->s_alloc_options.border != 10) {
659 seq_printf(seq, "concentrating_formatted_nodes=%d",
660 100 / REISERFS_SB(s)->s_alloc_options.border);
661 } else
662 seq_puts(seq, "concentrating_formatted_nodes");
663 }
664 if (TEST_OPTION(displacing_large_files, s)) {
665 print_sep(seq, &first);
666 if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
667 seq_printf(seq, "displacing_large_files=%lu",
668 REISERFS_SB(s)->s_alloc_options.large_file_size);
669 } else
670 seq_puts(seq, "displacing_large_files");
671 }
672 if (TEST_OPTION(displacing_new_packing_localities, s)) {
673 print_sep(seq, &first);
674 seq_puts(seq, "displacing_new_packing_localities");
675 }
676 if (TEST_OPTION(old_hashed_relocation, s)) {
677 print_sep(seq, &first);
678 seq_puts(seq, "old_hashed_relocation");
679 }
680 if (TEST_OPTION(new_hashed_relocation, s)) {
681 print_sep(seq, &first);
682 seq_puts(seq, "new_hashed_relocation");
683 }
684 if (TEST_OPTION(dirid_groups, s)) {
685 print_sep(seq, &first);
686 seq_puts(seq, "dirid_groups");
687 }
688 if (TEST_OPTION(oid_groups, s)) {
689 print_sep(seq, &first);
690 seq_puts(seq, "oid_groups");
691 }
692 if (TEST_OPTION(packing_groups, s)) {
693 print_sep(seq, &first);
694 seq_puts(seq, "packing_groups");
695 }
696 if (TEST_OPTION(hashed_formatted_nodes, s)) {
697 print_sep(seq, &first);
698 seq_puts(seq, "hashed_formatted_nodes");
699 }
700 if (TEST_OPTION(skip_busy, s)) {
701 print_sep(seq, &first);
702 seq_puts(seq, "skip_busy");
703 }
704 if (TEST_OPTION(hundredth_slices, s)) {
705 print_sep(seq, &first);
706 seq_puts(seq, "hundredth_slices");
707 }
708 if (TEST_OPTION(old_way, s)) {
709 print_sep(seq, &first);
710 seq_puts(seq, "old_way");
711 }
712 if (TEST_OPTION(displace_based_on_dirid, s)) {
713 print_sep(seq, &first);
714 seq_puts(seq, "displace_based_on_dirid");
715 }
716 if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
717 print_sep(seq, &first);
718 seq_printf(seq, "preallocmin=%d",
719 REISERFS_SB(s)->s_alloc_options.preallocmin);
720 }
721 if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
722 print_sep(seq, &first);
723 seq_printf(seq, "preallocsize=%d",
724 REISERFS_SB(s)->s_alloc_options.preallocsize);
725 }
726}
727
637static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) 728static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
638{ 729{
639 char *hash_in; 730 char *hash_in;
@@ -1273,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1273 struct reiserfs_bitmap_info *bitmap; 1364 struct reiserfs_bitmap_info *bitmap;
1274 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1365 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1275 1366
1276 /* Avoid lock recursion in fault case */
1277 reiserfs_write_unlock(sb);
1278 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1367 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1279 reiserfs_write_lock(sb);
1280 if (bitmap == NULL) 1368 if (bitmap == NULL)
1281 return -ENOMEM; 1369 return -ENOMEM;
1282 1370
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 950f13af0951..9e8cd5acd79c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1766,7 +1766,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1766 for the fresh inode. This can only be done outside a transaction, so 1766 for the fresh inode. This can only be done outside a transaction, so
1767 if we return non-zero, we also end the transaction. */ 1767 if we return non-zero, we also end the transaction. */
1768int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1768int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1769 struct inode *dir, int mode, const char *symname, 1769 struct inode *dir, umode_t mode, const char *symname,
1770 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1770 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1771 strlen (symname) for symlinks) */ 1771 strlen (symname) for symlinks) */
1772 loff_t i_size, struct dentry *dentry, 1772 loff_t i_size, struct dentry *dentry,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 4e153051bc75..950e3d1b5c9e 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -55,7 +55,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
55 break; 55 break;
56 } 56 }
57 57
58 err = mnt_want_write(filp->f_path.mnt); 58 err = mnt_want_write_file(filp);
59 if (err) 59 if (err)
60 break; 60 break;
61 61
@@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
96 inode->i_ctime = CURRENT_TIME_SEC; 96 inode->i_ctime = CURRENT_TIME_SEC;
97 mark_inode_dirty(inode); 97 mark_inode_dirty(inode);
98setflags_out: 98setflags_out:
99 mnt_drop_write(filp->f_path.mnt); 99 mnt_drop_write_file(filp);
100 break; 100 break;
101 } 101 }
102 case REISERFS_IOC_GETVERSION: 102 case REISERFS_IOC_GETVERSION:
@@ -107,7 +107,7 @@ setflags_out:
107 err = -EPERM; 107 err = -EPERM;
108 break; 108 break;
109 } 109 }
110 err = mnt_want_write(filp->f_path.mnt); 110 err = mnt_want_write_file(filp);
111 if (err) 111 if (err)
112 break; 112 break;
113 if (get_user(inode->i_generation, (int __user *)arg)) { 113 if (get_user(inode->i_generation, (int __user *)arg)) {
@@ -117,7 +117,7 @@ setflags_out:
117 inode->i_ctime = CURRENT_TIME_SEC; 117 inode->i_ctime = CURRENT_TIME_SEC;
118 mark_inode_dirty(inode); 118 mark_inode_dirty(inode);
119setversion_out: 119setversion_out:
120 mnt_drop_write(filp->f_path.mnt); 120 mnt_drop_write_file(filp);
121 break; 121 break;
122 default: 122 default:
123 err = -ENOTTY; 123 err = -ENOTTY;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index eb711060a6f2..c3cf54fd4de3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2678 char b[BDEVNAME_SIZE]; 2678 char b[BDEVNAME_SIZE];
2679 int ret; 2679 int ret;
2680 2680
2681 /*
2682 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2683 * dependency inversion warnings.
2684 */
2685 reiserfs_write_unlock(sb);
2686 journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); 2681 journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
2687 if (!journal) { 2682 if (!journal) {
2688 reiserfs_warning(sb, "journal-1256", 2683 reiserfs_warning(sb, "journal-1256",
2689 "unable to get memory for journal structure"); 2684 "unable to get memory for journal structure");
2690 reiserfs_write_lock(sb);
2691 return 1; 2685 return 1;
2692 } 2686 }
2693 INIT_LIST_HEAD(&journal->j_bitmap_nodes); 2687 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
@@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2695 INIT_LIST_HEAD(&journal->j_working_list); 2689 INIT_LIST_HEAD(&journal->j_working_list);
2696 INIT_LIST_HEAD(&journal->j_journal_list); 2690 INIT_LIST_HEAD(&journal->j_journal_list);
2697 journal->j_persistent_trans = 0; 2691 journal->j_persistent_trans = 0;
2698 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, 2692 if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2699 reiserfs_bmap_count(sb)); 2693 reiserfs_bmap_count(sb)))
2700 reiserfs_write_lock(sb);
2701 if (ret)
2702 goto free_and_return; 2694 goto free_and_return;
2703 2695
2704 allocate_bitmap_nodes(sb); 2696 allocate_bitmap_nodes(sb);
@@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2727 goto free_and_return; 2719 goto free_and_return;
2728 } 2720 }
2729 2721
2730 /*
2731 * We need to unlock here to avoid creating the following
2732 * dependency:
2733 * reiserfs_lock -> sysfs_mutex
2734 * Because the reiserfs mmap path creates the following dependency:
2735 * mm->mmap -> reiserfs_lock, hence we have
2736 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2737 * This would ends up in a circular dependency with sysfs readdir path
2738 * which does sysfs_mutex -> mm->mmap_sem
2739 * This is fine because the reiserfs lock is useless in mount path,
2740 * at least until we call journal_begin. We keep it for paranoid
2741 * reasons.
2742 */
2743 reiserfs_write_unlock(sb);
2744 if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2722 if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2745 reiserfs_write_lock(sb);
2746 reiserfs_warning(sb, "sh-462", 2723 reiserfs_warning(sb, "sh-462",
2747 "unable to initialize jornal device"); 2724 "unable to initialize jornal device");
2748 goto free_and_return; 2725 goto free_and_return;
2749 } 2726 }
2750 reiserfs_write_lock(sb);
2751 2727
2752 rs = SB_DISK_SUPER_BLOCK(sb); 2728 rs = SB_DISK_SUPER_BLOCK(sb);
2753 2729
@@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2829 journal->j_mount_id = 10; 2805 journal->j_mount_id = 10;
2830 journal->j_state = 0; 2806 journal->j_state = 0;
2831 atomic_set(&(journal->j_jlock), 0); 2807 atomic_set(&(journal->j_jlock), 0);
2832 reiserfs_write_unlock(sb);
2833 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2808 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2834 reiserfs_write_lock(sb);
2835 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2809 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2836 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2810 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2837 journal->j_cnode_used = 0; 2811 journal->j_cnode_used = 0;
@@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2848 2822
2849 init_journal_hash(sb); 2823 init_journal_hash(sb);
2850 jl = journal->j_current_jl; 2824 jl = journal->j_current_jl;
2825
2826 /*
2827 * get_list_bitmap() may call flush_commit_list() which
2828 * requires the lock. Calling flush_commit_list() shouldn't happen
2829 * this early but I like to be paranoid.
2830 */
2831 reiserfs_write_lock(sb);
2851 jl->j_list_bitmap = get_list_bitmap(sb, jl); 2832 jl->j_list_bitmap = get_list_bitmap(sb, jl);
2833 reiserfs_write_unlock(sb);
2852 if (!jl->j_list_bitmap) { 2834 if (!jl->j_list_bitmap) {
2853 reiserfs_warning(sb, "journal-2005", 2835 reiserfs_warning(sb, "journal-2005",
2854 "get_list_bitmap failed for journal list 0"); 2836 "get_list_bitmap failed for journal list 0");
2855 goto free_and_return; 2837 goto free_and_return;
2856 } 2838 }
2857 if (journal_read(sb) < 0) { 2839
2840 /*
2841 * Journal_read needs to be inspected in order to push down
2842 * the lock further inside (or even remove it).
2843 */
2844 reiserfs_write_lock(sb);
2845 ret = journal_read(sb);
2846 reiserfs_write_unlock(sb);
2847 if (ret < 0) {
2858 reiserfs_warning(sb, "reiserfs-2006", 2848 reiserfs_warning(sb, "reiserfs-2006",
2859 "Replay Failure, unable to mount"); 2849 "Replay Failure, unable to mount");
2860 goto free_and_return; 2850 goto free_and_return;
2861 } 2851 }
2862 2852
2863 reiserfs_mounted_fs_count++; 2853 reiserfs_mounted_fs_count++;
2864 if (reiserfs_mounted_fs_count <= 1) { 2854 if (reiserfs_mounted_fs_count <= 1)
2865 reiserfs_write_unlock(sb);
2866 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); 2855 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2867 reiserfs_write_lock(sb);
2868 }
2869 2856
2870 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2857 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2871 journal->j_work_sb = sb; 2858 journal->j_work_sb = sb;
@@ -2896,14 +2883,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2896 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2883 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2897 return 1; 2884 return 1;
2898 } 2885 }
2899 /* protected by the BKL here */ 2886
2900 journal->j_len_alloc += new_alloc; 2887 journal->j_len_alloc += new_alloc;
2901 th->t_blocks_allocated += new_alloc ; 2888 th->t_blocks_allocated += new_alloc ;
2902 return 0; 2889 return 0;
2903} 2890}
2904 2891
2905/* this must be called inside a transaction, and requires the 2892/* this must be called inside a transaction
2906** kernel_lock to be held
2907*/ 2893*/
2908void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2894void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2909{ 2895{
@@ -2914,8 +2900,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2914 return; 2900 return;
2915} 2901}
2916 2902
2917/* this must be called without a transaction started, and does not 2903/* this must be called without a transaction started
2918** require BKL
2919*/ 2904*/
2920void reiserfs_allow_writes(struct super_block *s) 2905void reiserfs_allow_writes(struct super_block *s)
2921{ 2906{
@@ -2924,8 +2909,7 @@ void reiserfs_allow_writes(struct super_block *s)
2924 wake_up(&journal->j_join_wait); 2909 wake_up(&journal->j_join_wait);
2925} 2910}
2926 2911
2927/* this must be called without a transaction started, and does not 2912/* this must be called without a transaction started
2928** require BKL
2929*/ 2913*/
2930void reiserfs_wait_on_write_block(struct super_block *s) 2914void reiserfs_wait_on_write_block(struct super_block *s)
2931{ 2915{
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 80058e8ce361..146378865239 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -559,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
559** outside of a transaction, so we had to pull some bits of 559** outside of a transaction, so we had to pull some bits of
560** reiserfs_new_inode out into this func. 560** reiserfs_new_inode out into this func.
561*/ 561*/
562static int new_inode_init(struct inode *inode, struct inode *dir, int mode) 562static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
563{ 563{
564 /* Make inode invalid - just in case we are going to drop it before 564 /* Make inode invalid - just in case we are going to drop it before
565 * the initialization happens */ 565 * the initialization happens */
@@ -572,7 +572,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
572 return 0; 572 return 0;
573} 573}
574 574
575static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, 575static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
576 struct nameidata *nd) 576 struct nameidata *nd)
577{ 577{
578 int retval; 578 int retval;
@@ -643,7 +643,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
643 return retval; 643 return retval;
644} 644}
645 645
646static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, 646static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
647 dev_t rdev) 647 dev_t rdev)
648{ 648{
649 int retval; 649 int retval;
@@ -721,7 +721,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
721 return retval; 721 return retval;
722} 722}
723 723
724static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 724static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
725{ 725{
726 int retval; 726 int retval;
727 struct inode *inode; 727 struct inode *inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 14363b96b6af..e12d8b97cd4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/crc32.h> 30#include <linux/crc32.h>
31#include <linux/seq_file.h>
31 32
32struct file_system_type reiserfs_fs_type; 33struct file_system_type reiserfs_fs_type;
33 34
@@ -61,6 +62,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
61 62
62static int reiserfs_remount(struct super_block *s, int *flags, char *data); 63static int reiserfs_remount(struct super_block *s, int *flags, char *data);
63static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); 64static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
65void show_alloc_options(struct seq_file *seq, struct super_block *s);
64 66
65static int reiserfs_sync_fs(struct super_block *s, int wait) 67static int reiserfs_sync_fs(struct super_block *s, int wait)
66{ 68{
@@ -453,16 +455,20 @@ int remove_save_link(struct inode *inode, int truncate)
453static void reiserfs_kill_sb(struct super_block *s) 455static void reiserfs_kill_sb(struct super_block *s)
454{ 456{
455 if (REISERFS_SB(s)) { 457 if (REISERFS_SB(s)) {
456 if (REISERFS_SB(s)->xattr_root) { 458 /*
457 d_invalidate(REISERFS_SB(s)->xattr_root); 459 * Force any pending inode evictions to occur now. Any
458 dput(REISERFS_SB(s)->xattr_root); 460 * inodes to be removed that have extended attributes
459 REISERFS_SB(s)->xattr_root = NULL; 461 * associated with them need to clean them up before
460 } 462 * we can release the extended attribute root dentries.
461 if (REISERFS_SB(s)->priv_root) { 463 * shrink_dcache_for_umount will BUG if we don't release
462 d_invalidate(REISERFS_SB(s)->priv_root); 464 * those before it's called so ->put_super is too late.
463 dput(REISERFS_SB(s)->priv_root); 465 */
464 REISERFS_SB(s)->priv_root = NULL; 466 shrink_dcache_sb(s);
465 } 467
468 dput(REISERFS_SB(s)->xattr_root);
469 REISERFS_SB(s)->xattr_root = NULL;
470 dput(REISERFS_SB(s)->priv_root);
471 REISERFS_SB(s)->priv_root = NULL;
466 } 472 }
467 473
468 kill_block_super(s); 474 kill_block_super(s);
@@ -532,7 +538,6 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
532static void reiserfs_i_callback(struct rcu_head *head) 538static void reiserfs_i_callback(struct rcu_head *head)
533{ 539{
534 struct inode *inode = container_of(head, struct inode, i_rcu); 540 struct inode *inode = container_of(head, struct inode, i_rcu);
535 INIT_LIST_HEAD(&inode->i_dentry);
536 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 541 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
537} 542}
538 543
@@ -597,6 +602,82 @@ out:
597 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 602 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
598} 603}
599 604
605static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
606{
607 struct super_block *s = root->d_sb;
608 struct reiserfs_journal *journal = SB_JOURNAL(s);
609 long opts = REISERFS_SB(s)->s_mount_opt;
610
611 if (opts & (1 << REISERFS_LARGETAIL))
612 seq_puts(seq, ",tails=on");
613 else if (!(opts & (1 << REISERFS_SMALLTAIL)))
614 seq_puts(seq, ",notail");
615 /* tails=small is default so we don't show it */
616
617 if (!(opts & (1 << REISERFS_BARRIER_FLUSH)))
618 seq_puts(seq, ",barrier=none");
619 /* barrier=flush is default so we don't show it */
620
621 if (opts & (1 << REISERFS_ERROR_CONTINUE))
622 seq_puts(seq, ",errors=continue");
623 else if (opts & (1 << REISERFS_ERROR_PANIC))
624 seq_puts(seq, ",errors=panic");
625 /* errors=ro is default so we don't show it */
626
627 if (opts & (1 << REISERFS_DATA_LOG))
628 seq_puts(seq, ",data=journal");
629 else if (opts & (1 << REISERFS_DATA_WRITEBACK))
630 seq_puts(seq, ",data=writeback");
631 /* data=ordered is default so we don't show it */
632
633 if (opts & (1 << REISERFS_ATTRS))
634 seq_puts(seq, ",attrs");
635
636 if (opts & (1 << REISERFS_XATTRS_USER))
637 seq_puts(seq, ",user_xattr");
638
639 if (opts & (1 << REISERFS_EXPOSE_PRIVROOT))
640 seq_puts(seq, ",expose_privroot");
641
642 if (opts & (1 << REISERFS_POSIXACL))
643 seq_puts(seq, ",acl");
644
645 if (REISERFS_SB(s)->s_jdev)
646 seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
647
648 if (journal->j_max_commit_age != journal->j_default_max_commit_age)
649 seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
650
651#ifdef CONFIG_QUOTA
652 if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
653 seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
654 else if (opts & (1 << REISERFS_USRQUOTA))
655 seq_puts(seq, ",usrquota");
656 if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
657 seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
658 else if (opts & (1 << REISERFS_GRPQUOTA))
659 seq_puts(seq, ",grpquota");
660 if (REISERFS_SB(s)->s_jquota_fmt) {
661 if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD)
662 seq_puts(seq, ",jqfmt=vfsold");
663 else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0)
664 seq_puts(seq, ",jqfmt=vfsv0");
665 }
666#endif
667
668 /* Block allocator options */
669 if (opts & (1 << REISERFS_NO_BORDER))
670 seq_puts(seq, ",block-allocator=noborder");
671 if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION))
672 seq_puts(seq, ",block-allocator=no_unhashed_relocation");
673 if (opts & (1 << REISERFS_HASHED_RELOCATION))
674 seq_puts(seq, ",block-allocator=hashed_relocation");
675 if (opts & (1 << REISERFS_TEST4))
676 seq_puts(seq, ",block-allocator=test4");
677 show_alloc_options(seq, s);
678 return 0;
679}
680
600#ifdef CONFIG_QUOTA 681#ifdef CONFIG_QUOTA
601static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 682static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
602 size_t, loff_t); 683 size_t, loff_t);
@@ -617,7 +698,7 @@ static const struct super_operations reiserfs_sops = {
617 .unfreeze_fs = reiserfs_unfreeze, 698 .unfreeze_fs = reiserfs_unfreeze,
618 .statfs = reiserfs_statfs, 699 .statfs = reiserfs_statfs,
619 .remount_fs = reiserfs_remount, 700 .remount_fs = reiserfs_remount,
620 .show_options = generic_show_options, 701 .show_options = reiserfs_show_options,
621#ifdef CONFIG_QUOTA 702#ifdef CONFIG_QUOTA
622 .quota_read = reiserfs_quota_read, 703 .quota_read = reiserfs_quota_read,
623 .quota_write = reiserfs_quota_write, 704 .quota_write = reiserfs_quota_write,
@@ -915,9 +996,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
915 {"jdev",.arg_required = 'j',.values = NULL}, 996 {"jdev",.arg_required = 'j',.values = NULL},
916 {"nolargeio",.arg_required = 'w',.values = NULL}, 997 {"nolargeio",.arg_required = 'w',.values = NULL},
917 {"commit",.arg_required = 'c',.values = NULL}, 998 {"commit",.arg_required = 'c',.values = NULL},
918 {"usrquota",.setmask = 1 << REISERFS_QUOTA}, 999 {"usrquota",.setmask = 1 << REISERFS_USRQUOTA},
919 {"grpquota",.setmask = 1 << REISERFS_QUOTA}, 1000 {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA},
920 {"noquota",.clrmask = 1 << REISERFS_QUOTA}, 1001 {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA},
921 {"errors",.arg_required = 'e',.values = error_actions}, 1002 {"errors",.arg_required = 'e',.values = error_actions},
922 {"usrjquota",.arg_required = 1003 {"usrjquota",.arg_required =
923 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, 1004 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
@@ -1031,12 +1112,19 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1031 return 0; 1112 return 0;
1032 } 1113 }
1033 strcpy(qf_names[qtype], arg); 1114 strcpy(qf_names[qtype], arg);
1034 *mount_options |= 1 << REISERFS_QUOTA; 1115 if (qtype == USRQUOTA)
1116 *mount_options |= 1 << REISERFS_USRQUOTA;
1117 else
1118 *mount_options |= 1 << REISERFS_GRPQUOTA;
1035 } else { 1119 } else {
1036 if (qf_names[qtype] != 1120 if (qf_names[qtype] !=
1037 REISERFS_SB(s)->s_qf_names[qtype]) 1121 REISERFS_SB(s)->s_qf_names[qtype])
1038 kfree(qf_names[qtype]); 1122 kfree(qf_names[qtype]);
1039 qf_names[qtype] = NULL; 1123 qf_names[qtype] = NULL;
1124 if (qtype == USRQUOTA)
1125 *mount_options &= ~(1 << REISERFS_USRQUOTA);
1126 else
1127 *mount_options &= ~(1 << REISERFS_GRPQUOTA);
1040 } 1128 }
1041 } 1129 }
1042 if (c == 'f') { 1130 if (c == 'f') {
@@ -1075,9 +1163,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1075 "journaled quota format not specified."); 1163 "journaled quota format not specified.");
1076 return 0; 1164 return 0;
1077 } 1165 }
1078 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1166 if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) &&
1079 if (!(*mount_options & (1 << REISERFS_QUOTA)) 1167 sb_has_quota_loaded(s, USRQUOTA)) ||
1080 && sb_any_quota_loaded(s)) { 1168 (!(*mount_options & (1 << REISERFS_GRPQUOTA)) &&
1169 sb_has_quota_loaded(s, GRPQUOTA))) {
1081 reiserfs_warning(s, "super-6516", "quota options must " 1170 reiserfs_warning(s, "super-6516", "quota options must "
1082 "be present when quota is turned on."); 1171 "be present when quota is turned on.");
1083 return 0; 1172 return 0;
@@ -1164,7 +1253,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names,
1164 kfree(REISERFS_SB(s)->s_qf_names[i]); 1253 kfree(REISERFS_SB(s)->s_qf_names[i]);
1165 REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; 1254 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1166 } 1255 }
1167 REISERFS_SB(s)->s_jquota_fmt = *qfmt; 1256 if (*qfmt)
1257 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1168} 1258}
1169#endif 1259#endif
1170 1260
@@ -1225,7 +1315,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1225 safe_mask |= 1 << REISERFS_ERROR_RO; 1315 safe_mask |= 1 << REISERFS_ERROR_RO;
1226 safe_mask |= 1 << REISERFS_ERROR_CONTINUE; 1316 safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
1227 safe_mask |= 1 << REISERFS_ERROR_PANIC; 1317 safe_mask |= 1 << REISERFS_ERROR_PANIC;
1228 safe_mask |= 1 << REISERFS_QUOTA; 1318 safe_mask |= 1 << REISERFS_USRQUOTA;
1319 safe_mask |= 1 << REISERFS_GRPQUOTA;
1229 1320
1230 /* Update the bitmask, taking care to keep 1321 /* Update the bitmask, taking care to keep
1231 * the bits we're not allowed to change here */ 1322 * the bits we're not allowed to change here */
@@ -1428,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset)
1428static int reread_meta_blocks(struct super_block *s) 1519static int reread_meta_blocks(struct super_block *s)
1429{ 1520{
1430 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1521 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1431 reiserfs_write_unlock(s);
1432 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1522 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1433 reiserfs_write_lock(s);
1434 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1523 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1435 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1524 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1436 return 1; 1525 return 1;
@@ -1655,22 +1744,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1655 mutex_init(&REISERFS_SB(s)->lock); 1744 mutex_init(&REISERFS_SB(s)->lock);
1656 REISERFS_SB(s)->lock_depth = -1; 1745 REISERFS_SB(s)->lock_depth = -1;
1657 1746
1658 /*
1659 * This function is called with the bkl, which also was the old
1660 * locking used here.
1661 * do_journal_begin() will soon check if we hold the lock (ie: was the
1662 * bkl). This is likely because do_journal_begin() has several another
1663 * callers because at this time, it doesn't seem to be necessary to
1664 * protect against anything.
1665 * Anyway, let's be conservative and lock for now.
1666 */
1667 reiserfs_write_lock(s);
1668
1669 jdev_name = NULL; 1747 jdev_name = NULL;
1670 if (reiserfs_parse_options 1748 if (reiserfs_parse_options
1671 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1749 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1672 &commit_max_age, qf_names, &qfmt) == 0) { 1750 &commit_max_age, qf_names, &qfmt) == 0) {
1673 goto error; 1751 goto error_unlocked;
1752 }
1753 if (jdev_name && jdev_name[0]) {
1754 REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
1755 if (!REISERFS_SB(s)->s_jdev) {
1756 SWARN(silent, s, "", "Cannot allocate memory for "
1757 "journal device name");
1758 goto error;
1759 }
1674 } 1760 }
1675#ifdef CONFIG_QUOTA 1761#ifdef CONFIG_QUOTA
1676 handle_quota_files(s, qf_names, &qfmt); 1762 handle_quota_files(s, qf_names, &qfmt);
@@ -1678,7 +1764,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1678 1764
1679 if (blocks) { 1765 if (blocks) {
1680 SWARN(silent, s, "jmacd-7", "resize option for remount only"); 1766 SWARN(silent, s, "jmacd-7", "resize option for remount only");
1681 goto error; 1767 goto error_unlocked;
1682 } 1768 }
1683 1769
1684 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1770 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
@@ -1688,7 +1774,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1688 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1774 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1689 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", 1775 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
1690 reiserfs_bdevname(s)); 1776 reiserfs_bdevname(s));
1691 goto error; 1777 goto error_unlocked;
1692 } 1778 }
1693 1779
1694 rs = SB_DISK_SUPER_BLOCK(s); 1780 rs = SB_DISK_SUPER_BLOCK(s);
@@ -1704,7 +1790,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1704 "or increase size of your LVM partition"); 1790 "or increase size of your LVM partition");
1705 SWARN(silent, s, "", "Or may be you forgot to " 1791 SWARN(silent, s, "", "Or may be you forgot to "
1706 "reboot after fdisk when it told you to"); 1792 "reboot after fdisk when it told you to");
1707 goto error; 1793 goto error_unlocked;
1708 } 1794 }
1709 1795
1710 sbi->s_mount_state = SB_REISERFS_STATE(s); 1796 sbi->s_mount_state = SB_REISERFS_STATE(s);
@@ -1712,8 +1798,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1712 1798
1713 if ((errval = reiserfs_init_bitmap_cache(s))) { 1799 if ((errval = reiserfs_init_bitmap_cache(s))) {
1714 SWARN(silent, s, "jmacd-8", "unable to read bitmap"); 1800 SWARN(silent, s, "jmacd-8", "unable to read bitmap");
1715 goto error; 1801 goto error_unlocked;
1716 } 1802 }
1803
1717 errval = -EINVAL; 1804 errval = -EINVAL;
1718#ifdef CONFIG_REISERFS_CHECK 1805#ifdef CONFIG_REISERFS_CHECK
1719 SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); 1806 SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
@@ -1736,24 +1823,26 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1736 if (reiserfs_barrier_flush(s)) { 1823 if (reiserfs_barrier_flush(s)) {
1737 printk("reiserfs: using flush barriers\n"); 1824 printk("reiserfs: using flush barriers\n");
1738 } 1825 }
1826
1739 // set_device_ro(s->s_dev, 1) ; 1827 // set_device_ro(s->s_dev, 1) ;
1740 if (journal_init(s, jdev_name, old_format, commit_max_age)) { 1828 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1741 SWARN(silent, s, "sh-2022", 1829 SWARN(silent, s, "sh-2022",
1742 "unable to initialize journal space"); 1830 "unable to initialize journal space");
1743 goto error; 1831 goto error_unlocked;
1744 } else { 1832 } else {
1745 jinit_done = 1; /* once this is set, journal_release must be called 1833 jinit_done = 1; /* once this is set, journal_release must be called
1746 ** if we error out of the mount 1834 ** if we error out of the mount
1747 */ 1835 */
1748 } 1836 }
1837
1749 if (reread_meta_blocks(s)) { 1838 if (reread_meta_blocks(s)) {
1750 SWARN(silent, s, "jmacd-9", 1839 SWARN(silent, s, "jmacd-9",
1751 "unable to reread meta blocks after journal init"); 1840 "unable to reread meta blocks after journal init");
1752 goto error; 1841 goto error_unlocked;
1753 } 1842 }
1754 1843
1755 if (replay_only(s)) 1844 if (replay_only(s))
1756 goto error; 1845 goto error_unlocked;
1757 1846
1758 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { 1847 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
1759 SWARN(silent, s, "clm-7000", 1848 SWARN(silent, s, "clm-7000",
@@ -1767,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1767 reiserfs_init_locked_inode, (void *)(&args)); 1856 reiserfs_init_locked_inode, (void *)(&args));
1768 if (!root_inode) { 1857 if (!root_inode) {
1769 SWARN(silent, s, "jmacd-10", "get root inode failed"); 1858 SWARN(silent, s, "jmacd-10", "get root inode failed");
1770 goto error; 1859 goto error_unlocked;
1771 } 1860 }
1772 1861
1862 /*
1863 * This path assumed to be called with the BKL in the old times.
1864 * Now we have inherited the big reiserfs lock from it and many
1865 * reiserfs helpers called in the mount path and elsewhere require
1866 * this lock to be held even if it's not always necessary. Let's be
1867 * conservative and hold it early. The window can be reduced after
1868 * careful review of the code.
1869 */
1870 reiserfs_write_lock(s);
1871
1773 if (root_inode->i_state & I_NEW) { 1872 if (root_inode->i_state & I_NEW) {
1774 reiserfs_read_locked_inode(root_inode, &args); 1873 reiserfs_read_locked_inode(root_inode, &args);
1775 unlock_new_inode(root_inode); 1874 unlock_new_inode(root_inode);
@@ -1896,12 +1995,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1896 return (0); 1995 return (0);
1897 1996
1898error: 1997error:
1899 if (jinit_done) { /* kill the commit thread, free journal ram */ 1998 reiserfs_write_unlock(s);
1999
2000error_unlocked:
2001 /* kill the commit thread, free journal ram */
2002 if (jinit_done) {
2003 reiserfs_write_lock(s);
1900 journal_release_error(NULL, s); 2004 journal_release_error(NULL, s);
2005 reiserfs_write_unlock(s);
1901 } 2006 }
1902 2007
1903 reiserfs_write_unlock(s);
1904
1905 reiserfs_free_bitmap_cache(s); 2008 reiserfs_free_bitmap_cache(s);
1906 if (SB_BUFFER_WITH_SB(s)) 2009 if (SB_BUFFER_WITH_SB(s))
1907 brelse(SB_BUFFER_WITH_SB(s)); 2010 brelse(SB_BUFFER_WITH_SB(s));
@@ -2054,12 +2157,13 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2054 int err; 2157 int err;
2055 struct inode *inode; 2158 struct inode *inode;
2056 struct reiserfs_transaction_handle th; 2159 struct reiserfs_transaction_handle th;
2160 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
2057 2161
2058 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2162 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
2059 return -EINVAL; 2163 return -EINVAL;
2060 2164
2061 /* Quotafile not on the same filesystem? */ 2165 /* Quotafile not on the same filesystem? */
2062 if (path->mnt->mnt_sb != sb) { 2166 if (path->dentry->d_sb != sb) {
2063 err = -EXDEV; 2167 err = -EXDEV;
2064 goto out; 2168 goto out;
2065 } 2169 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6bc346c160e7..c24deda8a8bc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,7 +66,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
66} 66}
67#endif 67#endif
68 68
69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode) 69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
70{ 70{
71 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 71 BUG_ON(!mutex_is_locked(&dir->i_mutex));
72 return dir->i_op->mkdir(dir, dentry, mode); 72 return dir->i_op->mkdir(dir, dentry, mode);
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index eed99428f104..e1a7779dd3cb 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -28,9 +28,10 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
28 struct inode *inode = file->f_mapping->host; 28 struct inode *inode = file->f_mapping->host;
29 struct mtd_info *mtd = inode->i_sb->s_mtd; 29 struct mtd_info *mtd = inode->i_sb->s_mtd;
30 unsigned long isize, offset, maxpages, lpages; 30 unsigned long isize, offset, maxpages, lpages;
31 int ret;
31 32
32 if (!mtd) 33 if (!mtd)
33 goto cant_map_directly; 34 return (unsigned long) -ENOSYS;
34 35
35 /* the mapping mustn't extend beyond the EOF */ 36 /* the mapping mustn't extend beyond the EOF */
36 lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 37 lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -41,23 +42,20 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
41 if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) 42 if ((pgoff >= maxpages) || (maxpages - pgoff < lpages))
42 return (unsigned long) -EINVAL; 43 return (unsigned long) -EINVAL;
43 44
44 /* we need to call down to the MTD layer to do the actual mapping */ 45 if (addr != 0)
45 if (mtd->get_unmapped_area) { 46 return (unsigned long) -EINVAL;
46 if (addr != 0)
47 return (unsigned long) -EINVAL;
48
49 if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
50 return (unsigned long) -EINVAL;
51 47
52 offset += ROMFS_I(inode)->i_dataoffset; 48 if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
53 if (offset > mtd->size - len) 49 return (unsigned long) -EINVAL;
54 return (unsigned long) -EINVAL;
55 50
56 return mtd->get_unmapped_area(mtd, len, offset, flags); 51 offset += ROMFS_I(inode)->i_dataoffset;
57 } 52 if (offset > mtd->size - len)
53 return (unsigned long) -EINVAL;
58 54
59cant_map_directly: 55 ret = mtd_get_unmapped_area(mtd, len, offset, flags);
60 return (unsigned long) -ENOSYS; 56 if (ret == -EOPNOTSUPP)
57 ret = -ENOSYS;
58 return (unsigned long) ret;
61} 59}
62 60
63/* 61/*
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8b4089f30408..bb36ab74eb45 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -403,7 +403,6 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
403static void romfs_i_callback(struct rcu_head *head) 403static void romfs_i_callback(struct rcu_head *head)
404{ 404{
405 struct inode *inode = container_of(head, struct inode, i_rcu); 405 struct inode *inode = container_of(head, struct inode, i_rcu);
406 INIT_LIST_HEAD(&inode->i_dentry);
407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 406 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
408} 407}
409 408
diff --git a/fs/seq_file.c b/fs/seq_file.c
index dba43c3ea3af..4023d6be939b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -397,7 +397,7 @@ EXPORT_SYMBOL(seq_printf);
397 * Returns pointer past last written character in @s, or NULL in case of 397 * Returns pointer past last written character in @s, or NULL in case of
398 * failure. 398 * failure.
399 */ 399 */
400char *mangle_path(char *s, char *p, char *esc) 400char *mangle_path(char *s, const char *p, const char *esc)
401{ 401{
402 while (s <= p) { 402 while (s <= p) {
403 char c = *p++; 403 char c = *p++;
@@ -427,7 +427,7 @@ EXPORT_SYMBOL(mangle_path);
427 * return the absolute path of 'path', as represented by the 427 * return the absolute path of 'path', as represented by the
428 * dentry / mnt pair in the path parameter. 428 * dentry / mnt pair in the path parameter.
429 */ 429 */
430int seq_path(struct seq_file *m, struct path *path, char *esc) 430int seq_path(struct seq_file *m, const struct path *path, const char *esc)
431{ 431{
432 char *buf; 432 char *buf;
433 size_t size = seq_get_buf(m, &buf); 433 size_t size = seq_get_buf(m, &buf);
@@ -450,8 +450,8 @@ EXPORT_SYMBOL(seq_path);
450/* 450/*
451 * Same as seq_path, but relative to supplied root. 451 * Same as seq_path, but relative to supplied root.
452 */ 452 */
453int seq_path_root(struct seq_file *m, struct path *path, struct path *root, 453int seq_path_root(struct seq_file *m, const struct path *path,
454 char *esc) 454 const struct path *root, const char *esc)
455{ 455{
456 char *buf; 456 char *buf;
457 size_t size = seq_get_buf(m, &buf); 457 size_t size = seq_get_buf(m, &buf);
@@ -480,7 +480,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
480/* 480/*
481 * returns the path of the 'dentry' from the root of its filesystem. 481 * returns the path of the 'dentry' from the root of its filesystem.
482 */ 482 */
483int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) 483int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
484{ 484{
485 char *buf; 485 char *buf;
486 size_t size = seq_get_buf(m, &buf); 486 size_t size = seq_get_buf(m, &buf);
diff --git a/fs/splice.c b/fs/splice.c
index fa2defa8afcf..1ec0493266b3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,7 +25,6 @@
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/buffer_head.h>
29#include <linux/module.h> 28#include <linux/module.h>
30#include <linux/syscalls.h> 29#include <linux/syscalls.h>
31#include <linux/uio.h> 30#include <linux/uio.h>
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f744be98cd5a..af0b73802592 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -70,11 +70,15 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
70 spin_lock(&cache->lock); 70 spin_lock(&cache->lock);
71 71
72 while (1) { 72 while (1) {
73 for (i = 0; i < cache->entries; i++) 73 for (i = cache->curr_blk, n = 0; n < cache->entries; n++) {
74 if (cache->entry[i].block == block) 74 if (cache->entry[i].block == block) {
75 cache->curr_blk = i;
75 break; 76 break;
77 }
78 i = (i + 1) % cache->entries;
79 }
76 80
77 if (i == cache->entries) { 81 if (n == cache->entries) {
78 /* 82 /*
79 * Block not in cache, if all cache entries are used 83 * Block not in cache, if all cache entries are used
80 * go to sleep waiting for one to become available. 84 * go to sleep waiting for one to become available.
@@ -245,6 +249,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
245 goto cleanup; 249 goto cleanup;
246 } 250 }
247 251
252 cache->curr_blk = 0;
248 cache->next_blk = 0; 253 cache->next_blk = 0;
249 cache->unused = entries; 254 cache->unused = entries;
250 cache->entries = entries; 255 cache->entries = entries;
@@ -332,17 +337,20 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
332 u64 *block, int *offset, int length) 337 u64 *block, int *offset, int length)
333{ 338{
334 struct squashfs_sb_info *msblk = sb->s_fs_info; 339 struct squashfs_sb_info *msblk = sb->s_fs_info;
335 int bytes, copied = length; 340 int bytes, res = length;
336 struct squashfs_cache_entry *entry; 341 struct squashfs_cache_entry *entry;
337 342
338 TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); 343 TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
339 344
340 while (length) { 345 while (length) {
341 entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); 346 entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
342 if (entry->error) 347 if (entry->error) {
343 return entry->error; 348 res = entry->error;
344 else if (*offset >= entry->length) 349 goto error;
345 return -EIO; 350 } else if (*offset >= entry->length) {
351 res = -EIO;
352 goto error;
353 }
346 354
347 bytes = squashfs_copy_data(buffer, entry, *offset, length); 355 bytes = squashfs_copy_data(buffer, entry, *offset, length);
348 if (buffer) 356 if (buffer)
@@ -358,7 +366,11 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
358 squashfs_cache_put(entry); 366 squashfs_cache_put(entry);
359 } 367 }
360 368
361 return copied; 369 return res;
370
371error:
372 squashfs_cache_put(entry);
373 return res;
362} 374}
363 375
364 376
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index fd7b3b3bda13..81afbccfa843 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -208,8 +208,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
208 inode->i_op = &squashfs_inode_ops; 208 inode->i_op = &squashfs_inode_ops;
209 inode->i_fop = &generic_ro_fops; 209 inode->i_fop = &generic_ro_fops;
210 inode->i_mode |= S_IFREG; 210 inode->i_mode |= S_IFREG;
211 inode->i_blocks = ((inode->i_size - 211 inode->i_blocks = (inode->i_size -
212 le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1; 212 le64_to_cpu(sqsh_ino->sparse) + 511) >> 9;
213 213
214 squashfs_i(inode)->fragment_block = frag_blk; 214 squashfs_i(inode)->fragment_block = frag_blk;
215 squashfs_i(inode)->fragment_size = frag_size; 215 squashfs_i(inode)->fragment_size = frag_size;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 651f0b31d296..52934a22f296 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -28,6 +28,7 @@
28struct squashfs_cache { 28struct squashfs_cache {
29 char *name; 29 char *name;
30 int entries; 30 int entries;
31 int curr_blk;
31 int next_blk; 32 int next_blk;
32 int num_waiters; 33 int num_waiters;
33 int unused; 34 int unused;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 2da1715452ac..ecaa2f7bdb8f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -290,7 +290,7 @@ handle_fragments:
290 290
291check_directory_table: 291check_directory_table:
292 /* Sanity check directory_table */ 292 /* Sanity check directory_table */
293 if (msblk->directory_table >= next_table) { 293 if (msblk->directory_table > next_table) {
294 err = -EINVAL; 294 err = -EINVAL;
295 goto failed_mount; 295 goto failed_mount;
296 } 296 }
@@ -464,7 +464,6 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
464static void squashfs_i_callback(struct rcu_head *head) 464static void squashfs_i_callback(struct rcu_head *head)
465{ 465{
466 struct inode *inode = container_of(head, struct inode, i_rcu); 466 struct inode *inode = container_of(head, struct inode, i_rcu);
467 INIT_LIST_HEAD(&inode->i_dentry);
468 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); 467 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
469} 468}
470 469
diff --git a/fs/statfs.c b/fs/statfs.c
index 9cf04a118965..2aa6a22e0be2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -7,6 +7,7 @@
7#include <linux/statfs.h> 7#include <linux/statfs.h>
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include "internal.h"
10 11
11static int flags_by_mnt(int mnt_flags) 12static int flags_by_mnt(int mnt_flags)
12{ 13{
@@ -45,7 +46,7 @@ static int calculate_f_flags(struct vfsmount *mnt)
45 flags_by_sb(mnt->mnt_sb->s_flags); 46 flags_by_sb(mnt->mnt_sb->s_flags);
46} 47}
47 48
48int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) 49static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
49{ 50{
50 int retval; 51 int retval;
51 52
@@ -205,19 +206,23 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
205 return error; 206 return error;
206} 207}
207 208
208SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) 209int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
209{ 210{
210 struct super_block *s; 211 struct super_block *s = user_get_super(dev);
211 struct ustat tmp;
212 struct kstatfs sbuf;
213 int err; 212 int err;
214
215 s = user_get_super(new_decode_dev(dev));
216 if (!s) 213 if (!s)
217 return -EINVAL; 214 return -EINVAL;
218 215
219 err = statfs_by_dentry(s->s_root, &sbuf); 216 err = statfs_by_dentry(s->s_root, sbuf);
220 drop_super(s); 217 drop_super(s);
218 return err;
219}
220
221SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
222{
223 struct ustat tmp;
224 struct kstatfs sbuf;
225 int err = vfs_ustat(new_decode_dev(dev), &sbuf);
221 if (err) 226 if (err)
222 return err; 227 return err;
223 228
diff --git a/fs/super.c b/fs/super.c
index afd0f1ad45e0..de41e1e46f09 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -136,12 +136,13 @@ static struct super_block *alloc_super(struct file_system_type *type)
136 INIT_LIST_HEAD(&s->s_files); 136 INIT_LIST_HEAD(&s->s_files);
137#endif 137#endif
138 s->s_bdi = &default_backing_dev_info; 138 s->s_bdi = &default_backing_dev_info;
139 INIT_LIST_HEAD(&s->s_instances); 139 INIT_HLIST_NODE(&s->s_instances);
140 INIT_HLIST_BL_HEAD(&s->s_anon); 140 INIT_HLIST_BL_HEAD(&s->s_anon);
141 INIT_LIST_HEAD(&s->s_inodes); 141 INIT_LIST_HEAD(&s->s_inodes);
142 INIT_LIST_HEAD(&s->s_dentry_lru); 142 INIT_LIST_HEAD(&s->s_dentry_lru);
143 INIT_LIST_HEAD(&s->s_inode_lru); 143 INIT_LIST_HEAD(&s->s_inode_lru);
144 spin_lock_init(&s->s_inode_lru_lock); 144 spin_lock_init(&s->s_inode_lru_lock);
145 INIT_LIST_HEAD(&s->s_mounts);
145 init_rwsem(&s->s_umount); 146 init_rwsem(&s->s_umount);
146 mutex_init(&s->s_lock); 147 mutex_init(&s->s_lock);
147 lockdep_set_class(&s->s_umount, &type->s_umount_key); 148 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -200,6 +201,7 @@ static inline void destroy_super(struct super_block *s)
200 free_percpu(s->s_files); 201 free_percpu(s->s_files);
201#endif 202#endif
202 security_sb_free(s); 203 security_sb_free(s);
204 WARN_ON(!list_empty(&s->s_mounts));
203 kfree(s->s_subtype); 205 kfree(s->s_subtype);
204 kfree(s->s_options); 206 kfree(s->s_options);
205 kfree(s); 207 kfree(s);
@@ -210,7 +212,7 @@ static inline void destroy_super(struct super_block *s)
210/* 212/*
211 * Drop a superblock's refcount. The caller must hold sb_lock. 213 * Drop a superblock's refcount. The caller must hold sb_lock.
212 */ 214 */
213void __put_super(struct super_block *sb) 215static void __put_super(struct super_block *sb)
214{ 216{
215 if (!--sb->s_count) { 217 if (!--sb->s_count) {
216 list_del_init(&sb->s_list); 218 list_del_init(&sb->s_list);
@@ -225,7 +227,7 @@ void __put_super(struct super_block *sb)
225 * Drops a temporary reference, frees superblock if there's no 227 * Drops a temporary reference, frees superblock if there's no
226 * references left. 228 * references left.
227 */ 229 */
228void put_super(struct super_block *sb) 230static void put_super(struct super_block *sb)
229{ 231{
230 spin_lock(&sb_lock); 232 spin_lock(&sb_lock);
231 __put_super(sb); 233 __put_super(sb);
@@ -328,7 +330,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
328bool grab_super_passive(struct super_block *sb) 330bool grab_super_passive(struct super_block *sb)
329{ 331{
330 spin_lock(&sb_lock); 332 spin_lock(&sb_lock);
331 if (list_empty(&sb->s_instances)) { 333 if (hlist_unhashed(&sb->s_instances)) {
332 spin_unlock(&sb_lock); 334 spin_unlock(&sb_lock);
333 return false; 335 return false;
334 } 336 }
@@ -337,7 +339,7 @@ bool grab_super_passive(struct super_block *sb)
337 spin_unlock(&sb_lock); 339 spin_unlock(&sb_lock);
338 340
339 if (down_read_trylock(&sb->s_umount)) { 341 if (down_read_trylock(&sb->s_umount)) {
340 if (sb->s_root) 342 if (sb->s_root && (sb->s_flags & MS_BORN))
341 return true; 343 return true;
342 up_read(&sb->s_umount); 344 up_read(&sb->s_umount);
343 } 345 }
@@ -400,7 +402,7 @@ void generic_shutdown_super(struct super_block *sb)
400 } 402 }
401 spin_lock(&sb_lock); 403 spin_lock(&sb_lock);
402 /* should be initialized for __put_super_and_need_restart() */ 404 /* should be initialized for __put_super_and_need_restart() */
403 list_del_init(&sb->s_instances); 405 hlist_del_init(&sb->s_instances);
404 spin_unlock(&sb_lock); 406 spin_unlock(&sb_lock);
405 up_write(&sb->s_umount); 407 up_write(&sb->s_umount);
406} 408}
@@ -420,13 +422,14 @@ struct super_block *sget(struct file_system_type *type,
420 void *data) 422 void *data)
421{ 423{
422 struct super_block *s = NULL; 424 struct super_block *s = NULL;
425 struct hlist_node *node;
423 struct super_block *old; 426 struct super_block *old;
424 int err; 427 int err;
425 428
426retry: 429retry:
427 spin_lock(&sb_lock); 430 spin_lock(&sb_lock);
428 if (test) { 431 if (test) {
429 list_for_each_entry(old, &type->fs_supers, s_instances) { 432 hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
430 if (!test(old, data)) 433 if (!test(old, data))
431 continue; 434 continue;
432 if (!grab_super(old)) 435 if (!grab_super(old))
@@ -462,7 +465,7 @@ retry:
462 s->s_type = type; 465 s->s_type = type;
463 strlcpy(s->s_id, type->name, sizeof(s->s_id)); 466 strlcpy(s->s_id, type->name, sizeof(s->s_id));
464 list_add_tail(&s->s_list, &super_blocks); 467 list_add_tail(&s->s_list, &super_blocks);
465 list_add(&s->s_instances, &type->fs_supers); 468 hlist_add_head(&s->s_instances, &type->fs_supers);
466 spin_unlock(&sb_lock); 469 spin_unlock(&sb_lock);
467 get_filesystem(type); 470 get_filesystem(type);
468 register_shrinker(&s->s_shrink); 471 register_shrinker(&s->s_shrink);
@@ -497,14 +500,14 @@ void sync_supers(void)
497 500
498 spin_lock(&sb_lock); 501 spin_lock(&sb_lock);
499 list_for_each_entry(sb, &super_blocks, s_list) { 502 list_for_each_entry(sb, &super_blocks, s_list) {
500 if (list_empty(&sb->s_instances)) 503 if (hlist_unhashed(&sb->s_instances))
501 continue; 504 continue;
502 if (sb->s_op->write_super && sb->s_dirt) { 505 if (sb->s_op->write_super && sb->s_dirt) {
503 sb->s_count++; 506 sb->s_count++;
504 spin_unlock(&sb_lock); 507 spin_unlock(&sb_lock);
505 508
506 down_read(&sb->s_umount); 509 down_read(&sb->s_umount);
507 if (sb->s_root && sb->s_dirt) 510 if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
508 sb->s_op->write_super(sb); 511 sb->s_op->write_super(sb);
509 up_read(&sb->s_umount); 512 up_read(&sb->s_umount);
510 513
@@ -533,13 +536,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
533 536
534 spin_lock(&sb_lock); 537 spin_lock(&sb_lock);
535 list_for_each_entry(sb, &super_blocks, s_list) { 538 list_for_each_entry(sb, &super_blocks, s_list) {
536 if (list_empty(&sb->s_instances)) 539 if (hlist_unhashed(&sb->s_instances))
537 continue; 540 continue;
538 sb->s_count++; 541 sb->s_count++;
539 spin_unlock(&sb_lock); 542 spin_unlock(&sb_lock);
540 543
541 down_read(&sb->s_umount); 544 down_read(&sb->s_umount);
542 if (sb->s_root) 545 if (sb->s_root && (sb->s_flags & MS_BORN))
543 f(sb, arg); 546 f(sb, arg);
544 up_read(&sb->s_umount); 547 up_read(&sb->s_umount);
545 548
@@ -566,14 +569,15 @@ void iterate_supers_type(struct file_system_type *type,
566 void (*f)(struct super_block *, void *), void *arg) 569 void (*f)(struct super_block *, void *), void *arg)
567{ 570{
568 struct super_block *sb, *p = NULL; 571 struct super_block *sb, *p = NULL;
572 struct hlist_node *node;
569 573
570 spin_lock(&sb_lock); 574 spin_lock(&sb_lock);
571 list_for_each_entry(sb, &type->fs_supers, s_instances) { 575 hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
572 sb->s_count++; 576 sb->s_count++;
573 spin_unlock(&sb_lock); 577 spin_unlock(&sb_lock);
574 578
575 down_read(&sb->s_umount); 579 down_read(&sb->s_umount);
576 if (sb->s_root) 580 if (sb->s_root && (sb->s_flags & MS_BORN))
577 f(sb, arg); 581 f(sb, arg);
578 up_read(&sb->s_umount); 582 up_read(&sb->s_umount);
579 583
@@ -607,14 +611,14 @@ struct super_block *get_super(struct block_device *bdev)
607 spin_lock(&sb_lock); 611 spin_lock(&sb_lock);
608rescan: 612rescan:
609 list_for_each_entry(sb, &super_blocks, s_list) { 613 list_for_each_entry(sb, &super_blocks, s_list) {
610 if (list_empty(&sb->s_instances)) 614 if (hlist_unhashed(&sb->s_instances))
611 continue; 615 continue;
612 if (sb->s_bdev == bdev) { 616 if (sb->s_bdev == bdev) {
613 sb->s_count++; 617 sb->s_count++;
614 spin_unlock(&sb_lock); 618 spin_unlock(&sb_lock);
615 down_read(&sb->s_umount); 619 down_read(&sb->s_umount);
616 /* still alive? */ 620 /* still alive? */
617 if (sb->s_root) 621 if (sb->s_root && (sb->s_flags & MS_BORN))
618 return sb; 622 return sb;
619 up_read(&sb->s_umount); 623 up_read(&sb->s_umount);
620 /* nope, got unmounted */ 624 /* nope, got unmounted */
@@ -647,7 +651,7 @@ struct super_block *get_active_super(struct block_device *bdev)
647restart: 651restart:
648 spin_lock(&sb_lock); 652 spin_lock(&sb_lock);
649 list_for_each_entry(sb, &super_blocks, s_list) { 653 list_for_each_entry(sb, &super_blocks, s_list) {
650 if (list_empty(&sb->s_instances)) 654 if (hlist_unhashed(&sb->s_instances))
651 continue; 655 continue;
652 if (sb->s_bdev == bdev) { 656 if (sb->s_bdev == bdev) {
653 if (grab_super(sb)) /* drops sb_lock */ 657 if (grab_super(sb)) /* drops sb_lock */
@@ -667,14 +671,14 @@ struct super_block *user_get_super(dev_t dev)
667 spin_lock(&sb_lock); 671 spin_lock(&sb_lock);
668rescan: 672rescan:
669 list_for_each_entry(sb, &super_blocks, s_list) { 673 list_for_each_entry(sb, &super_blocks, s_list) {
670 if (list_empty(&sb->s_instances)) 674 if (hlist_unhashed(&sb->s_instances))
671 continue; 675 continue;
672 if (sb->s_dev == dev) { 676 if (sb->s_dev == dev) {
673 sb->s_count++; 677 sb->s_count++;
674 spin_unlock(&sb_lock); 678 spin_unlock(&sb_lock);
675 down_read(&sb->s_umount); 679 down_read(&sb->s_umount);
676 /* still alive? */ 680 /* still alive? */
677 if (sb->s_root) 681 if (sb->s_root && (sb->s_flags & MS_BORN))
678 return sb; 682 return sb;
679 up_read(&sb->s_umount); 683 up_read(&sb->s_umount);
680 /* nope, got unmounted */ 684 /* nope, got unmounted */
@@ -719,23 +723,29 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
719 /* If we are remounting RDONLY and current sb is read/write, 723 /* If we are remounting RDONLY and current sb is read/write,
720 make sure there are no rw files opened */ 724 make sure there are no rw files opened */
721 if (remount_ro) { 725 if (remount_ro) {
722 if (force) 726 if (force) {
723 mark_files_ro(sb); 727 mark_files_ro(sb);
724 else if (!fs_may_remount_ro(sb)) 728 } else {
725 return -EBUSY; 729 retval = sb_prepare_remount_readonly(sb);
730 if (retval)
731 return retval;
732 }
726 } 733 }
727 734
728 if (sb->s_op->remount_fs) { 735 if (sb->s_op->remount_fs) {
729 retval = sb->s_op->remount_fs(sb, &flags, data); 736 retval = sb->s_op->remount_fs(sb, &flags, data);
730 if (retval) { 737 if (retval) {
731 if (!force) 738 if (!force)
732 return retval; 739 goto cancel_readonly;
733 /* If forced remount, go ahead despite any errors */ 740 /* If forced remount, go ahead despite any errors */
734 WARN(1, "forced remount of a %s fs returned %i\n", 741 WARN(1, "forced remount of a %s fs returned %i\n",
735 sb->s_type->name, retval); 742 sb->s_type->name, retval);
736 } 743 }
737 } 744 }
738 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 745 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
746 /* Needs to be ordered wrt mnt_is_readonly() */
747 smp_wmb();
748 sb->s_readonly_remount = 0;
739 749
740 /* 750 /*
741 * Some filesystems modify their metadata via some other path than the 751 * Some filesystems modify their metadata via some other path than the
@@ -748,6 +758,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
748 if (remount_ro && sb->s_bdev) 758 if (remount_ro && sb->s_bdev)
749 invalidate_bdev(sb->s_bdev); 759 invalidate_bdev(sb->s_bdev);
750 return 0; 760 return 0;
761
762cancel_readonly:
763 sb->s_readonly_remount = 0;
764 return retval;
751} 765}
752 766
753static void do_emergency_remount(struct work_struct *work) 767static void do_emergency_remount(struct work_struct *work)
@@ -756,12 +770,13 @@ static void do_emergency_remount(struct work_struct *work)
756 770
757 spin_lock(&sb_lock); 771 spin_lock(&sb_lock);
758 list_for_each_entry(sb, &super_blocks, s_list) { 772 list_for_each_entry(sb, &super_blocks, s_list) {
759 if (list_empty(&sb->s_instances)) 773 if (hlist_unhashed(&sb->s_instances))
760 continue; 774 continue;
761 sb->s_count++; 775 sb->s_count++;
762 spin_unlock(&sb_lock); 776 spin_unlock(&sb_lock);
763 down_write(&sb->s_umount); 777 down_write(&sb->s_umount);
764 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { 778 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
779 !(sb->s_flags & MS_RDONLY)) {
765 /* 780 /*
766 * What lock protects sb->s_flags?? 781 * What lock protects sb->s_flags??
767 */ 782 */
@@ -1144,6 +1159,11 @@ int freeze_super(struct super_block *sb)
1144 return -EBUSY; 1159 return -EBUSY;
1145 } 1160 }
1146 1161
1162 if (!(sb->s_flags & MS_BORN)) {
1163 up_write(&sb->s_umount);
1164 return 0; /* sic - it's "nothing to do" */
1165 }
1166
1147 if (sb->s_flags & MS_RDONLY) { 1167 if (sb->s_flags & MS_RDONLY) {
1148 sb->s_frozen = SB_FREEZE_TRANS; 1168 sb->s_frozen = SB_FREEZE_TRANS;
1149 smp_wmb(); 1169 smp_wmb();
diff --git a/fs/sync.c b/fs/sync.c
index 101b8ef901d7..f3501ef39235 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,7 +14,6 @@
14#include <linux/linkage.h> 14#include <linux/linkage.h>
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/quotaops.h> 16#include <linux/quotaops.h>
17#include <linux/buffer_head.h>
18#include <linux/backing-dev.h> 17#include <linux/backing-dev.h>
19#include "internal.h" 18#include "internal.h"
20 19
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d4e6080b4b20..62f4fb37789e 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -518,7 +518,7 @@ out:
518} 518}
519 519
520int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, 520int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
521 const struct attribute *attr, int type, mode_t amode) 521 const struct attribute *attr, int type, umode_t amode)
522{ 522{
523 umode_t mode = (amode & S_IALLUGO) | S_IFREG; 523 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
524 struct sysfs_addrm_cxt acxt; 524 struct sysfs_addrm_cxt acxt;
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
618 * 618 *
619 */ 619 */
620int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, 620int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
621 mode_t mode) 621 umode_t mode)
622{ 622{
623 struct sysfs_dirent *sd; 623 struct sysfs_dirent *sd;
624 struct iattr newattrs; 624 struct iattr newattrs;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 194414f8298c..dd1701caecc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -33,7 +33,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
33 int error = 0, i; 33 int error = 0, i;
34 34
35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
36 mode_t mode = 0; 36 umode_t mode = 0;
37 37
38 /* in update mode, we're changing the permissions or 38 /* in update mode, we're changing the permissions or
39 * visibility. Do this by first removing then 39 * visibility. Do this by first removing then
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index c81b22f3ace1..4a802b4a9056 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -187,7 +187,7 @@ out:
187 return error; 187 return error;
188} 188}
189 189
190static inline void set_default_inode_attr(struct inode * inode, mode_t mode) 190static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
191{ 191{
192 inode->i_mode = mode; 192 inode->i_mode = mode;
193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce29e28b766d..7484a36ee678 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -79,7 +79,7 @@ struct sysfs_dirent {
79 }; 79 };
80 80
81 unsigned int s_flags; 81 unsigned int s_flags;
82 unsigned short s_mode; 82 umode_t s_mode;
83 ino_t s_ino; 83 ino_t s_ino;
84 struct sysfs_inode_attrs *s_iattr; 84 struct sysfs_inode_attrs *s_iattr;
85}; 85};
@@ -229,7 +229,7 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd,
229 const struct attribute *attr, int type); 229 const struct attribute *attr, int type);
230 230
231int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, 231int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
232 const struct attribute *attr, int type, mode_t amode); 232 const struct attribute *attr, int type, umode_t amode);
233/* 233/*
234 * bin.c 234 * bin.c
235 */ 235 */
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 0c96c98bd1db..8233b02eccae 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -132,7 +132,7 @@ void sysv_free_inode(struct inode * inode)
132 brelse(bh); 132 brelse(bh);
133} 133}
134 134
135struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) 135struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
136{ 136{
137 struct super_block *sb = dir->i_sb; 137 struct super_block *sb = dir->i_sb;
138 struct sysv_sb_info *sbi = SYSV_SB(sb); 138 struct sysv_sb_info *sbi = SYSV_SB(sb);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 25ffb3e9a3f8..3da5ce25faf0 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -336,7 +336,6 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
336static void sysv_i_callback(struct rcu_head *head) 336static void sysv_i_callback(struct rcu_head *head)
337{ 337{
338 struct inode *inode = container_of(head, struct inode, i_rcu); 338 struct inode *inode = container_of(head, struct inode, i_rcu);
339 INIT_LIST_HEAD(&inode->i_dentry);
340 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 339 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
341} 340}
342 341
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index fa8d43c92bb8..90b54b438789 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -442,7 +442,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
442 442
443int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 443int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
444{ 444{
445 struct super_block *s = mnt->mnt_sb; 445 struct super_block *s = dentry->d_sb;
446 generic_fillattr(dentry->d_inode, stat); 446 generic_fillattr(dentry->d_inode, stat);
447 stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); 447 stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
448 stat->blksize = s->s_blocksize; 448 stat->blksize = s->s_blocksize;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e474fbcf8bde..b217797e621b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -61,7 +61,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
61 return NULL; 61 return NULL;
62} 62}
63 63
64static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev) 64static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
65{ 65{
66 struct inode * inode; 66 struct inode * inode;
67 int err; 67 int err;
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_
80 return err; 80 return err;
81} 81}
82 82
83static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) 83static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
84{ 84{
85 return sysv_mknod(dir, dentry, mode, 0); 85 return sysv_mknod(dir, dentry, mode, 0);
86} 86}
@@ -131,7 +131,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
131 return add_nondir(dentry, inode); 131 return add_nondir(dentry, inode);
132} 132}
133 133
134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode) 134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
135{ 135{
136 struct inode * inode; 136 struct inode * inode;
137 int err = -EMLINK; 137 int err = -EMLINK;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index bb55cdb394bf..0e4b821c5691 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -125,7 +125,7 @@ static inline void dirty_sb(struct super_block *sb)
125/* ialloc.c */ 125/* ialloc.c */
126extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned, 126extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned,
127 struct buffer_head **); 127 struct buffer_head **);
128extern struct inode * sysv_new_inode(const struct inode *, mode_t); 128extern struct inode * sysv_new_inode(const struct inode *, umode_t);
129extern void sysv_free_inode(struct inode *); 129extern void sysv_free_inode(struct inode *);
130extern unsigned long sysv_count_free_inodes(struct super_block *); 130extern unsigned long sysv_count_free_inodes(struct super_block *);
131 131
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b09ba2dd8b62..f922cbacdb96 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -38,9 +38,6 @@
38 38
39DEFINE_SPINLOCK(dbg_lock); 39DEFINE_SPINLOCK(dbg_lock);
40 40
41static char dbg_key_buf0[128];
42static char dbg_key_buf1[128];
43
44static const char *get_key_fmt(int fmt) 41static const char *get_key_fmt(int fmt)
45{ 42{
46 switch (fmt) { 43 switch (fmt) {
@@ -103,8 +100,8 @@ static const char *get_dent_type(int type)
103 } 100 }
104} 101}
105 102
106static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, 103const char *dbg_snprintf_key(const struct ubifs_info *c,
107 char *buffer) 104 const union ubifs_key *key, char *buffer, int len)
108{ 105{
109 char *p = buffer; 106 char *p = buffer;
110 int type = key_type(c, key); 107 int type = key_type(c, key);
@@ -112,45 +109,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
112 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { 109 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
113 switch (type) { 110 switch (type) {
114 case UBIFS_INO_KEY: 111 case UBIFS_INO_KEY:
115 sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), 112 len -= snprintf(p, len, "(%lu, %s)",
116 get_key_type(type)); 113 (unsigned long)key_inum(c, key),
114 get_key_type(type));
117 break; 115 break;
118 case UBIFS_DENT_KEY: 116 case UBIFS_DENT_KEY:
119 case UBIFS_XENT_KEY: 117 case UBIFS_XENT_KEY:
120 sprintf(p, "(%lu, %s, %#08x)", 118 len -= snprintf(p, len, "(%lu, %s, %#08x)",
121 (unsigned long)key_inum(c, key), 119 (unsigned long)key_inum(c, key),
122 get_key_type(type), key_hash(c, key)); 120 get_key_type(type), key_hash(c, key));
123 break; 121 break;
124 case UBIFS_DATA_KEY: 122 case UBIFS_DATA_KEY:
125 sprintf(p, "(%lu, %s, %u)", 123 len -= snprintf(p, len, "(%lu, %s, %u)",
126 (unsigned long)key_inum(c, key), 124 (unsigned long)key_inum(c, key),
127 get_key_type(type), key_block(c, key)); 125 get_key_type(type), key_block(c, key));
128 break; 126 break;
129 case UBIFS_TRUN_KEY: 127 case UBIFS_TRUN_KEY:
130 sprintf(p, "(%lu, %s)", 128 len -= snprintf(p, len, "(%lu, %s)",
131 (unsigned long)key_inum(c, key), 129 (unsigned long)key_inum(c, key),
132 get_key_type(type)); 130 get_key_type(type));
133 break; 131 break;
134 default: 132 default:
135 sprintf(p, "(bad key type: %#08x, %#08x)", 133 len -= snprintf(p, len, "(bad key type: %#08x, %#08x)",
136 key->u32[0], key->u32[1]); 134 key->u32[0], key->u32[1]);
137 } 135 }
138 } else 136 } else
139 sprintf(p, "bad key format %d", c->key_fmt); 137 len -= snprintf(p, len, "bad key format %d", c->key_fmt);
140} 138 ubifs_assert(len > 0);
141 139 return p;
142const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
143{
144 /* dbg_lock must be held */
145 sprintf_key(c, key, dbg_key_buf0);
146 return dbg_key_buf0;
147}
148
149const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
150{
151 /* dbg_lock must be held */
152 sprintf_key(c, key, dbg_key_buf1);
153 return dbg_key_buf1;
154} 140}
155 141
156const char *dbg_ntype(int type) 142const char *dbg_ntype(int type)
@@ -319,6 +305,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
319 int i, n; 305 int i, n;
320 union ubifs_key key; 306 union ubifs_key key;
321 const struct ubifs_ch *ch = node; 307 const struct ubifs_ch *ch = node;
308 char key_buf[DBG_KEY_BUF_LEN];
322 309
323 if (dbg_is_tst_rcvry(c)) 310 if (dbg_is_tst_rcvry(c))
324 return; 311 return;
@@ -474,7 +461,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
474 const struct ubifs_ino_node *ino = node; 461 const struct ubifs_ino_node *ino = node;
475 462
476 key_read(c, &ino->key, &key); 463 key_read(c, &ino->key, &key);
477 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); 464 printk(KERN_DEBUG "\tkey %s\n",
465 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
478 printk(KERN_DEBUG "\tcreat_sqnum %llu\n", 466 printk(KERN_DEBUG "\tcreat_sqnum %llu\n",
479 (unsigned long long)le64_to_cpu(ino->creat_sqnum)); 467 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
480 printk(KERN_DEBUG "\tsize %llu\n", 468 printk(KERN_DEBUG "\tsize %llu\n",
@@ -517,7 +505,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
517 int nlen = le16_to_cpu(dent->nlen); 505 int nlen = le16_to_cpu(dent->nlen);
518 506
519 key_read(c, &dent->key, &key); 507 key_read(c, &dent->key, &key);
520 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); 508 printk(KERN_DEBUG "\tkey %s\n",
509 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
521 printk(KERN_DEBUG "\tinum %llu\n", 510 printk(KERN_DEBUG "\tinum %llu\n",
522 (unsigned long long)le64_to_cpu(dent->inum)); 511 (unsigned long long)le64_to_cpu(dent->inum));
523 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); 512 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type);
@@ -541,7 +530,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
541 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; 530 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
542 531
543 key_read(c, &dn->key, &key); 532 key_read(c, &dn->key, &key);
544 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); 533 printk(KERN_DEBUG "\tkey %s\n",
534 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
545 printk(KERN_DEBUG "\tsize %u\n", 535 printk(KERN_DEBUG "\tsize %u\n",
546 le32_to_cpu(dn->size)); 536 le32_to_cpu(dn->size));
547 printk(KERN_DEBUG "\tcompr_typ %d\n", 537 printk(KERN_DEBUG "\tcompr_typ %d\n",
@@ -582,7 +572,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
582 key_read(c, &br->key, &key); 572 key_read(c, &br->key, &key);
583 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", 573 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
584 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), 574 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
585 le32_to_cpu(br->len), DBGKEY(&key)); 575 le32_to_cpu(br->len),
576 dbg_snprintf_key(c, &key, key_buf,
577 DBG_KEY_BUF_LEN));
586 } 578 }
587 break; 579 break;
588 } 580 }
@@ -934,6 +926,7 @@ void dbg_dump_znode(const struct ubifs_info *c,
934{ 926{
935 int n; 927 int n;
936 const struct ubifs_zbranch *zbr; 928 const struct ubifs_zbranch *zbr;
929 char key_buf[DBG_KEY_BUF_LEN];
937 930
938 spin_lock(&dbg_lock); 931 spin_lock(&dbg_lock);
939 if (znode->parent) 932 if (znode->parent)
@@ -958,12 +951,16 @@ void dbg_dump_znode(const struct ubifs_info *c,
958 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " 951 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
959 "%s\n", n, zbr->znode, zbr->lnum, 952 "%s\n", n, zbr->znode, zbr->lnum,
960 zbr->offs, zbr->len, 953 zbr->offs, zbr->len,
961 DBGKEY(&zbr->key)); 954 dbg_snprintf_key(c, &zbr->key,
955 key_buf,
956 DBG_KEY_BUF_LEN));
962 else 957 else
963 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " 958 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
964 "%s\n", n, zbr->znode, zbr->lnum, 959 "%s\n", n, zbr->znode, zbr->lnum,
965 zbr->offs, zbr->len, 960 zbr->offs, zbr->len,
966 DBGKEY(&zbr->key)); 961 dbg_snprintf_key(c, &zbr->key,
962 key_buf,
963 DBG_KEY_BUF_LEN));
967 } 964 }
968 spin_unlock(&dbg_lock); 965 spin_unlock(&dbg_lock);
969} 966}
@@ -1260,6 +1257,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1260 int err, nlen1, nlen2, cmp; 1257 int err, nlen1, nlen2, cmp;
1261 struct ubifs_dent_node *dent1, *dent2; 1258 struct ubifs_dent_node *dent1, *dent2;
1262 union ubifs_key key; 1259 union ubifs_key key;
1260 char key_buf[DBG_KEY_BUF_LEN];
1263 1261
1264 ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); 1262 ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
1265 dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); 1263 dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
@@ -1290,9 +1288,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1290 key_read(c, &dent1->key, &key); 1288 key_read(c, &dent1->key, &key);
1291 if (keys_cmp(c, &zbr1->key, &key)) { 1289 if (keys_cmp(c, &zbr1->key, &key)) {
1292 dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, 1290 dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
1293 zbr1->offs, DBGKEY(&key)); 1291 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1292 DBG_KEY_BUF_LEN));
1294 dbg_err("but it should have key %s according to tnc", 1293 dbg_err("but it should have key %s according to tnc",
1295 DBGKEY(&zbr1->key)); 1294 dbg_snprintf_key(c, &zbr1->key, key_buf,
1295 DBG_KEY_BUF_LEN));
1296 dbg_dump_node(c, dent1); 1296 dbg_dump_node(c, dent1);
1297 goto out_free; 1297 goto out_free;
1298 } 1298 }
@@ -1300,9 +1300,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1300 key_read(c, &dent2->key, &key); 1300 key_read(c, &dent2->key, &key);
1301 if (keys_cmp(c, &zbr2->key, &key)) { 1301 if (keys_cmp(c, &zbr2->key, &key)) {
1302 dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, 1302 dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
1303 zbr1->offs, DBGKEY(&key)); 1303 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1304 DBG_KEY_BUF_LEN));
1304 dbg_err("but it should have key %s according to tnc", 1305 dbg_err("but it should have key %s according to tnc",
1305 DBGKEY(&zbr2->key)); 1306 dbg_snprintf_key(c, &zbr2->key, key_buf,
1307 DBG_KEY_BUF_LEN));
1306 dbg_dump_node(c, dent2); 1308 dbg_dump_node(c, dent2);
1307 goto out_free; 1309 goto out_free;
1308 } 1310 }
@@ -1319,7 +1321,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1319 dbg_err("2 xent/dent nodes with the same name"); 1321 dbg_err("2 xent/dent nodes with the same name");
1320 else 1322 else
1321 dbg_err("bad order of colliding key %s", 1323 dbg_err("bad order of colliding key %s",
1322 DBGKEY(&key)); 1324 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
1323 1325
1324 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); 1326 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
1325 dbg_dump_node(c, dent1); 1327 dbg_dump_node(c, dent1);
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8d9c46810189..ad1a6fee6010 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -169,40 +169,39 @@ struct ubifs_global_debug_info {
169 spin_unlock(&dbg_lock); \ 169 spin_unlock(&dbg_lock); \
170} while (0) 170} while (0)
171 171
172const char *dbg_key_str0(const struct ubifs_info *c, 172#define ubifs_dbg_msg(type, fmt, ...) \
173 const union ubifs_key *key); 173 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
174const char *dbg_key_str1(const struct ubifs_info *c, 174
175 const union ubifs_key *key); 175#define DBG_KEY_BUF_LEN 32
176 176#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
177/* 177 char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
178 * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message 178 pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \
179 * macros. 179 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \
180 */
181#define DBGKEY(key) dbg_key_str0(c, (key))
182#define DBGKEY1(key) dbg_key_str1(c, (key))
183
184extern spinlock_t dbg_lock;
185
186#define ubifs_dbg_msg(type, fmt, ...) do { \
187 spin_lock(&dbg_lock); \
188 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
189 spin_unlock(&dbg_lock); \
190} while (0) 180} while (0)
191 181
192/* Just a debugging messages not related to any specific UBIFS subsystem */ 182/* Just a debugging messages not related to any specific UBIFS subsystem */
193#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) 183#define dbg_msg(fmt, ...) \
184 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
185 __func__, ##__VA_ARGS__)
186
194/* General messages */ 187/* General messages */
195#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) 188#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
196/* Additional journal messages */ 189/* Additional journal messages */
197#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) 190#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
191#define dbg_jnlk(key, fmt, ...) \
192 ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__)
198/* Additional TNC messages */ 193/* Additional TNC messages */
199#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) 194#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
195#define dbg_tnck(key, fmt, ...) \
196 ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__)
200/* Additional lprops messages */ 197/* Additional lprops messages */
201#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) 198#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
202/* Additional LEB find messages */ 199/* Additional LEB find messages */
203#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) 200#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
204/* Additional mount messages */ 201/* Additional mount messages */
205#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) 202#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
203#define dbg_mntk(key, fmt, ...) \
204 ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__)
206/* Additional I/O messages */ 205/* Additional I/O messages */
207#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) 206#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
208/* Additional commit messages */ 207/* Additional commit messages */
@@ -218,6 +217,7 @@ extern spinlock_t dbg_lock;
218/* Additional recovery messages */ 217/* Additional recovery messages */
219#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) 218#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
220 219
220extern spinlock_t dbg_lock;
221extern struct ubifs_global_debug_info ubifs_dbg; 221extern struct ubifs_global_debug_info ubifs_dbg;
222 222
223static inline int dbg_is_chk_gen(const struct ubifs_info *c) 223static inline int dbg_is_chk_gen(const struct ubifs_info *c)
@@ -258,6 +258,8 @@ const char *dbg_cstate(int cmt_state);
258const char *dbg_jhead(int jhead); 258const char *dbg_jhead(int jhead);
259const char *dbg_get_key_dump(const struct ubifs_info *c, 259const char *dbg_get_key_dump(const struct ubifs_info *c,
260 const union ubifs_key *key); 260 const union ubifs_key *key);
261const char *dbg_snprintf_key(const struct ubifs_info *c,
262 const union ubifs_key *key, char *buffer, int len);
261void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); 263void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
262void dbg_dump_node(const struct ubifs_info *c, const void *node); 264void dbg_dump_node(const struct ubifs_info *c, const void *node);
263void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, 265void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
@@ -345,20 +347,23 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
345#define dbg_dump_stack() 347#define dbg_dump_stack()
346#define ubifs_assert_cmt_locked(c) 348#define ubifs_assert_cmt_locked(c)
347 349
348#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 350#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
349#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 351#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
350#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 352#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
351#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 353#define dbg_jnlk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
352#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 354#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
353#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 355#define dbg_tnck(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
354#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 356#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
355#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 357#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
356#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 358#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
357#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 359#define dbg_mntk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
358#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 360#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
359#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 361#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
360#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 362#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
361#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 363#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
364#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
365#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
366#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
362 367
363static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } 368static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
364static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } 369static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
@@ -368,6 +373,10 @@ static inline const char *dbg_jhead(int jhead) { return ""; }
368static inline const char * 373static inline const char *
369dbg_get_key_dump(const struct ubifs_info *c, 374dbg_get_key_dump(const struct ubifs_info *c,
370 const union ubifs_key *key) { return ""; } 375 const union ubifs_key *key) { return ""; }
376static inline const char *
377dbg_snprintf_key(const struct ubifs_info *c,
378 const union ubifs_key *key, char *buffer,
379 int len) { return ""; }
371static inline void dbg_dump_inode(struct ubifs_info *c, 380static inline void dbg_dump_inode(struct ubifs_info *c,
372 const struct inode *inode) { return; } 381 const struct inode *inode) { return; }
373static inline void dbg_dump_node(const struct ubifs_info *c, 382static inline void dbg_dump_node(const struct ubifs_info *c,
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 683492043317..d6fe1c79f18b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -56,7 +56,7 @@
56 * 56 *
57 * This function returns the inherited flags. 57 * This function returns the inherited flags.
58 */ 58 */
59static int inherit_flags(const struct inode *dir, int mode) 59static int inherit_flags(const struct inode *dir, umode_t mode)
60{ 60{
61 int flags; 61 int flags;
62 const struct ubifs_inode *ui = ubifs_inode(dir); 62 const struct ubifs_inode *ui = ubifs_inode(dir);
@@ -86,7 +86,7 @@ static int inherit_flags(const struct inode *dir, int mode)
86 * case of failure. 86 * case of failure.
87 */ 87 */
88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, 88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
89 int mode) 89 umode_t mode)
90{ 90{
91 struct inode *inode; 91 struct inode *inode;
92 struct ubifs_inode *ui; 92 struct ubifs_inode *ui;
@@ -253,7 +253,7 @@ out:
253 return ERR_PTR(err); 253 return ERR_PTR(err);
254} 254}
255 255
256static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, 256static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
257 struct nameidata *nd) 257 struct nameidata *nd)
258{ 258{
259 struct inode *inode; 259 struct inode *inode;
@@ -268,7 +268,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
268 * parent directory inode. 268 * parent directory inode.
269 */ 269 */
270 270
271 dbg_gen("dent '%.*s', mode %#x in dir ino %lu", 271 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
272 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 272 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
273 273
274 err = ubifs_budget_space(c, &req); 274 err = ubifs_budget_space(c, &req);
@@ -712,7 +712,7 @@ out_cancel:
712 return err; 712 return err;
713} 713}
714 714
715static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 715static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
716{ 716{
717 struct inode *inode; 717 struct inode *inode;
718 struct ubifs_inode *dir_ui = ubifs_inode(dir); 718 struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -725,7 +725,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725 * directory inode. 725 * directory inode.
726 */ 726 */
727 727
728 dbg_gen("dent '%.*s', mode %#x in dir ino %lu", 728 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
729 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 729 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
730 730
731 err = ubifs_budget_space(c, &req); 731 err = ubifs_budget_space(c, &req);
@@ -769,7 +769,7 @@ out_budg:
769} 769}
770 770
771static int ubifs_mknod(struct inode *dir, struct dentry *dentry, 771static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
772 int mode, dev_t rdev) 772 umode_t mode, dev_t rdev)
773{ 773{
774 struct inode *inode; 774 struct inode *inode;
775 struct ubifs_inode *ui; 775 struct ubifs_inode *ui;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 548acf494afd..1a7e2d8bdbe9 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -173,12 +173,12 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
173 * Make sure the file-system is read-write and make sure it 173 * Make sure the file-system is read-write and make sure it
174 * will not become read-only while we are changing the flags. 174 * will not become read-only while we are changing the flags.
175 */ 175 */
176 err = mnt_want_write(file->f_path.mnt); 176 err = mnt_want_write_file(file);
177 if (err) 177 if (err)
178 return err; 178 return err;
179 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); 179 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
180 err = setflags(inode, flags); 180 err = setflags(inode, flags);
181 mnt_drop_write(file->f_path.mnt); 181 mnt_drop_write_file(file);
182 return err; 182 return err;
183 } 183 }
184 184
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index cef0460f4c54..2f438ab2e7a2 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -697,9 +697,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
697 int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; 697 int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
698 struct ubifs_inode *ui = ubifs_inode(inode); 698 struct ubifs_inode *ui = ubifs_inode(inode);
699 699
700 dbg_jnl("ino %lu, blk %u, len %d, key %s", 700 dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
701 (unsigned long)key_inum(c, key), key_block(c, key), len, 701 (unsigned long)key_inum(c, key), key_block(c, key), len);
702 DBGKEY(key));
703 ubifs_assert(len <= UBIFS_BLOCK_SIZE); 702 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
704 703
705 data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); 704 data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
@@ -1177,7 +1176,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1177 dn = (void *)trun + UBIFS_TRUN_NODE_SZ; 1176 dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
1178 blk = new_size >> UBIFS_BLOCK_SHIFT; 1177 blk = new_size >> UBIFS_BLOCK_SHIFT;
1179 data_key_init(c, &key, inum, blk); 1178 data_key_init(c, &key, inum, blk);
1180 dbg_jnl("last block key %s", DBGKEY(&key)); 1179 dbg_jnlk(&key, "last block key ");
1181 err = ubifs_tnc_lookup(c, &key, dn); 1180 err = ubifs_tnc_lookup(c, &key, dn);
1182 if (err == -ENOENT) 1181 if (err == -ENOENT)
1183 dlen = 0; /* Not found (so it is a hole) */ 1182 dlen = 0; /* Not found (so it is a hole) */
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 6189c74d97f0..66d59d0a1402 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1986,12 +1986,11 @@ again:
1986 1986
1987 if (path[h].in_tree) 1987 if (path[h].in_tree)
1988 continue; 1988 continue;
1989 nnode = kmalloc(sz, GFP_NOFS); 1989 nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS);
1990 if (!nnode) { 1990 if (!nnode) {
1991 err = -ENOMEM; 1991 err = -ENOMEM;
1992 goto out; 1992 goto out;
1993 } 1993 }
1994 memcpy(nnode, &path[h].nnode, sz);
1995 parent = nnode->parent; 1994 parent = nnode->parent;
1996 parent->nbranch[nnode->iip].nnode = nnode; 1995 parent->nbranch[nnode->iip].nnode = nnode;
1997 path[h].ptr.nnode = nnode; 1996 path[h].ptr.nnode = nnode;
@@ -2004,12 +2003,11 @@ again:
2004 const size_t sz = sizeof(struct ubifs_pnode); 2003 const size_t sz = sizeof(struct ubifs_pnode);
2005 struct ubifs_nnode *parent; 2004 struct ubifs_nnode *parent;
2006 2005
2007 pnode = kmalloc(sz, GFP_NOFS); 2006 pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS);
2008 if (!pnode) { 2007 if (!pnode) {
2009 err = -ENOMEM; 2008 err = -ENOMEM;
2010 goto out; 2009 goto out;
2011 } 2010 }
2012 memcpy(pnode, &path[h].pnode, sz);
2013 parent = pnode->parent; 2011 parent = pnode->parent;
2014 parent->nbranch[pnode->iip].pnode = pnode; 2012 parent->nbranch[pnode->iip].pnode = pnode;
2015 path[h].ptr.pnode = pnode; 2013 path[h].ptr.pnode = pnode;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index ccabaf1164b3..b007637f0406 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -221,8 +221,8 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
221{ 221{
222 int err; 222 int err;
223 223
224 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, 224 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
225 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); 225 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
226 226
227 /* Set c->replay_sqnum to help deal with dangling branches. */ 227 /* Set c->replay_sqnum to help deal with dangling branches. */
228 c->replay_sqnum = r->sqnum; 228 c->replay_sqnum = r->sqnum;
@@ -361,7 +361,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
361{ 361{
362 struct replay_entry *r; 362 struct replay_entry *r;
363 363
364 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 364 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
365 365
366 if (key_inum(c, key) >= c->highest_inum) 366 if (key_inum(c, key) >= c->highest_inum)
367 c->highest_inum = key_inum(c, key); 367 c->highest_inum = key_inum(c, key);
@@ -409,7 +409,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
409 struct replay_entry *r; 409 struct replay_entry *r;
410 char *nbuf; 410 char *nbuf;
411 411
412 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 412 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
413 if (key_inum(c, key) >= c->highest_inum) 413 if (key_inum(c, key) >= c->highest_inum)
414 c->highest_inum = key_inum(c, key); 414 c->highest_inum = key_inum(c, key);
415 415
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ae0e76bb6ebf..63765d58445b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -276,7 +276,6 @@ static void ubifs_i_callback(struct rcu_head *head)
276{ 276{
277 struct inode *inode = container_of(head, struct inode, i_rcu); 277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode); 278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui); 279 kmem_cache_free(ubifs_inode_slab, ui);
281} 280}
282 281
@@ -420,9 +419,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
420 return 0; 419 return 0;
421} 420}
422 421
423static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) 422static int ubifs_show_options(struct seq_file *s, struct dentry *root)
424{ 423{
425 struct ubifs_info *c = mnt->mnt_sb->s_fs_info; 424 struct ubifs_info *c = root->d_sb->s_fs_info;
426 425
427 if (c->mount_opts.unmount_mode == 2) 426 if (c->mount_opts.unmount_mode == 2)
428 seq_printf(s, ",fast_unmount"); 427 seq_printf(s, ",fast_unmount");
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 066738647685..16ad84d8402f 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -344,12 +344,11 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
344 return err; 344 return err;
345 } 345 }
346 346
347 lnc_node = kmalloc(zbr->len, GFP_NOFS); 347 lnc_node = kmemdup(node, zbr->len, GFP_NOFS);
348 if (!lnc_node) 348 if (!lnc_node)
349 /* We don't have to have the cache, so no error */ 349 /* We don't have to have the cache, so no error */
350 return 0; 350 return 0;
351 351
352 memcpy(lnc_node, node, zbr->len);
353 zbr->leaf = lnc_node; 352 zbr->leaf = lnc_node;
354 return 0; 353 return 0;
355} 354}
@@ -506,7 +505,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506{ 505{
507 int ret; 506 int ret;
508 507
509 dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); 508 dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
510 509
511 ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, 510 ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
512 zbr->offs); 511 zbr->offs);
@@ -520,8 +519,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
520 ret = 0; 519 ret = 0;
521 } 520 }
522 if (ret == 0 && c->replaying) 521 if (ret == 0 && c->replaying)
523 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 522 dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ",
524 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 523 zbr->lnum, zbr->offs, zbr->len);
525 return ret; 524 return ret;
526} 525}
527 526
@@ -996,9 +995,9 @@ static int fallible_resolve_collision(struct ubifs_info *c,
996 if (adding || !o_znode) 995 if (adding || !o_znode)
997 return 0; 996 return 0;
998 997
999 dbg_mnt("dangling match LEB %d:%d len %d %s", 998 dbg_mntk(key, "dangling match LEB %d:%d len %d key ",
1000 o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, 999 o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
1001 o_znode->zbranch[o_n].len, DBGKEY(key)); 1000 o_znode->zbranch[o_n].len);
1002 *zn = o_znode; 1001 *zn = o_znode;
1003 *n = o_n; 1002 *n = o_n;
1004 return 1; 1003 return 1;
@@ -1180,7 +1179,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1180 struct ubifs_znode *znode; 1179 struct ubifs_znode *znode;
1181 unsigned long time = get_seconds(); 1180 unsigned long time = get_seconds();
1182 1181
1183 dbg_tnc("search key %s", DBGKEY(key)); 1182 dbg_tnck(key, "search key ");
1184 ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); 1183 ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
1185 1184
1186 znode = c->zroot.znode; 1185 znode = c->zroot.znode;
@@ -1316,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1316 struct ubifs_znode *znode; 1315 struct ubifs_znode *znode;
1317 unsigned long time = get_seconds(); 1316 unsigned long time = get_seconds();
1318 1317
1319 dbg_tnc("search and dirty key %s", DBGKEY(key)); 1318 dbg_tnck(key, "search and dirty key ");
1320 1319
1321 znode = c->zroot.znode; 1320 znode = c->zroot.znode;
1322 if (unlikely(!znode)) { 1321 if (unlikely(!znode)) {
@@ -1723,8 +1722,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
1723 if (!keys_eq(c, &zbr->key, &key1)) { 1722 if (!keys_eq(c, &zbr->key, &key1)) {
1724 ubifs_err("bad key in node at LEB %d:%d", 1723 ubifs_err("bad key in node at LEB %d:%d",
1725 zbr->lnum, zbr->offs); 1724 zbr->lnum, zbr->offs);
1726 dbg_tnc("looked for key %s found node's key %s", 1725 dbg_tnck(&zbr->key, "looked for key ");
1727 DBGKEY(&zbr->key), DBGKEY1(&key1)); 1726 dbg_tnck(&key1, "found node's key ");
1728 goto out_err; 1727 goto out_err;
1729 } 1728 }
1730 1729
@@ -1777,7 +1776,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1777 ubifs_err("failed to read from LEB %d:%d, error %d", 1776 ubifs_err("failed to read from LEB %d:%d, error %d",
1778 lnum, offs, err); 1777 lnum, offs, err);
1779 dbg_dump_stack(); 1778 dbg_dump_stack();
1780 dbg_tnc("key %s", DBGKEY(&bu->key)); 1779 dbg_tnck(&bu->key, "key ");
1781 return err; 1780 return err;
1782 } 1781 }
1783 1782
@@ -1812,7 +1811,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1812 int found, n, err; 1811 int found, n, err;
1813 struct ubifs_znode *znode; 1812 struct ubifs_znode *znode;
1814 1813
1815 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); 1814 dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name);
1816 mutex_lock(&c->tnc_mutex); 1815 mutex_lock(&c->tnc_mutex);
1817 found = ubifs_lookup_level0(c, key, &znode, &n); 1816 found = ubifs_lookup_level0(c, key, &znode, &n);
1818 if (!found) { 1817 if (!found) {
@@ -1986,8 +1985,7 @@ again:
1986 zp = znode->parent; 1985 zp = znode->parent;
1987 if (znode->child_cnt < c->fanout) { 1986 if (znode->child_cnt < c->fanout) {
1988 ubifs_assert(n != c->fanout); 1987 ubifs_assert(n != c->fanout);
1989 dbg_tnc("inserted at %d level %d, key %s", n, znode->level, 1988 dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level);
1990 DBGKEY(key));
1991 1989
1992 insert_zbranch(znode, zbr, n); 1990 insert_zbranch(znode, zbr, n);
1993 1991
@@ -2002,7 +2000,7 @@ again:
2002 * Unfortunately, @znode does not have more empty slots and we have to 2000 * Unfortunately, @znode does not have more empty slots and we have to
2003 * split it. 2001 * split it.
2004 */ 2002 */
2005 dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); 2003 dbg_tnck(key, "splitting level %d, key ", znode->level);
2006 2004
2007 if (znode->alt) 2005 if (znode->alt)
2008 /* 2006 /*
@@ -2096,7 +2094,7 @@ do_split:
2096 } 2094 }
2097 2095
2098 /* Insert new key and branch */ 2096 /* Insert new key and branch */
2099 dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); 2097 dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level);
2100 2098
2101 insert_zbranch(zi, zbr, n); 2099 insert_zbranch(zi, zbr, n);
2102 2100
@@ -2172,7 +2170,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
2172 struct ubifs_znode *znode; 2170 struct ubifs_znode *znode;
2173 2171
2174 mutex_lock(&c->tnc_mutex); 2172 mutex_lock(&c->tnc_mutex);
2175 dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); 2173 dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len);
2176 found = lookup_level0_dirty(c, key, &znode, &n); 2174 found = lookup_level0_dirty(c, key, &znode, &n);
2177 if (!found) { 2175 if (!found) {
2178 struct ubifs_zbranch zbr; 2176 struct ubifs_zbranch zbr;
@@ -2221,8 +2219,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
2221 struct ubifs_znode *znode; 2219 struct ubifs_znode *znode;
2222 2220
2223 mutex_lock(&c->tnc_mutex); 2221 mutex_lock(&c->tnc_mutex);
2224 dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, 2222 dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum,
2225 old_offs, lnum, offs, len, DBGKEY(key)); 2223 old_offs, lnum, offs, len);
2226 found = lookup_level0_dirty(c, key, &znode, &n); 2224 found = lookup_level0_dirty(c, key, &znode, &n);
2227 if (found < 0) { 2225 if (found < 0) {
2228 err = found; 2226 err = found;
@@ -2304,8 +2302,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
2304 struct ubifs_znode *znode; 2302 struct ubifs_znode *znode;
2305 2303
2306 mutex_lock(&c->tnc_mutex); 2304 mutex_lock(&c->tnc_mutex);
2307 dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, 2305 dbg_tnck(key, "LEB %d:%d, name '%.*s', key ",
2308 DBGKEY(key)); 2306 lnum, offs, nm->len, nm->name);
2309 found = lookup_level0_dirty(c, key, &znode, &n); 2307 found = lookup_level0_dirty(c, key, &znode, &n);
2310 if (found < 0) { 2308 if (found < 0) {
2311 err = found; 2309 err = found;
@@ -2398,7 +2396,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2398 /* Delete without merge for now */ 2396 /* Delete without merge for now */
2399 ubifs_assert(znode->level == 0); 2397 ubifs_assert(znode->level == 0);
2400 ubifs_assert(n >= 0 && n < c->fanout); 2398 ubifs_assert(n >= 0 && n < c->fanout);
2401 dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); 2399 dbg_tnck(&znode->zbranch[n].key, "deleting key ");
2402 2400
2403 zbr = &znode->zbranch[n]; 2401 zbr = &znode->zbranch[n];
2404 lnc_free(zbr); 2402 lnc_free(zbr);
@@ -2508,7 +2506,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
2508 struct ubifs_znode *znode; 2506 struct ubifs_znode *znode;
2509 2507
2510 mutex_lock(&c->tnc_mutex); 2508 mutex_lock(&c->tnc_mutex);
2511 dbg_tnc("key %s", DBGKEY(key)); 2509 dbg_tnck(key, "key ");
2512 found = lookup_level0_dirty(c, key, &znode, &n); 2510 found = lookup_level0_dirty(c, key, &znode, &n);
2513 if (found < 0) { 2511 if (found < 0) {
2514 err = found; 2512 err = found;
@@ -2539,7 +2537,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2539 struct ubifs_znode *znode; 2537 struct ubifs_znode *znode;
2540 2538
2541 mutex_lock(&c->tnc_mutex); 2539 mutex_lock(&c->tnc_mutex);
2542 dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); 2540 dbg_tnck(key, "%.*s, key ", nm->len, nm->name);
2543 err = lookup_level0_dirty(c, key, &znode, &n); 2541 err = lookup_level0_dirty(c, key, &znode, &n);
2544 if (err < 0) 2542 if (err < 0)
2545 goto out_unlock; 2543 goto out_unlock;
@@ -2654,7 +2652,7 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2654 dbg_dump_znode(c, znode); 2652 dbg_dump_znode(c, znode);
2655 goto out_unlock; 2653 goto out_unlock;
2656 } 2654 }
2657 dbg_tnc("removing %s", DBGKEY(key)); 2655 dbg_tnck(key, "removing key ");
2658 } 2656 }
2659 if (k) { 2657 if (k) {
2660 for (i = n + 1 + k; i < znode->child_cnt; i++) 2658 for (i = n + 1 + k; i < znode->child_cnt; i++)
@@ -2774,7 +2772,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
2774 struct ubifs_zbranch *zbr; 2772 struct ubifs_zbranch *zbr;
2775 union ubifs_key *dkey; 2773 union ubifs_key *dkey;
2776 2774
2777 dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); 2775 dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)");
2778 ubifs_assert(is_hash_key(c, key)); 2776 ubifs_assert(is_hash_key(c, key));
2779 2777
2780 mutex_lock(&c->tnc_mutex); 2778 mutex_lock(&c->tnc_mutex);
@@ -3333,9 +3331,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3333 3331
3334out_dump: 3332out_dump:
3335 block = key_block(c, key); 3333 block = key_block(c, key);
3336 ubifs_err("inode %lu has size %lld, but there are data at offset %lld " 3334 ubifs_err("inode %lu has size %lld, but there are data at offset %lld",
3337 "(data key %s)", (unsigned long)inode->i_ino, size, 3335 (unsigned long)inode->i_ino, size,
3338 ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); 3336 ((loff_t)block) << UBIFS_BLOCK_SHIFT);
3339 mutex_unlock(&c->tnc_mutex); 3337 mutex_unlock(&c->tnc_mutex);
3340 dbg_dump_inode(c, inode); 3338 dbg_dump_inode(c, inode);
3341 dbg_dump_stack(); 3339 dbg_dump_stack();
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index b48db999903e..dc28fe6ec07a 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
328 case UBIFS_XENT_KEY: 328 case UBIFS_XENT_KEY:
329 break; 329 break;
330 default: 330 default:
331 dbg_msg("bad key type at slot %d: %s", i, 331 dbg_msg("bad key type at slot %d: %d",
332 DBGKEY(&zbr->key)); 332 i, key_type(c, &zbr->key));
333 err = 3; 333 err = 3;
334 goto out_dump; 334 goto out_dump;
335 } 335 }
@@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
475 zbr->offs); 475 zbr->offs);
476 476
477 if (err) { 477 if (err) {
478 dbg_tnc("key %s", DBGKEY(key)); 478 dbg_tnck(key, "key ");
479 return err; 479 return err;
480 } 480 }
481 481
@@ -484,8 +484,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
484 if (!keys_eq(c, key, &key1)) { 484 if (!keys_eq(c, key, &key1)) {
485 ubifs_err("bad key in node at LEB %d:%d", 485 ubifs_err("bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs); 486 zbr->lnum, zbr->offs);
487 dbg_tnc("looked for key %s found node's key %s", 487 dbg_tnck(key, "looked for key ");
488 DBGKEY(key), DBGKEY1(&key1)); 488 dbg_tnck(&key1, "but found node's key ");
489 dbg_dump_node(c, node); 489 dbg_dump_node(c, node);
490 return -EINVAL; 490 return -EINVAL;
491 } 491 }
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 27f22551f805..12e94774aa88 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1734,7 +1734,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1734 1734
1735/* dir.c */ 1735/* dir.c */
1736struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, 1736struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
1737 int mode); 1737 umode_t mode);
1738int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1738int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1739 struct kstat *stat); 1739 struct kstat *stat);
1740 1740
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index bf18f7a04544..85b272268754 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -138,12 +138,11 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
138 ui = ubifs_inode(inode); 138 ui = ubifs_inode(inode);
139 ui->xattr = 1; 139 ui->xattr = 1;
140 ui->flags |= UBIFS_XATTR_FL; 140 ui->flags |= UBIFS_XATTR_FL;
141 ui->data = kmalloc(size, GFP_NOFS); 141 ui->data = kmemdup(value, size, GFP_NOFS);
142 if (!ui->data) { 142 if (!ui->data) {
143 err = -ENOMEM; 143 err = -ENOMEM;
144 goto out_free; 144 goto out_free;
145 } 145 }
146 memcpy(ui->data, value, size);
147 inode->i_size = ui->ui_size = size; 146 inode->i_size = ui->ui_size = size;
148 ui->data_len = size; 147 ui->data_len = size;
149 148
@@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
204 return err; 203 return err;
205 204
206 kfree(ui->data); 205 kfree(ui->data);
207 ui->data = kmalloc(size, GFP_NOFS); 206 ui->data = kmemdup(value, size, GFP_NOFS);
208 if (!ui->data) { 207 if (!ui->data) {
209 err = -ENOMEM; 208 err = -ENOMEM;
210 goto out_free; 209 goto out_free;
211 } 210 }
212 memcpy(ui->data, value, size);
213 inode->i_size = ui->ui_size = size; 211 inode->i_size = ui->ui_size = size;
214 ui->data_len = size; 212 ui->data_len = size;
215 213
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d8ffa7cc661d..dca0c3881e82 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
125 err = udf_expand_file_adinicb(inode); 125 err = udf_expand_file_adinicb(inode);
126 if (err) { 126 if (err) {
127 udf_debug("udf_expand_adinicb: err=%d\n", err); 127 udf_debug("udf_expand_adinicb: err=%d\n", err);
128 up_write(&iinfo->i_data_sem);
129 return err; 128 return err;
130 } 129 }
131 } else { 130 } else {
@@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
133 iinfo->i_lenAlloc = pos + count; 132 iinfo->i_lenAlloc = pos + count;
134 else 133 else
135 iinfo->i_lenAlloc = inode->i_size; 134 iinfo->i_lenAlloc = inode->i_size;
135 up_write(&iinfo->i_data_sem);
136 } 136 }
137 } 137 } else
138 up_write(&iinfo->i_data_sem); 138 up_write(&iinfo->i_data_sem);
139 139
140 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); 140 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
141 if (retval > 0) 141 if (retval > 0)
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 6fb7e0adcda0..05ab48195be9 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -46,7 +46,7 @@ void udf_free_inode(struct inode *inode)
46 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); 46 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
47} 47}
48 48
49struct inode *udf_new_inode(struct inode *dir, int mode, int *err) 49struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
50{ 50{
51 struct super_block *sb = dir->i_sb; 51 struct super_block *sb = dir->i_sb;
52 struct udf_sb_info *sbi = UDF_SB(sb); 52 struct udf_sb_info *sbi = UDF_SB(sb);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4fd1d809738c..7699df7b3198 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -48,13 +48,12 @@ MODULE_LICENSE("GPL");
48 48
49#define EXTENT_MERGE_SIZE 5 49#define EXTENT_MERGE_SIZE 5
50 50
51static mode_t udf_convert_permissions(struct fileEntry *); 51static umode_t udf_convert_permissions(struct fileEntry *);
52static int udf_update_inode(struct inode *, int); 52static int udf_update_inode(struct inode *, int);
53static void udf_fill_inode(struct inode *, struct buffer_head *); 53static void udf_fill_inode(struct inode *, struct buffer_head *);
54static int udf_sync_inode(struct inode *inode); 54static int udf_sync_inode(struct inode *inode);
55static int udf_alloc_i_data(struct inode *inode, size_t size); 55static int udf_alloc_i_data(struct inode *inode, size_t size);
56static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, 56static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
57 sector_t *, int *);
58static int8_t udf_insert_aext(struct inode *, struct extent_position, 57static int8_t udf_insert_aext(struct inode *, struct extent_position,
59 struct kernel_lb_addr, uint32_t); 58 struct kernel_lb_addr, uint32_t);
60static void udf_split_extents(struct inode *, int *, int, int, 59static void udf_split_extents(struct inode *, int *, int, int,
@@ -151,6 +150,12 @@ const struct address_space_operations udf_aops = {
151 .bmap = udf_bmap, 150 .bmap = udf_bmap,
152}; 151};
153 152
153/*
154 * Expand file stored in ICB to a normal one-block-file
155 *
156 * This function requires i_data_sem for writing and releases it.
157 * This function requires i_mutex held
158 */
154int udf_expand_file_adinicb(struct inode *inode) 159int udf_expand_file_adinicb(struct inode *inode)
155{ 160{
156 struct page *page; 161 struct page *page;
@@ -169,9 +174,15 @@ int udf_expand_file_adinicb(struct inode *inode)
169 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 174 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
170 /* from now on we have normal address_space methods */ 175 /* from now on we have normal address_space methods */
171 inode->i_data.a_ops = &udf_aops; 176 inode->i_data.a_ops = &udf_aops;
177 up_write(&iinfo->i_data_sem);
172 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
173 return 0; 179 return 0;
174 } 180 }
181 /*
182 * Release i_data_sem so that we can lock a page - page lock ranks
183 * above i_data_sem. i_mutex still protects us against file changes.
184 */
185 up_write(&iinfo->i_data_sem);
175 186
176 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); 187 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
177 if (!page) 188 if (!page)
@@ -187,6 +198,7 @@ int udf_expand_file_adinicb(struct inode *inode)
187 SetPageUptodate(page); 198 SetPageUptodate(page);
188 kunmap(page); 199 kunmap(page);
189 } 200 }
201 down_write(&iinfo->i_data_sem);
190 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, 202 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
191 iinfo->i_lenAlloc); 203 iinfo->i_lenAlloc);
192 iinfo->i_lenAlloc = 0; 204 iinfo->i_lenAlloc = 0;
@@ -196,17 +208,20 @@ int udf_expand_file_adinicb(struct inode *inode)
196 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 208 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
197 /* from now on we have normal address_space methods */ 209 /* from now on we have normal address_space methods */
198 inode->i_data.a_ops = &udf_aops; 210 inode->i_data.a_ops = &udf_aops;
211 up_write(&iinfo->i_data_sem);
199 err = inode->i_data.a_ops->writepage(page, &udf_wbc); 212 err = inode->i_data.a_ops->writepage(page, &udf_wbc);
200 if (err) { 213 if (err) {
201 /* Restore everything back so that we don't lose data... */ 214 /* Restore everything back so that we don't lose data... */
202 lock_page(page); 215 lock_page(page);
203 kaddr = kmap(page); 216 kaddr = kmap(page);
217 down_write(&iinfo->i_data_sem);
204 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, 218 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
205 inode->i_size); 219 inode->i_size);
206 kunmap(page); 220 kunmap(page);
207 unlock_page(page); 221 unlock_page(page);
208 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; 222 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
209 inode->i_data.a_ops = &udf_adinicb_aops; 223 inode->i_data.a_ops = &udf_adinicb_aops;
224 up_write(&iinfo->i_data_sem);
210 } 225 }
211 page_cache_release(page); 226 page_cache_release(page);
212 mark_inode_dirty(inode); 227 mark_inode_dirty(inode);
@@ -310,7 +325,6 @@ static int udf_get_block(struct inode *inode, sector_t block,
310 struct buffer_head *bh_result, int create) 325 struct buffer_head *bh_result, int create)
311{ 326{
312 int err, new; 327 int err, new;
313 struct buffer_head *bh;
314 sector_t phys = 0; 328 sector_t phys = 0;
315 struct udf_inode_info *iinfo; 329 struct udf_inode_info *iinfo;
316 330
@@ -323,7 +337,6 @@ static int udf_get_block(struct inode *inode, sector_t block,
323 337
324 err = -EIO; 338 err = -EIO;
325 new = 0; 339 new = 0;
326 bh = NULL;
327 iinfo = UDF_I(inode); 340 iinfo = UDF_I(inode);
328 341
329 down_write(&iinfo->i_data_sem); 342 down_write(&iinfo->i_data_sem);
@@ -332,13 +345,10 @@ static int udf_get_block(struct inode *inode, sector_t block,
332 iinfo->i_next_alloc_goal++; 345 iinfo->i_next_alloc_goal++;
333 } 346 }
334 347
335 err = 0;
336 348
337 bh = inode_getblk(inode, block, &err, &phys, &new); 349 phys = inode_getblk(inode, block, &err, &new);
338 BUG_ON(bh); 350 if (!phys)
339 if (err)
340 goto abort; 351 goto abort;
341 BUG_ON(!phys);
342 352
343 if (new) 353 if (new)
344 set_buffer_new(bh_result); 354 set_buffer_new(bh_result);
@@ -547,11 +557,10 @@ out:
547 return err; 557 return err;
548} 558}
549 559
550static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, 560static sector_t inode_getblk(struct inode *inode, sector_t block,
551 int *err, sector_t *phys, int *new) 561 int *err, int *new)
552{ 562{
553 static sector_t last_block; 563 static sector_t last_block;
554 struct buffer_head *result = NULL;
555 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; 564 struct kernel_long_ad laarr[EXTENT_MERGE_SIZE];
556 struct extent_position prev_epos, cur_epos, next_epos; 565 struct extent_position prev_epos, cur_epos, next_epos;
557 int count = 0, startnum = 0, endnum = 0; 566 int count = 0, startnum = 0, endnum = 0;
@@ -566,6 +575,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
566 int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; 575 int goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
567 int lastblock = 0; 576 int lastblock = 0;
568 577
578 *err = 0;
579 *new = 0;
569 prev_epos.offset = udf_file_entry_alloc_offset(inode); 580 prev_epos.offset = udf_file_entry_alloc_offset(inode);
570 prev_epos.block = iinfo->i_location; 581 prev_epos.block = iinfo->i_location;
571 prev_epos.bh = NULL; 582 prev_epos.bh = NULL;
@@ -635,8 +646,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
635 brelse(cur_epos.bh); 646 brelse(cur_epos.bh);
636 brelse(next_epos.bh); 647 brelse(next_epos.bh);
637 newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); 648 newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
638 *phys = newblock; 649 return newblock;
639 return NULL;
640 } 650 }
641 651
642 last_block = block; 652 last_block = block;
@@ -664,7 +674,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
664 brelse(cur_epos.bh); 674 brelse(cur_epos.bh);
665 brelse(next_epos.bh); 675 brelse(next_epos.bh);
666 *err = ret; 676 *err = ret;
667 return NULL; 677 return 0;
668 } 678 }
669 c = 0; 679 c = 0;
670 offset = 0; 680 offset = 0;
@@ -729,7 +739,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
729 if (!newblocknum) { 739 if (!newblocknum) {
730 brelse(prev_epos.bh); 740 brelse(prev_epos.bh);
731 *err = -ENOSPC; 741 *err = -ENOSPC;
732 return NULL; 742 return 0;
733 } 743 }
734 iinfo->i_lenExtents += inode->i_sb->s_blocksize; 744 iinfo->i_lenExtents += inode->i_sb->s_blocksize;
735 } 745 }
@@ -761,10 +771,10 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
761 771
762 newblock = udf_get_pblock(inode->i_sb, newblocknum, 772 newblock = udf_get_pblock(inode->i_sb, newblocknum,
763 iinfo->i_location.partitionReferenceNum, 0); 773 iinfo->i_location.partitionReferenceNum, 0);
764 if (!newblock) 774 if (!newblock) {
765 return NULL; 775 *err = -EIO;
766 *phys = newblock; 776 return 0;
767 *err = 0; 777 }
768 *new = 1; 778 *new = 1;
769 iinfo->i_next_alloc_block = block; 779 iinfo->i_next_alloc_block = block;
770 iinfo->i_next_alloc_goal = newblocknum; 780 iinfo->i_next_alloc_goal = newblocknum;
@@ -775,7 +785,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
775 else 785 else
776 mark_inode_dirty(inode); 786 mark_inode_dirty(inode);
777 787
778 return result; 788 return newblock;
779} 789}
780 790
781static void udf_split_extents(struct inode *inode, int *c, int offset, 791static void udf_split_extents(struct inode *inode, int *c, int offset,
@@ -1111,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1111 if (bsize < 1121 if (bsize <
1112 (udf_file_entry_alloc_offset(inode) + newsize)) { 1122 (udf_file_entry_alloc_offset(inode) + newsize)) {
1113 err = udf_expand_file_adinicb(inode); 1123 err = udf_expand_file_adinicb(inode);
1114 if (err) { 1124 if (err)
1115 up_write(&iinfo->i_data_sem);
1116 return err; 1125 return err;
1117 } 1126 down_write(&iinfo->i_data_sem);
1118 } else 1127 } else
1119 iinfo->i_lenAlloc = newsize; 1128 iinfo->i_lenAlloc = newsize;
1120 } 1129 }
@@ -1452,9 +1461,9 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
1452 return 0; 1461 return 0;
1453} 1462}
1454 1463
1455static mode_t udf_convert_permissions(struct fileEntry *fe) 1464static umode_t udf_convert_permissions(struct fileEntry *fe)
1456{ 1465{
1457 mode_t mode; 1466 umode_t mode;
1458 uint32_t permissions; 1467 uint32_t permissions;
1459 uint32_t flags; 1468 uint32_t flags;
1460 1469
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4639e137222f..08bf46edf9c4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -552,7 +552,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
552 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); 552 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
553} 553}
554 554
555static int udf_create(struct inode *dir, struct dentry *dentry, int mode, 555static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
556 struct nameidata *nd) 556 struct nameidata *nd)
557{ 557{
558 struct udf_fileident_bh fibh; 558 struct udf_fileident_bh fibh;
@@ -596,7 +596,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
596 return 0; 596 return 0;
597} 597}
598 598
599static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, 599static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
600 dev_t rdev) 600 dev_t rdev)
601{ 601{
602 struct inode *inode; 602 struct inode *inode;
@@ -640,7 +640,7 @@ out:
640 return err; 640 return err;
641} 641}
642 642
643static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) 643static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
644{ 644{
645 struct inode *inode; 645 struct inode *inode;
646 struct udf_fileident_bh fibh; 646 struct udf_fileident_bh fibh;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e185253470df..c09a84daaf50 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -89,7 +89,7 @@ static void udf_open_lvid(struct super_block *);
89static void udf_close_lvid(struct super_block *); 89static void udf_close_lvid(struct super_block *);
90static unsigned int udf_count_free(struct super_block *); 90static unsigned int udf_count_free(struct super_block *);
91static int udf_statfs(struct dentry *, struct kstatfs *); 91static int udf_statfs(struct dentry *, struct kstatfs *);
92static int udf_show_options(struct seq_file *, struct vfsmount *); 92static int udf_show_options(struct seq_file *, struct dentry *);
93 93
94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
95{ 95{
@@ -138,7 +138,6 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
138static void udf_i_callback(struct rcu_head *head) 138static void udf_i_callback(struct rcu_head *head)
139{ 139{
140 struct inode *inode = container_of(head, struct inode, i_rcu); 140 struct inode *inode = container_of(head, struct inode, i_rcu);
141 INIT_LIST_HEAD(&inode->i_dentry);
142 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 141 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
143} 142}
144 143
@@ -196,11 +195,11 @@ struct udf_options {
196 unsigned int fileset; 195 unsigned int fileset;
197 unsigned int rootdir; 196 unsigned int rootdir;
198 unsigned int flags; 197 unsigned int flags;
199 mode_t umask; 198 umode_t umask;
200 gid_t gid; 199 gid_t gid;
201 uid_t uid; 200 uid_t uid;
202 mode_t fmode; 201 umode_t fmode;
203 mode_t dmode; 202 umode_t dmode;
204 struct nls_table *nls_map; 203 struct nls_table *nls_map;
205}; 204};
206 205
@@ -250,9 +249,9 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
250 return 0; 249 return 0;
251} 250}
252 251
253static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) 252static int udf_show_options(struct seq_file *seq, struct dentry *root)
254{ 253{
255 struct super_block *sb = mnt->mnt_sb; 254 struct super_block *sb = root->d_sb;
256 struct udf_sb_info *sbi = UDF_SB(sb); 255 struct udf_sb_info *sbi = UDF_SB(sb);
257 256
258 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) 257 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
@@ -280,11 +279,11 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
280 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) 279 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
281 seq_printf(seq, ",gid=%u", sbi->s_gid); 280 seq_printf(seq, ",gid=%u", sbi->s_gid);
282 if (sbi->s_umask != 0) 281 if (sbi->s_umask != 0)
283 seq_printf(seq, ",umask=%o", sbi->s_umask); 282 seq_printf(seq, ",umask=%ho", sbi->s_umask);
284 if (sbi->s_fmode != UDF_INVALID_MODE) 283 if (sbi->s_fmode != UDF_INVALID_MODE)
285 seq_printf(seq, ",mode=%o", sbi->s_fmode); 284 seq_printf(seq, ",mode=%ho", sbi->s_fmode);
286 if (sbi->s_dmode != UDF_INVALID_MODE) 285 if (sbi->s_dmode != UDF_INVALID_MODE)
287 seq_printf(seq, ",dmode=%o", sbi->s_dmode); 286 seq_printf(seq, ",dmode=%ho", sbi->s_dmode);
288 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) 287 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
289 seq_printf(seq, ",session=%u", sbi->s_session); 288 seq_printf(seq, ",session=%u", sbi->s_session);
290 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) 289 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
@@ -1799,6 +1798,12 @@ static void udf_close_lvid(struct super_block *sb)
1799 le16_to_cpu(lvid->descTag.descCRCLength))); 1798 le16_to_cpu(lvid->descTag.descCRCLength)));
1800 1799
1801 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1800 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1801 /*
1802 * We set buffer uptodate unconditionally here to avoid spurious
1803 * warnings from mark_buffer_dirty() when previous EIO has marked
1804 * the buffer as !uptodate
1805 */
1806 set_buffer_uptodate(bh);
1802 mark_buffer_dirty(bh); 1807 mark_buffer_dirty(bh);
1803 sbi->s_lvid_dirty = 0; 1808 sbi->s_lvid_dirty = 0;
1804 mutex_unlock(&sbi->s_alloc_mutex); 1809 mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index b1d4488b0f14..d7c6dbe4194b 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -41,10 +41,16 @@ static void udf_pc_to_char(struct super_block *sb, unsigned char *from,
41 pc = (struct pathComponent *)(from + elen); 41 pc = (struct pathComponent *)(from + elen);
42 switch (pc->componentType) { 42 switch (pc->componentType) {
43 case 1: 43 case 1:
44 if (pc->lengthComponentIdent == 0) { 44 /*
45 p = to; 45 * Symlink points to some place which should be agreed
46 *p++ = '/'; 46 * upon between originator and receiver of the media. Ignore.
47 } 47 */
48 if (pc->lengthComponentIdent > 0)
49 break;
50 /* Fall through */
51 case 2:
52 p = to;
53 *p++ = '/';
48 break; 54 break;
49 case 3: 55 case 3:
50 memcpy(p, "../", 3); 56 memcpy(p, "../", 3);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 5142a82e3276..42ad69ac9576 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -50,7 +50,7 @@
50#define UDF_SPARABLE_MAP15 0x1522U 50#define UDF_SPARABLE_MAP15 0x1522U
51#define UDF_METADATA_MAP25 0x2511U 51#define UDF_METADATA_MAP25 0x2511U
52 52
53#define UDF_INVALID_MODE ((mode_t)-1) 53#define UDF_INVALID_MODE ((umode_t)-1)
54 54
55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
56 56
@@ -127,11 +127,11 @@ struct udf_sb_info {
127 struct buffer_head *s_lvid_bh; 127 struct buffer_head *s_lvid_bh;
128 128
129 /* Default permissions */ 129 /* Default permissions */
130 mode_t s_umask; 130 umode_t s_umask;
131 gid_t s_gid; 131 gid_t s_gid;
132 uid_t s_uid; 132 uid_t s_uid;
133 mode_t s_fmode; 133 umode_t s_fmode;
134 mode_t s_dmode; 134 umode_t s_dmode;
135 /* Lock protecting consistency of above permission settings */ 135 /* Lock protecting consistency of above permission settings */
136 rwlock_t s_cred_lock; 136 rwlock_t s_cred_lock;
137 137
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f34e6fc0cdaa..ebe10314e512 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -215,7 +215,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
215 215
216/* ialloc.c */ 216/* ialloc.c */
217extern void udf_free_inode(struct inode *); 217extern void udf_free_inode(struct inode *);
218extern struct inode *udf_new_inode(struct inode *, int, int *); 218extern struct inode *udf_new_inode(struct inode *, umode_t, int *);
219 219
220/* truncate.c */ 220/* truncate.c */
221extern void udf_truncate_tail_extent(struct inode *); 221extern void udf_truncate_tail_extent(struct inode *);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 78a4c70d46b5..4ec5c1085a87 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -170,7 +170,7 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
170 * For other inodes, search forward from the parent directory's block 170 * For other inodes, search forward from the parent directory's block
171 * group to find a free inode. 171 * group to find a free inode.
172 */ 172 */
173struct inode * ufs_new_inode(struct inode * dir, int mode) 173struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
174{ 174{
175 struct super_block * sb; 175 struct super_block * sb;
176 struct ufs_sb_info * sbi; 176 struct ufs_sb_info * sbi;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 879b13436fa4..9094e1d917be 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -583,7 +583,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
583{ 583{
584 struct ufs_inode_info *ufsi = UFS_I(inode); 584 struct ufs_inode_info *ufsi = UFS_I(inode);
585 struct super_block *sb = inode->i_sb; 585 struct super_block *sb = inode->i_sb;
586 mode_t mode; 586 umode_t mode;
587 587
588 /* 588 /*
589 * Copy data to the in-core inode. 589 * Copy data to the in-core inode.
@@ -630,7 +630,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
630{ 630{
631 struct ufs_inode_info *ufsi = UFS_I(inode); 631 struct ufs_inode_info *ufsi = UFS_I(inode);
632 struct super_block *sb = inode->i_sb; 632 struct super_block *sb = inode->i_sb;
633 mode_t mode; 633 umode_t mode;
634 634
635 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino); 635 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
636 /* 636 /*
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 639d49162241..38cac199edff 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
70 * If the create succeeds, we fill in the inode information 70 * If the create succeeds, we fill in the inode information
71 * with d_instantiate(). 71 * with d_instantiate().
72 */ 72 */
73static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, 73static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
74 struct nameidata *nd) 74 struct nameidata *nd)
75{ 75{
76 struct inode *inode; 76 struct inode *inode;
@@ -94,7 +94,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
94 return err; 94 return err;
95} 95}
96 96
97static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 97static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
98{ 98{
99 struct inode *inode; 99 struct inode *inode;
100 int err; 100 int err;
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
180 return error; 180 return error;
181} 181}
182 182
183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) 183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
184{ 184{
185 struct inode * inode; 185 struct inode * inode;
186 int err = -EMLINK; 186 int err = -EMLINK;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3915ade6f9a8..5246ee3e5607 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1351,9 +1351,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1351 return 0; 1351 return 0;
1352} 1352}
1353 1353
1354static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) 1354static int ufs_show_options(struct seq_file *seq, struct dentry *root)
1355{ 1355{
1356 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1356 struct ufs_sb_info *sbi = UFS_SB(root->d_sb);
1357 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1357 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1358 const struct match_token *tp = tokens; 1358 const struct match_token *tp = tokens;
1359 1359
@@ -1425,7 +1425,6 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1425static void ufs_i_callback(struct rcu_head *head) 1425static void ufs_i_callback(struct rcu_head *head)
1426{ 1426{
1427 struct inode *inode = container_of(head, struct inode, i_rcu); 1427 struct inode *inode = container_of(head, struct inode, i_rcu);
1428 INIT_LIST_HEAD(&inode->i_dentry);
1429 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1428 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1430} 1429}
1431 1430
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index c26f2bcec264..528750b7e701 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -104,7 +104,7 @@ extern const struct address_space_operations ufs_aops;
104 104
105/* ialloc.c */ 105/* ialloc.c */
106extern void ufs_free_inode (struct inode *inode); 106extern void ufs_free_inode (struct inode *inode);
107extern struct inode * ufs_new_inode (struct inode *, int); 107extern struct inode * ufs_new_inode (struct inode *, umode_t);
108 108
109/* inode.c */ 109/* inode.c */
110extern struct inode *ufs_iget(struct super_block *, unsigned long); 110extern struct inode *ufs_iget(struct super_block *, unsigned long);
diff --git a/fs/xattr.c b/fs/xattr.c
index 67583de8218c..82f43376c7cd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -397,7 +397,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
397 error = mnt_want_write_file(f); 397 error = mnt_want_write_file(f);
398 if (!error) { 398 if (!error) {
399 error = setxattr(dentry, name, value, size, flags); 399 error = setxattr(dentry, name, value, size, flags);
400 mnt_drop_write(f->f_path.mnt); 400 mnt_drop_write_file(f);
401 } 401 }
402 fput(f); 402 fput(f);
403 return error; 403 return error;
@@ -624,7 +624,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
624 error = mnt_want_write_file(f); 624 error = mnt_want_write_file(f);
625 if (!error) { 625 if (!error) {
626 error = removexattr(dentry, name); 626 error = removexattr(dentry, name);
627 mnt_drop_write(f->f_path.mnt); 627 mnt_drop_write_file(f);
628 } 628 }
629 fput(f); 629 fput(f);
630 return error; 630 return error;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 76e4266d2e7e..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
39 struct posix_acl_entry *acl_e; 39 struct posix_acl_entry *acl_e;
40 struct posix_acl *acl; 40 struct posix_acl *acl;
41 struct xfs_acl_entry *ace; 41 struct xfs_acl_entry *ace;
42 int count, i; 42 unsigned int count, i;
43 43
44 count = be32_to_cpu(aclp->acl_cnt); 44 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES) 45 if (count > XFS_ACL_MAX_ENTRIES)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..4dff85c7d7eb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1370,7 +1370,7 @@ restart:
1370 goto restart; 1370 goto restart;
1371 } 1371 }
1372 /* 1372 /*
1373 * clear the LRU reference count so the bufer doesn't get 1373 * clear the LRU reference count so the buffer doesn't get
1374 * ignored in xfs_buf_rele(). 1374 * ignored in xfs_buf_rele().
1375 */ 1375 */
1376 atomic_set(&bp->b_lru_ref, 0); 1376 atomic_set(&bp->b_lru_ref, 0);
@@ -1701,12 +1701,8 @@ xfsbufd(
1701 struct list_head tmp; 1701 struct list_head tmp;
1702 struct blk_plug plug; 1702 struct blk_plug plug;
1703 1703
1704 if (unlikely(freezing(current))) { 1704 if (unlikely(freezing(current)))
1705 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1705 try_to_freeze();
1706 refrigerator();
1707 } else {
1708 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1709 }
1710 1706
1711 /* sleep for a long time if there is nothing to do. */ 1707 /* sleep for a long time if there is nothing to do. */
1712 if (list_empty(&target->bt_delwri_queue)) 1708 if (list_empty(&target->bt_delwri_queue))
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5bab046e859f..df7ffb0affe7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
90 { _XBF_DELWRI_Q, "DELWRI_Q" } 90 { _XBF_DELWRI_Q, "DELWRI_Q" }
91 91
92typedef enum { 92typedef enum {
93 XBT_FORCE_SLEEP = 0, 93 XBT_FORCE_FLUSH = 0,
94 XBT_FORCE_FLUSH = 1,
95} xfs_buftarg_flags_t; 94} xfs_buftarg_flags_t;
96 95
97typedef struct xfs_buftarg { 96typedef struct xfs_buftarg {
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8a24f0c6c860..286a051f12cf 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -68,7 +68,7 @@ xfs_trim_extents(
68 * Look up the longest btree in the AGF and start with it. 68 * Look up the longest btree in the AGF and start with it.
69 */ 69 */
70 error = xfs_alloc_lookup_le(cur, 0, 70 error = xfs_alloc_lookup_le(cur, 0,
71 XFS_BUF_TO_AGF(agbp)->agf_longest, &i); 71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
72 if (error) 72 if (error)
73 goto out_del_cursor; 73 goto out_del_cursor;
74 74
@@ -84,7 +84,7 @@ xfs_trim_extents(
84 if (error) 84 if (error)
85 goto out_del_cursor; 85 goto out_del_cursor;
86 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); 86 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
87 ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); 87 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
88 88
89 /* 89 /*
90 * Too small? Give up. 90 * Too small? Give up.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 25d7280e9f6b..b4ff40b5f918 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -39,20 +39,19 @@
39#include "xfs_qm.h" 39#include "xfs_qm.h"
40#include "xfs_trace.h" 40#include "xfs_trace.h"
41 41
42
43/* 42/*
44 LOCK ORDER 43 * Lock order:
45 44 *
46 inode lock (ilock) 45 * ip->i_lock
47 dquot hash-chain lock (hashlock) 46 * qh->qh_lock
48 xqm dquot freelist lock (freelistlock 47 * qi->qi_dqlist_lock
49 mount's dquot list lock (mplistlock) 48 * dquot->q_qlock (xfs_dqlock() and friends)
50 user dquot lock - lock ordering among dquots is based on the uid or gid 49 * dquot->q_flush (xfs_dqflock() and friends)
51 group dquot lock - similar to udquots. Between the two dquots, the udquot 50 * xfs_Gqm->qm_dqfrlist_lock
52 has to be locked first. 51 *
53 pin lock - the dquot lock must be held to take this lock. 52 * If two dquots need to be locked the order is user before group/project,
54 flush lock - ditto. 53 * otherwise by the lowest id first, see xfs_dqlock2.
55*/ 54 */
56 55
57#ifdef DEBUG 56#ifdef DEBUG
58xfs_buftarg_t *xfs_dqerror_target; 57xfs_buftarg_t *xfs_dqerror_target;
@@ -155,24 +154,6 @@ xfs_qm_dqdestroy(
155} 154}
156 155
157/* 156/*
158 * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
159 */
160STATIC void
161xfs_qm_dqinit_core(
162 xfs_dqid_t id,
163 uint type,
164 xfs_dqblk_t *d)
165{
166 /*
167 * Caller has zero'd the entire dquot 'chunk' already.
168 */
169 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
170 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
171 d->dd_diskdq.d_id = cpu_to_be32(id);
172 d->dd_diskdq.d_flags = type;
173}
174
175/*
176 * If default limits are in force, push them into the dquot now. 157 * If default limits are in force, push them into the dquot now.
177 * We overwrite the dquot limits only if they are zero and this 158 * We overwrite the dquot limits only if they are zero and this
178 * is not the root dquot. 159 * is not the root dquot.
@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
328 curid = id - (id % q->qi_dqperchunk); 309 curid = id - (id % q->qi_dqperchunk);
329 ASSERT(curid >= 0); 310 ASSERT(curid >= 0);
330 memset(d, 0, BBTOB(q->qi_dqchunklen)); 311 memset(d, 0, BBTOB(q->qi_dqchunklen));
331 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) 312 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
332 xfs_qm_dqinit_core(curid, type, d); 313 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
314 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
315 d->dd_diskdq.d_id = cpu_to_be32(curid);
316 d->dd_diskdq.d_flags = type;
317 }
318
333 xfs_trans_dquot_buf(tp, bp, 319 xfs_trans_dquot_buf(tp, bp,
334 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 320 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
335 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 321 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
564 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 550 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
565 * and release the buffer immediately. 551 * and release the buffer immediately.
566 * 552 *
553 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
567 */ 554 */
568/* ARGSUSED */ 555int
569STATIC int
570xfs_qm_dqread( 556xfs_qm_dqread(
571 xfs_trans_t **tpp, 557 struct xfs_mount *mp,
572 xfs_dqid_t id, 558 xfs_dqid_t id,
573 xfs_dquot_t *dqp, /* dquot to get filled in */ 559 uint type,
574 uint flags) 560 uint flags,
561 struct xfs_dquot **O_dqpp)
575{ 562{
576 xfs_disk_dquot_t *ddqp; 563 struct xfs_dquot *dqp;
577 xfs_buf_t *bp; 564 struct xfs_disk_dquot *ddqp;
578 int error; 565 struct xfs_buf *bp;
579 xfs_trans_t *tp; 566 struct xfs_trans *tp = NULL;
567 int error;
568 int cancelflags = 0;
580 569
581 ASSERT(tpp); 570 dqp = xfs_qm_dqinit(mp, id, type);
582 571
583 trace_xfs_dqread(dqp); 572 trace_xfs_dqread(dqp);
584 573
574 if (flags & XFS_QMOPT_DQALLOC) {
575 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
576 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
577 XFS_WRITE_LOG_RES(mp) +
578 /*
579 * Round the chunklen up to the next multiple
580 * of 128 (buf log item chunk size)).
581 */
582 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
583 0,
584 XFS_TRANS_PERM_LOG_RES,
585 XFS_WRITE_LOG_COUNT);
586 if (error)
587 goto error1;
588 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
589 }
590
585 /* 591 /*
586 * get a pointer to the on-disk dquot and the buffer containing it 592 * get a pointer to the on-disk dquot and the buffer containing it
587 * dqp already knows its own type (GROUP/USER). 593 * dqp already knows its own type (GROUP/USER).
588 */ 594 */
589 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 595 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
590 return (error); 596 if (error) {
597 /*
598 * This can happen if quotas got turned off (ESRCH),
599 * or if the dquot didn't exist on disk and we ask to
600 * allocate (ENOENT).
601 */
602 trace_xfs_dqread_fail(dqp);
603 cancelflags |= XFS_TRANS_ABORT;
604 goto error1;
591 } 605 }
592 tp = *tpp;
593 606
594 /* copy everything from disk dquot to the incore dquot */ 607 /* copy everything from disk dquot to the incore dquot */
595 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 608 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
596 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
597 xfs_qm_dquot_logitem_init(dqp); 609 xfs_qm_dquot_logitem_init(dqp);
598 610
599 /* 611 /*
@@ -622,77 +634,22 @@ xfs_qm_dqread(
622 ASSERT(xfs_buf_islocked(bp)); 634 ASSERT(xfs_buf_islocked(bp));
623 xfs_trans_brelse(tp, bp); 635 xfs_trans_brelse(tp, bp);
624 636
625 return (error);
626}
627
628
629/*
630 * allocate an incore dquot from the kernel heap,
631 * and fill its core with quota information kept on disk.
632 * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
633 * if it wasn't already allocated.
634 */
635STATIC int
636xfs_qm_idtodq(
637 xfs_mount_t *mp,
638 xfs_dqid_t id, /* gid or uid, depending on type */
639 uint type, /* UDQUOT or GDQUOT */
640 uint flags, /* DQALLOC, DQREPAIR */
641 xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
642{
643 xfs_dquot_t *dqp;
644 int error;
645 xfs_trans_t *tp;
646 int cancelflags=0;
647
648 dqp = xfs_qm_dqinit(mp, id, type);
649 tp = NULL;
650 if (flags & XFS_QMOPT_DQALLOC) {
651 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
652 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
653 XFS_WRITE_LOG_RES(mp) +
654 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
655 128,
656 0,
657 XFS_TRANS_PERM_LOG_RES,
658 XFS_WRITE_LOG_COUNT);
659 if (error) {
660 cancelflags = 0;
661 goto error0;
662 }
663 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
664 }
665
666 /*
667 * Read it from disk; xfs_dqread() takes care of
668 * all the necessary initialization of dquot's fields (locks, etc)
669 */
670 if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
671 /*
672 * This can happen if quotas got turned off (ESRCH),
673 * or if the dquot didn't exist on disk and we ask to
674 * allocate (ENOENT).
675 */
676 trace_xfs_dqread_fail(dqp);
677 cancelflags |= XFS_TRANS_ABORT;
678 goto error0;
679 }
680 if (tp) { 637 if (tp) {
681 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) 638 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
682 goto error1; 639 if (error)
640 goto error0;
683 } 641 }
684 642
685 *O_dqpp = dqp; 643 *O_dqpp = dqp;
686 return (0); 644 return error;
687 645
688 error0: 646error1:
689 ASSERT(error);
690 if (tp) 647 if (tp)
691 xfs_trans_cancel(tp, cancelflags); 648 xfs_trans_cancel(tp, cancelflags);
692 error1: 649error0:
693 xfs_qm_dqdestroy(dqp); 650 xfs_qm_dqdestroy(dqp);
694 *O_dqpp = NULL; 651 *O_dqpp = NULL;
695 return (error); 652 return error;
696} 653}
697 654
698/* 655/*
@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
710 xfs_dquot_t **O_dqpp) 667 xfs_dquot_t **O_dqpp)
711{ 668{
712 xfs_dquot_t *dqp; 669 xfs_dquot_t *dqp;
713 uint flist_locked;
714 670
715 ASSERT(mutex_is_locked(&qh->qh_lock)); 671 ASSERT(mutex_is_locked(&qh->qh_lock));
716 672
717 flist_locked = B_FALSE;
718
719 /* 673 /*
720 * Traverse the hashchain looking for a match 674 * Traverse the hashchain looking for a match
721 */ 675 */
@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
725 * dqlock to look at the id field of the dquot, since the 679 * dqlock to look at the id field of the dquot, since the
726 * id can't be modified without the hashlock anyway. 680 * id can't be modified without the hashlock anyway.
727 */ 681 */
728 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 682 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
729 trace_xfs_dqlookup_found(dqp); 683 continue;
730 684
731 /* 685 trace_xfs_dqlookup_found(dqp);
732 * All in core dquots must be on the dqlist of mp
733 */
734 ASSERT(!list_empty(&dqp->q_mplist));
735
736 xfs_dqlock(dqp);
737 if (dqp->q_nrefs == 0) {
738 ASSERT(!list_empty(&dqp->q_freelist));
739 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
740 trace_xfs_dqlookup_want(dqp);
741
742 /*
743 * We may have raced with dqreclaim_one()
744 * (and lost). So, flag that we don't
745 * want the dquot to be reclaimed.
746 */
747 dqp->dq_flags |= XFS_DQ_WANT;
748 xfs_dqunlock(dqp);
749 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
750 xfs_dqlock(dqp);
751 dqp->dq_flags &= ~(XFS_DQ_WANT);
752 }
753 flist_locked = B_TRUE;
754 }
755 686
756 /* 687 xfs_dqlock(dqp);
757 * id couldn't have changed; we had the hashlock all 688 if (dqp->dq_flags & XFS_DQ_FREEING) {
758 * along 689 *O_dqpp = NULL;
759 */ 690 xfs_dqunlock(dqp);
760 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); 691 return -1;
761 692 }
762 if (flist_locked) {
763 if (dqp->q_nrefs != 0) {
764 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
765 flist_locked = B_FALSE;
766 } else {
767 /* take it off the freelist */
768 trace_xfs_dqlookup_freelist(dqp);
769 list_del_init(&dqp->q_freelist);
770 xfs_Gqm->qm_dqfrlist_cnt--;
771 }
772 }
773 693
774 XFS_DQHOLD(dqp); 694 dqp->q_nrefs++;
775 695
776 if (flist_locked) 696 /*
777 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 697 * move the dquot to the front of the hashchain
778 /* 698 */
779 * move the dquot to the front of the hashchain 699 list_move(&dqp->q_hashlist, &qh->qh_list);
780 */ 700 trace_xfs_dqlookup_done(dqp);
781 ASSERT(mutex_is_locked(&qh->qh_lock)); 701 *O_dqpp = dqp;
782 list_move(&dqp->q_hashlist, &qh->qh_list); 702 return 0;
783 trace_xfs_dqlookup_done(dqp);
784 *O_dqpp = dqp;
785 return 0;
786 }
787 } 703 }
788 704
789 *O_dqpp = NULL; 705 *O_dqpp = NULL;
790 ASSERT(mutex_is_locked(&qh->qh_lock)); 706 return 1;
791 return (1);
792} 707}
793 708
794/* 709/*
@@ -829,11 +744,7 @@ xfs_qm_dqget(
829 return (EIO); 744 return (EIO);
830 } 745 }
831 } 746 }
832#endif
833
834 again:
835 747
836#ifdef DEBUG
837 ASSERT(type == XFS_DQ_USER || 748 ASSERT(type == XFS_DQ_USER ||
838 type == XFS_DQ_PROJ || 749 type == XFS_DQ_PROJ ||
839 type == XFS_DQ_GROUP); 750 type == XFS_DQ_GROUP);
@@ -845,13 +756,21 @@ xfs_qm_dqget(
845 ASSERT(ip->i_gdquot == NULL); 756 ASSERT(ip->i_gdquot == NULL);
846 } 757 }
847#endif 758#endif
759
760restart:
848 mutex_lock(&h->qh_lock); 761 mutex_lock(&h->qh_lock);
849 762
850 /* 763 /*
851 * Look in the cache (hashtable). 764 * Look in the cache (hashtable).
852 * The chain is kept locked during lookup. 765 * The chain is kept locked during lookup.
853 */ 766 */
854 if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { 767 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
768 case -1:
769 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
770 mutex_unlock(&h->qh_lock);
771 delay(1);
772 goto restart;
773 case 0:
855 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); 774 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
856 /* 775 /*
857 * The dquot was found, moved to the front of the chain, 776 * The dquot was found, moved to the front of the chain,
@@ -862,9 +781,11 @@ xfs_qm_dqget(
862 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 781 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
863 mutex_unlock(&h->qh_lock); 782 mutex_unlock(&h->qh_lock);
864 trace_xfs_dqget_hit(*O_dqpp); 783 trace_xfs_dqget_hit(*O_dqpp);
865 return (0); /* success */ 784 return 0; /* success */
785 default:
786 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
787 break;
866 } 788 }
867 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
868 789
869 /* 790 /*
870 * Dquot cache miss. We don't want to keep the inode lock across 791 * Dquot cache miss. We don't want to keep the inode lock across
@@ -882,41 +803,18 @@ xfs_qm_dqget(
882 version = h->qh_version; 803 version = h->qh_version;
883 mutex_unlock(&h->qh_lock); 804 mutex_unlock(&h->qh_lock);
884 805
885 /* 806 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
886 * Allocate the dquot on the kernel heap, and read the ondisk
887 * portion off the disk. Also, do all the necessary initialization
888 * This can return ENOENT if dquot didn't exist on disk and we didn't
889 * ask it to allocate; ESRCH if quotas got turned off suddenly.
890 */
891 if ((error = xfs_qm_idtodq(mp, id, type,
892 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
893 XFS_QMOPT_DOWARN),
894 &dqp))) {
895 if (ip)
896 xfs_ilock(ip, XFS_ILOCK_EXCL);
897 return (error);
898 }
899 807
900 /* 808 if (ip)
901 * See if this is mount code calling to look at the overall quota limits 809 xfs_ilock(ip, XFS_ILOCK_EXCL);
902 * which are stored in the id == 0 user or group's dquot. 810
903 * Since we may not have done a quotacheck by this point, just return 811 if (error)
904 * the dquot without attaching it to any hashtables, lists, etc, or even 812 return error;
905 * taking a reference.
906 * The caller must dqdestroy this once done.
907 */
908 if (flags & XFS_QMOPT_DQSUSER) {
909 ASSERT(id == 0);
910 ASSERT(! ip);
911 goto dqret;
912 }
913 813
914 /* 814 /*
915 * Dquot lock comes after hashlock in the lock ordering 815 * Dquot lock comes after hashlock in the lock ordering
916 */ 816 */
917 if (ip) { 817 if (ip) {
918 xfs_ilock(ip, XFS_ILOCK_EXCL);
919
920 /* 818 /*
921 * A dquot could be attached to this inode by now, since 819 * A dquot could be attached to this inode by now, since
922 * we had dropped the ilock. 820 * we had dropped the ilock.
@@ -961,16 +859,21 @@ xfs_qm_dqget(
961 * lock order between the two dquots here since dqp isn't 859 * lock order between the two dquots here since dqp isn't
962 * on any findable lists yet. 860 * on any findable lists yet.
963 */ 861 */
964 if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { 862 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
863 case 0:
864 case -1:
965 /* 865 /*
966 * Duplicate found. Just throw away the new dquot 866 * Duplicate found, either in cache or on its way out.
967 * and start over. 867 * Just throw away the new dquot and start over.
968 */ 868 */
969 xfs_qm_dqput(tmpdqp); 869 if (tmpdqp)
870 xfs_qm_dqput(tmpdqp);
970 mutex_unlock(&h->qh_lock); 871 mutex_unlock(&h->qh_lock);
971 xfs_qm_dqdestroy(dqp); 872 xfs_qm_dqdestroy(dqp);
972 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 873 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
973 goto again; 874 goto restart;
875 default:
876 break;
974 } 877 }
975 } 878 }
976 879
@@ -1015,67 +918,49 @@ xfs_qm_dqget(
1015 */ 918 */
1016void 919void
1017xfs_qm_dqput( 920xfs_qm_dqput(
1018 xfs_dquot_t *dqp) 921 struct xfs_dquot *dqp)
1019{ 922{
1020 xfs_dquot_t *gdqp; 923 struct xfs_dquot *gdqp;
1021 924
1022 ASSERT(dqp->q_nrefs > 0); 925 ASSERT(dqp->q_nrefs > 0);
1023 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 926 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1024 927
1025 trace_xfs_dqput(dqp); 928 trace_xfs_dqput(dqp);
1026 929
1027 if (dqp->q_nrefs != 1) { 930recurse:
1028 dqp->q_nrefs--; 931 if (--dqp->q_nrefs > 0) {
1029 xfs_dqunlock(dqp); 932 xfs_dqunlock(dqp);
1030 return; 933 return;
1031 } 934 }
1032 935
936 trace_xfs_dqput_free(dqp);
937
938 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
939 if (list_empty(&dqp->q_freelist)) {
940 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
941 xfs_Gqm->qm_dqfrlist_cnt++;
942 }
943 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
944
1033 /* 945 /*
1034 * drop the dqlock and acquire the freelist and dqlock 946 * If we just added a udquot to the freelist, then we want to release
1035 * in the right order; but try to get it out-of-order first 947 * the gdquot reference that it (probably) has. Otherwise it'll keep
948 * the gdquot from getting reclaimed.
1036 */ 949 */
1037 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { 950 gdqp = dqp->q_gdquot;
1038 trace_xfs_dqput_wait(dqp); 951 if (gdqp) {
1039 xfs_dqunlock(dqp); 952 xfs_dqlock(gdqp);
1040 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 953 dqp->q_gdquot = NULL;
1041 xfs_dqlock(dqp);
1042 } 954 }
955 xfs_dqunlock(dqp);
1043 956
1044 while (1) { 957 /*
1045 gdqp = NULL; 958 * If we had a group quota hint, release it now.
1046 959 */
1047 /* We can't depend on nrefs being == 1 here */ 960 if (gdqp) {
1048 if (--dqp->q_nrefs == 0) {
1049 trace_xfs_dqput_free(dqp);
1050
1051 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1052 xfs_Gqm->qm_dqfrlist_cnt++;
1053
1054 /*
1055 * If we just added a udquot to the freelist, then
1056 * we want to release the gdquot reference that
1057 * it (probably) has. Otherwise it'll keep the
1058 * gdquot from getting reclaimed.
1059 */
1060 if ((gdqp = dqp->q_gdquot)) {
1061 /*
1062 * Avoid a recursive dqput call
1063 */
1064 xfs_dqlock(gdqp);
1065 dqp->q_gdquot = NULL;
1066 }
1067 }
1068 xfs_dqunlock(dqp);
1069
1070 /*
1071 * If we had a group quota inside the user quota as a hint,
1072 * release it now.
1073 */
1074 if (! gdqp)
1075 break;
1076 dqp = gdqp; 961 dqp = gdqp;
962 goto recurse;
1077 } 963 }
1078 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1079} 964}
1080 965
1081/* 966/*
@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
1169 * If not dirty, or it's pinned and we are not supposed to block, nada. 1054 * If not dirty, or it's pinned and we are not supposed to block, nada.
1170 */ 1055 */
1171 if (!XFS_DQ_IS_DIRTY(dqp) || 1056 if (!XFS_DQ_IS_DIRTY(dqp) ||
1172 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { 1057 ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
1173 xfs_dqfunlock(dqp); 1058 xfs_dqfunlock(dqp);
1174 return 0; 1059 return 0;
1175 } 1060 }
@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
1257 1142
1258} 1143}
1259 1144
1260int
1261xfs_qm_dqlock_nowait(
1262 xfs_dquot_t *dqp)
1263{
1264 return mutex_trylock(&dqp->q_qlock);
1265}
1266
1267void
1268xfs_dqlock(
1269 xfs_dquot_t *dqp)
1270{
1271 mutex_lock(&dqp->q_qlock);
1272}
1273
1274void 1145void
1275xfs_dqunlock( 1146xfs_dqunlock(
1276 xfs_dquot_t *dqp) 1147 xfs_dquot_t *dqp)
1277{ 1148{
1278 mutex_unlock(&(dqp->q_qlock)); 1149 xfs_dqunlock_nonotify(dqp);
1279 if (dqp->q_logitem.qli_dquot == dqp) { 1150 if (dqp->q_logitem.qli_dquot == dqp) {
1280 /* Once was dqp->q_mount, but might just have been cleared */
1281 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, 1151 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1282 (xfs_log_item_t*)&(dqp->q_logitem)); 1152 &dqp->q_logitem.qli_item);
1283 } 1153 }
1284} 1154}
1285 1155
1286
1287void
1288xfs_dqunlock_nonotify(
1289 xfs_dquot_t *dqp)
1290{
1291 mutex_unlock(&(dqp->q_qlock));
1292}
1293
1294/* 1156/*
1295 * Lock two xfs_dquot structures. 1157 * Lock two xfs_dquot structures.
1296 * 1158 *
@@ -1319,43 +1181,18 @@ xfs_dqlock2(
1319 } 1181 }
1320} 1182}
1321 1183
1322
1323/* 1184/*
1324 * Take a dquot out of the mount's dqlist as well as the hashlist. 1185 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1325 * This is called via unmount as well as quotaoff, and the purge 1186 * called via unmount as well as quotaoff, and the purge will always succeed.
1326 * will always succeed unless there are soft (temp) references
1327 * outstanding.
1328 *
1329 * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
1330 * that we're returning! XXXsup - not cool.
1331 */ 1187 */
1332/* ARGSUSED */ 1188void
1333int
1334xfs_qm_dqpurge( 1189xfs_qm_dqpurge(
1335 xfs_dquot_t *dqp) 1190 struct xfs_dquot *dqp)
1336{ 1191{
1337 xfs_dqhash_t *qh = dqp->q_hash; 1192 struct xfs_mount *mp = dqp->q_mount;
1338 xfs_mount_t *mp = dqp->q_mount; 1193 struct xfs_dqhash *qh = dqp->q_hash;
1339
1340 ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
1341 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1342 1194
1343 xfs_dqlock(dqp); 1195 xfs_dqlock(dqp);
1344 /*
1345 * We really can't afford to purge a dquot that is
1346 * referenced, because these are hard refs.
1347 * It shouldn't happen in general because we went thru _all_ inodes in
1348 * dqrele_all_inodes before calling this and didn't let the mountlock go.
1349 * However it is possible that we have dquots with temporary
1350 * references that are not attached to an inode. e.g. see xfs_setattr().
1351 */
1352 if (dqp->q_nrefs != 0) {
1353 xfs_dqunlock(dqp);
1354 mutex_unlock(&dqp->q_hash->qh_lock);
1355 return (1);
1356 }
1357
1358 ASSERT(!list_empty(&dqp->q_freelist));
1359 1196
1360 /* 1197 /*
1361 * If we're turning off quotas, we have to make sure that, for 1198 * If we're turning off quotas, we have to make sure that, for
@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
1370 * Block on the flush lock after nudging dquot buffer, 1207 * Block on the flush lock after nudging dquot buffer,
1371 * if it is incore. 1208 * if it is incore.
1372 */ 1209 */
1373 xfs_qm_dqflock_pushbuf_wait(dqp); 1210 xfs_dqflock_pushbuf_wait(dqp);
1374 } 1211 }
1375 1212
1376 /* 1213 /*
1377 * XXXIf we're turning this type of quotas off, we don't care 1214 * If we are turning this type of quotas off, we don't care
1378 * about the dirty metadata sitting in this dquot. OTOH, if 1215 * about the dirty metadata sitting in this dquot. OTOH, if
1379 * we're unmounting, we do care, so we flush it and wait. 1216 * we're unmounting, we do care, so we flush it and wait.
1380 */ 1217 */
1381 if (XFS_DQ_IS_DIRTY(dqp)) { 1218 if (XFS_DQ_IS_DIRTY(dqp)) {
1382 int error; 1219 int error;
1383 1220
1384 /* dqflush unlocks dqflock */
1385 /* 1221 /*
1386 * Given that dqpurge is a very rare occurrence, it is OK
1387 * that we're holding the hashlist and mplist locks
1388 * across the disk write. But, ... XXXsup
1389 *
1390 * We don't care about getting disk errors here. We need 1222 * We don't care about getting disk errors here. We need
1391 * to purge this dquot anyway, so we go ahead regardless. 1223 * to purge this dquot anyway, so we go ahead regardless.
1392 */ 1224 */
@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
1396 __func__, dqp); 1228 __func__, dqp);
1397 xfs_dqflock(dqp); 1229 xfs_dqflock(dqp);
1398 } 1230 }
1231
1399 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1232 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1400 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1233 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1401 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1234 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1402 1235
1236 xfs_dqfunlock(dqp);
1237 xfs_dqunlock(dqp);
1238
1239 mutex_lock(&qh->qh_lock);
1403 list_del_init(&dqp->q_hashlist); 1240 list_del_init(&dqp->q_hashlist);
1404 qh->qh_version++; 1241 qh->qh_version++;
1242 mutex_unlock(&qh->qh_lock);
1243
1244 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1405 list_del_init(&dqp->q_mplist); 1245 list_del_init(&dqp->q_mplist);
1406 mp->m_quotainfo->qi_dqreclaims++; 1246 mp->m_quotainfo->qi_dqreclaims++;
1407 mp->m_quotainfo->qi_dquots--; 1247 mp->m_quotainfo->qi_dquots--;
1248 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1249
1408 /* 1250 /*
1409 * XXX Move this to the front of the freelist, if we can get the 1251 * We move dquots to the freelist as soon as their reference count
1410 * freelist lock. 1252 * hits zero, so it really should be on the freelist here.
1411 */ 1253 */
1254 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1412 ASSERT(!list_empty(&dqp->q_freelist)); 1255 ASSERT(!list_empty(&dqp->q_freelist));
1256 list_del_init(&dqp->q_freelist);
1257 xfs_Gqm->qm_dqfrlist_cnt--;
1258 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1413 1259
1414 dqp->q_mount = NULL; 1260 xfs_qm_dqdestroy(dqp);
1415 dqp->q_hash = NULL;
1416 dqp->dq_flags = XFS_DQ_INACTIVE;
1417 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1418 xfs_dqfunlock(dqp);
1419 xfs_dqunlock(dqp);
1420 mutex_unlock(&qh->qh_lock);
1421 return (0);
1422} 1261}
1423 1262
1424
1425/* 1263/*
1426 * Give the buffer a little push if it is incore and 1264 * Give the buffer a little push if it is incore and
1427 * wait on the flush lock. 1265 * wait on the flush lock.
1428 */ 1266 */
1429void 1267void
1430xfs_qm_dqflock_pushbuf_wait( 1268xfs_dqflock_pushbuf_wait(
1431 xfs_dquot_t *dqp) 1269 xfs_dquot_t *dqp)
1432{ 1270{
1433 xfs_mount_t *mp = dqp->q_mount; 1271 xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 34b7e945dbfa..a1d91d8f1802 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -80,8 +80,6 @@ enum {
80 XFS_QLOCK_NESTED, 80 XFS_QLOCK_NESTED,
81}; 81};
82 82
83#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
84
85/* 83/*
86 * Manage the q_flush completion queue embedded in the dquot. This completion 84 * Manage the q_flush completion queue embedded in the dquot. This completion
87 * queue synchronizes processes attempting to flush the in-core dquot back to 85 * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
102 complete(&dqp->q_flush); 100 complete(&dqp->q_flush);
103} 101}
104 102
103static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
104{
105 return mutex_trylock(&dqp->q_qlock);
106}
107
108static inline void xfs_dqlock(struct xfs_dquot *dqp)
109{
110 mutex_lock(&dqp->q_qlock);
111}
112
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
114{
115 mutex_unlock(&dqp->q_qlock);
116}
117
105#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
106#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
107#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
116 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
117 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
118 131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **);
119extern void xfs_qm_dqdestroy(xfs_dquot_t *); 134extern void xfs_qm_dqdestroy(xfs_dquot_t *);
120extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 135extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
121extern int xfs_qm_dqpurge(xfs_dquot_t *); 136extern void xfs_qm_dqpurge(xfs_dquot_t *);
122extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
123extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
124extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
125extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
126 xfs_disk_dquot_t *); 139 xfs_disk_dquot_t *);
127extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, 140extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
@@ -129,9 +142,17 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
129extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, 142extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
130 xfs_dqid_t, uint, uint, xfs_dquot_t **); 143 xfs_dqid_t, uint, uint, xfs_dquot_t **);
131extern void xfs_qm_dqput(xfs_dquot_t *); 144extern void xfs_qm_dqput(xfs_dquot_t *);
132extern void xfs_dqlock(xfs_dquot_t *); 145
133extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); 146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
134extern void xfs_dqunlock(xfs_dquot_t *); 147extern void xfs_dqunlock(struct xfs_dquot *);
135extern void xfs_dqunlock_nonotify(xfs_dquot_t *); 148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
151{
152 xfs_dqlock(dqp);
153 dqp->q_nrefs++;
154 xfs_dqunlock(dqp);
155 return dqp;
156}
136 157
137#endif /* __XFS_DQUOT_H__ */ 158#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 0dee0b71029d..34baeae45265 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
73 logvec->i_len = sizeof(xfs_disk_dquot_t); 73 logvec->i_len = sizeof(xfs_disk_dquot_t);
74 logvec->i_type = XLOG_REG_TYPE_DQUOT; 74 logvec->i_type = XLOG_REG_TYPE_DQUOT;
75 75
76 ASSERT(2 == lip->li_desc->lid_size);
77 qlip->qli_format.qlf_size = 2; 76 qlip->qli_format.qlf_size = 2;
78 77
79} 78}
@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
134 * lock without sleeping, then there must not have been 133 * lock without sleeping, then there must not have been
135 * anyone in the process of flushing the dquot. 134 * anyone in the process of flushing the dquot.
136 */ 135 */
137 error = xfs_qm_dqflush(dqp, 0); 136 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
138 if (error) 137 if (error)
139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 138 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 __func__, error, dqp); 139 __func__, error, dqp);
@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
237 if (atomic_read(&dqp->q_pincount) > 0) 236 if (atomic_read(&dqp->q_pincount) > 0)
238 return XFS_ITEM_PINNED; 237 return XFS_ITEM_PINNED;
239 238
240 if (!xfs_qm_dqlock_nowait(dqp)) 239 if (!xfs_dqlock_nowait(dqp))
241 return XFS_ITEM_LOCKED; 240 return XFS_ITEM_LOCKED;
242 241
243 if (!xfs_dqflock_nowait(dqp)) { 242 if (!xfs_dqflock_nowait(dqp)) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 753ed9b5c70b..f675f3d9d7b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -209,10 +209,10 @@ xfs_file_fsync(
209 209
210 /* 210 /*
211 * First check if the VFS inode is marked dirty. All the dirtying 211 * First check if the VFS inode is marked dirty. All the dirtying
212 * of non-transactional updates no goes through mark_inode_dirty*, 212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish beteeen pure timestamp updates 213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync. 214 * and i_size updates which need to be caught for fdatasync.
215 * After that also theck for the dirty state in the XFS inode, which 215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL 216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster. 217 * or xfs_iflush_cluster.
218 */ 218 */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 169380e66057..dad1a31aa4fc 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -447,7 +447,7 @@ STATIC xfs_buf_t * /* allocation group buffer */
447xfs_ialloc_ag_select( 447xfs_ialloc_ag_select(
448 xfs_trans_t *tp, /* transaction pointer */ 448 xfs_trans_t *tp, /* transaction pointer */
449 xfs_ino_t parent, /* parent directory inode number */ 449 xfs_ino_t parent, /* parent directory inode number */
450 mode_t mode, /* bits set to indicate file type */ 450 umode_t mode, /* bits set to indicate file type */
451 int okalloc) /* ok to allocate more space */ 451 int okalloc) /* ok to allocate more space */
452{ 452{
453 xfs_buf_t *agbp; /* allocation group header buffer */ 453 xfs_buf_t *agbp; /* allocation group header buffer */
@@ -640,7 +640,7 @@ int
640xfs_dialloc( 640xfs_dialloc(
641 xfs_trans_t *tp, /* transaction pointer */ 641 xfs_trans_t *tp, /* transaction pointer */
642 xfs_ino_t parent, /* parent inode (directory) */ 642 xfs_ino_t parent, /* parent inode (directory) */
643 mode_t mode, /* mode bits for new inode */ 643 umode_t mode, /* mode bits for new inode */
644 int okalloc, /* ok to allocate more space */ 644 int okalloc, /* ok to allocate more space */
645 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ 645 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
646 boolean_t *alloc_done, /* true if we needed to replenish 646 boolean_t *alloc_done, /* true if we needed to replenish
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index bb5385475e1f..666a037398d6 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -81,7 +81,7 @@ int /* error */
81xfs_dialloc( 81xfs_dialloc(
82 struct xfs_trans *tp, /* transaction pointer */ 82 struct xfs_trans *tp, /* transaction pointer */
83 xfs_ino_t parent, /* parent inode (directory) */ 83 xfs_ino_t parent, /* parent inode (directory) */
84 mode_t mode, /* mode bits for new inode */ 84 umode_t mode, /* mode bits for new inode */
85 int okalloc, /* ok to allocate more space */ 85 int okalloc, /* ok to allocate more space */
86 struct xfs_buf **agbp, /* buf for a.g. inode header */ 86 struct xfs_buf **agbp, /* buf for a.g. inode header */
87 boolean_t *alloc_done, /* an allocation was done to replenish 87 boolean_t *alloc_done, /* an allocation was done to replenish
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0fa98b1c70ea..3960a066d7ff 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -107,7 +107,6 @@ xfs_inode_free_callback(
107 struct inode *inode = container_of(head, struct inode, i_rcu); 107 struct inode *inode = container_of(head, struct inode, i_rcu);
108 struct xfs_inode *ip = XFS_I(inode); 108 struct xfs_inode *ip = XFS_I(inode);
109 109
110 INIT_LIST_HEAD(&inode->i_dentry);
111 kmem_zone_free(xfs_inode_zone, ip); 110 kmem_zone_free(xfs_inode_zone, ip);
112} 111}
113 112
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 755ee8164880..9dda7cc32848 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -961,7 +961,7 @@ int
961xfs_ialloc( 961xfs_ialloc(
962 xfs_trans_t *tp, 962 xfs_trans_t *tp,
963 xfs_inode_t *pip, 963 xfs_inode_t *pip,
964 mode_t mode, 964 umode_t mode,
965 xfs_nlink_t nlink, 965 xfs_nlink_t nlink,
966 xfs_dev_t rdev, 966 xfs_dev_t rdev,
967 prid_t prid, 967 prid_t prid,
@@ -1002,7 +1002,7 @@ xfs_ialloc(
1002 return error; 1002 return error;
1003 ASSERT(ip != NULL); 1003 ASSERT(ip != NULL);
1004 1004
1005 ip->i_d.di_mode = (__uint16_t)mode; 1005 ip->i_d.di_mode = mode;
1006 ip->i_d.di_onlink = 0; 1006 ip->i_d.di_onlink = 0;
1007 ip->i_d.di_nlink = nlink; 1007 ip->i_d.di_nlink = nlink;
1008 ASSERT(ip->i_d.di_nlink == nlink); 1008 ASSERT(ip->i_d.di_nlink == nlink);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b4cd4739f98e..f0e6b151ba37 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -481,7 +481,7 @@ void xfs_inode_free(struct xfs_inode *ip);
481/* 481/*
482 * xfs_inode.c prototypes. 482 * xfs_inode.c prototypes.
483 */ 483 */
484int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 484int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
485 xfs_nlink_t, xfs_dev_t, prid_t, int, 485 xfs_nlink_t, xfs_dev_t, prid_t, int,
486 struct xfs_buf **, boolean_t *, xfs_inode_t **); 486 struct xfs_buf **, boolean_t *, xfs_inode_t **);
487 487
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index abaafdbb3e65..cfd6c7f8cc3c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -437,7 +437,6 @@ xfs_inode_item_format(
437 * Assert that no attribute-related log flags are set. 437 * Assert that no attribute-related log flags are set.
438 */ 438 */
439 if (!XFS_IFORK_Q(ip)) { 439 if (!XFS_IFORK_Q(ip)) {
440 ASSERT(nvecs == lip->li_desc->lid_size);
441 iip->ili_format.ilf_size = nvecs; 440 iip->ili_format.ilf_size = nvecs;
442 ASSERT(!(iip->ili_format.ilf_fields & 441 ASSERT(!(iip->ili_format.ilf_fields &
443 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 442 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -521,7 +520,6 @@ xfs_inode_item_format(
521 break; 520 break;
522 } 521 }
523 522
524 ASSERT(nvecs == lip->li_desc->lid_size);
525 iip->ili_format.ilf_size = nvecs; 523 iip->ili_format.ilf_size = nvecs;
526} 524}
527 525
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d99a90518909..76f3ca5cfc36 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -559,23 +559,23 @@ xfs_attrmulti_by_handle(
559 ops[i].am_flags); 559 ops[i].am_flags);
560 break; 560 break;
561 case ATTR_OP_SET: 561 case ATTR_OP_SET:
562 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 562 ops[i].am_error = mnt_want_write_file(parfilp);
563 if (ops[i].am_error) 563 if (ops[i].am_error)
564 break; 564 break;
565 ops[i].am_error = xfs_attrmulti_attr_set( 565 ops[i].am_error = xfs_attrmulti_attr_set(
566 dentry->d_inode, attr_name, 566 dentry->d_inode, attr_name,
567 ops[i].am_attrvalue, ops[i].am_length, 567 ops[i].am_attrvalue, ops[i].am_length,
568 ops[i].am_flags); 568 ops[i].am_flags);
569 mnt_drop_write(parfilp->f_path.mnt); 569 mnt_drop_write_file(parfilp);
570 break; 570 break;
571 case ATTR_OP_REMOVE: 571 case ATTR_OP_REMOVE:
572 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 572 ops[i].am_error = mnt_want_write_file(parfilp);
573 if (ops[i].am_error) 573 if (ops[i].am_error)
574 break; 574 break;
575 ops[i].am_error = xfs_attrmulti_attr_remove( 575 ops[i].am_error = xfs_attrmulti_attr_remove(
576 dentry->d_inode, attr_name, 576 dentry->d_inode, attr_name,
577 ops[i].am_flags); 577 ops[i].am_flags);
578 mnt_drop_write(parfilp->f_path.mnt); 578 mnt_drop_write_file(parfilp);
579 break; 579 break;
580 default: 580 default:
581 ops[i].am_error = EINVAL; 581 ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 54e623bfbb85..f9ccb7b7c043 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -454,23 +454,23 @@ xfs_compat_attrmulti_by_handle(
454 &ops[i].am_length, ops[i].am_flags); 454 &ops[i].am_length, ops[i].am_flags);
455 break; 455 break;
456 case ATTR_OP_SET: 456 case ATTR_OP_SET:
457 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 457 ops[i].am_error = mnt_want_write_file(parfilp);
458 if (ops[i].am_error) 458 if (ops[i].am_error)
459 break; 459 break;
460 ops[i].am_error = xfs_attrmulti_attr_set( 460 ops[i].am_error = xfs_attrmulti_attr_set(
461 dentry->d_inode, attr_name, 461 dentry->d_inode, attr_name,
462 compat_ptr(ops[i].am_attrvalue), 462 compat_ptr(ops[i].am_attrvalue),
463 ops[i].am_length, ops[i].am_flags); 463 ops[i].am_length, ops[i].am_flags);
464 mnt_drop_write(parfilp->f_path.mnt); 464 mnt_drop_write_file(parfilp);
465 break; 465 break;
466 case ATTR_OP_REMOVE: 466 case ATTR_OP_REMOVE:
467 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 467 ops[i].am_error = mnt_want_write_file(parfilp);
468 if (ops[i].am_error) 468 if (ops[i].am_error)
469 break; 469 break;
470 ops[i].am_error = xfs_attrmulti_attr_remove( 470 ops[i].am_error = xfs_attrmulti_attr_remove(
471 dentry->d_inode, attr_name, 471 dentry->d_inode, attr_name,
472 ops[i].am_flags); 472 ops[i].am_flags);
473 mnt_drop_write(parfilp->f_path.mnt); 473 mnt_drop_write_file(parfilp);
474 break; 474 break;
475 default: 475 default:
476 ops[i].am_error = EINVAL; 476 ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 23ce927973a4..f9babd179223 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -168,7 +168,7 @@ STATIC int
168xfs_vn_mknod( 168xfs_vn_mknod(
169 struct inode *dir, 169 struct inode *dir,
170 struct dentry *dentry, 170 struct dentry *dentry,
171 int mode, 171 umode_t mode,
172 dev_t rdev) 172 dev_t rdev)
173{ 173{
174 struct inode *inode; 174 struct inode *inode;
@@ -231,7 +231,7 @@ STATIC int
231xfs_vn_create( 231xfs_vn_create(
232 struct inode *dir, 232 struct inode *dir,
233 struct dentry *dentry, 233 struct dentry *dentry,
234 int mode, 234 umode_t mode,
235 struct nameidata *nd) 235 struct nameidata *nd)
236{ 236{
237 return xfs_vn_mknod(dir, dentry, mode, 0); 237 return xfs_vn_mknod(dir, dentry, mode, 0);
@@ -241,7 +241,7 @@ STATIC int
241xfs_vn_mkdir( 241xfs_vn_mkdir(
242 struct inode *dir, 242 struct inode *dir,
243 struct dentry *dentry, 243 struct dentry *dentry,
244 int mode) 244 umode_t mode)
245{ 245{
246 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); 246 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
247} 247}
@@ -366,7 +366,7 @@ xfs_vn_symlink(
366 struct xfs_inode *cip = NULL; 366 struct xfs_inode *cip = NULL;
367 struct xfs_name name; 367 struct xfs_name name;
368 int error; 368 int error;
369 mode_t mode; 369 umode_t mode;
370 370
371 mode = S_IFLNK | 371 mode = S_IFLNK |
372 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); 372 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 34817adf4b9e..e2cc3568c299 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -760,38 +760,6 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 760 INIT_LIST_HEAD(&item->li_cil);
761} 761}
762 762
763/*
764 * Write region vectors to log. The write happens using the space reservation
765 * of the ticket (tic). It is not a requirement that all writes for a given
766 * transaction occur with one call to xfs_log_write(). However, it is important
767 * to note that the transaction reservation code makes an assumption about the
768 * number of log headers a transaction requires that may be violated if you
769 * don't pass all the transaction vectors in one call....
770 */
771int
772xfs_log_write(
773 struct xfs_mount *mp,
774 struct xfs_log_iovec reg[],
775 int nentries,
776 struct xlog_ticket *tic,
777 xfs_lsn_t *start_lsn)
778{
779 struct log *log = mp->m_log;
780 int error;
781 struct xfs_log_vec vec = {
782 .lv_niovecs = nentries,
783 .lv_iovecp = reg,
784 };
785
786 if (XLOG_FORCED_SHUTDOWN(log))
787 return XFS_ERROR(EIO);
788
789 error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
790 if (error)
791 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
792 return error;
793}
794
795void 763void
796xfs_log_move_tail(xfs_mount_t *mp, 764xfs_log_move_tail(xfs_mount_t *mp,
797 xfs_lsn_t tail_lsn) 765 xfs_lsn_t tail_lsn)
@@ -1685,7 +1653,7 @@ xlog_print_tic_res(
1685 }; 1653 };
1686 1654
1687 xfs_warn(mp, 1655 xfs_warn(mp,
1688 "xfs_log_write: reservation summary:\n" 1656 "xlog_write: reservation summary:\n"
1689 " trans type = %s (%u)\n" 1657 " trans type = %s (%u)\n"
1690 " unit res = %d bytes\n" 1658 " unit res = %d bytes\n"
1691 " current res = %d bytes\n" 1659 " current res = %d bytes\n"
@@ -1714,7 +1682,7 @@ xlog_print_tic_res(
1714 } 1682 }
1715 1683
1716 xfs_alert_tag(mp, XFS_PTAG_LOGRES, 1684 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1717 "xfs_log_write: reservation ran out. Need to up reservation"); 1685 "xlog_write: reservation ran out. Need to up reservation");
1718 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1686 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1719} 1687}
1720 1688
@@ -1968,23 +1936,21 @@ xlog_write(
1968 *start_lsn = 0; 1936 *start_lsn = 0;
1969 1937
1970 len = xlog_write_calc_vec_length(ticket, log_vector); 1938 len = xlog_write_calc_vec_length(ticket, log_vector);
1971 if (log->l_cilp) {
1972 /*
1973 * Region headers and bytes are already accounted for.
1974 * We only need to take into account start records and
1975 * split regions in this function.
1976 */
1977 if (ticket->t_flags & XLOG_TIC_INITED)
1978 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1979 1939
1980 /* 1940 /*
1981 * Commit record headers need to be accounted for. These 1941 * Region headers and bytes are already accounted for.
1982 * come in as separate writes so are easy to detect. 1942 * We only need to take into account start records and
1983 */ 1943 * split regions in this function.
1984 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) 1944 */
1985 ticket->t_curr_res -= sizeof(xlog_op_header_t); 1945 if (ticket->t_flags & XLOG_TIC_INITED)
1986 } else 1946 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1987 ticket->t_curr_res -= len; 1947
1948 /*
1949 * Commit record headers need to be accounted for. These
1950 * come in as separate writes so are easy to detect.
1951 */
1952 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
1953 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1988 1954
1989 if (ticket->t_curr_res < 0) 1955 if (ticket->t_curr_res < 0)
1990 xlog_print_tic_res(log->l_mp, ticket); 1956 xlog_print_tic_res(log->l_mp, ticket);
@@ -2931,8 +2897,7 @@ _xfs_log_force(
2931 2897
2932 XFS_STATS_INC(xs_log_force); 2898 XFS_STATS_INC(xs_log_force);
2933 2899
2934 if (log->l_cilp) 2900 xlog_cil_force(log);
2935 xlog_cil_force(log);
2936 2901
2937 spin_lock(&log->l_icloglock); 2902 spin_lock(&log->l_icloglock);
2938 2903
@@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(
3081 3046
3082 XFS_STATS_INC(xs_log_force); 3047 XFS_STATS_INC(xs_log_force);
3083 3048
3084 if (log->l_cilp) { 3049 lsn = xlog_cil_force_lsn(log, lsn);
3085 lsn = xlog_cil_force_lsn(log, lsn); 3050 if (lsn == NULLCOMMITLSN)
3086 if (lsn == NULLCOMMITLSN) 3051 return 0;
3087 return 0;
3088 }
3089 3052
3090try_again: 3053try_again:
3091 spin_lock(&log->l_icloglock); 3054 spin_lock(&log->l_icloglock);
@@ -3653,7 +3616,7 @@ xfs_log_force_umount(
3653 * completed transactions are flushed to disk with the xfs_log_force() 3616 * completed transactions are flushed to disk with the xfs_log_force()
3654 * call below. 3617 * call below.
3655 */ 3618 */
3656 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) 3619 if (!logerror)
3657 xlog_cil_force(log); 3620 xlog_cil_force(log);
3658 3621
3659 /* 3622 /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3f7bf451c034..2aee3b22d29c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -174,11 +174,6 @@ int xfs_log_reserve(struct xfs_mount *mp,
174 __uint8_t clientid, 174 __uint8_t clientid,
175 uint flags, 175 uint flags,
176 uint t_type); 176 uint t_type);
177int xfs_log_write(struct xfs_mount *mp,
178 xfs_log_iovec_t region[],
179 int nentries,
180 struct xlog_ticket *ticket,
181 xfs_lsn_t *start_lsn);
182int xfs_log_unmount_write(struct xfs_mount *mp); 177int xfs_log_unmount_write(struct xfs_mount *mp);
183void xfs_log_unmount(struct xfs_mount *mp); 178void xfs_log_unmount(struct xfs_mount *mp);
184int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 179int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,8 +184,7 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 184struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 185void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 186
192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 187int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
193 struct xfs_log_vec *log_vector,
194 xfs_lsn_t *commit_lsn, int flags); 188 xfs_lsn_t *commit_lsn, int flags);
195bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 189bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
196 190
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..d4fadbe8ac90 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -32,10 +32,7 @@
32#include "xfs_discard.h" 32#include "xfs_discard.h"
33 33
34/* 34/*
35 * Perform initial CIL structure initialisation. If the CIL is not 35 * Perform initial CIL structure initialisation.
36 * enabled in this filesystem, ensure the log->l_cilp is null so
37 * we can check this conditional to determine if we are doing delayed
38 * logging or not.
39 */ 36 */
40int 37int
41xlog_cil_init( 38xlog_cil_init(
@@ -44,10 +41,6 @@ xlog_cil_init(
44 struct xfs_cil *cil; 41 struct xfs_cil *cil;
45 struct xfs_cil_ctx *ctx; 42 struct xfs_cil_ctx *ctx;
46 43
47 log->l_cilp = NULL;
48 if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
49 return 0;
50
51 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); 44 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
52 if (!cil) 45 if (!cil)
53 return ENOMEM; 46 return ENOMEM;
@@ -80,9 +73,6 @@ void
80xlog_cil_destroy( 73xlog_cil_destroy(
81 struct log *log) 74 struct log *log)
82{ 75{
83 if (!log->l_cilp)
84 return;
85
86 if (log->l_cilp->xc_ctx) { 76 if (log->l_cilp->xc_ctx) {
87 if (log->l_cilp->xc_ctx->ticket) 77 if (log->l_cilp->xc_ctx->ticket)
88 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); 78 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
@@ -137,9 +127,6 @@ void
137xlog_cil_init_post_recovery( 127xlog_cil_init_post_recovery(
138 struct log *log) 128 struct log *log)
139{ 129{
140 if (!log->l_cilp)
141 return;
142
143 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 130 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 log->l_cilp->xc_ctx->sequence = 1; 131 log->l_cilp->xc_ctx->sequence = 1;
145 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, 132 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
172 * format the regions into the iclog as though they are being formatted 159 * format the regions into the iclog as though they are being formatted
173 * directly out of the objects themselves. 160 * directly out of the objects themselves.
174 */ 161 */
175static void 162static struct xfs_log_vec *
176xlog_cil_format_items( 163xlog_cil_prepare_log_vecs(
177 struct log *log, 164 struct xfs_trans *tp)
178 struct xfs_log_vec *log_vector)
179{ 165{
180 struct xfs_log_vec *lv; 166 struct xfs_log_item_desc *lidp;
167 struct xfs_log_vec *lv = NULL;
168 struct xfs_log_vec *ret_lv = NULL;
181 169
182 ASSERT(log_vector); 170
183 for (lv = log_vector; lv; lv = lv->lv_next) { 171 /* Bail out if we didn't find a log item. */
172 if (list_empty(&tp->t_items)) {
173 ASSERT(0);
174 return NULL;
175 }
176
177 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
178 struct xfs_log_vec *new_lv;
184 void *ptr; 179 void *ptr;
185 int index; 180 int index;
186 int len = 0; 181 int len = 0;
182 uint niovecs;
183
184 /* Skip items which aren't dirty in this transaction. */
185 if (!(lidp->lid_flags & XFS_LID_DIRTY))
186 continue;
187
188 /* Skip items that do not have any vectors for writing */
189 niovecs = IOP_SIZE(lidp->lid_item);
190 if (!niovecs)
191 continue;
192
193 new_lv = kmem_zalloc(sizeof(*new_lv) +
194 niovecs * sizeof(struct xfs_log_iovec),
195 KM_SLEEP);
196
197 /* The allocated iovec region lies beyond the log vector. */
198 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
199 new_lv->lv_niovecs = niovecs;
200 new_lv->lv_item = lidp->lid_item;
187 201
188 /* build the vector array and calculate it's length */ 202 /* build the vector array and calculate it's length */
189 IOP_FORMAT(lv->lv_item, lv->lv_iovecp); 203 IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
190 for (index = 0; index < lv->lv_niovecs; index++) 204 for (index = 0; index < new_lv->lv_niovecs; index++)
191 len += lv->lv_iovecp[index].i_len; 205 len += new_lv->lv_iovecp[index].i_len;
192 206
193 lv->lv_buf_len = len; 207 new_lv->lv_buf_len = len;
194 lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); 208 new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
195 ptr = lv->lv_buf; 209 KM_SLEEP|KM_NOFS);
210 ptr = new_lv->lv_buf;
196 211
197 for (index = 0; index < lv->lv_niovecs; index++) { 212 for (index = 0; index < new_lv->lv_niovecs; index++) {
198 struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; 213 struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
199 214
200 memcpy(ptr, vec->i_addr, vec->i_len); 215 memcpy(ptr, vec->i_addr, vec->i_len);
201 vec->i_addr = ptr; 216 vec->i_addr = ptr;
202 ptr += vec->i_len; 217 ptr += vec->i_len;
203 } 218 }
204 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); 219 ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
220
221 if (!ret_lv)
222 ret_lv = new_lv;
223 else
224 lv->lv_next = new_lv;
225 lv = new_lv;
205 } 226 }
227
228 return ret_lv;
206} 229}
207 230
208/* 231/*
@@ -256,7 +279,7 @@ xfs_cil_prepare_item(
256 * Insert the log items into the CIL and calculate the difference in space 279 * Insert the log items into the CIL and calculate the difference in space
257 * consumed by the item. Add the space to the checkpoint ticket and calculate 280 * consumed by the item. Add the space to the checkpoint ticket and calculate
258 * if the change requires additional log metadata. If it does, take that space 281 * if the change requires additional log metadata. If it does, take that space
259 * as well. Remove the amount of space we addded to the checkpoint ticket from 282 * as well. Remove the amount of space we added to the checkpoint ticket from
260 * the current transaction ticket so that the accounting works out correctly. 283 * the current transaction ticket so that the accounting works out correctly.
261 */ 284 */
262static void 285static void
@@ -635,28 +658,30 @@ out_abort:
635 * background commit, returns without it held once background commits are 658 * background commit, returns without it held once background commits are
636 * allowed again. 659 * allowed again.
637 */ 660 */
638void 661int
639xfs_log_commit_cil( 662xfs_log_commit_cil(
640 struct xfs_mount *mp, 663 struct xfs_mount *mp,
641 struct xfs_trans *tp, 664 struct xfs_trans *tp,
642 struct xfs_log_vec *log_vector,
643 xfs_lsn_t *commit_lsn, 665 xfs_lsn_t *commit_lsn,
644 int flags) 666 int flags)
645{ 667{
646 struct log *log = mp->m_log; 668 struct log *log = mp->m_log;
647 int log_flags = 0; 669 int log_flags = 0;
648 int push = 0; 670 int push = 0;
671 struct xfs_log_vec *log_vector;
649 672
650 if (flags & XFS_TRANS_RELEASE_LOG_RES) 673 if (flags & XFS_TRANS_RELEASE_LOG_RES)
651 log_flags = XFS_LOG_REL_PERM_RESERV; 674 log_flags = XFS_LOG_REL_PERM_RESERV;
652 675
653 /* 676 /*
654 * do all the hard work of formatting items (including memory 677 * Do all the hard work of formatting items (including memory
655 * allocation) outside the CIL context lock. This prevents stalling CIL 678 * allocation) outside the CIL context lock. This prevents stalling CIL
656 * pushes when we are low on memory and a transaction commit spends a 679 * pushes when we are low on memory and a transaction commit spends a
657 * lot of time in memory reclaim. 680 * lot of time in memory reclaim.
658 */ 681 */
659 xlog_cil_format_items(log, log_vector); 682 log_vector = xlog_cil_prepare_log_vecs(tp);
683 if (!log_vector)
684 return ENOMEM;
660 685
661 /* lock out background commit */ 686 /* lock out background commit */
662 down_read(&log->l_cilp->xc_ctx_lock); 687 down_read(&log->l_cilp->xc_ctx_lock);
@@ -709,6 +734,7 @@ xfs_log_commit_cil(
709 */ 734 */
710 if (push) 735 if (push)
711 xlog_cil_push(log, 0); 736 xlog_cil_push(log, 0);
737 return 0;
712} 738}
713 739
714/* 740/*
@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
786{ 812{
787 struct xfs_cil_ctx *ctx; 813 struct xfs_cil_ctx *ctx;
788 814
789 if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
790 return false;
791 if (list_empty(&lip->li_cil)) 815 if (list_empty(&lip->li_cil))
792 return false; 816 return false;
793 817
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb24dac42a25..19f69e232509 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,6 @@ typedef struct xfs_mount {
219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
220 must be synchronous except 220 must be synchronous except
221 for space allocations */ 221 for space allocations */
222#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
223#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 222#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
224#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 223#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
225 operations, typically for 224 operations, typically for
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 0bbb1a41998b..671f37eae1c7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -154,12 +154,17 @@ STATIC void
154xfs_qm_destroy( 154xfs_qm_destroy(
155 struct xfs_qm *xqm) 155 struct xfs_qm *xqm)
156{ 156{
157 struct xfs_dquot *dqp, *n;
158 int hsize, i; 157 int hsize, i;
159 158
160 ASSERT(xqm != NULL); 159 ASSERT(xqm != NULL);
161 ASSERT(xqm->qm_nrefs == 0); 160 ASSERT(xqm->qm_nrefs == 0);
161
162 unregister_shrinker(&xfs_qm_shaker); 162 unregister_shrinker(&xfs_qm_shaker);
163
164 mutex_lock(&xqm->qm_dqfrlist_lock);
165 ASSERT(list_empty(&xqm->qm_dqfrlist));
166 mutex_unlock(&xqm->qm_dqfrlist_lock);
167
163 hsize = xqm->qm_dqhashmask + 1; 168 hsize = xqm->qm_dqhashmask + 1;
164 for (i = 0; i < hsize; i++) { 169 for (i = 0; i < hsize; i++) {
165 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 170 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -171,17 +176,6 @@ xfs_qm_destroy(
171 xqm->qm_grp_dqhtable = NULL; 176 xqm->qm_grp_dqhtable = NULL;
172 xqm->qm_dqhashmask = 0; 177 xqm->qm_dqhashmask = 0;
173 178
174 /* frlist cleanup */
175 mutex_lock(&xqm->qm_dqfrlist_lock);
176 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
177 xfs_dqlock(dqp);
178 list_del_init(&dqp->q_freelist);
179 xfs_Gqm->qm_dqfrlist_cnt--;
180 xfs_dqunlock(dqp);
181 xfs_qm_dqdestroy(dqp);
182 }
183 mutex_unlock(&xqm->qm_dqfrlist_lock);
184 mutex_destroy(&xqm->qm_dqfrlist_lock);
185 kmem_free(xqm); 179 kmem_free(xqm);
186} 180}
187 181
@@ -232,34 +226,10 @@ STATIC void
232xfs_qm_rele_quotafs_ref( 226xfs_qm_rele_quotafs_ref(
233 struct xfs_mount *mp) 227 struct xfs_mount *mp)
234{ 228{
235 xfs_dquot_t *dqp, *n;
236
237 ASSERT(xfs_Gqm); 229 ASSERT(xfs_Gqm);
238 ASSERT(xfs_Gqm->qm_nrefs > 0); 230 ASSERT(xfs_Gqm->qm_nrefs > 0);
239 231
240 /* 232 /*
241 * Go thru the freelist and destroy all inactive dquots.
242 */
243 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
244
245 list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
246 xfs_dqlock(dqp);
247 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
248 ASSERT(dqp->q_mount == NULL);
249 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
250 ASSERT(list_empty(&dqp->q_hashlist));
251 ASSERT(list_empty(&dqp->q_mplist));
252 list_del_init(&dqp->q_freelist);
253 xfs_Gqm->qm_dqfrlist_cnt--;
254 xfs_dqunlock(dqp);
255 xfs_qm_dqdestroy(dqp);
256 } else {
257 xfs_dqunlock(dqp);
258 }
259 }
260 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
261
262 /*
263 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 233 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
264 * be restarted. 234 * be restarted.
265 */ 235 */
@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
415 */ 385 */
416STATIC int 386STATIC int
417xfs_qm_dqflush_all( 387xfs_qm_dqflush_all(
418 struct xfs_mount *mp, 388 struct xfs_mount *mp)
419 int sync_mode)
420{ 389{
421 struct xfs_quotainfo *q = mp->m_quotainfo; 390 struct xfs_quotainfo *q = mp->m_quotainfo;
422 int recl; 391 int recl;
@@ -429,7 +398,8 @@ again:
429 mutex_lock(&q->qi_dqlist_lock); 398 mutex_lock(&q->qi_dqlist_lock);
430 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 399 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
431 xfs_dqlock(dqp); 400 xfs_dqlock(dqp);
432 if (! XFS_DQ_IS_DIRTY(dqp)) { 401 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
402 !XFS_DQ_IS_DIRTY(dqp)) {
433 xfs_dqunlock(dqp); 403 xfs_dqunlock(dqp);
434 continue; 404 continue;
435 } 405 }
@@ -444,14 +414,14 @@ again:
444 * out immediately. We'll be able to acquire 414 * out immediately. We'll be able to acquire
445 * the flush lock when the I/O completes. 415 * the flush lock when the I/O completes.
446 */ 416 */
447 xfs_qm_dqflock_pushbuf_wait(dqp); 417 xfs_dqflock_pushbuf_wait(dqp);
448 } 418 }
449 /* 419 /*
450 * Let go of the mplist lock. We don't want to hold it 420 * Let go of the mplist lock. We don't want to hold it
451 * across a disk write. 421 * across a disk write.
452 */ 422 */
453 mutex_unlock(&q->qi_dqlist_lock); 423 mutex_unlock(&q->qi_dqlist_lock);
454 error = xfs_qm_dqflush(dqp, sync_mode); 424 error = xfs_qm_dqflush(dqp, 0);
455 xfs_dqunlock(dqp); 425 xfs_dqunlock(dqp);
456 if (error) 426 if (error)
457 return error; 427 return error;
@@ -468,6 +438,7 @@ again:
468 /* return ! busy */ 438 /* return ! busy */
469 return 0; 439 return 0;
470} 440}
441
471/* 442/*
472 * Release the group dquot pointers the user dquots may be 443 * Release the group dquot pointers the user dquots may be
473 * carrying around as a hint. mplist is locked on entry and exit. 444 * carrying around as a hint. mplist is locked on entry and exit.
@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
478{ 449{
479 struct xfs_quotainfo *q = mp->m_quotainfo; 450 struct xfs_quotainfo *q = mp->m_quotainfo;
480 struct xfs_dquot *dqp, *gdqp; 451 struct xfs_dquot *dqp, *gdqp;
481 int nrecl;
482 452
483 again: 453 again:
484 ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); 454 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
485 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 455 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
486 xfs_dqlock(dqp); 456 xfs_dqlock(dqp);
487 if ((gdqp = dqp->q_gdquot)) { 457 if (dqp->dq_flags & XFS_DQ_FREEING) {
488 xfs_dqlock(gdqp); 458 xfs_dqunlock(dqp);
489 dqp->q_gdquot = NULL;
490 }
491 xfs_dqunlock(dqp);
492
493 if (gdqp) {
494 /*
495 * Can't hold the mplist lock across a dqput.
496 * XXXmust convert to marker based iterations here.
497 */
498 nrecl = q->qi_dqreclaims;
499 mutex_unlock(&q->qi_dqlist_lock); 459 mutex_unlock(&q->qi_dqlist_lock);
500 xfs_qm_dqput(gdqp); 460 delay(1);
501
502 mutex_lock(&q->qi_dqlist_lock); 461 mutex_lock(&q->qi_dqlist_lock);
503 if (nrecl != q->qi_dqreclaims) 462 goto again;
504 goto again;
505 } 463 }
464
465 gdqp = dqp->q_gdquot;
466 if (gdqp)
467 dqp->q_gdquot = NULL;
468 xfs_dqunlock(dqp);
469
470 if (gdqp)
471 xfs_qm_dqrele(gdqp);
506 } 472 }
507} 473}
508 474
@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
520 struct xfs_quotainfo *q = mp->m_quotainfo; 486 struct xfs_quotainfo *q = mp->m_quotainfo;
521 struct xfs_dquot *dqp, *n; 487 struct xfs_dquot *dqp, *n;
522 uint dqtype; 488 uint dqtype;
523 int nrecl; 489 int nmisses = 0;
524 int nmisses; 490 LIST_HEAD (dispose_list);
525 491
526 if (!q) 492 if (!q)
527 return 0; 493 return 0;
@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
540 */ 506 */
541 xfs_qm_detach_gdquots(mp); 507 xfs_qm_detach_gdquots(mp);
542 508
543 again:
544 nmisses = 0;
545 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
546 /* 509 /*
547 * Try to get rid of all of the unwanted dquots. The idea is to 510 * Try to get rid of all of the unwanted dquots.
548 * get them off mplist and hashlist, but leave them on freelist.
549 */ 511 */
550 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { 512 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
551 /* 513 xfs_dqlock(dqp);
552 * It's OK to look at the type without taking dqlock here. 514 if ((dqp->dq_flags & dqtype) != 0 &&
553 * We're holding the mplist lock here, and that's needed for 515 !(dqp->dq_flags & XFS_DQ_FREEING)) {
554 * a dqreclaim. 516 if (dqp->q_nrefs == 0) {
555 */ 517 dqp->dq_flags |= XFS_DQ_FREEING;
556 if ((dqp->dq_flags & dqtype) == 0) 518 list_move_tail(&dqp->q_mplist, &dispose_list);
557 continue; 519 } else
558 520 nmisses++;
559 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
560 nrecl = q->qi_dqreclaims;
561 mutex_unlock(&q->qi_dqlist_lock);
562 mutex_lock(&dqp->q_hash->qh_lock);
563 mutex_lock(&q->qi_dqlist_lock);
564
565 /*
566 * XXXTheoretically, we can get into a very long
567 * ping pong game here.
568 * No one can be adding dquots to the mplist at
569 * this point, but somebody might be taking things off.
570 */
571 if (nrecl != q->qi_dqreclaims) {
572 mutex_unlock(&dqp->q_hash->qh_lock);
573 goto again;
574 }
575 } 521 }
576 522 xfs_dqunlock(dqp);
577 /*
578 * Take the dquot off the mplist and hashlist. It may remain on
579 * freelist in INACTIVE state.
580 */
581 nmisses += xfs_qm_dqpurge(dqp);
582 } 523 }
583 mutex_unlock(&q->qi_dqlist_lock); 524 mutex_unlock(&q->qi_dqlist_lock);
525
526 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
527 xfs_qm_dqpurge(dqp);
528
584 return nmisses; 529 return nmisses;
585} 530}
586 531
@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
648 */ 593 */
649 dqp = udqhint->q_gdquot; 594 dqp = udqhint->q_gdquot;
650 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { 595 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
651 xfs_dqlock(dqp);
652 XFS_DQHOLD(dqp);
653 ASSERT(*IO_idqpp == NULL); 596 ASSERT(*IO_idqpp == NULL);
654 *IO_idqpp = dqp;
655 597
656 xfs_dqunlock(dqp); 598 *IO_idqpp = xfs_qm_dqhold(dqp);
657 xfs_dqunlock(udqhint); 599 xfs_dqunlock(udqhint);
658 return 0; 600 return 0;
659 } 601 }
@@ -693,11 +635,7 @@ xfs_qm_dqattach_one(
693 635
694/* 636/*
695 * Given a udquot and gdquot, attach a ptr to the group dquot in the 637 * Given a udquot and gdquot, attach a ptr to the group dquot in the
696 * udquot as a hint for future lookups. The idea sounds simple, but the 638 * udquot as a hint for future lookups.
697 * execution isn't, because the udquot might have a group dquot attached
698 * already and getting rid of that gets us into lock ordering constraints.
699 * The process is complicated more by the fact that the dquots may or may not
700 * be locked on entry.
701 */ 639 */
702STATIC void 640STATIC void
703xfs_qm_dqattach_grouphint( 641xfs_qm_dqattach_grouphint(
@@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(
708 646
709 xfs_dqlock(udq); 647 xfs_dqlock(udq);
710 648
711 if ((tmp = udq->q_gdquot)) { 649 tmp = udq->q_gdquot;
712 if (tmp == gdq) { 650 if (tmp) {
713 xfs_dqunlock(udq); 651 if (tmp == gdq)
714 return; 652 goto done;
715 }
716 653
717 udq->q_gdquot = NULL; 654 udq->q_gdquot = NULL;
718 /*
719 * We can't keep any dqlocks when calling dqrele,
720 * because the freelist lock comes before dqlocks.
721 */
722 xfs_dqunlock(udq);
723 /*
724 * we took a hard reference once upon a time in dqget,
725 * so give it back when the udquot no longer points at it
726 * dqput() does the unlocking of the dquot.
727 */
728 xfs_qm_dqrele(tmp); 655 xfs_qm_dqrele(tmp);
729
730 xfs_dqlock(udq);
731 xfs_dqlock(gdq);
732
733 } else {
734 ASSERT(XFS_DQ_IS_LOCKED(udq));
735 xfs_dqlock(gdq);
736 }
737
738 ASSERT(XFS_DQ_IS_LOCKED(udq));
739 ASSERT(XFS_DQ_IS_LOCKED(gdq));
740 /*
741 * Somebody could have attached a gdquot here,
742 * when we dropped the uqlock. If so, just do nothing.
743 */
744 if (udq->q_gdquot == NULL) {
745 XFS_DQHOLD(gdq);
746 udq->q_gdquot = gdq;
747 } 656 }
748 657
749 xfs_dqunlock(gdq); 658 udq->q_gdquot = xfs_qm_dqhold(gdq);
659done:
750 xfs_dqunlock(udq); 660 xfs_dqunlock(udq);
751} 661}
752 662
@@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(
813 ASSERT(ip->i_gdquot); 723 ASSERT(ip->i_gdquot);
814 724
815 /* 725 /*
816 * We may or may not have the i_udquot locked at this point, 726 * We do not have i_udquot locked at this point, but this check
817 * but this check is OK since we don't depend on the i_gdquot to 727 * is OK since we don't depend on the i_gdquot to be accurate
818 * be accurate 100% all the time. It is just a hint, and this 728 * 100% all the time. It is just a hint, and this will
819 * will succeed in general. 729 * succeed in general.
820 */
821 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
822 goto done;
823 /*
824 * Attach i_gdquot to the gdquot hint inside the i_udquot.
825 */ 730 */
826 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); 731 if (ip->i_udquot->q_gdquot != ip->i_gdquot)
732 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
827 } 733 }
828 734
829 done: 735 done:
@@ -879,100 +785,6 @@ xfs_qm_dqdetach(
879 } 785 }
880} 786}
881 787
882int
883xfs_qm_sync(
884 struct xfs_mount *mp,
885 int flags)
886{
887 struct xfs_quotainfo *q = mp->m_quotainfo;
888 int recl, restarts;
889 struct xfs_dquot *dqp;
890 int error;
891
892 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
893 return 0;
894
895 restarts = 0;
896
897 again:
898 mutex_lock(&q->qi_dqlist_lock);
899 /*
900 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
901 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
902 * when we have the mplist lock, we know that dquots will be consistent
903 * as long as we have it locked.
904 */
905 if (!XFS_IS_QUOTA_ON(mp)) {
906 mutex_unlock(&q->qi_dqlist_lock);
907 return 0;
908 }
909 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
910 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
911 /*
912 * If this is vfs_sync calling, then skip the dquots that
913 * don't 'seem' to be dirty. ie. don't acquire dqlock.
914 * This is very similar to what xfs_sync does with inodes.
915 */
916 if (flags & SYNC_TRYLOCK) {
917 if (!XFS_DQ_IS_DIRTY(dqp))
918 continue;
919 if (!xfs_qm_dqlock_nowait(dqp))
920 continue;
921 } else {
922 xfs_dqlock(dqp);
923 }
924
925 /*
926 * Now, find out for sure if this dquot is dirty or not.
927 */
928 if (! XFS_DQ_IS_DIRTY(dqp)) {
929 xfs_dqunlock(dqp);
930 continue;
931 }
932
933 /* XXX a sentinel would be better */
934 recl = q->qi_dqreclaims;
935 if (!xfs_dqflock_nowait(dqp)) {
936 if (flags & SYNC_TRYLOCK) {
937 xfs_dqunlock(dqp);
938 continue;
939 }
940 /*
941 * If we can't grab the flush lock then if the caller
942 * really wanted us to give this our best shot, so
943 * see if we can give a push to the buffer before we wait
944 * on the flush lock. At this point, we know that
945 * even though the dquot is being flushed,
946 * it has (new) dirty data.
947 */
948 xfs_qm_dqflock_pushbuf_wait(dqp);
949 }
950 /*
951 * Let go of the mplist lock. We don't want to hold it
952 * across a disk write
953 */
954 mutex_unlock(&q->qi_dqlist_lock);
955 error = xfs_qm_dqflush(dqp, flags);
956 xfs_dqunlock(dqp);
957 if (error && XFS_FORCED_SHUTDOWN(mp))
958 return 0; /* Need to prevent umount failure */
959 else if (error)
960 return error;
961
962 mutex_lock(&q->qi_dqlist_lock);
963 if (recl != q->qi_dqreclaims) {
964 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
965 break;
966
967 mutex_unlock(&q->qi_dqlist_lock);
968 goto again;
969 }
970 }
971
972 mutex_unlock(&q->qi_dqlist_lock);
973 return 0;
974}
975
976/* 788/*
977 * The hash chains and the mplist use the same xfs_dqhash structure as 789 * The hash chains and the mplist use the same xfs_dqhash structure as
978 * their list head, but we can take the mplist qh_lock and one of the 790 * their list head, but we can take the mplist qh_lock and one of the
@@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(
1034 /* 846 /*
1035 * We try to get the limits from the superuser's limits fields. 847 * We try to get the limits from the superuser's limits fields.
1036 * This is quite hacky, but it is standard quota practice. 848 * This is quite hacky, but it is standard quota practice.
849 *
1037 * We look at the USR dquot with id == 0 first, but if user quotas 850 * We look at the USR dquot with id == 0 first, but if user quotas
1038 * are not enabled we goto the GRP dquot with id == 0. 851 * are not enabled we goto the GRP dquot with id == 0.
1039 * We don't really care to keep separate default limits for user 852 * We don't really care to keep separate default limits for user
1040 * and group quotas, at least not at this point. 853 * and group quotas, at least not at this point.
854 *
855 * Since we may not have done a quotacheck by this point, just read
856 * the dquot without attaching it to any hashtables or lists.
1041 */ 857 */
1042 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, 858 error = xfs_qm_dqread(mp, 0,
1043 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 859 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
1044 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 860 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1045 XFS_DQ_PROJ), 861 XFS_DQ_PROJ),
1046 XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, 862 XFS_QMOPT_DOWARN, &dqp);
1047 &dqp); 863 if (!error) {
1048 if (! error) {
1049 xfs_disk_dquot_t *ddqp = &dqp->q_core; 864 xfs_disk_dquot_t *ddqp = &dqp->q_core;
1050 865
1051 /* 866 /*
@@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(
1072 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); 887 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1073 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); 888 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1074 889
1075 /*
1076 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1077 * we don't want this dquot cached. We haven't done a
1078 * quotacheck yet, and quotacheck doesn't like incore dquots.
1079 */
1080 xfs_qm_dqdestroy(dqp); 890 xfs_qm_dqdestroy(dqp);
1081 } else { 891 } else {
1082 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 892 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(
1661 * successfully. 1471 * successfully.
1662 */ 1472 */
1663 if (!error) 1473 if (!error)
1664 error = xfs_qm_dqflush_all(mp, 0); 1474 error = xfs_qm_dqflush_all(mp);
1665 1475
1666 /* 1476 /*
1667 * We can get this error if we couldn't do a dquot allocation inside 1477 * We can get this error if we couldn't do a dquot allocation inside
@@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(
1793 1603
1794 1604
1795/* 1605/*
1796 * Just pop the least recently used dquot off the freelist and 1606 * Pop the least recently used dquot off the freelist and recycle it.
1797 * recycle it. The returned dquot is locked.
1798 */ 1607 */
1799STATIC xfs_dquot_t * 1608STATIC struct xfs_dquot *
1800xfs_qm_dqreclaim_one(void) 1609xfs_qm_dqreclaim_one(void)
1801{ 1610{
1802 xfs_dquot_t *dqpout; 1611 struct xfs_dquot *dqp;
1803 xfs_dquot_t *dqp; 1612 int restarts = 0;
1804 int restarts;
1805 int startagain;
1806
1807 restarts = 0;
1808 dqpout = NULL;
1809 1613
1810 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1811again:
1812 startagain = 0;
1813 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1614 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1814 1615restart:
1815 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { 1616 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1816 struct xfs_mount *mp = dqp->q_mount; 1617 struct xfs_mount *mp = dqp->q_mount;
1817 xfs_dqlock(dqp); 1618
1619 if (!xfs_dqlock_nowait(dqp))
1620 continue;
1818 1621
1819 /* 1622 /*
1820 * We are racing with dqlookup here. Naturally we don't 1623 * This dquot has already been grabbed by dqlookup.
1821 * want to reclaim a dquot that lookup wants. We release the 1624 * Remove it from the freelist and try again.
1822 * freelist lock and start over, so that lookup will grab
1823 * both the dquot and the freelistlock.
1824 */ 1625 */
1825 if (dqp->dq_flags & XFS_DQ_WANT) { 1626 if (dqp->q_nrefs) {
1826 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1827
1828 trace_xfs_dqreclaim_want(dqp); 1627 trace_xfs_dqreclaim_want(dqp);
1829 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1628 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1830 restarts++;
1831 startagain = 1;
1832 goto dqunlock;
1833 }
1834 1629
1835 /*
1836 * If the dquot is inactive, we are assured that it is
1837 * not on the mplist or the hashlist, and that makes our
1838 * life easier.
1839 */
1840 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1841 ASSERT(mp == NULL);
1842 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1843 ASSERT(list_empty(&dqp->q_hashlist));
1844 ASSERT(list_empty(&dqp->q_mplist));
1845 list_del_init(&dqp->q_freelist); 1630 list_del_init(&dqp->q_freelist);
1846 xfs_Gqm->qm_dqfrlist_cnt--; 1631 xfs_Gqm->qm_dqfrlist_cnt--;
1847 dqpout = dqp; 1632 restarts++;
1848 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1849 goto dqunlock; 1633 goto dqunlock;
1850 } 1634 }
1851 1635
@@ -1874,64 +1658,49 @@ again:
1874 * We flush it delayed write, so don't bother 1658 * We flush it delayed write, so don't bother
1875 * releasing the freelist lock. 1659 * releasing the freelist lock.
1876 */ 1660 */
1877 error = xfs_qm_dqflush(dqp, 0); 1661 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
1878 if (error) { 1662 if (error) {
1879 xfs_warn(mp, "%s: dquot %p flush failed", 1663 xfs_warn(mp, "%s: dquot %p flush failed",
1880 __func__, dqp); 1664 __func__, dqp);
1881 } 1665 }
1882 goto dqunlock; 1666 goto dqunlock;
1883 } 1667 }
1668 xfs_dqfunlock(dqp);
1884 1669
1885 /* 1670 /*
1886 * We're trying to get the hashlock out of order. This races 1671 * Prevent lookup now that we are going to reclaim the dquot.
1887 * with dqlookup; so, we giveup and goto the next dquot if 1672 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
1888 * we couldn't get the hashlock. This way, we won't starve 1673 * thus we can drop the lock now.
1889 * a dqlookup process that holds the hashlock that is
1890 * waiting for the freelist lock.
1891 */ 1674 */
1892 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 1675 dqp->dq_flags |= XFS_DQ_FREEING;
1893 restarts++; 1676 xfs_dqunlock(dqp);
1894 goto dqfunlock;
1895 }
1896 1677
1897 /* 1678 mutex_lock(&dqp->q_hash->qh_lock);
1898 * This races with dquot allocation code as well as dqflush_all 1679 list_del_init(&dqp->q_hashlist);
1899 * and reclaim code. So, if we failed to grab the mplist lock, 1680 dqp->q_hash->qh_version++;
1900 * giveup everything and start over. 1681 mutex_unlock(&dqp->q_hash->qh_lock);
1901 */
1902 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1903 restarts++;
1904 startagain = 1;
1905 goto qhunlock;
1906 }
1907 1682
1908 ASSERT(dqp->q_nrefs == 0); 1683 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1909 list_del_init(&dqp->q_mplist); 1684 list_del_init(&dqp->q_mplist);
1910 mp->m_quotainfo->qi_dquots--; 1685 mp->m_quotainfo->qi_dquots--;
1911 mp->m_quotainfo->qi_dqreclaims++; 1686 mp->m_quotainfo->qi_dqreclaims++;
1912 list_del_init(&dqp->q_hashlist); 1687 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1913 dqp->q_hash->qh_version++; 1688
1689 ASSERT(dqp->q_nrefs == 0);
1914 list_del_init(&dqp->q_freelist); 1690 list_del_init(&dqp->q_freelist);
1915 xfs_Gqm->qm_dqfrlist_cnt--; 1691 xfs_Gqm->qm_dqfrlist_cnt--;
1916 dqpout = dqp; 1692
1917 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1693 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1918qhunlock: 1694 return dqp;
1919 mutex_unlock(&dqp->q_hash->qh_lock);
1920dqfunlock:
1921 xfs_dqfunlock(dqp);
1922dqunlock: 1695dqunlock:
1923 xfs_dqunlock(dqp); 1696 xfs_dqunlock(dqp);
1924 if (dqpout)
1925 break;
1926 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1697 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1927 break; 1698 break;
1928 if (startagain) { 1699 goto restart;
1929 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1930 goto again;
1931 }
1932 } 1700 }
1701
1933 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1702 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1934 return dqpout; 1703 return NULL;
1935} 1704}
1936 1705
1937/* 1706/*
@@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(
2151 * this to caller 1920 * this to caller
2152 */ 1921 */
2153 ASSERT(ip->i_udquot); 1922 ASSERT(ip->i_udquot);
2154 uq = ip->i_udquot; 1923 uq = xfs_qm_dqhold(ip->i_udquot);
2155 xfs_dqlock(uq);
2156 XFS_DQHOLD(uq);
2157 xfs_dqunlock(uq);
2158 } 1924 }
2159 } 1925 }
2160 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { 1926 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
@@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(
2175 xfs_ilock(ip, lockflags); 1941 xfs_ilock(ip, lockflags);
2176 } else { 1942 } else {
2177 ASSERT(ip->i_gdquot); 1943 ASSERT(ip->i_gdquot);
2178 gq = ip->i_gdquot; 1944 gq = xfs_qm_dqhold(ip->i_gdquot);
2179 xfs_dqlock(gq);
2180 XFS_DQHOLD(gq);
2181 xfs_dqunlock(gq);
2182 } 1945 }
2183 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { 1946 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2184 if (xfs_get_projid(ip) != prid) { 1947 if (xfs_get_projid(ip) != prid) {
@@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(
2198 xfs_ilock(ip, lockflags); 1961 xfs_ilock(ip, lockflags);
2199 } else { 1962 } else {
2200 ASSERT(ip->i_gdquot); 1963 ASSERT(ip->i_gdquot);
2201 gq = ip->i_gdquot; 1964 gq = xfs_qm_dqhold(ip->i_gdquot);
2202 xfs_dqlock(gq);
2203 XFS_DQHOLD(gq);
2204 xfs_dqunlock(gq);
2205 } 1965 }
2206 } 1966 }
2207 if (uq) 1967 if (uq)
@@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(
2251 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); 2011 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2252 2012
2253 /* 2013 /*
2254 * Take an extra reference, because the inode 2014 * Take an extra reference, because the inode is going to keep
2255 * is going to keep this dquot pointer even 2015 * this dquot pointer even after the trans_commit.
2256 * after the trans_commit.
2257 */ 2016 */
2258 xfs_dqlock(newdq); 2017 *IO_olddq = xfs_qm_dqhold(newdq);
2259 XFS_DQHOLD(newdq);
2260 xfs_dqunlock(newdq);
2261 *IO_olddq = newdq;
2262 2018
2263 return prevdq; 2019 return prevdq;
2264} 2020}
@@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
2390 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2146 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2391 2147
2392 if (udqp) { 2148 if (udqp) {
2393 xfs_dqlock(udqp);
2394 XFS_DQHOLD(udqp);
2395 xfs_dqunlock(udqp);
2396 ASSERT(ip->i_udquot == NULL); 2149 ASSERT(ip->i_udquot == NULL);
2397 ip->i_udquot = udqp;
2398 ASSERT(XFS_IS_UQUOTA_ON(mp)); 2150 ASSERT(XFS_IS_UQUOTA_ON(mp));
2399 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2151 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2152
2153 ip->i_udquot = xfs_qm_dqhold(udqp);
2400 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2154 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2401 } 2155 }
2402 if (gdqp) { 2156 if (gdqp) {
2403 xfs_dqlock(gdqp);
2404 XFS_DQHOLD(gdqp);
2405 xfs_dqunlock(gdqp);
2406 ASSERT(ip->i_gdquot == NULL); 2157 ASSERT(ip->i_gdquot == NULL);
2407 ip->i_gdquot = gdqp;
2408 ASSERT(XFS_IS_OQUOTA_ON(mp)); 2158 ASSERT(XFS_IS_OQUOTA_ON(mp));
2409 ASSERT((XFS_IS_GQUOTA_ON(mp) ? 2159 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2410 ip->i_d.di_gid : xfs_get_projid(ip)) == 2160 ip->i_d.di_gid : xfs_get_projid(ip)) ==
2411 be32_to_cpu(gdqp->q_core.d_id)); 2161 be32_to_cpu(gdqp->q_core.d_id));
2162
2163 ip->i_gdquot = xfs_qm_dqhold(gdqp);
2412 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2164 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2413 } 2165 }
2414} 2166}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 43b9abe1052c..9b4f3adefbc5 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -33,12 +33,6 @@ extern kmem_zone_t *qm_dqzone;
33extern kmem_zone_t *qm_dqtrxzone; 33extern kmem_zone_t *qm_dqtrxzone;
34 34
35/* 35/*
36 * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
37 * iterate over the mountpt's dquot list in one call.
38 */
39#define XFS_QM_SYNC_MAX_RESTARTS 7
40
41/*
42 * Ditto, for xfs_qm_dqreclaim_one. 36 * Ditto, for xfs_qm_dqreclaim_one.
43 */ 37 */
44#define XFS_QM_RECLAIM_MAX_RESTARTS 4 38#define XFS_QM_RECLAIM_MAX_RESTARTS 4
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index a595f29567fe..8a0807e0f979 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
87#define XFS_DQ_PROJ 0x0002 /* project quota */ 87#define XFS_DQ_PROJ 0x0002 /* project quota */
88#define XFS_DQ_GROUP 0x0004 /* a group quota */ 88#define XFS_DQ_GROUP 0x0004 /* a group quota */
89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ 89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
90#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ 90#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
91#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
92 91
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 92#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 93
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
97 { XFS_DQ_PROJ, "PROJ" }, \ 96 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \ 97 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \ 98 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \ 99 { XFS_DQ_FREEING, "FREEING" }
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102 100
103/* 101/*
104 * In the worst case, when both user and group quotas are on, 102 * In the worst case, when both user and group quotas are on,
@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
199#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ 197#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
200#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 198#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 199#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 200#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
204#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 201#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
205#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 202#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
326extern void xfs_qm_dqdetach(struct xfs_inode *); 323extern void xfs_qm_dqdetach(struct xfs_inode *);
327extern void xfs_qm_dqrele(struct xfs_dquot *); 324extern void xfs_qm_dqrele(struct xfs_dquot *);
328extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); 325extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
329extern int xfs_qm_sync(struct xfs_mount *, int);
330extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); 326extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
331extern void xfs_qm_mount_quotas(struct xfs_mount *); 327extern void xfs_qm_mount_quotas(struct xfs_mount *);
332extern void xfs_qm_unmount(struct xfs_mount *); 328extern void xfs_qm_unmount(struct xfs_mount *);
@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
366#define xfs_qm_dqdetach(ip) 362#define xfs_qm_dqdetach(ip)
367#define xfs_qm_dqrele(d) 363#define xfs_qm_dqrele(d)
368#define xfs_qm_statvfs(ip, s) 364#define xfs_qm_statvfs(ip, s)
369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
373#define xfs_qm_newmount(mp, a, b) (0) 365#define xfs_qm_newmount(mp, a, b) (0)
374#define xfs_qm_mount_quotas(mp) 366#define xfs_qm_mount_quotas(mp)
375#define xfs_qm_unmount(mp) 367#define xfs_qm_unmount(mp)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8a899496fd5f..281961c1d81a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,7 +199,6 @@ xfs_parseargs(
199 mp->m_flags |= XFS_MOUNT_BARRIER; 199 mp->m_flags |= XFS_MOUNT_BARRIER;
200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
202 mp->m_flags |= XFS_MOUNT_DELAYLOG;
203 202
204 /* 203 /*
205 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -353,11 +352,11 @@ xfs_parseargs(
353 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 352 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
354 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
355 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
356 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 xfs_warn(mp,
356 "delaylog is the default now, option is deprecated.");
357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
358 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
359 xfs_warn(mp, 358 xfs_warn(mp,
360 "nodelaylog is deprecated and will be removed in Linux 3.3"); 359 "nodelaylog support has been removed, option is deprecated.");
361 } else if (!strcmp(this_char, MNTOPT_DISCARD)) { 360 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
362 mp->m_flags |= XFS_MOUNT_DISCARD; 361 mp->m_flags |= XFS_MOUNT_DISCARD;
363 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 362 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
@@ -395,13 +394,6 @@ xfs_parseargs(
395 return EINVAL; 394 return EINVAL;
396 } 395 }
397 396
398 if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
399 !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
400 xfs_warn(mp,
401 "the discard option is incompatible with the nodelaylog option");
402 return EINVAL;
403 }
404
405#ifndef CONFIG_XFS_QUOTA 397#ifndef CONFIG_XFS_QUOTA
406 if (XFS_IS_QUOTA_RUNNING(mp)) { 398 if (XFS_IS_QUOTA_RUNNING(mp)) {
407 xfs_warn(mp, "quota support not available in this kernel."); 399 xfs_warn(mp, "quota support not available in this kernel.");
@@ -501,7 +493,6 @@ xfs_showargs(
501 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, 493 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
502 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 494 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
503 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 495 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
504 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
505 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 496 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
506 { 0, NULL } 497 { 0, NULL }
507 }; 498 };
@@ -1014,17 +1005,10 @@ xfs_fs_sync_fs(
1014 int error; 1005 int error;
1015 1006
1016 /* 1007 /*
1017 * Not much we can do for the first async pass. Writing out the 1008 * Doing anything during the async pass would be counterproductive.
1018 * superblock would be counter-productive as we are going to redirty
1019 * when writing out other data and metadata (and writing out a single
1020 * block is quite fast anyway).
1021 *
1022 * Try to asynchronously kick off quota syncing at least.
1023 */ 1009 */
1024 if (!wait) { 1010 if (!wait)
1025 xfs_qm_sync(mp, SYNC_TRYLOCK);
1026 return 0; 1011 return 0;
1027 }
1028 1012
1029 error = xfs_quiesce_data(mp); 1013 error = xfs_quiesce_data(mp);
1030 if (error) 1014 if (error)
@@ -1238,9 +1222,9 @@ xfs_fs_unfreeze(
1238STATIC int 1222STATIC int
1239xfs_fs_show_options( 1223xfs_fs_show_options(
1240 struct seq_file *m, 1224 struct seq_file *m,
1241 struct vfsmount *mnt) 1225 struct dentry *root)
1242{ 1226{
1243 return -xfs_showargs(XFS_M(mnt->mnt_sb), m); 1227 return -xfs_showargs(XFS_M(root->d_sb), m);
1244} 1228}
1245 1229
1246/* 1230/*
@@ -1621,12 +1605,12 @@ STATIC int __init
1621xfs_init_workqueues(void) 1605xfs_init_workqueues(void)
1622{ 1606{
1623 /* 1607 /*
1624 * max_active is set to 8 to give enough concurency to allow 1608 * We never want to the same work item to run twice, reclaiming inodes
1625 * multiple work operations on each CPU to run. This allows multiple 1609 * or idling the log is not going to get any faster by multiple CPUs
1626 * filesystems to be running sync work concurrently, and scales with 1610 * competing for ressources. Use the default large max_active value
1627 * the number of CPUs in the system. 1611 * so that even lots of filesystems can perform these task in parallel.
1628 */ 1612 */
1629 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1613 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1630 if (!xfs_syncd_wq) 1614 if (!xfs_syncd_wq)
1631 return -ENOMEM; 1615 return -ENOMEM;
1632 return 0; 1616 return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index f0994aedcd15..72c01a1c16e7 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -395,10 +395,7 @@ xfs_quiesce_data(
395 */ 395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0); 396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397 397
398 xfs_qm_sync(mp, SYNC_TRYLOCK); 398 /* force out the log */
399 xfs_qm_sync(mp, SYNC_WAIT);
400
401 /* force out the newly dirtied log buffers */
402 xfs_log_force(mp, XFS_LOG_SYNC); 399 xfs_log_force(mp, XFS_LOG_SYNC);
403 400
404 /* write superblock and hoover up shutdown errors */ 401 /* write superblock and hoover up shutdown errors */
@@ -506,7 +503,6 @@ xfs_sync_worker(
506 error = xfs_fs_log_dummy(mp); 503 error = xfs_fs_log_dummy(mp);
507 else 504 else
508 xfs_log_force(mp, 0); 505 xfs_log_force(mp, 0);
509 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
510 506
511 /* start pushing all the metadata that is currently dirty */ 507 /* start pushing all the metadata that is currently dirty */
512 xfs_ail_push_all(mp->m_ail); 508 xfs_ail_push_all(mp->m_ail);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 494035798873..a9d5b1e06efe 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
743DEFINE_DQUOT_EVENT(xfs_dqread); 743DEFINE_DQUOT_EVENT(xfs_dqread);
744DEFINE_DQUOT_EVENT(xfs_dqread_fail); 744DEFINE_DQUOT_EVENT(xfs_dqread_fail);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_found); 745DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
746DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
747DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
748DEFINE_DQUOT_EVENT(xfs_dqlookup_done); 746DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
749DEFINE_DQUOT_EVENT(xfs_dqget_hit); 747DEFINE_DQUOT_EVENT(xfs_dqget_hit);
750DEFINE_DQUOT_EVENT(xfs_dqget_miss); 748DEFINE_DQUOT_EVENT(xfs_dqget_miss);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1f35b2feca97..329b06aba1c2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
1158 1158
1159 lidp->lid_item = lip; 1159 lidp->lid_item = lip;
1160 lidp->lid_flags = 0; 1160 lidp->lid_flags = 0;
1161 lidp->lid_size = 0;
1162 list_add_tail(&lidp->lid_trans, &tp->t_items); 1161 list_add_tail(&lidp->lid_trans, &tp->t_items);
1163 1162
1164 lip->li_desc = lidp; 1163 lip->li_desc = lidp;
@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
1210 } 1209 }
1211} 1210}
1212 1211
1213/*
1214 * Unlock the items associated with a transaction.
1215 *
1216 * Items which were not logged should be freed. Those which were logged must
1217 * still be tracked so they can be unpinned when the transaction commits.
1218 */
1219STATIC void
1220xfs_trans_unlock_items(
1221 struct xfs_trans *tp,
1222 xfs_lsn_t commit_lsn)
1223{
1224 struct xfs_log_item_desc *lidp, *next;
1225
1226 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1227 struct xfs_log_item *lip = lidp->lid_item;
1228
1229 lip->li_desc = NULL;
1230
1231 if (commit_lsn != NULLCOMMITLSN)
1232 IOP_COMMITTING(lip, commit_lsn);
1233 IOP_UNLOCK(lip);
1234
1235 /*
1236 * Free the descriptor if the item is not dirty
1237 * within this transaction.
1238 */
1239 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1240 xfs_trans_free_item_desc(lidp);
1241 }
1242}
1243
1244/*
1245 * Total up the number of log iovecs needed to commit this
1246 * transaction. The transaction itself needs one for the
1247 * transaction header. Ask each dirty item in turn how many
1248 * it needs to get the total.
1249 */
1250static uint
1251xfs_trans_count_vecs(
1252 struct xfs_trans *tp)
1253{
1254 int nvecs;
1255 struct xfs_log_item_desc *lidp;
1256
1257 nvecs = 1;
1258
1259 /* In the non-debug case we need to start bailing out if we
1260 * didn't find a log_item here, return zero and let trans_commit
1261 * deal with it.
1262 */
1263 if (list_empty(&tp->t_items)) {
1264 ASSERT(0);
1265 return 0;
1266 }
1267
1268 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1269 /*
1270 * Skip items which aren't dirty in this transaction.
1271 */
1272 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1273 continue;
1274 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1275 nvecs += lidp->lid_size;
1276 }
1277
1278 return nvecs;
1279}
1280
1281/*
1282 * Fill in the vector with pointers to data to be logged
1283 * by this transaction. The transaction header takes
1284 * the first vector, and then each dirty item takes the
1285 * number of vectors it indicated it needed in xfs_trans_count_vecs().
1286 *
1287 * As each item fills in the entries it needs, also pin the item
1288 * so that it cannot be flushed out until the log write completes.
1289 */
1290static void
1291xfs_trans_fill_vecs(
1292 struct xfs_trans *tp,
1293 struct xfs_log_iovec *log_vector)
1294{
1295 struct xfs_log_item_desc *lidp;
1296 struct xfs_log_iovec *vecp;
1297 uint nitems;
1298
1299 /*
1300 * Skip over the entry for the transaction header, we'll
1301 * fill that in at the end.
1302 */
1303 vecp = log_vector + 1;
1304
1305 nitems = 0;
1306 ASSERT(!list_empty(&tp->t_items));
1307 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1308 /* Skip items which aren't dirty in this transaction. */
1309 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1310 continue;
1311
1312 /*
1313 * The item may be marked dirty but not log anything. This can
1314 * be used to get called when a transaction is committed.
1315 */
1316 if (lidp->lid_size)
1317 nitems++;
1318 IOP_FORMAT(lidp->lid_item, vecp);
1319 vecp += lidp->lid_size;
1320 IOP_PIN(lidp->lid_item);
1321 }
1322
1323 /*
1324 * Now that we've counted the number of items in this transaction, fill
1325 * in the transaction header. Note that the transaction header does not
1326 * have a log item.
1327 */
1328 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
1329 tp->t_header.th_type = tp->t_type;
1330 tp->t_header.th_num_items = nitems;
1331 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1332 log_vector->i_len = sizeof(xfs_trans_header_t);
1333 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1334}
1335
1336/*
1337 * The committed item processing consists of calling the committed routine of
1338 * each logged item, updating the item's position in the AIL if necessary, and
1339 * unpinning each item. If the committed routine returns -1, then do nothing
1340 * further with the item because it may have been freed.
1341 *
1342 * Since items are unlocked when they are copied to the incore log, it is
1343 * possible for two transactions to be completing and manipulating the same
1344 * item simultaneously. The AIL lock will protect the lsn field of each item.
1345 * The value of this field can never go backwards.
1346 *
1347 * We unpin the items after repositioning them in the AIL, because otherwise
1348 * they could be immediately flushed and we'd have to race with the flusher
1349 * trying to pull the item from the AIL as we add it.
1350 */
1351static void
1352xfs_trans_item_committed(
1353 struct xfs_log_item *lip,
1354 xfs_lsn_t commit_lsn,
1355 int aborted)
1356{
1357 xfs_lsn_t item_lsn;
1358 struct xfs_ail *ailp;
1359
1360 if (aborted)
1361 lip->li_flags |= XFS_LI_ABORTED;
1362 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1363
1364 /* item_lsn of -1 means the item needs no further processing */
1365 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1366 return;
1367
1368 /*
1369 * If the returned lsn is greater than what it contained before, update
1370 * the location of the item in the AIL. If it is not, then do nothing.
1371 * Items can never move backwards in the AIL.
1372 *
1373 * While the new lsn should usually be greater, it is possible that a
1374 * later transaction completing simultaneously with an earlier one
1375 * using the same item could complete first with a higher lsn. This
1376 * would cause the earlier transaction to fail the test below.
1377 */
1378 ailp = lip->li_ailp;
1379 spin_lock(&ailp->xa_lock);
1380 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1381 /*
1382 * This will set the item's lsn to item_lsn and update the
1383 * position of the item in the AIL.
1384 *
1385 * xfs_trans_ail_update() drops the AIL lock.
1386 */
1387 xfs_trans_ail_update(ailp, lip, item_lsn);
1388 } else {
1389 spin_unlock(&ailp->xa_lock);
1390 }
1391
1392 /*
1393 * Now that we've repositioned the item in the AIL, unpin it so it can
1394 * be flushed. Pass information about buffer stale state down from the
1395 * log item flags, if anyone else stales the buffer we do not want to
1396 * pay any attention to it.
1397 */
1398 IOP_UNPIN(lip, 0);
1399}
1400
1401/*
1402 * This is typically called by the LM when a transaction has been fully
1403 * committed to disk. It needs to unpin the items which have
1404 * been logged by the transaction and update their positions
1405 * in the AIL if necessary.
1406 *
1407 * This also gets called when the transactions didn't get written out
1408 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1409 */
1410STATIC void
1411xfs_trans_committed(
1412 void *arg,
1413 int abortflag)
1414{
1415 struct xfs_trans *tp = arg;
1416 struct xfs_log_item_desc *lidp, *next;
1417
1418 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1419 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1420 xfs_trans_free_item_desc(lidp);
1421 }
1422
1423 xfs_trans_free(tp);
1424}
1425
1426static inline void 1212static inline void
1427xfs_log_item_batch_insert( 1213xfs_log_item_batch_insert(
1428 struct xfs_ail *ailp, 1214 struct xfs_ail *ailp,
@@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(
1538} 1324}
1539 1325
1540/* 1326/*
1541 * Called from the trans_commit code when we notice that the filesystem is in
1542 * the middle of a forced shutdown.
1543 *
1544 * When we are called here, we have already pinned all the items in the
1545 * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
1546 * so we can simply walk the items in the transaction, unpin them with an abort
1547 * flag and then free the items. Note that unpinning the items can result in
1548 * them being freed immediately, so we need to use a safe list traversal method
1549 * here.
1550 */
1551STATIC void
1552xfs_trans_uncommit(
1553 struct xfs_trans *tp,
1554 uint flags)
1555{
1556 struct xfs_log_item_desc *lidp, *n;
1557
1558 list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
1559 if (lidp->lid_flags & XFS_LID_DIRTY)
1560 IOP_UNPIN(lidp->lid_item, 1);
1561 }
1562
1563 xfs_trans_unreserve_and_mod_sb(tp);
1564 xfs_trans_unreserve_and_mod_dquots(tp);
1565
1566 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1567 xfs_trans_free(tp);
1568}
1569
1570/*
1571 * Format the transaction direct to the iclog. This isolates the physical
1572 * transaction commit operation from the logical operation and hence allows
1573 * other methods to be introduced without affecting the existing commit path.
1574 */
1575static int
1576xfs_trans_commit_iclog(
1577 struct xfs_mount *mp,
1578 struct xfs_trans *tp,
1579 xfs_lsn_t *commit_lsn,
1580 int flags)
1581{
1582 int shutdown;
1583 int error;
1584 int log_flags = 0;
1585 struct xlog_in_core *commit_iclog;
1586#define XFS_TRANS_LOGVEC_COUNT 16
1587 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1588 struct xfs_log_iovec *log_vector;
1589 uint nvec;
1590
1591
1592 /*
1593 * Ask each log item how many log_vector entries it will
1594 * need so we can figure out how many to allocate.
1595 * Try to avoid the kmem_alloc() call in the common case
1596 * by using a vector from the stack when it fits.
1597 */
1598 nvec = xfs_trans_count_vecs(tp);
1599 if (nvec == 0) {
1600 return ENOMEM; /* triggers a shutdown! */
1601 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
1602 log_vector = log_vector_fast;
1603 } else {
1604 log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
1605 sizeof(xfs_log_iovec_t),
1606 KM_SLEEP);
1607 }
1608
1609 /*
1610 * Fill in the log_vector and pin the logged items, and
1611 * then write the transaction to the log.
1612 */
1613 xfs_trans_fill_vecs(tp, log_vector);
1614
1615 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1616 log_flags = XFS_LOG_REL_PERM_RESERV;
1617
1618 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
1619
1620 /*
1621 * The transaction is committed incore here, and can go out to disk
1622 * at any time after this call. However, all the items associated
1623 * with the transaction are still locked and pinned in memory.
1624 */
1625 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
1626
1627 tp->t_commit_lsn = *commit_lsn;
1628 trace_xfs_trans_commit_lsn(tp);
1629
1630 if (nvec > XFS_TRANS_LOGVEC_COUNT)
1631 kmem_free(log_vector);
1632
1633 /*
1634 * If we got a log write error. Unpin the logitems that we
1635 * had pinned, clean up, free trans structure, and return error.
1636 */
1637 if (error || *commit_lsn == -1) {
1638 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1639 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
1640 return XFS_ERROR(EIO);
1641 }
1642
1643 /*
1644 * Once the transaction has committed, unused
1645 * reservations need to be released and changes to
1646 * the superblock need to be reflected in the in-core
1647 * version. Do that now.
1648 */
1649 xfs_trans_unreserve_and_mod_sb(tp);
1650
1651 /*
1652 * Tell the LM to call the transaction completion routine
1653 * when the log write with LSN commit_lsn completes (e.g.
1654 * when the transaction commit really hits the on-disk log).
1655 * After this call we cannot reference tp, because the call
1656 * can happen at any time and the call will free the transaction
1657 * structure pointed to by tp. The only case where we call
1658 * the completion routine (xfs_trans_committed) directly is
1659 * if the log is turned off on a debug kernel or we're
1660 * running in simulation mode (the log is explicitly turned
1661 * off).
1662 */
1663 tp->t_logcb.cb_func = xfs_trans_committed;
1664 tp->t_logcb.cb_arg = tp;
1665
1666 /*
1667 * We need to pass the iclog buffer which was used for the
1668 * transaction commit record into this function, and attach
1669 * the callback to it. The callback must be attached before
1670 * the items are unlocked to avoid racing with other threads
1671 * waiting for an item to unlock.
1672 */
1673 shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
1674
1675 /*
1676 * Mark this thread as no longer being in a transaction
1677 */
1678 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1679
1680 /*
1681 * Once all the items of the transaction have been copied
1682 * to the in core log and the callback is attached, the
1683 * items can be unlocked.
1684 *
1685 * This will free descriptors pointing to items which were
1686 * not logged since there is nothing more to do with them.
1687 * For items which were logged, we will keep pointers to them
1688 * so they can be unpinned after the transaction commits to disk.
1689 * This will also stamp each modified meta-data item with
1690 * the commit lsn of this transaction for dependency tracking
1691 * purposes.
1692 */
1693 xfs_trans_unlock_items(tp, *commit_lsn);
1694
1695 /*
1696 * If we detected a log error earlier, finish committing
1697 * the transaction now (unpin log items, etc).
1698 *
1699 * Order is critical here, to avoid using the transaction
1700 * pointer after its been freed (by xfs_trans_committed
1701 * either here now, or as a callback). We cannot do this
1702 * step inside xfs_log_notify as was done earlier because
1703 * of this issue.
1704 */
1705 if (shutdown)
1706 xfs_trans_committed(tp, XFS_LI_ABORTED);
1707
1708 /*
1709 * Now that the xfs_trans_committed callback has been attached,
1710 * and the items are released we can finally allow the iclog to
1711 * go to disk.
1712 */
1713 return xfs_log_release_iclog(mp, commit_iclog);
1714}
1715
1716/*
1717 * Walk the log items and allocate log vector structures for
1718 * each item large enough to fit all the vectors they require.
1719 * Note that this format differs from the old log vector format in
1720 * that there is no transaction header in these log vectors.
1721 */
1722STATIC struct xfs_log_vec *
1723xfs_trans_alloc_log_vecs(
1724 xfs_trans_t *tp)
1725{
1726 struct xfs_log_item_desc *lidp;
1727 struct xfs_log_vec *lv = NULL;
1728 struct xfs_log_vec *ret_lv = NULL;
1729
1730
1731 /* Bail out if we didn't find a log item. */
1732 if (list_empty(&tp->t_items)) {
1733 ASSERT(0);
1734 return NULL;
1735 }
1736
1737 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1738 struct xfs_log_vec *new_lv;
1739
1740 /* Skip items which aren't dirty in this transaction. */
1741 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1742 continue;
1743
1744 /* Skip items that do not have any vectors for writing */
1745 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1746 if (!lidp->lid_size)
1747 continue;
1748
1749 new_lv = kmem_zalloc(sizeof(*new_lv) +
1750 lidp->lid_size * sizeof(struct xfs_log_iovec),
1751 KM_SLEEP);
1752
1753 /* The allocated iovec region lies beyond the log vector. */
1754 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
1755 new_lv->lv_niovecs = lidp->lid_size;
1756 new_lv->lv_item = lidp->lid_item;
1757 if (!ret_lv)
1758 ret_lv = new_lv;
1759 else
1760 lv->lv_next = new_lv;
1761 lv = new_lv;
1762 }
1763
1764 return ret_lv;
1765}
1766
1767static int
1768xfs_trans_commit_cil(
1769 struct xfs_mount *mp,
1770 struct xfs_trans *tp,
1771 xfs_lsn_t *commit_lsn,
1772 int flags)
1773{
1774 struct xfs_log_vec *log_vector;
1775
1776 /*
1777 * Get each log item to allocate a vector structure for
1778 * the log item to to pass to the log write code. The
1779 * CIL commit code will format the vector and save it away.
1780 */
1781 log_vector = xfs_trans_alloc_log_vecs(tp);
1782 if (!log_vector)
1783 return ENOMEM;
1784
1785 xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1786
1787 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1788 xfs_trans_free(tp);
1789 return 0;
1790}
1791
1792/*
1793 * Commit the given transaction to the log. 1327 * Commit the given transaction to the log.
1794 * 1328 *
1795 * XFS disk error handling mechanism is not based on a typical 1329 * XFS disk error handling mechanism is not based on a typical
@@ -1845,17 +1379,16 @@ xfs_trans_commit(
1845 xfs_trans_apply_sb_deltas(tp); 1379 xfs_trans_apply_sb_deltas(tp);
1846 xfs_trans_apply_dquot_deltas(tp); 1380 xfs_trans_apply_dquot_deltas(tp);
1847 1381
1848 if (mp->m_flags & XFS_MOUNT_DELAYLOG) 1382 error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
1849 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
1850 else
1851 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1852
1853 if (error == ENOMEM) { 1383 if (error == ENOMEM) {
1854 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1384 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1855 error = XFS_ERROR(EIO); 1385 error = XFS_ERROR(EIO);
1856 goto out_unreserve; 1386 goto out_unreserve;
1857 } 1387 }
1858 1388
1389 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1390 xfs_trans_free(tp);
1391
1859 /* 1392 /*
1860 * If the transaction needs to be synchronous, then force the 1393 * If the transaction needs to be synchronous, then force the
1861 * log out now and wait for it. 1394 * log out now and wait for it.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3ae713c0abd9..f6118703f20d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
163 */ 163 */
164struct xfs_log_item_desc { 164struct xfs_log_item_desc {
165 struct xfs_log_item *lid_item; 165 struct xfs_log_item *lid_item;
166 ushort lid_size;
167 unsigned char lid_flags;
168 struct list_head lid_trans; 166 struct list_head lid_trans;
167 unsigned char lid_flags;
169}; 168};
170 169
171#define XFS_LID_DIRTY 0x1 170#define XFS_LID_DIRTY 0x1
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 8b32d1a4c5a1..89dbb4a50872 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -53,7 +53,7 @@ xfs_dir_ialloc(
53 output: may be a new transaction. */ 53 output: may be a new transaction. */
54 xfs_inode_t *dp, /* directory within whose allocate 54 xfs_inode_t *dp, /* directory within whose allocate
55 the inode. */ 55 the inode. */
56 mode_t mode, 56 umode_t mode,
57 xfs_nlink_t nlink, 57 xfs_nlink_t nlink,
58 xfs_dev_t rdev, 58 xfs_dev_t rdev,
59 prid_t prid, /* project id */ 59 prid_t prid, /* project id */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 456fca314933..5eeab4690cfe 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,
22 xfs_dev_t, prid_t, int, xfs_inode_t **, int *); 22 xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
23extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); 23extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
24extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); 24extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ce9268a2f56b..f2fea868d4db 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -822,7 +822,7 @@ int
822xfs_create( 822xfs_create(
823 xfs_inode_t *dp, 823 xfs_inode_t *dp,
824 struct xfs_name *name, 824 struct xfs_name *name,
825 mode_t mode, 825 umode_t mode,
826 xfs_dev_t rdev, 826 xfs_dev_t rdev,
827 xfs_inode_t **ipp) 827 xfs_inode_t **ipp)
828{ 828{
@@ -1481,7 +1481,7 @@ xfs_symlink(
1481 xfs_inode_t *dp, 1481 xfs_inode_t *dp,
1482 struct xfs_name *link_name, 1482 struct xfs_name *link_name,
1483 const char *target_path, 1483 const char *target_path,
1484 mode_t mode, 1484 umode_t mode,
1485 xfs_inode_t **ipp) 1485 xfs_inode_t **ipp)
1486{ 1486{
1487 xfs_mount_t *mp = dp->i_mount; 1487 xfs_mount_t *mp = dp->i_mount;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 35d3d513e1e9..0c877cbde142 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -26,7 +26,7 @@ int xfs_release(struct xfs_inode *ip);
26int xfs_inactive(struct xfs_inode *ip); 26int xfs_inactive(struct xfs_inode *ip);
27int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 27int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
28 struct xfs_inode **ipp, struct xfs_name *ci_name); 28 struct xfs_inode **ipp, struct xfs_name *ci_name);
29int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 29int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,
30 xfs_dev_t rdev, struct xfs_inode **ipp); 30 xfs_dev_t rdev, struct xfs_inode **ipp);
31int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 31int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
32 struct xfs_inode *ip); 32 struct xfs_inode *ip);
@@ -35,7 +35,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
35int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 35int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
36 xfs_off_t *offset, filldir_t filldir); 36 xfs_off_t *offset, filldir_t filldir);
37int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 37int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
38 const char *target_path, mode_t mode, struct xfs_inode **ipp); 38 const char *target_path, umode_t mode, struct xfs_inode **ipp);
39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
40int xfs_change_file_space(struct xfs_inode *ip, int cmd, 40int xfs_change_file_space(struct xfs_inode *ip, int cmd,
41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);